No OneTemporary
Actions

Size

7 MB

Referenced Files

None

Subscribers

None

View Options

This file is larger than 256 KB, so syntax highlighting was skipped.

	diff --git a/contrib/llvm-project/clang/lib/AST/ASTContext.cpp b/contrib/llvm-project/clang/lib/AST/ASTContext.cpp
	index fdba204fbe7f..0e163f3161a3 100644
	--- a/contrib/llvm-project/clang/lib/AST/ASTContext.cpp
	+++ b/contrib/llvm-project/clang/lib/AST/ASTContext.cpp
	@@ -1,11801 +1,11809 @@
	//===- ASTContext.cpp - Context to hold long-lived AST nodes --------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file implements the ASTContext interface.
	//
	//===----------------------------------------------------------------------===//

	#include "clang/AST/ASTContext.h"
	#include "CXXABI.h"
	#include "Interp/Context.h"
	#include "clang/AST/APValue.h"
	#include "clang/AST/ASTConcept.h"
	#include "clang/AST/ASTMutationListener.h"
	#include "clang/AST/ASTTypeTraits.h"
	#include "clang/AST/Attr.h"
	#include "clang/AST/AttrIterator.h"
	#include "clang/AST/CharUnits.h"
	#include "clang/AST/Comment.h"
	#include "clang/AST/Decl.h"
	#include "clang/AST/DeclBase.h"
	#include "clang/AST/DeclCXX.h"
	#include "clang/AST/DeclContextInternals.h"
	#include "clang/AST/DeclObjC.h"
	#include "clang/AST/DeclOpenMP.h"
	#include "clang/AST/DeclTemplate.h"
	#include "clang/AST/DeclarationName.h"
	#include "clang/AST/DependenceFlags.h"
	#include "clang/AST/Expr.h"
	#include "clang/AST/ExprCXX.h"
	#include "clang/AST/ExprConcepts.h"
	#include "clang/AST/ExternalASTSource.h"
	#include "clang/AST/Mangle.h"
	#include "clang/AST/MangleNumberingContext.h"
	#include "clang/AST/NestedNameSpecifier.h"
	#include "clang/AST/ParentMapContext.h"
	#include "clang/AST/RawCommentList.h"
	#include "clang/AST/RecordLayout.h"
	#include "clang/AST/Stmt.h"
	#include "clang/AST/TemplateBase.h"
	#include "clang/AST/TemplateName.h"
	#include "clang/AST/Type.h"
	#include "clang/AST/TypeLoc.h"
	#include "clang/AST/UnresolvedSet.h"
	#include "clang/AST/VTableBuilder.h"
	#include "clang/Basic/AddressSpaces.h"
	#include "clang/Basic/Builtins.h"
	#include "clang/Basic/CommentOptions.h"
	#include "clang/Basic/ExceptionSpecificationType.h"
	#include "clang/Basic/IdentifierTable.h"
	#include "clang/Basic/LLVM.h"
	#include "clang/Basic/LangOptions.h"
	#include "clang/Basic/Linkage.h"
	#include "clang/Basic/Module.h"
	#include "clang/Basic/NoSanitizeList.h"
	#include "clang/Basic/ObjCRuntime.h"
	#include "clang/Basic/SourceLocation.h"
	#include "clang/Basic/SourceManager.h"
	#include "clang/Basic/Specifiers.h"
	#include "clang/Basic/TargetCXXABI.h"
	#include "clang/Basic/TargetInfo.h"
	#include "clang/Basic/XRayLists.h"
	#include "llvm/ADT/APFixedPoint.h"
	#include "llvm/ADT/APInt.h"
	#include "llvm/ADT/APSInt.h"
	#include "llvm/ADT/ArrayRef.h"
	#include "llvm/ADT/DenseMap.h"
	#include "llvm/ADT/DenseSet.h"
	#include "llvm/ADT/FoldingSet.h"
	#include "llvm/ADT/None.h"
	#include "llvm/ADT/Optional.h"
	#include "llvm/ADT/PointerUnion.h"
	#include "llvm/ADT/STLExtras.h"
	#include "llvm/ADT/SmallPtrSet.h"
	#include "llvm/ADT/SmallVector.h"
	#include "llvm/ADT/StringExtras.h"
	#include "llvm/ADT/StringRef.h"
	#include "llvm/ADT/Triple.h"
	#include "llvm/Support/Capacity.h"
	#include "llvm/Support/Casting.h"
	#include "llvm/Support/Compiler.h"
	#include "llvm/Support/ErrorHandling.h"
	#include "llvm/Support/MD5.h"
	#include "llvm/Support/MathExtras.h"
	#include "llvm/Support/raw_ostream.h"
	#include <algorithm>
	#include <cassert>
	#include <cstddef>
	#include <cstdint>
	#include <cstdlib>
	#include <map>
	#include <memory>
	#include <string>
	#include <tuple>
	#include <utility>

	using namespace clang;

	enum FloatingRank {
	BFloat16Rank, Float16Rank, HalfRank, FloatRank, DoubleRank, LongDoubleRank, Float128Rank
	};

	/// \returns location that is relevant when searching for Doc comments related
	/// to \p D.
	static SourceLocation getDeclLocForCommentSearch(const Decl *D,
	SourceManager &SourceMgr) {
	assert(D);

	// User can not attach documentation to implicit declarations.
	if (D->isImplicit())
	return {};

	// User can not attach documentation to implicit instantiations.
	if (const auto *FD = dyn_cast<FunctionDecl>(D)) {
	if (FD->getTemplateSpecializationKind() == TSK_ImplicitInstantiation)
	return {};
	}

	if (const auto *VD = dyn_cast<VarDecl>(D)) {
	if (VD->isStaticDataMember() &&
	VD->getTemplateSpecializationKind() == TSK_ImplicitInstantiation)
	return {};
	}

	if (const auto *CRD = dyn_cast<CXXRecordDecl>(D)) {
	if (CRD->getTemplateSpecializationKind() == TSK_ImplicitInstantiation)
	return {};
	}

	if (const auto *CTSD = dyn_cast<ClassTemplateSpecializationDecl>(D)) {
	TemplateSpecializationKind TSK = CTSD->getSpecializationKind();
	if (TSK == TSK_ImplicitInstantiation \|\|
	TSK == TSK_Undeclared)
	return {};
	}

	if (const auto *ED = dyn_cast<EnumDecl>(D)) {
	if (ED->getTemplateSpecializationKind() == TSK_ImplicitInstantiation)
	return {};
	}
	if (const auto *TD = dyn_cast<TagDecl>(D)) {
	// When tag declaration (but not definition!) is part of the
	// decl-specifier-seq of some other declaration, it doesn't get comment
	if (TD->isEmbeddedInDeclarator() && !TD->isCompleteDefinition())
	return {};
	}
	// TODO: handle comments for function parameters properly.
	if (isa<ParmVarDecl>(D))
	return {};

	// TODO: we could look up template parameter documentation in the template
	// documentation.
	if (isa<TemplateTypeParmDecl>(D) \|\|
	isa<NonTypeTemplateParmDecl>(D) \|\|
	isa<TemplateTemplateParmDecl>(D))
	return {};

	// Find declaration location.
	// For Objective-C declarations we generally don't expect to have multiple
	// declarators, thus use declaration starting location as the "declaration
	// location".
	// For all other declarations multiple declarators are used quite frequently,
	// so we use the location of the identifier as the "declaration location".
	if (isa<ObjCMethodDecl>(D) \|\| isa<ObjCContainerDecl>(D) \|\|
	isa<ObjCPropertyDecl>(D) \|\|
	isa<RedeclarableTemplateDecl>(D) \|\|
	isa<ClassTemplateSpecializationDecl>(D) \|\|
	// Allow association with Y across {} in `typedef struct X {} Y`.
	isa<TypedefDecl>(D))
	return D->getBeginLoc();
	else {
	const SourceLocation DeclLoc = D->getLocation();
	if (DeclLoc.isMacroID()) {
	if (isa<TypedefDecl>(D)) {
	// If location of the typedef name is in a macro, it is because being
	// declared via a macro. Try using declaration's starting location as
	// the "declaration location".
	return D->getBeginLoc();
	} else if (const auto *TD = dyn_cast<TagDecl>(D)) {
	// If location of the tag decl is inside a macro, but the spelling of
	// the tag name comes from a macro argument, it looks like a special
	// macro like NS_ENUM is being used to define the tag decl. In that
	// case, adjust the source location to the expansion loc so that we can
	// attach the comment to the tag decl.
	if (SourceMgr.isMacroArgExpansion(DeclLoc) &&
	TD->isCompleteDefinition())
	return SourceMgr.getExpansionLoc(DeclLoc);
	}
	}
	return DeclLoc;
	}

	return {};
	}

	RawComment *ASTContext::getRawCommentForDeclNoCacheImpl(
	const Decl *D, const SourceLocation RepresentativeLocForDecl,
	const std::map<unsigned, RawComment *> &CommentsInTheFile) const {
	// If the declaration doesn't map directly to a location in a file, we
	// can't find the comment.
	if (RepresentativeLocForDecl.isInvalid() \|\|
	!RepresentativeLocForDecl.isFileID())
	return nullptr;

	// If there are no comments anywhere, we won't find anything.
	if (CommentsInTheFile.empty())
	return nullptr;

	// Decompose the location for the declaration and find the beginning of the
	// file buffer.
	const std::pair<FileID, unsigned> DeclLocDecomp =
	SourceMgr.getDecomposedLoc(RepresentativeLocForDecl);

	// Slow path.
	auto OffsetCommentBehindDecl =
	CommentsInTheFile.lower_bound(DeclLocDecomp.second);

	// First check whether we have a trailing comment.
	if (OffsetCommentBehindDecl != CommentsInTheFile.end()) {
	RawComment *CommentBehindDecl = OffsetCommentBehindDecl->second;
	if ((CommentBehindDecl->isDocumentation() \|\|
	LangOpts.CommentOpts.ParseAllComments) &&
	CommentBehindDecl->isTrailingComment() &&
	(isa<FieldDecl>(D) \|\| isa<EnumConstantDecl>(D) \|\| isa<VarDecl>(D) \|\|
	isa<ObjCMethodDecl>(D) \|\| isa<ObjCPropertyDecl>(D))) {

	// Check that Doxygen trailing comment comes after the declaration, starts
	// on the same line and in the same file as the declaration.
	if (SourceMgr.getLineNumber(DeclLocDecomp.first, DeclLocDecomp.second) ==
	Comments.getCommentBeginLine(CommentBehindDecl, DeclLocDecomp.first,
	OffsetCommentBehindDecl->first)) {
	return CommentBehindDecl;
	}
	}
	}

	// The comment just after the declaration was not a trailing comment.
	// Let's look at the previous comment.
	if (OffsetCommentBehindDecl == CommentsInTheFile.begin())
	return nullptr;

	auto OffsetCommentBeforeDecl = --OffsetCommentBehindDecl;
	RawComment *CommentBeforeDecl = OffsetCommentBeforeDecl->second;

	// Check that we actually have a non-member Doxygen comment.
	if (!(CommentBeforeDecl->isDocumentation() \|\|
	LangOpts.CommentOpts.ParseAllComments) \|\|
	CommentBeforeDecl->isTrailingComment())
	return nullptr;

	// Decompose the end of the comment.
	const unsigned CommentEndOffset =
	Comments.getCommentEndOffset(CommentBeforeDecl);

	// Get the corresponding buffer.
	bool Invalid = false;
	const char *Buffer = SourceMgr.getBufferData(DeclLocDecomp.first,
	&Invalid).data();
	if (Invalid)
	return nullptr;

	// Extract text between the comment and declaration.
	StringRef Text(Buffer + CommentEndOffset,
	DeclLocDecomp.second - CommentEndOffset);

	// There should be no other declarations or preprocessor directives between
	// comment and declaration.
	if (Text.find_first_of(";{}#@") != StringRef::npos)
	return nullptr;

	return CommentBeforeDecl;
	}

	RawComment ASTContext::getRawCommentForDeclNoCache(const Decl D) const {
	const SourceLocation DeclLoc = getDeclLocForCommentSearch(D, SourceMgr);

	// If the declaration doesn't map directly to a location in a file, we
	// can't find the comment.
	if (DeclLoc.isInvalid() \|\| !DeclLoc.isFileID())
	return nullptr;

	if (ExternalSource && !CommentsLoaded) {
	ExternalSource->ReadComments();
	CommentsLoaded = true;
	}

	if (Comments.empty())
	return nullptr;

	const FileID File = SourceMgr.getDecomposedLoc(DeclLoc).first;
	const auto CommentsInThisFile = Comments.getCommentsInFile(File);
	if (!CommentsInThisFile \|\| CommentsInThisFile->empty())
	return nullptr;

	return getRawCommentForDeclNoCacheImpl(D, DeclLoc, *CommentsInThisFile);
	}

	void ASTContext::addComment(const RawComment &RC) {
	assert(LangOpts.RetainCommentsFromSystemHeaders \|\|
	!SourceMgr.isInSystemHeader(RC.getSourceRange().getBegin()));
	Comments.addComment(RC, LangOpts.CommentOpts, BumpAlloc);
	}

	/// If we have a 'templated' declaration for a template, adjust 'D' to
	/// refer to the actual template.
	/// If we have an implicit instantiation, adjust 'D' to refer to template.
	static const Decl &adjustDeclToTemplate(const Decl &D) {
	if (const auto *FD = dyn_cast<FunctionDecl>(&D)) {
	// Is this function declaration part of a function template?
	if (const FunctionTemplateDecl *FTD = FD->getDescribedFunctionTemplate())
	return *FTD;

	// Nothing to do if function is not an implicit instantiation.
	if (FD->getTemplateSpecializationKind() != TSK_ImplicitInstantiation)
	return D;

	// Function is an implicit instantiation of a function template?
	if (const FunctionTemplateDecl *FTD = FD->getPrimaryTemplate())
	return *FTD;

	// Function is instantiated from a member definition of a class template?
	if (const FunctionDecl *MemberDecl =
	FD->getInstantiatedFromMemberFunction())
	return *MemberDecl;

	return D;
	}
	if (const auto *VD = dyn_cast<VarDecl>(&D)) {
	// Static data member is instantiated from a member definition of a class
	// template?
	if (VD->isStaticDataMember())
	if (const VarDecl *MemberDecl = VD->getInstantiatedFromStaticDataMember())
	return *MemberDecl;

	return D;
	}
	if (const auto *CRD = dyn_cast<CXXRecordDecl>(&D)) {
	// Is this class declaration part of a class template?
	if (const ClassTemplateDecl *CTD = CRD->getDescribedClassTemplate())
	return *CTD;

	// Class is an implicit instantiation of a class template or partial
	// specialization?
	if (const auto *CTSD = dyn_cast<ClassTemplateSpecializationDecl>(CRD)) {
	if (CTSD->getSpecializationKind() != TSK_ImplicitInstantiation)
	return D;
	llvm::PointerUnion<ClassTemplateDecl *,
	ClassTemplatePartialSpecializationDecl *>
	PU = CTSD->getSpecializedTemplateOrPartial();
	return PU.is<ClassTemplateDecl *>()
	? static_cast<const Decl >(PU.get<ClassTemplateDecl *>())
	: static_cast<const Decl >(
	PU.get<ClassTemplatePartialSpecializationDecl *>());
	}

	// Class is instantiated from a member definition of a class template?
	if (const MemberSpecializationInfo *Info =
	CRD->getMemberSpecializationInfo())
	return *Info->getInstantiatedFrom();

	return D;
	}
	if (const auto *ED = dyn_cast<EnumDecl>(&D)) {
	// Enum is instantiated from a member definition of a class template?
	if (const EnumDecl *MemberDecl = ED->getInstantiatedFromMemberEnum())
	return *MemberDecl;

	return D;
	}
	// FIXME: Adjust alias templates?
	return D;
	}

	const RawComment *ASTContext::getRawCommentForAnyRedecl(
	const Decl *D,
	const Decl **OriginalDecl) const {
	if (!D) {
	if (OriginalDecl)
	OriginalDecl = nullptr;
	return nullptr;
	}

	D = &adjustDeclToTemplate(*D);

	// Any comment directly attached to D?
	{
	auto DeclComment = DeclRawComments.find(D);
	if (DeclComment != DeclRawComments.end()) {
	if (OriginalDecl)
	*OriginalDecl = D;
	return DeclComment->second;
	}
	}

	// Any comment attached to any redeclaration of D?
	const Decl *CanonicalD = D->getCanonicalDecl();
	if (!CanonicalD)
	return nullptr;

	{
	auto RedeclComment = RedeclChainComments.find(CanonicalD);
	if (RedeclComment != RedeclChainComments.end()) {
	if (OriginalDecl)
	*OriginalDecl = RedeclComment->second;
	auto CommentAtRedecl = DeclRawComments.find(RedeclComment->second);
	assert(CommentAtRedecl != DeclRawComments.end() &&
	"This decl is supposed to have comment attached.");
	return CommentAtRedecl->second;
	}
	}

	// Any redeclarations of D that we haven't checked for comments yet?
	// We can't use DenseMap::iterator directly since it'd get invalid.
	auto LastCheckedRedecl = [this, CanonicalD]() -> const Decl * {
	auto LookupRes = CommentlessRedeclChains.find(CanonicalD);
	if (LookupRes != CommentlessRedeclChains.end())
	return LookupRes->second;
	return nullptr;
	}();

	for (const auto Redecl : D->redecls()) {
	assert(Redecl);
	// Skip all redeclarations that have been checked previously.
	if (LastCheckedRedecl) {
	if (LastCheckedRedecl == Redecl) {
	LastCheckedRedecl = nullptr;
	}
	continue;
	}
	const RawComment *RedeclComment = getRawCommentForDeclNoCache(Redecl);
	if (RedeclComment) {
	cacheRawCommentForDecl(Redecl, RedeclComment);
	if (OriginalDecl)
	*OriginalDecl = Redecl;
	return RedeclComment;
	}
	CommentlessRedeclChains[CanonicalD] = Redecl;
	}

	if (OriginalDecl)
	*OriginalDecl = nullptr;
	return nullptr;
	}

	void ASTContext::cacheRawCommentForDecl(const Decl &OriginalD,
	const RawComment &Comment) const {
	assert(Comment.isDocumentation() \|\| LangOpts.CommentOpts.ParseAllComments);
	DeclRawComments.try_emplace(&OriginalD, &Comment);
	const Decl *const CanonicalDecl = OriginalD.getCanonicalDecl();
	RedeclChainComments.try_emplace(CanonicalDecl, &OriginalD);
	CommentlessRedeclChains.erase(CanonicalDecl);
	}

	static void addRedeclaredMethods(const ObjCMethodDecl *ObjCMethod,
	SmallVectorImpl<const NamedDecl *> &Redeclared) {
	const DeclContext *DC = ObjCMethod->getDeclContext();
	if (const auto *IMD = dyn_cast<ObjCImplDecl>(DC)) {
	const ObjCInterfaceDecl *ID = IMD->getClassInterface();
	if (!ID)
	return;
	// Add redeclared method here.
	for (const auto *Ext : ID->known_extensions()) {
	if (ObjCMethodDecl *RedeclaredMethod =
	Ext->getMethod(ObjCMethod->getSelector(),
	ObjCMethod->isInstanceMethod()))
	Redeclared.push_back(RedeclaredMethod);
	}
	}
	}

	void ASTContext::attachCommentsToJustParsedDecls(ArrayRef<Decl *> Decls,
	const Preprocessor *PP) {
	if (Comments.empty() \|\| Decls.empty())
	return;

	FileID File;
	for (Decl *D : Decls) {
	SourceLocation Loc = D->getLocation();
	if (Loc.isValid()) {
	// See if there are any new comments that are not attached to a decl.
	// The location doesn't have to be precise - we care only about the file.
	File = SourceMgr.getDecomposedLoc(Loc).first;
	break;
	}
	}

	if (File.isInvalid())
	return;

	auto CommentsInThisFile = Comments.getCommentsInFile(File);
	if (!CommentsInThisFile \|\| CommentsInThisFile->empty() \|\|
	CommentsInThisFile->rbegin()->second->isAttached())
	return;

	// There is at least one comment not attached to a decl.
	// Maybe it should be attached to one of Decls?
	//
	// Note that this way we pick up not only comments that precede the
	// declaration, but also comments that follow the declaration -- thanks to
	// the lookahead in the lexer: we've consumed the semicolon and looked
	// ahead through comments.

	for (const Decl *D : Decls) {
	assert(D);
	if (D->isInvalidDecl())
	continue;

	D = &adjustDeclToTemplate(*D);

	const SourceLocation DeclLoc = getDeclLocForCommentSearch(D, SourceMgr);

	if (DeclLoc.isInvalid() \|\| !DeclLoc.isFileID())
	continue;

	if (DeclRawComments.count(D) > 0)
	continue;

	if (RawComment *const DocComment =
	getRawCommentForDeclNoCacheImpl(D, DeclLoc, *CommentsInThisFile)) {
	cacheRawCommentForDecl(D, DocComment);
	comments::FullComment FC = DocComment->parse(this, PP, D);
	ParsedComments[D->getCanonicalDecl()] = FC;
	}
	}
	}

	comments::FullComment ASTContext::cloneFullComment(comments::FullComment FC,
	const Decl *D) const {
	auto ThisDeclInfo = new (this) comments::DeclInfo;
	ThisDeclInfo->CommentDecl = D;
	ThisDeclInfo->IsFilled = false;
	ThisDeclInfo->fill();
	ThisDeclInfo->CommentDecl = FC->getDecl();
	if (!ThisDeclInfo->TemplateParameters)
	ThisDeclInfo->TemplateParameters = FC->getDeclInfo()->TemplateParameters;
	comments::FullComment *CFC =
	new (*this) comments::FullComment(FC->getBlocks(),
	ThisDeclInfo);
	return CFC;
	}

	comments::FullComment ASTContext::getLocalCommentForDeclUncached(const Decl D) const {
	const RawComment *RC = getRawCommentForDeclNoCache(D);
	return RC ? RC->parse(*this, nullptr, D) : nullptr;
	}

	comments::FullComment *ASTContext::getCommentForDecl(
	const Decl *D,
	const Preprocessor *PP) const {
	if (!D \|\| D->isInvalidDecl())
	return nullptr;
	D = &adjustDeclToTemplate(*D);

	const Decl *Canonical = D->getCanonicalDecl();
	llvm::DenseMap<const Decl , comments::FullComment >::iterator Pos =
	ParsedComments.find(Canonical);

	if (Pos != ParsedComments.end()) {
	if (Canonical != D) {
	comments::FullComment *FC = Pos->second;
	comments::FullComment *CFC = cloneFullComment(FC, D);
	return CFC;
	}
	return Pos->second;
	}

	const Decl *OriginalDecl = nullptr;

	const RawComment *RC = getRawCommentForAnyRedecl(D, &OriginalDecl);
	if (!RC) {
	if (isa<ObjCMethodDecl>(D) \|\| isa<FunctionDecl>(D)) {
	SmallVector<const NamedDecl*, 8> Overridden;
	const auto *OMD = dyn_cast<ObjCMethodDecl>(D);
	if (OMD && OMD->isPropertyAccessor())
	if (const ObjCPropertyDecl *PDecl = OMD->findPropertyDecl())
	if (comments::FullComment *FC = getCommentForDecl(PDecl, PP))
	return cloneFullComment(FC, D);
	if (OMD)
	addRedeclaredMethods(OMD, Overridden);
	getOverriddenMethods(dyn_cast<NamedDecl>(D), Overridden);
	for (unsigned i = 0, e = Overridden.size(); i < e; i++)
	if (comments::FullComment *FC = getCommentForDecl(Overridden[i], PP))
	return cloneFullComment(FC, D);
	}
	else if (const auto *TD = dyn_cast<TypedefNameDecl>(D)) {
	// Attach any tag type's documentation to its typedef if latter
	// does not have one of its own.
	QualType QT = TD->getUnderlyingType();
	if (const auto *TT = QT->getAs<TagType>())
	if (const Decl *TD = TT->getDecl())
	if (comments::FullComment *FC = getCommentForDecl(TD, PP))
	return cloneFullComment(FC, D);
	}
	else if (const auto *IC = dyn_cast<ObjCInterfaceDecl>(D)) {
	while (IC->getSuperClass()) {
	IC = IC->getSuperClass();
	if (comments::FullComment *FC = getCommentForDecl(IC, PP))
	return cloneFullComment(FC, D);
	}
	}
	else if (const auto *CD = dyn_cast<ObjCCategoryDecl>(D)) {
	if (const ObjCInterfaceDecl *IC = CD->getClassInterface())
	if (comments::FullComment *FC = getCommentForDecl(IC, PP))
	return cloneFullComment(FC, D);
	}
	else if (const auto *RD = dyn_cast<CXXRecordDecl>(D)) {
	if (!(RD = RD->getDefinition()))
	return nullptr;
	// Check non-virtual bases.
	for (const auto &I : RD->bases()) {
	if (I.isVirtual() \|\| (I.getAccessSpecifier() != AS_public))
	continue;
	QualType Ty = I.getType();
	if (Ty.isNull())
	continue;
	if (const CXXRecordDecl *NonVirtualBase = Ty->getAsCXXRecordDecl()) {
	if (!(NonVirtualBase= NonVirtualBase->getDefinition()))
	continue;

	if (comments::FullComment *FC = getCommentForDecl((NonVirtualBase), PP))
	return cloneFullComment(FC, D);
	}
	}
	// Check virtual bases.
	for (const auto &I : RD->vbases()) {
	if (I.getAccessSpecifier() != AS_public)
	continue;
	QualType Ty = I.getType();
	if (Ty.isNull())
	continue;
	if (const CXXRecordDecl *VirtualBase = Ty->getAsCXXRecordDecl()) {
	if (!(VirtualBase= VirtualBase->getDefinition()))
	continue;
	if (comments::FullComment *FC = getCommentForDecl((VirtualBase), PP))
	return cloneFullComment(FC, D);
	}
	}
	}
	return nullptr;
	}

	// If the RawComment was attached to other redeclaration of this Decl, we
	// should parse the comment in context of that other Decl. This is important
	// because comments can contain references to parameter names which can be
	// different across redeclarations.
	if (D != OriginalDecl && OriginalDecl)
	return getCommentForDecl(OriginalDecl, PP);

	comments::FullComment FC = RC->parse(this, PP, D);
	ParsedComments[Canonical] = FC;
	return FC;
	}

	void
	ASTContext::CanonicalTemplateTemplateParm::Profile(llvm::FoldingSetNodeID &ID,
	const ASTContext &C,
	TemplateTemplateParmDecl *Parm) {
	ID.AddInteger(Parm->getDepth());
	ID.AddInteger(Parm->getPosition());
	ID.AddBoolean(Parm->isParameterPack());

	TemplateParameterList *Params = Parm->getTemplateParameters();
	ID.AddInteger(Params->size());
	for (TemplateParameterList::const_iterator P = Params->begin(),
	PEnd = Params->end();
	P != PEnd; ++P) {
	if (const auto TTP = dyn_cast<TemplateTypeParmDecl>(P)) {
	ID.AddInteger(0);
	ID.AddBoolean(TTP->isParameterPack());
	const TypeConstraint *TC = TTP->getTypeConstraint();
	ID.AddBoolean(TC != nullptr);
	if (TC)
	TC->getImmediatelyDeclaredConstraint()->Profile(ID, C,
	/Canonical=/true);
	if (TTP->isExpandedParameterPack()) {
	ID.AddBoolean(true);
	ID.AddInteger(TTP->getNumExpansionParameters());
	} else
	ID.AddBoolean(false);
	continue;
	}

	if (const auto NTTP = dyn_cast<NonTypeTemplateParmDecl>(P)) {
	ID.AddInteger(1);
	ID.AddBoolean(NTTP->isParameterPack());
	ID.AddPointer(NTTP->getType().getCanonicalType().getAsOpaquePtr());
	if (NTTP->isExpandedParameterPack()) {
	ID.AddBoolean(true);
	ID.AddInteger(NTTP->getNumExpansionTypes());
	for (unsigned I = 0, N = NTTP->getNumExpansionTypes(); I != N; ++I) {
	QualType T = NTTP->getExpansionType(I);
	ID.AddPointer(T.getCanonicalType().getAsOpaquePtr());
	}
	} else
	ID.AddBoolean(false);
	continue;
	}

	auto TTP = cast<TemplateTemplateParmDecl>(P);
	ID.AddInteger(2);
	Profile(ID, C, TTP);
	}
	Expr *RequiresClause = Parm->getTemplateParameters()->getRequiresClause();
	ID.AddBoolean(RequiresClause != nullptr);
	if (RequiresClause)
	RequiresClause->Profile(ID, C, /Canonical=/true);
	}

	static Expr *
	canonicalizeImmediatelyDeclaredConstraint(const ASTContext &C, Expr *IDC,
	QualType ConstrainedType) {
	// This is a bit ugly - we need to form a new immediately-declared
	// constraint that references the new parameter; this would ideally
	// require semantic analysis (e.g. template<C T> struct S {}; - the
	// converted arguments of C<T> could be an argument pack if C is
	// declared as template<typename... T> concept C = ...).
	// We don't have semantic analysis here so we dig deep into the
	// ready-made constraint expr and change the thing manually.
	ConceptSpecializationExpr *CSE;
	if (const auto *Fold = dyn_cast<CXXFoldExpr>(IDC))
	CSE = cast<ConceptSpecializationExpr>(Fold->getLHS());
	else
	CSE = cast<ConceptSpecializationExpr>(IDC);
	ArrayRef<TemplateArgument> OldConverted = CSE->getTemplateArguments();
	SmallVector<TemplateArgument, 3> NewConverted;
	NewConverted.reserve(OldConverted.size());
	if (OldConverted.front().getKind() == TemplateArgument::Pack) {
	// The case:
	// template<typename... T> concept C = true;
	// template<C<int> T> struct S; -> constraint is C<{T, int}>
	NewConverted.push_back(ConstrainedType);
	for (auto &Arg : OldConverted.front().pack_elements().drop_front(1))
	NewConverted.push_back(Arg);
	TemplateArgument NewPack(NewConverted);

	NewConverted.clear();
	NewConverted.push_back(NewPack);
	assert(OldConverted.size() == 1 &&
	"Template parameter pack should be the last parameter");
	} else {
	assert(OldConverted.front().getKind() == TemplateArgument::Type &&
	"Unexpected first argument kind for immediately-declared "
	"constraint");
	NewConverted.push_back(ConstrainedType);
	for (auto &Arg : OldConverted.drop_front(1))
	NewConverted.push_back(Arg);
	}
	Expr *NewIDC = ConceptSpecializationExpr::Create(
	C, CSE->getNamedConcept(), NewConverted, nullptr,
	CSE->isInstantiationDependent(), CSE->containsUnexpandedParameterPack());

	if (auto *OrigFold = dyn_cast<CXXFoldExpr>(IDC))
	NewIDC = new (C) CXXFoldExpr(
	OrigFold->getType(), /Callee/nullptr, SourceLocation(), NewIDC,
	BinaryOperatorKind::BO_LAnd, SourceLocation(), /RHS=/nullptr,
	SourceLocation(), /NumExpansions=/None);
	return NewIDC;
	}

	TemplateTemplateParmDecl *
	ASTContext::getCanonicalTemplateTemplateParmDecl(
	TemplateTemplateParmDecl *TTP) const {
	// Check if we already have a canonical template template parameter.
	llvm::FoldingSetNodeID ID;
	CanonicalTemplateTemplateParm::Profile(ID, *this, TTP);
	void *InsertPos = nullptr;
	CanonicalTemplateTemplateParm *Canonical
	= CanonTemplateTemplateParms.FindNodeOrInsertPos(ID, InsertPos);
	if (Canonical)
	return Canonical->getParam();

	// Build a canonical template parameter list.
	TemplateParameterList *Params = TTP->getTemplateParameters();
	SmallVector<NamedDecl *, 4> CanonParams;
	CanonParams.reserve(Params->size());
	for (TemplateParameterList::const_iterator P = Params->begin(),
	PEnd = Params->end();
	P != PEnd; ++P) {
	if (const auto TTP = dyn_cast<TemplateTypeParmDecl>(P)) {
	TemplateTypeParmDecl NewTTP = TemplateTypeParmDecl::Create(this,
	getTranslationUnitDecl(), SourceLocation(), SourceLocation(),
	TTP->getDepth(), TTP->getIndex(), nullptr, false,
	TTP->isParameterPack(), TTP->hasTypeConstraint(),
	TTP->isExpandedParameterPack() ?
	llvm::Optional<unsigned>(TTP->getNumExpansionParameters()) : None);
	if (const auto *TC = TTP->getTypeConstraint()) {
	QualType ParamAsArgument(NewTTP->getTypeForDecl(), 0);
	Expr *NewIDC = canonicalizeImmediatelyDeclaredConstraint(
	*this, TC->getImmediatelyDeclaredConstraint(),
	ParamAsArgument);
	TemplateArgumentListInfo CanonArgsAsWritten;
	if (auto *Args = TC->getTemplateArgsAsWritten())
	for (const auto &ArgLoc : Args->arguments())
	CanonArgsAsWritten.addArgument(
	TemplateArgumentLoc(ArgLoc.getArgument(),
	TemplateArgumentLocInfo()));
	NewTTP->setTypeConstraint(
	NestedNameSpecifierLoc(),
	DeclarationNameInfo(TC->getNamedConcept()->getDeclName(),
	SourceLocation()), /FoundDecl=/nullptr,
	// Actually canonicalizing a TemplateArgumentLoc is difficult so we
	// simply omit the ArgsAsWritten
	TC->getNamedConcept(), /ArgsAsWritten=/nullptr, NewIDC);
	}
	CanonParams.push_back(NewTTP);
	} else if (const auto NTTP = dyn_cast<NonTypeTemplateParmDecl>(P)) {
	QualType T = getCanonicalType(NTTP->getType());
	TypeSourceInfo *TInfo = getTrivialTypeSourceInfo(T);
	NonTypeTemplateParmDecl *Param;
	if (NTTP->isExpandedParameterPack()) {
	SmallVector<QualType, 2> ExpandedTypes;
	SmallVector<TypeSourceInfo *, 2> ExpandedTInfos;
	for (unsigned I = 0, N = NTTP->getNumExpansionTypes(); I != N; ++I) {
	ExpandedTypes.push_back(getCanonicalType(NTTP->getExpansionType(I)));
	ExpandedTInfos.push_back(
	getTrivialTypeSourceInfo(ExpandedTypes.back()));
	}

	Param = NonTypeTemplateParmDecl::Create(*this, getTranslationUnitDecl(),
	SourceLocation(),
	SourceLocation(),
	NTTP->getDepth(),
	NTTP->getPosition(), nullptr,
	T,
	TInfo,
	ExpandedTypes,
	ExpandedTInfos);
	} else {
	Param = NonTypeTemplateParmDecl::Create(*this, getTranslationUnitDecl(),
	SourceLocation(),
	SourceLocation(),
	NTTP->getDepth(),
	NTTP->getPosition(), nullptr,
	T,
	NTTP->isParameterPack(),
	TInfo);
	}
	if (AutoType *AT = T->getContainedAutoType()) {
	if (AT->isConstrained()) {
	Param->setPlaceholderTypeConstraint(
	canonicalizeImmediatelyDeclaredConstraint(
	*this, NTTP->getPlaceholderTypeConstraint(), T));
	}
	}
	CanonParams.push_back(Param);

	} else
	CanonParams.push_back(getCanonicalTemplateTemplateParmDecl(
	cast<TemplateTemplateParmDecl>(*P)));
	}

	Expr *CanonRequiresClause = nullptr;
	if (Expr *RequiresClause = TTP->getTemplateParameters()->getRequiresClause())
	CanonRequiresClause = RequiresClause;

	TemplateTemplateParmDecl *CanonTTP
	= TemplateTemplateParmDecl::Create(*this, getTranslationUnitDecl(),
	SourceLocation(), TTP->getDepth(),
	TTP->getPosition(),
	TTP->isParameterPack(),
	nullptr,
	TemplateParameterList::Create(*this, SourceLocation(),
	SourceLocation(),
	CanonParams,
	SourceLocation(),
	CanonRequiresClause));

	// Get the new insert position for the node we care about.
	Canonical = CanonTemplateTemplateParms.FindNodeOrInsertPos(ID, InsertPos);
	assert(!Canonical && "Shouldn't be in the map!");
	(void)Canonical;

	// Create the canonical template template parameter entry.
	Canonical = new (*this) CanonicalTemplateTemplateParm(CanonTTP);
	CanonTemplateTemplateParms.InsertNode(Canonical, InsertPos);
	return CanonTTP;
	}

	TargetCXXABI::Kind ASTContext::getCXXABIKind() const {
	auto Kind = getTargetInfo().getCXXABI().getKind();
	return getLangOpts().CXXABI.getValueOr(Kind);
	}

	CXXABI *ASTContext::createCXXABI(const TargetInfo &T) {
	if (!LangOpts.CPlusPlus) return nullptr;

	switch (getCXXABIKind()) {
	case TargetCXXABI::AppleARM64:
	case TargetCXXABI::Fuchsia:
	case TargetCXXABI::GenericARM: // Same as Itanium at this level
	case TargetCXXABI::iOS:
	case TargetCXXABI::WatchOS:
	case TargetCXXABI::GenericAArch64:
	case TargetCXXABI::GenericMIPS:
	case TargetCXXABI::GenericItanium:
	case TargetCXXABI::WebAssembly:
	case TargetCXXABI::XL:
	return CreateItaniumCXXABI(*this);
	case TargetCXXABI::Microsoft:
	return CreateMicrosoftCXXABI(*this);
	}
	llvm_unreachable("Invalid CXXABI type!");
	}

	interp::Context &ASTContext::getInterpContext() {
	if (!InterpContext) {
	InterpContext.reset(new interp::Context(*this));
	}
	return *InterpContext.get();
	}

	ParentMapContext &ASTContext::getParentMapContext() {
	if (!ParentMapCtx)
	ParentMapCtx.reset(new ParentMapContext(*this));
	return *ParentMapCtx.get();
	}

	static const LangASMap *getAddressSpaceMap(const TargetInfo &T,
	const LangOptions &LOpts) {
	if (LOpts.FakeAddressSpaceMap) {
	// The fake address space map must have a distinct entry for each
	// language-specific address space.
	static const unsigned FakeAddrSpaceMap[] = {
	0, // Default
	1, // opencl_global
	3, // opencl_local
	2, // opencl_constant
	0, // opencl_private
	4, // opencl_generic
	5, // opencl_global_device
	6, // opencl_global_host
	7, // cuda_device
	8, // cuda_constant
	9, // cuda_shared
	1, // sycl_global
	5, // sycl_global_device
	6, // sycl_global_host
	3, // sycl_local
	0, // sycl_private
	10, // ptr32_sptr
	11, // ptr32_uptr
	12 // ptr64
	};
	return &FakeAddrSpaceMap;
	} else {
	return &T.getAddressSpaceMap();
	}
	}

	static bool isAddrSpaceMapManglingEnabled(const TargetInfo &TI,
	const LangOptions &LangOpts) {
	switch (LangOpts.getAddressSpaceMapMangling()) {
	case LangOptions::ASMM_Target:
	return TI.useAddressSpaceMapMangling();
	case LangOptions::ASMM_On:
	return true;
	case LangOptions::ASMM_Off:
	return false;
	}
	llvm_unreachable("getAddressSpaceMapMangling() doesn't cover anything.");
	}

	ASTContext::ASTContext(LangOptions &LOpts, SourceManager &SM,
	IdentifierTable &idents, SelectorTable &sels,
	Builtin::Context &builtins, TranslationUnitKind TUKind)
	: ConstantArrayTypes(this_()), FunctionProtoTypes(this_()),
	TemplateSpecializationTypes(this_()),
	DependentTemplateSpecializationTypes(this_()), AutoTypes(this_()),
	SubstTemplateTemplateParmPacks(this_()),
	CanonTemplateTemplateParms(this_()), SourceMgr(SM), LangOpts(LOpts),
	NoSanitizeL(new NoSanitizeList(LangOpts.NoSanitizeFiles, SM)),
	XRayFilter(new XRayFunctionFilter(LangOpts.XRayAlwaysInstrumentFiles,
	LangOpts.XRayNeverInstrumentFiles,
	LangOpts.XRayAttrListFiles, SM)),
	ProfList(new ProfileList(LangOpts.ProfileListFiles, SM)),
	PrintingPolicy(LOpts), Idents(idents), Selectors(sels),
	BuiltinInfo(builtins), TUKind(TUKind), DeclarationNames(*this),
	Comments(SM), CommentCommandTraits(BumpAlloc, LOpts.CommentOpts),
	CompCategories(this_()), LastSDM(nullptr, 0) {
	addTranslationUnitDecl();
	}

	ASTContext::~ASTContext() {
	// Release the DenseMaps associated with DeclContext objects.
	// FIXME: Is this the ideal solution?
	ReleaseDeclContextMaps();

	// Call all of the deallocation functions on all of their targets.
	for (auto &Pair : Deallocations)
	(Pair.first)(Pair.second);

	// ASTRecordLayout objects in ASTRecordLayouts must always be destroyed
	// because they can contain DenseMaps.
	for (llvm::DenseMap<const ObjCContainerDecl*,
	const ASTRecordLayout*>::iterator
	I = ObjCLayouts.begin(), E = ObjCLayouts.end(); I != E; )
	// Increment in loop to prevent using deallocated memory.
	if (auto R = const_cast<ASTRecordLayout >((I++)->second))
	R->Destroy(*this);

	for (llvm::DenseMap<const RecordDecl, const ASTRecordLayout>::iterator
	I = ASTRecordLayouts.begin(), E = ASTRecordLayouts.end(); I != E; ) {
	// Increment in loop to prevent using deallocated memory.
	if (auto R = const_cast<ASTRecordLayout >((I++)->second))
	R->Destroy(*this);
	}

	for (llvm::DenseMap<const Decl, AttrVec>::iterator A = DeclAttrs.begin(),
	AEnd = DeclAttrs.end();
	A != AEnd; ++A)
	A->second->~AttrVec();

	for (const auto &Value : ModuleInitializers)
	Value.second->~PerModuleInitializers();
	}

	void ASTContext::setTraversalScope(const std::vector<Decl *> &TopLevelDecls) {
	TraversalScope = TopLevelDecls;
	getParentMapContext().clear();
	}

	void ASTContext::AddDeallocation(void (Callback)(void ), void *Data) const {
	Deallocations.push_back({Callback, Data});
	}

	void
	ASTContext::setExternalSource(IntrusiveRefCntPtr<ExternalASTSource> Source) {
	ExternalSource = std::move(Source);
	}

	void ASTContext::PrintStats() const {
	llvm::errs() << "\n*** AST Context Stats:\n";
	llvm::errs() << " " << Types.size() << " types total.\n";

	unsigned counts[] = {
	#define TYPE(Name, Parent) 0,
	#define ABSTRACT_TYPE(Name, Parent)
	#include "clang/AST/TypeNodes.inc"
	0 // Extra
	};

	for (unsigned i = 0, e = Types.size(); i != e; ++i) {
	Type *T = Types[i];
	counts[(unsigned)T->getTypeClass()]++;
	}

	unsigned Idx = 0;
	unsigned TotalBytes = 0;
	#define TYPE(Name, Parent) \
	if (counts[Idx]) \
	llvm::errs() << " " << counts[Idx] << " " << #Name \
	<< " types, " << sizeof(Name##Type) << " each " \
	<< "(" << counts[Idx] * sizeof(Name##Type) \
	<< " bytes)\n"; \
	TotalBytes += counts[Idx] * sizeof(Name##Type); \
	++Idx;
	#define ABSTRACT_TYPE(Name, Parent)
	#include "clang/AST/TypeNodes.inc"

	llvm::errs() << "Total bytes = " << TotalBytes << "\n";

	// Implicit special member functions.
	llvm::errs() << NumImplicitDefaultConstructorsDeclared << "/"
	<< NumImplicitDefaultConstructors
	<< " implicit default constructors created\n";
	llvm::errs() << NumImplicitCopyConstructorsDeclared << "/"
	<< NumImplicitCopyConstructors
	<< " implicit copy constructors created\n";
	if (getLangOpts().CPlusPlus)
	llvm::errs() << NumImplicitMoveConstructorsDeclared << "/"
	<< NumImplicitMoveConstructors
	<< " implicit move constructors created\n";
	llvm::errs() << NumImplicitCopyAssignmentOperatorsDeclared << "/"
	<< NumImplicitCopyAssignmentOperators
	<< " implicit copy assignment operators created\n";
	if (getLangOpts().CPlusPlus)
	llvm::errs() << NumImplicitMoveAssignmentOperatorsDeclared << "/"
	<< NumImplicitMoveAssignmentOperators
	<< " implicit move assignment operators created\n";
	llvm::errs() << NumImplicitDestructorsDeclared << "/"
	<< NumImplicitDestructors
	<< " implicit destructors created\n";

	if (ExternalSource) {
	llvm::errs() << "\n";
	ExternalSource->PrintStats();
	}

	BumpAlloc.PrintStats();
	}

	void ASTContext::mergeDefinitionIntoModule(NamedDecl ND, Module M,
	bool NotifyListeners) {
	if (NotifyListeners)
	if (auto *Listener = getASTMutationListener())
	Listener->RedefinedHiddenDefinition(ND, M);

	MergedDefModules[cast<NamedDecl>(ND->getCanonicalDecl())].push_back(M);
	}

	void ASTContext::deduplicateMergedDefinitonsFor(NamedDecl *ND) {
	auto It = MergedDefModules.find(cast<NamedDecl>(ND->getCanonicalDecl()));
	if (It == MergedDefModules.end())
	return;

	auto &Merged = It->second;
	llvm::DenseSet<Module*> Found;
	for (Module *&M : Merged)
	if (!Found.insert(M).second)
	M = nullptr;
	Merged.erase(std::remove(Merged.begin(), Merged.end(), nullptr), Merged.end());
	}

	ArrayRef<Module *>
	ASTContext::getModulesWithMergedDefinition(const NamedDecl *Def) {
	auto MergedIt =
	MergedDefModules.find(cast<NamedDecl>(Def->getCanonicalDecl()));
	if (MergedIt == MergedDefModules.end())
	return None;
	return MergedIt->second;
	}

	void ASTContext::PerModuleInitializers::resolve(ASTContext &Ctx) {
	if (LazyInitializers.empty())
	return;

	auto *Source = Ctx.getExternalSource();
	assert(Source && "lazy initializers but no external source");

	auto LazyInits = std::move(LazyInitializers);
	LazyInitializers.clear();

	for (auto ID : LazyInits)
	Initializers.push_back(Source->GetExternalDecl(ID));

	assert(LazyInitializers.empty() &&
	"GetExternalDecl for lazy module initializer added more inits");
	}

	void ASTContext::addModuleInitializer(Module M, Decl D) {
	// One special case: if we add a module initializer that imports another
	// module, and that module's only initializer is an ImportDecl, simplify.
	if (const auto *ID = dyn_cast<ImportDecl>(D)) {
	auto It = ModuleInitializers.find(ID->getImportedModule());

	// Maybe the ImportDecl does nothing at all. (Common case.)
	if (It == ModuleInitializers.end())
	return;

	// Maybe the ImportDecl only imports another ImportDecl.
	auto &Imported = *It->second;
	if (Imported.Initializers.size() + Imported.LazyInitializers.size() == 1) {
	Imported.resolve(*this);
	auto *OnlyDecl = Imported.Initializers.front();
	if (isa<ImportDecl>(OnlyDecl))
	D = OnlyDecl;
	}
	}

	auto *&Inits = ModuleInitializers[M];
	if (!Inits)
	Inits = new (*this) PerModuleInitializers;
	Inits->Initializers.push_back(D);
	}

	void ASTContext::addLazyModuleInitializers(Module *M, ArrayRef<uint32_t> IDs) {
	auto *&Inits = ModuleInitializers[M];
	if (!Inits)
	Inits = new (*this) PerModuleInitializers;
	Inits->LazyInitializers.insert(Inits->LazyInitializers.end(),
	IDs.begin(), IDs.end());
	}

	ArrayRef<Decl > ASTContext::getModuleInitializers(Module M) {
	auto It = ModuleInitializers.find(M);
	if (It == ModuleInitializers.end())
	return None;

	auto *Inits = It->second;
	Inits->resolve(*this);
	return Inits->Initializers;
	}

	ExternCContextDecl *ASTContext::getExternCContextDecl() const {
	if (!ExternCContext)
	ExternCContext = ExternCContextDecl::Create(*this, getTranslationUnitDecl());

	return ExternCContext;
	}

	BuiltinTemplateDecl *
	ASTContext::buildBuiltinTemplateDecl(BuiltinTemplateKind BTK,
	const IdentifierInfo *II) const {
	auto *BuiltinTemplate =
	BuiltinTemplateDecl::Create(*this, getTranslationUnitDecl(), II, BTK);
	BuiltinTemplate->setImplicit();
	getTranslationUnitDecl()->addDecl(BuiltinTemplate);

	return BuiltinTemplate;
	}

	BuiltinTemplateDecl *
	ASTContext::getMakeIntegerSeqDecl() const {
	if (!MakeIntegerSeqDecl)
	MakeIntegerSeqDecl = buildBuiltinTemplateDecl(BTK__make_integer_seq,
	getMakeIntegerSeqName());
	return MakeIntegerSeqDecl;
	}

	BuiltinTemplateDecl *
	ASTContext::getTypePackElementDecl() const {
	if (!TypePackElementDecl)
	TypePackElementDecl = buildBuiltinTemplateDecl(BTK__type_pack_element,
	getTypePackElementName());
	return TypePackElementDecl;
	}

	RecordDecl *ASTContext::buildImplicitRecord(StringRef Name,
	RecordDecl::TagKind TK) const {
	SourceLocation Loc;
	RecordDecl *NewDecl;
	if (getLangOpts().CPlusPlus)
	NewDecl = CXXRecordDecl::Create(*this, TK, getTranslationUnitDecl(), Loc,
	Loc, &Idents.get(Name));
	else
	NewDecl = RecordDecl::Create(*this, TK, getTranslationUnitDecl(), Loc, Loc,
	&Idents.get(Name));
	NewDecl->setImplicit();
	NewDecl->addAttr(TypeVisibilityAttr::CreateImplicit(
	const_cast<ASTContext &>(*this), TypeVisibilityAttr::Default));
	return NewDecl;
	}

	TypedefDecl *ASTContext::buildImplicitTypedef(QualType T,
	StringRef Name) const {
	TypeSourceInfo *TInfo = getTrivialTypeSourceInfo(T);
	TypedefDecl *NewDecl = TypedefDecl::Create(
	const_cast<ASTContext &>(*this), getTranslationUnitDecl(),
	SourceLocation(), SourceLocation(), &Idents.get(Name), TInfo);
	NewDecl->setImplicit();
	return NewDecl;
	}

	TypedefDecl *ASTContext::getInt128Decl() const {
	if (!Int128Decl)
	Int128Decl = buildImplicitTypedef(Int128Ty, "__int128_t");
	return Int128Decl;
	}

	TypedefDecl *ASTContext::getUInt128Decl() const {
	if (!UInt128Decl)
	UInt128Decl = buildImplicitTypedef(UnsignedInt128Ty, "__uint128_t");
	return UInt128Decl;
	}

	void ASTContext::InitBuiltinType(CanQualType &R, BuiltinType::Kind K) {
	auto Ty = new (this, TypeAlignment) BuiltinType(K);
	R = CanQualType::CreateUnsafe(QualType(Ty, 0));
	Types.push_back(Ty);
	}

	void ASTContext::InitBuiltinTypes(const TargetInfo &Target,
	const TargetInfo *AuxTarget) {
	assert((!this->Target \|\| this->Target == &Target) &&
	"Incorrect target reinitialization");
	assert(VoidTy.isNull() && "Context reinitialized?");

	this->Target = &Target;
	this->AuxTarget = AuxTarget;

	ABI.reset(createCXXABI(Target));
	AddrSpaceMap = getAddressSpaceMap(Target, LangOpts);
	AddrSpaceMapMangling = isAddrSpaceMapManglingEnabled(Target, LangOpts);

	// C99 6.2.5p19.
	InitBuiltinType(VoidTy, BuiltinType::Void);

	// C99 6.2.5p2.
	InitBuiltinType(BoolTy, BuiltinType::Bool);
	// C99 6.2.5p3.
	if (LangOpts.CharIsSigned)
	InitBuiltinType(CharTy, BuiltinType::Char_S);
	else
	InitBuiltinType(CharTy, BuiltinType::Char_U);
	// C99 6.2.5p4.
	InitBuiltinType(SignedCharTy, BuiltinType::SChar);
	InitBuiltinType(ShortTy, BuiltinType::Short);
	InitBuiltinType(IntTy, BuiltinType::Int);
	InitBuiltinType(LongTy, BuiltinType::Long);
	InitBuiltinType(LongLongTy, BuiltinType::LongLong);

	// C99 6.2.5p6.
	InitBuiltinType(UnsignedCharTy, BuiltinType::UChar);
	InitBuiltinType(UnsignedShortTy, BuiltinType::UShort);
	InitBuiltinType(UnsignedIntTy, BuiltinType::UInt);
	InitBuiltinType(UnsignedLongTy, BuiltinType::ULong);
	InitBuiltinType(UnsignedLongLongTy, BuiltinType::ULongLong);

	// C99 6.2.5p10.
	InitBuiltinType(FloatTy, BuiltinType::Float);
	InitBuiltinType(DoubleTy, BuiltinType::Double);
	InitBuiltinType(LongDoubleTy, BuiltinType::LongDouble);

	// GNU extension, __float128 for IEEE quadruple precision
	InitBuiltinType(Float128Ty, BuiltinType::Float128);

	// C11 extension ISO/IEC TS 18661-3
	InitBuiltinType(Float16Ty, BuiltinType::Float16);

	// ISO/IEC JTC1 SC22 WG14 N1169 Extension
	InitBuiltinType(ShortAccumTy, BuiltinType::ShortAccum);
	InitBuiltinType(AccumTy, BuiltinType::Accum);
	InitBuiltinType(LongAccumTy, BuiltinType::LongAccum);
	InitBuiltinType(UnsignedShortAccumTy, BuiltinType::UShortAccum);
	InitBuiltinType(UnsignedAccumTy, BuiltinType::UAccum);
	InitBuiltinType(UnsignedLongAccumTy, BuiltinType::ULongAccum);
	InitBuiltinType(ShortFractTy, BuiltinType::ShortFract);
	InitBuiltinType(FractTy, BuiltinType::Fract);
	InitBuiltinType(LongFractTy, BuiltinType::LongFract);
	InitBuiltinType(UnsignedShortFractTy, BuiltinType::UShortFract);
	InitBuiltinType(UnsignedFractTy, BuiltinType::UFract);
	InitBuiltinType(UnsignedLongFractTy, BuiltinType::ULongFract);
	InitBuiltinType(SatShortAccumTy, BuiltinType::SatShortAccum);
	InitBuiltinType(SatAccumTy, BuiltinType::SatAccum);
	InitBuiltinType(SatLongAccumTy, BuiltinType::SatLongAccum);
	InitBuiltinType(SatUnsignedShortAccumTy, BuiltinType::SatUShortAccum);
	InitBuiltinType(SatUnsignedAccumTy, BuiltinType::SatUAccum);
	InitBuiltinType(SatUnsignedLongAccumTy, BuiltinType::SatULongAccum);
	InitBuiltinType(SatShortFractTy, BuiltinType::SatShortFract);
	InitBuiltinType(SatFractTy, BuiltinType::SatFract);
	InitBuiltinType(SatLongFractTy, BuiltinType::SatLongFract);
	InitBuiltinType(SatUnsignedShortFractTy, BuiltinType::SatUShortFract);
	InitBuiltinType(SatUnsignedFractTy, BuiltinType::SatUFract);
	InitBuiltinType(SatUnsignedLongFractTy, BuiltinType::SatULongFract);

	// GNU extension, 128-bit integers.
	InitBuiltinType(Int128Ty, BuiltinType::Int128);
	InitBuiltinType(UnsignedInt128Ty, BuiltinType::UInt128);

	// C++ 3.9.1p5
	if (TargetInfo::isTypeSigned(Target.getWCharType()))
	InitBuiltinType(WCharTy, BuiltinType::WChar_S);
	else // -fshort-wchar makes wchar_t be unsigned.
	InitBuiltinType(WCharTy, BuiltinType::WChar_U);
	if (LangOpts.CPlusPlus && LangOpts.WChar)
	WideCharTy = WCharTy;
	else {
	// C99 (or C++ using -fno-wchar).
	WideCharTy = getFromTargetType(Target.getWCharType());
	}

	WIntTy = getFromTargetType(Target.getWIntType());

	// C++20 (proposed)
	InitBuiltinType(Char8Ty, BuiltinType::Char8);

	if (LangOpts.CPlusPlus) // C++0x 3.9.1p5, extension for C++
	InitBuiltinType(Char16Ty, BuiltinType::Char16);
	else // C99
	Char16Ty = getFromTargetType(Target.getChar16Type());

	if (LangOpts.CPlusPlus) // C++0x 3.9.1p5, extension for C++
	InitBuiltinType(Char32Ty, BuiltinType::Char32);
	else // C99
	Char32Ty = getFromTargetType(Target.getChar32Type());

	// Placeholder type for type-dependent expressions whose type is
	// completely unknown. No code should ever check a type against
	// DependentTy and users should never see it; however, it is here to
	// help diagnose failures to properly check for type-dependent
	// expressions.
	InitBuiltinType(DependentTy, BuiltinType::Dependent);

	// Placeholder type for functions.
	InitBuiltinType(OverloadTy, BuiltinType::Overload);

	// Placeholder type for bound members.
	InitBuiltinType(BoundMemberTy, BuiltinType::BoundMember);

	// Placeholder type for pseudo-objects.
	InitBuiltinType(PseudoObjectTy, BuiltinType::PseudoObject);

	// "any" type; useful for debugger-like clients.
	InitBuiltinType(UnknownAnyTy, BuiltinType::UnknownAny);

	// Placeholder type for unbridged ARC casts.
	InitBuiltinType(ARCUnbridgedCastTy, BuiltinType::ARCUnbridgedCast);

	// Placeholder type for builtin functions.
	InitBuiltinType(BuiltinFnTy, BuiltinType::BuiltinFn);

	// Placeholder type for OMP array sections.
	if (LangOpts.OpenMP) {
	InitBuiltinType(OMPArraySectionTy, BuiltinType::OMPArraySection);
	InitBuiltinType(OMPArrayShapingTy, BuiltinType::OMPArrayShaping);
	InitBuiltinType(OMPIteratorTy, BuiltinType::OMPIterator);
	}
	if (LangOpts.MatrixTypes)
	InitBuiltinType(IncompleteMatrixIdxTy, BuiltinType::IncompleteMatrixIdx);

	// C99 6.2.5p11.
	FloatComplexTy = getComplexType(FloatTy);
	DoubleComplexTy = getComplexType(DoubleTy);
	LongDoubleComplexTy = getComplexType(LongDoubleTy);
	Float128ComplexTy = getComplexType(Float128Ty);

	// Builtin types for 'id', 'Class', and 'SEL'.
	InitBuiltinType(ObjCBuiltinIdTy, BuiltinType::ObjCId);
	InitBuiltinType(ObjCBuiltinClassTy, BuiltinType::ObjCClass);
	InitBuiltinType(ObjCBuiltinSelTy, BuiltinType::ObjCSel);

	if (LangOpts.OpenCL) {
	#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
	InitBuiltinType(SingletonId, BuiltinType::Id);
	#include "clang/Basic/OpenCLImageTypes.def"

	InitBuiltinType(OCLSamplerTy, BuiltinType::OCLSampler);
	InitBuiltinType(OCLEventTy, BuiltinType::OCLEvent);
	InitBuiltinType(OCLClkEventTy, BuiltinType::OCLClkEvent);
	InitBuiltinType(OCLQueueTy, BuiltinType::OCLQueue);
	InitBuiltinType(OCLReserveIDTy, BuiltinType::OCLReserveID);

	#define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \
	InitBuiltinType(Id##Ty, BuiltinType::Id);
	#include "clang/Basic/OpenCLExtensionTypes.def"
	}

	if (Target.hasAArch64SVETypes()) {
	#define SVE_TYPE(Name, Id, SingletonId) \
	InitBuiltinType(SingletonId, BuiltinType::Id);
	#include "clang/Basic/AArch64SVEACLETypes.def"
	}

	if (Target.getTriple().isPPC64() &&
	Target.hasFeature("paired-vector-memops")) {
	if (Target.hasFeature("mma")) {
	#define PPC_VECTOR_MMA_TYPE(Name, Id, Size) \
	InitBuiltinType(Id##Ty, BuiltinType::Id);
	#include "clang/Basic/PPCTypes.def"
	}
	#define PPC_VECTOR_VSX_TYPE(Name, Id, Size) \
	InitBuiltinType(Id##Ty, BuiltinType::Id);
	#include "clang/Basic/PPCTypes.def"
	}

	if (Target.hasRISCVVTypes()) {
	#define RVV_TYPE(Name, Id, SingletonId) \
	InitBuiltinType(SingletonId, BuiltinType::Id);
	#include "clang/Basic/RISCVVTypes.def"
	}

	// Builtin type for __objc_yes and __objc_no
	ObjCBuiltinBoolTy = (Target.useSignedCharForObjCBool() ?
	SignedCharTy : BoolTy);

	ObjCConstantStringType = QualType();

	ObjCSuperType = QualType();

	// void * type
	if (LangOpts.OpenCLGenericAddressSpace) {
	auto Q = VoidTy.getQualifiers();
	Q.setAddressSpace(LangAS::opencl_generic);
	VoidPtrTy = getPointerType(getCanonicalType(
	getQualifiedType(VoidTy.getUnqualifiedType(), Q)));
	} else {
	VoidPtrTy = getPointerType(VoidTy);
	}

	// nullptr type (C++0x 2.14.7)
	InitBuiltinType(NullPtrTy, BuiltinType::NullPtr);

	// half type (OpenCL 6.1.1.1) / ARM NEON __fp16
	InitBuiltinType(HalfTy, BuiltinType::Half);

	InitBuiltinType(BFloat16Ty, BuiltinType::BFloat16);

	// Builtin type used to help define __builtin_va_list.
	VaListTagDecl = nullptr;

	// MSVC predeclares struct _GUID, and we need it to create MSGuidDecls.
	if (LangOpts.MicrosoftExt \|\| LangOpts.Borland) {
	MSGuidTagDecl = buildImplicitRecord("_GUID");
	getTranslationUnitDecl()->addDecl(MSGuidTagDecl);
	}
	}

	DiagnosticsEngine &ASTContext::getDiagnostics() const {
	return SourceMgr.getDiagnostics();
	}

	AttrVec& ASTContext::getDeclAttrs(const Decl *D) {
	AttrVec *&Result = DeclAttrs[D];
	if (!Result) {
	void *Mem = Allocate(sizeof(AttrVec));
	Result = new (Mem) AttrVec;
	}

	return *Result;
	}

	/// Erase the attributes corresponding to the given declaration.
	void ASTContext::eraseDeclAttrs(const Decl *D) {
	llvm::DenseMap<const Decl, AttrVec>::iterator Pos = DeclAttrs.find(D);
	if (Pos != DeclAttrs.end()) {
	Pos->second->~AttrVec();
	DeclAttrs.erase(Pos);
	}
	}

	// FIXME: Remove ?
	MemberSpecializationInfo *
	ASTContext::getInstantiatedFromStaticDataMember(const VarDecl *Var) {
	assert(Var->isStaticDataMember() && "Not a static data member");
	return getTemplateOrSpecializationInfo(Var)
	.dyn_cast<MemberSpecializationInfo *>();
	}

	ASTContext::TemplateOrSpecializationInfo
	ASTContext::getTemplateOrSpecializationInfo(const VarDecl *Var) {
	llvm::DenseMap<const VarDecl *, TemplateOrSpecializationInfo>::iterator Pos =
	TemplateOrInstantiation.find(Var);
	if (Pos == TemplateOrInstantiation.end())
	return {};

	return Pos->second;
	}

	void
	ASTContext::setInstantiatedFromStaticDataMember(VarDecl Inst, VarDecl Tmpl,
	TemplateSpecializationKind TSK,
	SourceLocation PointOfInstantiation) {
	assert(Inst->isStaticDataMember() && "Not a static data member");
	assert(Tmpl->isStaticDataMember() && "Not a static data member");
	setTemplateOrSpecializationInfo(Inst, new (*this) MemberSpecializationInfo(
	Tmpl, TSK, PointOfInstantiation));
	}

	void
	ASTContext::setTemplateOrSpecializationInfo(VarDecl *Inst,
	TemplateOrSpecializationInfo TSI) {
	assert(!TemplateOrInstantiation[Inst] &&
	"Already noted what the variable was instantiated from");
	TemplateOrInstantiation[Inst] = TSI;
	}

	NamedDecl *
	ASTContext::getInstantiatedFromUsingDecl(NamedDecl *UUD) {
	auto Pos = InstantiatedFromUsingDecl.find(UUD);
	if (Pos == InstantiatedFromUsingDecl.end())
	return nullptr;

	return Pos->second;
	}

	void
	ASTContext::setInstantiatedFromUsingDecl(NamedDecl Inst, NamedDecl Pattern) {
	assert((isa<UsingDecl>(Pattern) \|\|
	isa<UnresolvedUsingValueDecl>(Pattern) \|\|
	isa<UnresolvedUsingTypenameDecl>(Pattern)) &&
	"pattern decl is not a using decl");
	assert((isa<UsingDecl>(Inst) \|\|
	isa<UnresolvedUsingValueDecl>(Inst) \|\|
	isa<UnresolvedUsingTypenameDecl>(Inst)) &&
	"instantiation did not produce a using decl");
	assert(!InstantiatedFromUsingDecl[Inst] && "pattern already exists");
	InstantiatedFromUsingDecl[Inst] = Pattern;
	}

	UsingEnumDecl *
	ASTContext::getInstantiatedFromUsingEnumDecl(UsingEnumDecl *UUD) {
	auto Pos = InstantiatedFromUsingEnumDecl.find(UUD);
	if (Pos == InstantiatedFromUsingEnumDecl.end())
	return nullptr;

	return Pos->second;
	}

	void ASTContext::setInstantiatedFromUsingEnumDecl(UsingEnumDecl *Inst,
	UsingEnumDecl *Pattern) {
	assert(!InstantiatedFromUsingEnumDecl[Inst] && "pattern already exists");
	InstantiatedFromUsingEnumDecl[Inst] = Pattern;
	}

	UsingShadowDecl *
	ASTContext::getInstantiatedFromUsingShadowDecl(UsingShadowDecl *Inst) {
	llvm::DenseMap<UsingShadowDecl, UsingShadowDecl>::const_iterator Pos
	= InstantiatedFromUsingShadowDecl.find(Inst);
	if (Pos == InstantiatedFromUsingShadowDecl.end())
	return nullptr;

	return Pos->second;
	}

	void
	ASTContext::setInstantiatedFromUsingShadowDecl(UsingShadowDecl *Inst,
	UsingShadowDecl *Pattern) {
	assert(!InstantiatedFromUsingShadowDecl[Inst] && "pattern already exists");
	InstantiatedFromUsingShadowDecl[Inst] = Pattern;
	}

	FieldDecl ASTContext::getInstantiatedFromUnnamedFieldDecl(FieldDecl Field) {
	llvm::DenseMap<FieldDecl , FieldDecl >::iterator Pos
	= InstantiatedFromUnnamedFieldDecl.find(Field);
	if (Pos == InstantiatedFromUnnamedFieldDecl.end())
	return nullptr;

	return Pos->second;
	}

	void ASTContext::setInstantiatedFromUnnamedFieldDecl(FieldDecl *Inst,
	FieldDecl *Tmpl) {
	assert(!Inst->getDeclName() && "Instantiated field decl is not unnamed");
	assert(!Tmpl->getDeclName() && "Template field decl is not unnamed");
	assert(!InstantiatedFromUnnamedFieldDecl[Inst] &&
	"Already noted what unnamed field was instantiated from");

	InstantiatedFromUnnamedFieldDecl[Inst] = Tmpl;
	}

	ASTContext::overridden_cxx_method_iterator
	ASTContext::overridden_methods_begin(const CXXMethodDecl *Method) const {
	return overridden_methods(Method).begin();
	}

	ASTContext::overridden_cxx_method_iterator
	ASTContext::overridden_methods_end(const CXXMethodDecl *Method) const {
	return overridden_methods(Method).end();
	}

	unsigned
	ASTContext::overridden_methods_size(const CXXMethodDecl *Method) const {
	auto Range = overridden_methods(Method);
	return Range.end() - Range.begin();
	}

	ASTContext::overridden_method_range
	ASTContext::overridden_methods(const CXXMethodDecl *Method) const {
	llvm::DenseMap<const CXXMethodDecl *, CXXMethodVector>::const_iterator Pos =
	OverriddenMethods.find(Method->getCanonicalDecl());
	if (Pos == OverriddenMethods.end())
	return overridden_method_range(nullptr, nullptr);
	return overridden_method_range(Pos->second.begin(), Pos->second.end());
	}

	void ASTContext::addOverriddenMethod(const CXXMethodDecl *Method,
	const CXXMethodDecl *Overridden) {
	assert(Method->isCanonicalDecl() && Overridden->isCanonicalDecl());
	OverriddenMethods[Method].push_back(Overridden);
	}

	void ASTContext::getOverriddenMethods(
	const NamedDecl *D,
	SmallVectorImpl<const NamedDecl *> &Overridden) const {
	assert(D);

	if (const auto *CXXMethod = dyn_cast<CXXMethodDecl>(D)) {
	Overridden.append(overridden_methods_begin(CXXMethod),
	overridden_methods_end(CXXMethod));
	return;
	}

	const auto *Method = dyn_cast<ObjCMethodDecl>(D);
	if (!Method)
	return;

	SmallVector<const ObjCMethodDecl *, 8> OverDecls;
	Method->getOverriddenMethods(OverDecls);
	Overridden.append(OverDecls.begin(), OverDecls.end());
	}

	void ASTContext::addedLocalImportDecl(ImportDecl *Import) {
	assert(!Import->getNextLocalImport() &&
	"Import declaration already in the chain");
	assert(!Import->isFromASTFile() && "Non-local import declaration");
	if (!FirstLocalImport) {
	FirstLocalImport = Import;
	LastLocalImport = Import;
	return;
	}

	LastLocalImport->setNextLocalImport(Import);
	LastLocalImport = Import;
	}

	//===----------------------------------------------------------------------===//
	// Type Sizing and Analysis
	//===----------------------------------------------------------------------===//

	/// getFloatTypeSemantics - Return the APFloat 'semantics' for the specified
	/// scalar floating point type.
	const llvm::fltSemantics &ASTContext::getFloatTypeSemantics(QualType T) const {
	switch (T->castAs<BuiltinType>()->getKind()) {
	default:
	llvm_unreachable("Not a floating point type!");
	case BuiltinType::BFloat16:
	return Target->getBFloat16Format();
	case BuiltinType::Float16:
	case BuiltinType::Half:
	return Target->getHalfFormat();
	case BuiltinType::Float: return Target->getFloatFormat();
	case BuiltinType::Double: return Target->getDoubleFormat();
	case BuiltinType::LongDouble:
	if (getLangOpts().OpenMP && getLangOpts().OpenMPIsDevice)
	return AuxTarget->getLongDoubleFormat();
	return Target->getLongDoubleFormat();
	case BuiltinType::Float128:
	if (getLangOpts().OpenMP && getLangOpts().OpenMPIsDevice)
	return AuxTarget->getFloat128Format();
	return Target->getFloat128Format();
	}
	}

	CharUnits ASTContext::getDeclAlign(const Decl *D, bool ForAlignof) const {
	unsigned Align = Target->getCharWidth();

	bool UseAlignAttrOnly = false;
	if (unsigned AlignFromAttr = D->getMaxAlignment()) {
	Align = AlignFromAttr;

	// __attribute__((aligned)) can increase or decrease alignment
	// except on a struct or struct member, where it only increases
	// alignment unless 'packed' is also specified.
	//
	// It is an error for alignas to decrease alignment, so we can
	// ignore that possibility; Sema should diagnose it.
	if (isa<FieldDecl>(D)) {
	UseAlignAttrOnly = D->hasAttr<PackedAttr>() \|\|
	cast<FieldDecl>(D)->getParent()->hasAttr<PackedAttr>();
	} else {
	UseAlignAttrOnly = true;
	}
	}
	else if (isa<FieldDecl>(D))
	UseAlignAttrOnly =
	D->hasAttr<PackedAttr>() \|\|
	cast<FieldDecl>(D)->getParent()->hasAttr<PackedAttr>();

	// If we're using the align attribute only, just ignore everything
	// else about the declaration and its type.
	if (UseAlignAttrOnly) {
	// do nothing
	} else if (const auto *VD = dyn_cast<ValueDecl>(D)) {
	QualType T = VD->getType();
	if (const auto *RT = T->getAs<ReferenceType>()) {
	if (ForAlignof)
	T = RT->getPointeeType();
	else
	T = getPointerType(RT->getPointeeType());
	}
	QualType BaseT = getBaseElementType(T);
	if (T->isFunctionType())
	Align = getTypeInfoImpl(T.getTypePtr()).Align;
	else if (!BaseT->isIncompleteType()) {
	// Adjust alignments of declarations with array type by the
	// large-array alignment on the target.
	if (const ArrayType *arrayType = getAsArrayType(T)) {
	unsigned MinWidth = Target->getLargeArrayMinWidth();
	if (!ForAlignof && MinWidth) {
	if (isa<VariableArrayType>(arrayType))
	Align = std::max(Align, Target->getLargeArrayAlign());
	else if (isa<ConstantArrayType>(arrayType) &&
	MinWidth <= getTypeSize(cast<ConstantArrayType>(arrayType)))
	Align = std::max(Align, Target->getLargeArrayAlign());
	}
	}
	Align = std::max(Align, getPreferredTypeAlign(T.getTypePtr()));
	if (BaseT.getQualifiers().hasUnaligned())
	Align = Target->getCharWidth();
	if (const auto *VD = dyn_cast<VarDecl>(D)) {
	if (VD->hasGlobalStorage() && !ForAlignof) {
	uint64_t TypeSize = getTypeSize(T.getTypePtr());
	Align = std::max(Align, getTargetInfo().getMinGlobalAlign(TypeSize));
	}
	}
	}

	// Fields can be subject to extra alignment constraints, like if
	// the field is packed, the struct is packed, or the struct has a
	// a max-field-alignment constraint (#pragma pack). So calculate
	// the actual alignment of the field within the struct, and then
	// (as we're expected to) constrain that by the alignment of the type.
	if (const auto *Field = dyn_cast<FieldDecl>(VD)) {
	const RecordDecl *Parent = Field->getParent();
	// We can only produce a sensible answer if the record is valid.
	if (!Parent->isInvalidDecl()) {
	const ASTRecordLayout &Layout = getASTRecordLayout(Parent);

	// Start with the record's overall alignment.
	unsigned FieldAlign = toBits(Layout.getAlignment());

	// Use the GCD of that and the offset within the record.
	uint64_t Offset = Layout.getFieldOffset(Field->getFieldIndex());
	if (Offset > 0) {
	// Alignment is always a power of 2, so the GCD will be a power of 2,
	// which means we get to do this crazy thing instead of Euclid's.
	uint64_t LowBitOfOffset = Offset & (~Offset + 1);
	if (LowBitOfOffset < FieldAlign)
	FieldAlign = static_cast<unsigned>(LowBitOfOffset);
	}

	Align = std::min(Align, FieldAlign);
	}
	}
	}

	// Some targets have hard limitation on the maximum requestable alignment in
	// aligned attribute for static variables.
	const unsigned MaxAlignedAttr = getTargetInfo().getMaxAlignedAttribute();
	const auto *VD = dyn_cast<VarDecl>(D);
	if (MaxAlignedAttr && VD && VD->getStorageClass() == SC_Static)
	Align = std::min(Align, MaxAlignedAttr);

	return toCharUnitsFromBits(Align);
	}

	CharUnits ASTContext::getExnObjectAlignment() const {
	return toCharUnitsFromBits(Target->getExnObjectAlignment());
	}

	// getTypeInfoDataSizeInChars - Return the size of a type, in
	// chars. If the type is a record, its data size is returned. This is
	// the size of the memcpy that's performed when assigning this type
	// using a trivial copy/move assignment operator.
	TypeInfoChars ASTContext::getTypeInfoDataSizeInChars(QualType T) const {
	TypeInfoChars Info = getTypeInfoInChars(T);

	// In C++, objects can sometimes be allocated into the tail padding
	// of a base-class subobject. We decide whether that's possible
	// during class layout, so here we can just trust the layout results.
	if (getLangOpts().CPlusPlus) {
	if (const auto *RT = T->getAs<RecordType>()) {
	const ASTRecordLayout &layout = getASTRecordLayout(RT->getDecl());
	Info.Width = layout.getDataSize();
	}
	}

	return Info;
	}

	/// getConstantArrayInfoInChars - Performing the computation in CharUnits
	/// instead of in bits prevents overflowing the uint64_t for some large arrays.
	TypeInfoChars
	static getConstantArrayInfoInChars(const ASTContext &Context,
	const ConstantArrayType *CAT) {
	TypeInfoChars EltInfo = Context.getTypeInfoInChars(CAT->getElementType());
	uint64_t Size = CAT->getSize().getZExtValue();
	assert((Size == 0 \|\| static_cast<uint64_t>(EltInfo.Width.getQuantity()) <=
	(uint64_t)(-1)/Size) &&
	"Overflow in array type char size evaluation");
	uint64_t Width = EltInfo.Width.getQuantity() * Size;
	unsigned Align = EltInfo.Align.getQuantity();
	if (!Context.getTargetInfo().getCXXABI().isMicrosoft() \|\|
	Context.getTargetInfo().getPointerWidth(0) == 64)
	Width = llvm::alignTo(Width, Align);
	return TypeInfoChars(CharUnits::fromQuantity(Width),
	CharUnits::fromQuantity(Align),
	EltInfo.AlignIsRequired);
	}

	TypeInfoChars ASTContext::getTypeInfoInChars(const Type *T) const {
	if (const auto *CAT = dyn_cast<ConstantArrayType>(T))
	return getConstantArrayInfoInChars(*this, CAT);
	TypeInfo Info = getTypeInfo(T);
	return TypeInfoChars(toCharUnitsFromBits(Info.Width),
	toCharUnitsFromBits(Info.Align),
	Info.AlignIsRequired);
	}

	TypeInfoChars ASTContext::getTypeInfoInChars(QualType T) const {
	return getTypeInfoInChars(T.getTypePtr());
	}

	bool ASTContext::isAlignmentRequired(const Type *T) const {
	return getTypeInfo(T).AlignIsRequired;
	}

	bool ASTContext::isAlignmentRequired(QualType T) const {
	return isAlignmentRequired(T.getTypePtr());
	}

	unsigned ASTContext::getTypeAlignIfKnown(QualType T,
	bool NeedsPreferredAlignment) const {
	// An alignment on a typedef overrides anything else.
	if (const auto *TT = T->getAs<TypedefType>())
	if (unsigned Align = TT->getDecl()->getMaxAlignment())
	return Align;

	// If we have an (array of) complete type, we're done.
	T = getBaseElementType(T);
	if (!T->isIncompleteType())
	return NeedsPreferredAlignment ? getPreferredTypeAlign(T) : getTypeAlign(T);

	// If we had an array type, its element type might be a typedef
	// type with an alignment attribute.
	if (const auto *TT = T->getAs<TypedefType>())
	if (unsigned Align = TT->getDecl()->getMaxAlignment())
	return Align;

	// Otherwise, see if the declaration of the type had an attribute.
	if (const auto *TT = T->getAs<TagType>())
	return TT->getDecl()->getMaxAlignment();

	return 0;
	}

	TypeInfo ASTContext::getTypeInfo(const Type *T) const {
	TypeInfoMap::iterator I = MemoizedTypeInfo.find(T);
	if (I != MemoizedTypeInfo.end())
	return I->second;

	// This call can invalidate MemoizedTypeInfo[T], so we need a second lookup.
	TypeInfo TI = getTypeInfoImpl(T);
	MemoizedTypeInfo[T] = TI;
	return TI;
	}

	/// getTypeInfoImpl - Return the size of the specified type, in bits. This
	/// method does not work on incomplete types.
	///
	/// FIXME: Pointers into different addr spaces could have different sizes and
	/// alignment requirements: getPointerInfo should take an AddrSpace, this
	/// should take a QualType, &c.
	TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const {
	uint64_t Width = 0;
	unsigned Align = 8;
	bool AlignIsRequired = false;
	unsigned AS = 0;
	switch (T->getTypeClass()) {
	#define TYPE(Class, Base)
	#define ABSTRACT_TYPE(Class, Base)
	#define NON_CANONICAL_TYPE(Class, Base)
	#define DEPENDENT_TYPE(Class, Base) case Type::Class:
	#define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(Class, Base) \
	case Type::Class: \
	assert(!T->isDependentType() && "should not see dependent types here"); \
	return getTypeInfo(cast<Class##Type>(T)->desugar().getTypePtr());
	#include "clang/AST/TypeNodes.inc"
	llvm_unreachable("Should not see dependent types");

	case Type::FunctionNoProto:
	case Type::FunctionProto:
	// GCC extension: alignof(function) = 32 bits
	Width = 0;
	Align = 32;
	break;

	case Type::IncompleteArray:
	case Type::VariableArray:
	case Type::ConstantArray: {
	// Model non-constant sized arrays as size zero, but track the alignment.
	uint64_t Size = 0;
	if (const auto *CAT = dyn_cast<ConstantArrayType>(T))
	Size = CAT->getSize().getZExtValue();

	TypeInfo EltInfo = getTypeInfo(cast<ArrayType>(T)->getElementType());
	assert((Size == 0 \|\| EltInfo.Width <= (uint64_t)(-1) / Size) &&
	"Overflow in array type bit size evaluation");
	Width = EltInfo.Width * Size;
	Align = EltInfo.Align;
	AlignIsRequired = EltInfo.AlignIsRequired;
	if (!getTargetInfo().getCXXABI().isMicrosoft() \|\|
	getTargetInfo().getPointerWidth(0) == 64)
	Width = llvm::alignTo(Width, Align);
	break;
	}

	case Type::ExtVector:
	case Type::Vector: {
	const auto *VT = cast<VectorType>(T);
	TypeInfo EltInfo = getTypeInfo(VT->getElementType());
	Width = EltInfo.Width * VT->getNumElements();
	Align = Width;
	// If the alignment is not a power of 2, round up to the next power of 2.
	// This happens for non-power-of-2 length vectors.
	if (Align & (Align-1)) {
	Align = llvm::NextPowerOf2(Align);
	Width = llvm::alignTo(Width, Align);
	}
	// Adjust the alignment based on the target max.
	uint64_t TargetVectorAlign = Target->getMaxVectorAlign();
	if (TargetVectorAlign && TargetVectorAlign < Align)
	Align = TargetVectorAlign;
	if (VT->getVectorKind() == VectorType::SveFixedLengthDataVector)
	// Adjust the alignment for fixed-length SVE vectors. This is important
	// for non-power-of-2 vector lengths.
	Align = 128;
	else if (VT->getVectorKind() == VectorType::SveFixedLengthPredicateVector)
	// Adjust the alignment for fixed-length SVE predicates.
	Align = 16;
	break;
	}

	case Type::ConstantMatrix: {
	const auto *MT = cast<ConstantMatrixType>(T);
	TypeInfo ElementInfo = getTypeInfo(MT->getElementType());
	// The internal layout of a matrix value is implementation defined.
	// Initially be ABI compatible with arrays with respect to alignment and
	// size.
	Width = ElementInfo.Width * MT->getNumRows() * MT->getNumColumns();
	Align = ElementInfo.Align;
	break;
	}

	case Type::Builtin:
	switch (cast<BuiltinType>(T)->getKind()) {
	default: llvm_unreachable("Unknown builtin type!");
	case BuiltinType::Void:
	// GCC extension: alignof(void) = 8 bits.
	Width = 0;
	Align = 8;
	break;
	case BuiltinType::Bool:
	Width = Target->getBoolWidth();
	Align = Target->getBoolAlign();
	break;
	case BuiltinType::Char_S:
	case BuiltinType::Char_U:
	case BuiltinType::UChar:
	case BuiltinType::SChar:
	case BuiltinType::Char8:
	Width = Target->getCharWidth();
	Align = Target->getCharAlign();
	break;
	case BuiltinType::WChar_S:
	case BuiltinType::WChar_U:
	Width = Target->getWCharWidth();
	Align = Target->getWCharAlign();
	break;
	case BuiltinType::Char16:
	Width = Target->getChar16Width();
	Align = Target->getChar16Align();
	break;
	case BuiltinType::Char32:
	Width = Target->getChar32Width();
	Align = Target->getChar32Align();
	break;
	case BuiltinType::UShort:
	case BuiltinType::Short:
	Width = Target->getShortWidth();
	Align = Target->getShortAlign();
	break;
	case BuiltinType::UInt:
	case BuiltinType::Int:
	Width = Target->getIntWidth();
	Align = Target->getIntAlign();
	break;
	case BuiltinType::ULong:
	case BuiltinType::Long:
	Width = Target->getLongWidth();
	Align = Target->getLongAlign();
	break;
	case BuiltinType::ULongLong:
	case BuiltinType::LongLong:
	Width = Target->getLongLongWidth();
	Align = Target->getLongLongAlign();
	break;
	case BuiltinType::Int128:
	case BuiltinType::UInt128:
	Width = 128;
	Align = 128; // int128_t is 128-bit aligned on all targets.
	break;
	case BuiltinType::ShortAccum:
	case BuiltinType::UShortAccum:
	case BuiltinType::SatShortAccum:
	case BuiltinType::SatUShortAccum:
	Width = Target->getShortAccumWidth();
	Align = Target->getShortAccumAlign();
	break;
	case BuiltinType::Accum:
	case BuiltinType::UAccum:
	case BuiltinType::SatAccum:
	case BuiltinType::SatUAccum:
	Width = Target->getAccumWidth();
	Align = Target->getAccumAlign();
	break;
	case BuiltinType::LongAccum:
	case BuiltinType::ULongAccum:
	case BuiltinType::SatLongAccum:
	case BuiltinType::SatULongAccum:
	Width = Target->getLongAccumWidth();
	Align = Target->getLongAccumAlign();
	break;
	case BuiltinType::ShortFract:
	case BuiltinType::UShortFract:
	case BuiltinType::SatShortFract:
	case BuiltinType::SatUShortFract:
	Width = Target->getShortFractWidth();
	Align = Target->getShortFractAlign();
	break;
	case BuiltinType::Fract:
	case BuiltinType::UFract:
	case BuiltinType::SatFract:
	case BuiltinType::SatUFract:
	Width = Target->getFractWidth();
	Align = Target->getFractAlign();
	break;
	case BuiltinType::LongFract:
	case BuiltinType::ULongFract:
	case BuiltinType::SatLongFract:
	case BuiltinType::SatULongFract:
	Width = Target->getLongFractWidth();
	Align = Target->getLongFractAlign();
	break;
	case BuiltinType::BFloat16:
	Width = Target->getBFloat16Width();
	Align = Target->getBFloat16Align();
	break;
	case BuiltinType::Float16:
	case BuiltinType::Half:
	if (Target->hasFloat16Type() \|\| !getLangOpts().OpenMP \|\|
	!getLangOpts().OpenMPIsDevice) {
	Width = Target->getHalfWidth();
	Align = Target->getHalfAlign();
	} else {
	assert(getLangOpts().OpenMP && getLangOpts().OpenMPIsDevice &&
	"Expected OpenMP device compilation.");
	Width = AuxTarget->getHalfWidth();
	Align = AuxTarget->getHalfAlign();
	}
	break;
	case BuiltinType::Float:
	Width = Target->getFloatWidth();
	Align = Target->getFloatAlign();
	break;
	case BuiltinType::Double:
	Width = Target->getDoubleWidth();
	Align = Target->getDoubleAlign();
	break;
	case BuiltinType::LongDouble:
	if (getLangOpts().OpenMP && getLangOpts().OpenMPIsDevice &&
	(Target->getLongDoubleWidth() != AuxTarget->getLongDoubleWidth() \|\|
	Target->getLongDoubleAlign() != AuxTarget->getLongDoubleAlign())) {
	Width = AuxTarget->getLongDoubleWidth();
	Align = AuxTarget->getLongDoubleAlign();
	} else {
	Width = Target->getLongDoubleWidth();
	Align = Target->getLongDoubleAlign();
	}
	break;
	case BuiltinType::Float128:
	if (Target->hasFloat128Type() \|\| !getLangOpts().OpenMP \|\|
	!getLangOpts().OpenMPIsDevice) {
	Width = Target->getFloat128Width();
	Align = Target->getFloat128Align();
	} else {
	assert(getLangOpts().OpenMP && getLangOpts().OpenMPIsDevice &&
	"Expected OpenMP device compilation.");
	Width = AuxTarget->getFloat128Width();
	Align = AuxTarget->getFloat128Align();
	}
	break;
	case BuiltinType::NullPtr:
	Width = Target->getPointerWidth(0); // C++ 3.9.1p11: sizeof(nullptr_t)
	Align = Target->getPointerAlign(0); // == sizeof(void*)
	break;
	case BuiltinType::ObjCId:
	case BuiltinType::ObjCClass:
	case BuiltinType::ObjCSel:
	Width = Target->getPointerWidth(0);
	Align = Target->getPointerAlign(0);
	break;
	case BuiltinType::OCLSampler:
	case BuiltinType::OCLEvent:
	case BuiltinType::OCLClkEvent:
	case BuiltinType::OCLQueue:
	case BuiltinType::OCLReserveID:
	#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
	case BuiltinType::Id:
	#include "clang/Basic/OpenCLImageTypes.def"
	#define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \
	case BuiltinType::Id:
	#include "clang/Basic/OpenCLExtensionTypes.def"
	AS = getTargetAddressSpace(
	Target->getOpenCLTypeAddrSpace(getOpenCLTypeKind(T)));
	Width = Target->getPointerWidth(AS);
	Align = Target->getPointerAlign(AS);
	break;
	// The SVE types are effectively target-specific. The length of an
	// SVE_VECTOR_TYPE is only known at runtime, but it is always a multiple
	// of 128 bits. There is one predicate bit for each vector byte, so the
	// length of an SVE_PREDICATE_TYPE is always a multiple of 16 bits.
	//
	// Because the length is only known at runtime, we use a dummy value
	// of 0 for the static length. The alignment values are those defined
	// by the Procedure Call Standard for the Arm Architecture.
	#define SVE_VECTOR_TYPE(Name, MangledName, Id, SingletonId, NumEls, ElBits, \
	IsSigned, IsFP, IsBF) \
	case BuiltinType::Id: \
	Width = 0; \
	Align = 128; \
	break;
	#define SVE_PREDICATE_TYPE(Name, MangledName, Id, SingletonId, NumEls) \
	case BuiltinType::Id: \
	Width = 0; \
	Align = 16; \
	break;
	#include "clang/Basic/AArch64SVEACLETypes.def"
	#define PPC_VECTOR_TYPE(Name, Id, Size) \
	case BuiltinType::Id: \
	Width = Size; \
	Align = Size; \
	break;
	#include "clang/Basic/PPCTypes.def"
	#define RVV_VECTOR_TYPE(Name, Id, SingletonId, ElKind, ElBits, NF, IsSigned, \
	IsFP) \
	case BuiltinType::Id: \
	Width = 0; \
	Align = ElBits; \
	break;
	#define RVV_PREDICATE_TYPE(Name, Id, SingletonId, ElKind) \
	case BuiltinType::Id: \
	Width = 0; \
	Align = 8; \
	break;
	#include "clang/Basic/RISCVVTypes.def"
	}
	break;
	case Type::ObjCObjectPointer:
	Width = Target->getPointerWidth(0);
	Align = Target->getPointerAlign(0);
	break;
	case Type::BlockPointer:
	AS = getTargetAddressSpace(cast<BlockPointerType>(T)->getPointeeType());
	Width = Target->getPointerWidth(AS);
	Align = Target->getPointerAlign(AS);
	break;
	case Type::LValueReference:
	case Type::RValueReference:
	// alignof and sizeof should never enter this code path here, so we go
	// the pointer route.
	AS = getTargetAddressSpace(cast<ReferenceType>(T)->getPointeeType());
	Width = Target->getPointerWidth(AS);
	Align = Target->getPointerAlign(AS);
	break;
	case Type::Pointer:
	AS = getTargetAddressSpace(cast<PointerType>(T)->getPointeeType());
	Width = Target->getPointerWidth(AS);
	Align = Target->getPointerAlign(AS);
	break;
	case Type::MemberPointer: {
	const auto *MPT = cast<MemberPointerType>(T);
	CXXABI::MemberPointerInfo MPI = ABI->getMemberPointerInfo(MPT);
	Width = MPI.Width;
	Align = MPI.Align;
	break;
	}
	case Type::Complex: {
	// Complex types have the same alignment as their elements, but twice the
	// size.
	TypeInfo EltInfo = getTypeInfo(cast<ComplexType>(T)->getElementType());
	Width = EltInfo.Width * 2;
	Align = EltInfo.Align;
	break;
	}
	case Type::ObjCObject:
	return getTypeInfo(cast<ObjCObjectType>(T)->getBaseType().getTypePtr());
	case Type::Adjusted:
	case Type::Decayed:
	return getTypeInfo(cast<AdjustedType>(T)->getAdjustedType().getTypePtr());
	case Type::ObjCInterface: {
	const auto *ObjCI = cast<ObjCInterfaceType>(T);
	if (ObjCI->getDecl()->isInvalidDecl()) {
	Width = 8;
	Align = 8;
	break;
	}
	const ASTRecordLayout &Layout = getASTObjCInterfaceLayout(ObjCI->getDecl());
	Width = toBits(Layout.getSize());
	Align = toBits(Layout.getAlignment());
	break;
	}
	case Type::ExtInt: {
	const auto *EIT = cast<ExtIntType>(T);
	Align =
	std::min(static_cast<unsigned>(std::max(
	getCharWidth(), llvm::PowerOf2Ceil(EIT->getNumBits()))),
	Target->getLongLongAlign());
	Width = llvm::alignTo(EIT->getNumBits(), Align);
	break;
	}
	case Type::Record:
	case Type::Enum: {
	const auto *TT = cast<TagType>(T);

	if (TT->getDecl()->isInvalidDecl()) {
	Width = 8;
	Align = 8;
	break;
	}

	if (const auto *ET = dyn_cast<EnumType>(TT)) {
	const EnumDecl *ED = ET->getDecl();
	TypeInfo Info =
	getTypeInfo(ED->getIntegerType()->getUnqualifiedDesugaredType());
	if (unsigned AttrAlign = ED->getMaxAlignment()) {
	Info.Align = AttrAlign;
	Info.AlignIsRequired = true;
	}
	return Info;
	}

	const auto *RT = cast<RecordType>(TT);
	const RecordDecl *RD = RT->getDecl();
	const ASTRecordLayout &Layout = getASTRecordLayout(RD);
	Width = toBits(Layout.getSize());
	Align = toBits(Layout.getAlignment());
	AlignIsRequired = RD->hasAttr<AlignedAttr>();
	break;
	}

	case Type::SubstTemplateTypeParm:
	return getTypeInfo(cast<SubstTemplateTypeParmType>(T)->
	getReplacementType().getTypePtr());

	case Type::Auto:
	case Type::DeducedTemplateSpecialization: {
	const auto *A = cast<DeducedType>(T);
	assert(!A->getDeducedType().isNull() &&
	"cannot request the size of an undeduced or dependent auto type");
	return getTypeInfo(A->getDeducedType().getTypePtr());
	}

	case Type::Paren:
	return getTypeInfo(cast<ParenType>(T)->getInnerType().getTypePtr());

	case Type::MacroQualified:
	return getTypeInfo(
	cast<MacroQualifiedType>(T)->getUnderlyingType().getTypePtr());

	case Type::ObjCTypeParam:
	return getTypeInfo(cast<ObjCTypeParamType>(T)->desugar().getTypePtr());

	case Type::Typedef: {
	const TypedefNameDecl *Typedef = cast<TypedefType>(T)->getDecl();
	TypeInfo Info = getTypeInfo(Typedef->getUnderlyingType().getTypePtr());
	// If the typedef has an aligned attribute on it, it overrides any computed
	// alignment we have. This violates the GCC documentation (which says that
	// attribute(aligned) can only round up) but matches its implementation.
	if (unsigned AttrAlign = Typedef->getMaxAlignment()) {
	Align = AttrAlign;
	AlignIsRequired = true;
	} else {
	Align = Info.Align;
	AlignIsRequired = Info.AlignIsRequired;
	}
	Width = Info.Width;
	break;
	}

	case Type::Elaborated:
	return getTypeInfo(cast<ElaboratedType>(T)->getNamedType().getTypePtr());

	case Type::Attributed:
	return getTypeInfo(
	cast<AttributedType>(T)->getEquivalentType().getTypePtr());

	case Type::Atomic: {
	// Start with the base type information.
	TypeInfo Info = getTypeInfo(cast<AtomicType>(T)->getValueType());
	Width = Info.Width;
	Align = Info.Align;

	if (!Width) {
	// An otherwise zero-sized type should still generate an
	// atomic operation.
	Width = Target->getCharWidth();
	assert(Align);
	} else if (Width <= Target->getMaxAtomicPromoteWidth()) {
	// If the size of the type doesn't exceed the platform's max
	// atomic promotion width, make the size and alignment more
	// favorable to atomic operations:

	// Round the size up to a power of 2.
	if (!llvm::isPowerOf2_64(Width))
	Width = llvm::NextPowerOf2(Width);

	// Set the alignment equal to the size.
	Align = static_cast<unsigned>(Width);
	}
	}
	break;

	case Type::Pipe:
	Width = Target->getPointerWidth(getTargetAddressSpace(LangAS::opencl_global));
	Align = Target->getPointerAlign(getTargetAddressSpace(LangAS::opencl_global));
	break;
	}

	assert(llvm::isPowerOf2_32(Align) && "Alignment must be power of 2");
	return TypeInfo(Width, Align, AlignIsRequired);
	}

	unsigned ASTContext::getTypeUnadjustedAlign(const Type *T) const {
	UnadjustedAlignMap::iterator I = MemoizedUnadjustedAlign.find(T);
	if (I != MemoizedUnadjustedAlign.end())
	return I->second;

	unsigned UnadjustedAlign;
	if (const auto *RT = T->getAs<RecordType>()) {
	const RecordDecl *RD = RT->getDecl();
	const ASTRecordLayout &Layout = getASTRecordLayout(RD);
	UnadjustedAlign = toBits(Layout.getUnadjustedAlignment());
	} else if (const auto *ObjCI = T->getAs<ObjCInterfaceType>()) {
	const ASTRecordLayout &Layout = getASTObjCInterfaceLayout(ObjCI->getDecl());
	UnadjustedAlign = toBits(Layout.getUnadjustedAlignment());
	} else {
	UnadjustedAlign = getTypeAlign(T->getUnqualifiedDesugaredType());
	}

	MemoizedUnadjustedAlign[T] = UnadjustedAlign;
	return UnadjustedAlign;
	}

	unsigned ASTContext::getOpenMPDefaultSimdAlign(QualType T) const {
	unsigned SimdAlign = getTargetInfo().getSimdDefaultAlign();
	return SimdAlign;
	}

	/// toCharUnitsFromBits - Convert a size in bits to a size in characters.
	CharUnits ASTContext::toCharUnitsFromBits(int64_t BitSize) const {
	return CharUnits::fromQuantity(BitSize / getCharWidth());
	}

	/// toBits - Convert a size in characters to a size in characters.
	int64_t ASTContext::toBits(CharUnits CharSize) const {
	return CharSize.getQuantity() * getCharWidth();
	}

	/// getTypeSizeInChars - Return the size of the specified type, in characters.
	/// This method does not work on incomplete types.
	CharUnits ASTContext::getTypeSizeInChars(QualType T) const {
	return getTypeInfoInChars(T).Width;
	}
	CharUnits ASTContext::getTypeSizeInChars(const Type *T) const {
	return getTypeInfoInChars(T).Width;
	}

	/// getTypeAlignInChars - Return the ABI-specified alignment of a type, in
	/// characters. This method does not work on incomplete types.
	CharUnits ASTContext::getTypeAlignInChars(QualType T) const {
	return toCharUnitsFromBits(getTypeAlign(T));
	}
	CharUnits ASTContext::getTypeAlignInChars(const Type *T) const {
	return toCharUnitsFromBits(getTypeAlign(T));
	}

	/// getTypeUnadjustedAlignInChars - Return the ABI-specified alignment of a
	/// type, in characters, before alignment adustments. This method does
	/// not work on incomplete types.
	CharUnits ASTContext::getTypeUnadjustedAlignInChars(QualType T) const {
	return toCharUnitsFromBits(getTypeUnadjustedAlign(T));
	}
	CharUnits ASTContext::getTypeUnadjustedAlignInChars(const Type *T) const {
	return toCharUnitsFromBits(getTypeUnadjustedAlign(T));
	}

	/// getPreferredTypeAlign - Return the "preferred" alignment of the specified
	/// type for the current target in bits. This can be different than the ABI
	/// alignment in cases where it is beneficial for performance or backwards
	/// compatibility preserving to overalign a data type. (Note: despite the name,
	/// the preferred alignment is ABI-impacting, and not an optimization.)
	unsigned ASTContext::getPreferredTypeAlign(const Type *T) const {
	TypeInfo TI = getTypeInfo(T);
	unsigned ABIAlign = TI.Align;

	T = T->getBaseElementTypeUnsafe();

	// The preferred alignment of member pointers is that of a pointer.
	if (T->isMemberPointerType())
	return getPreferredTypeAlign(getPointerDiffType().getTypePtr());

	if (!Target->allowsLargerPreferedTypeAlignment())
	return ABIAlign;

	if (const auto *RT = T->getAs<RecordType>()) {
	if (TI.AlignIsRequired \|\| RT->getDecl()->isInvalidDecl())
	return ABIAlign;

	unsigned PreferredAlign = static_cast<unsigned>(
	toBits(getASTRecordLayout(RT->getDecl()).PreferredAlignment));
	assert(PreferredAlign >= ABIAlign &&
	"PreferredAlign should be at least as large as ABIAlign.");
	return PreferredAlign;
	}

	// Double (and, for targets supporting AIX `power` alignment, long double) and
	// long long should be naturally aligned (despite requiring less alignment) if
	// possible.
	if (const auto *CT = T->getAs<ComplexType>())
	T = CT->getElementType().getTypePtr();
	if (const auto *ET = T->getAs<EnumType>())
	T = ET->getDecl()->getIntegerType().getTypePtr();
	if (T->isSpecificBuiltinType(BuiltinType::Double) \|\|
	T->isSpecificBuiltinType(BuiltinType::LongLong) \|\|
	T->isSpecificBuiltinType(BuiltinType::ULongLong) \|\|
	(T->isSpecificBuiltinType(BuiltinType::LongDouble) &&
	Target->defaultsToAIXPowerAlignment()))
	// Don't increase the alignment if an alignment attribute was specified on a
	// typedef declaration.
	if (!TI.AlignIsRequired)
	return std::max(ABIAlign, (unsigned)getTypeSize(T));

	return ABIAlign;
	}

	/// getTargetDefaultAlignForAttributeAligned - Return the default alignment
	/// for __attribute__((aligned)) on this target, to be used if no alignment
	/// value is specified.
	unsigned ASTContext::getTargetDefaultAlignForAttributeAligned() const {
	return getTargetInfo().getDefaultAlignForAttributeAligned();
	}

	/// getAlignOfGlobalVar - Return the alignment in bits that should be given
	/// to a global variable of the specified type.
	unsigned ASTContext::getAlignOfGlobalVar(QualType T) const {
	uint64_t TypeSize = getTypeSize(T.getTypePtr());
	return std::max(getPreferredTypeAlign(T),
	getTargetInfo().getMinGlobalAlign(TypeSize));
	}

	/// getAlignOfGlobalVarInChars - Return the alignment in characters that
	/// should be given to a global variable of the specified type.
	CharUnits ASTContext::getAlignOfGlobalVarInChars(QualType T) const {
	return toCharUnitsFromBits(getAlignOfGlobalVar(T));
	}

	CharUnits ASTContext::getOffsetOfBaseWithVBPtr(const CXXRecordDecl *RD) const {
	CharUnits Offset = CharUnits::Zero();
	const ASTRecordLayout *Layout = &getASTRecordLayout(RD);
	while (const CXXRecordDecl *Base = Layout->getBaseSharingVBPtr()) {
	Offset += Layout->getBaseClassOffset(Base);
	Layout = &getASTRecordLayout(Base);
	}
	return Offset;
	}

	CharUnits ASTContext::getMemberPointerPathAdjustment(const APValue &MP) const {
	const ValueDecl *MPD = MP.getMemberPointerDecl();
	CharUnits ThisAdjustment = CharUnits::Zero();
	ArrayRef<const CXXRecordDecl*> Path = MP.getMemberPointerPath();
	bool DerivedMember = MP.isMemberPointerToDerivedMember();
	const CXXRecordDecl *RD = cast<CXXRecordDecl>(MPD->getDeclContext());
	for (unsigned I = 0, N = Path.size(); I != N; ++I) {
	const CXXRecordDecl *Base = RD;
	const CXXRecordDecl *Derived = Path[I];
	if (DerivedMember)
	std::swap(Base, Derived);
	ThisAdjustment += getASTRecordLayout(Derived).getBaseClassOffset(Base);
	RD = Path[I];
	}
	if (DerivedMember)
	ThisAdjustment = -ThisAdjustment;
	return ThisAdjustment;
	}

	/// DeepCollectObjCIvars -
	/// This routine first collects all declared, but not synthesized, ivars in
	/// super class and then collects all ivars, including those synthesized for
	/// current class. This routine is used for implementation of current class
	/// when all ivars, declared and synthesized are known.
	void ASTContext::DeepCollectObjCIvars(const ObjCInterfaceDecl *OI,
	bool leafClass,
	SmallVectorImpl<const ObjCIvarDecl*> &Ivars) const {
	if (const ObjCInterfaceDecl *SuperClass = OI->getSuperClass())
	DeepCollectObjCIvars(SuperClass, false, Ivars);
	if (!leafClass) {
	for (const auto *I : OI->ivars())
	Ivars.push_back(I);
	} else {
	auto IDecl = const_cast<ObjCInterfaceDecl >(OI);
	for (const ObjCIvarDecl *Iv = IDecl->all_declared_ivar_begin(); Iv;
	Iv= Iv->getNextIvar())
	Ivars.push_back(Iv);
	}
	}

	/// CollectInheritedProtocols - Collect all protocols in current class and
	/// those inherited by it.
	void ASTContext::CollectInheritedProtocols(const Decl *CDecl,
	llvm::SmallPtrSet<ObjCProtocolDecl*, 8> &Protocols) {
	if (const auto *OI = dyn_cast<ObjCInterfaceDecl>(CDecl)) {
	// We can use protocol_iterator here instead of
	// all_referenced_protocol_iterator since we are walking all categories.
	for (auto *Proto : OI->all_referenced_protocols()) {
	CollectInheritedProtocols(Proto, Protocols);
	}

	// Categories of this Interface.
	for (const auto *Cat : OI->visible_categories())
	CollectInheritedProtocols(Cat, Protocols);

	if (ObjCInterfaceDecl *SD = OI->getSuperClass())
	while (SD) {
	CollectInheritedProtocols(SD, Protocols);
	SD = SD->getSuperClass();
	}
	} else if (const auto *OC = dyn_cast<ObjCCategoryDecl>(CDecl)) {
	for (auto *Proto : OC->protocols()) {
	CollectInheritedProtocols(Proto, Protocols);
	}
	} else if (const auto *OP = dyn_cast<ObjCProtocolDecl>(CDecl)) {
	// Insert the protocol.
	if (!Protocols.insert(
	const_cast<ObjCProtocolDecl *>(OP->getCanonicalDecl())).second)
	return;

	for (auto *Proto : OP->protocols())
	CollectInheritedProtocols(Proto, Protocols);
	}
	}

	static bool unionHasUniqueObjectRepresentations(const ASTContext &Context,
	const RecordDecl *RD) {
	assert(RD->isUnion() && "Must be union type");
	CharUnits UnionSize = Context.getTypeSizeInChars(RD->getTypeForDecl());

	for (const auto *Field : RD->fields()) {
	if (!Context.hasUniqueObjectRepresentations(Field->getType()))
	return false;
	CharUnits FieldSize = Context.getTypeSizeInChars(Field->getType());
	if (FieldSize != UnionSize)
	return false;
	}
	return !RD->field_empty();
	}

	static bool isStructEmpty(QualType Ty) {
	const RecordDecl *RD = Ty->castAs<RecordType>()->getDecl();

	if (!RD->field_empty())
	return false;

	if (const auto *ClassDecl = dyn_cast<CXXRecordDecl>(RD))
	return ClassDecl->isEmpty();

	return true;
	}

	static llvm::Optional<int64_t>
	structHasUniqueObjectRepresentations(const ASTContext &Context,
	const RecordDecl *RD) {
	assert(!RD->isUnion() && "Must be struct/class type");
	const auto &Layout = Context.getASTRecordLayout(RD);

	int64_t CurOffsetInBits = 0;
	if (const auto *ClassDecl = dyn_cast<CXXRecordDecl>(RD)) {
	if (ClassDecl->isDynamicClass())
	return llvm::None;

	SmallVector<std::pair<QualType, int64_t>, 4> Bases;
	for (const auto &Base : ClassDecl->bases()) {
	// Empty types can be inherited from, and non-empty types can potentially
	// have tail padding, so just make sure there isn't an error.
	if (!isStructEmpty(Base.getType())) {
	llvm::Optional<int64_t> Size = structHasUniqueObjectRepresentations(
	Context, Base.getType()->castAs<RecordType>()->getDecl());
	if (!Size)
	return llvm::None;
	Bases.emplace_back(Base.getType(), Size.getValue());
	}
	}

	llvm::sort(Bases, [&](const std::pair<QualType, int64_t> &L,
	const std::pair<QualType, int64_t> &R) {
	return Layout.getBaseClassOffset(L.first->getAsCXXRecordDecl()) <
	Layout.getBaseClassOffset(R.first->getAsCXXRecordDecl());
	});

	for (const auto &Base : Bases) {
	int64_t BaseOffset = Context.toBits(
	Layout.getBaseClassOffset(Base.first->getAsCXXRecordDecl()));
	int64_t BaseSize = Base.second;
	if (BaseOffset != CurOffsetInBits)
	return llvm::None;
	CurOffsetInBits = BaseOffset + BaseSize;
	}
	}

	for (const auto *Field : RD->fields()) {
	if (!Field->getType()->isReferenceType() &&
	!Context.hasUniqueObjectRepresentations(Field->getType()))
	return llvm::None;

	int64_t FieldSizeInBits =
	Context.toBits(Context.getTypeSizeInChars(Field->getType()));
	if (Field->isBitField()) {
	int64_t BitfieldSize = Field->getBitWidthValue(Context);

	if (BitfieldSize > FieldSizeInBits)
	return llvm::None;
	FieldSizeInBits = BitfieldSize;
	}

	int64_t FieldOffsetInBits = Context.getFieldOffset(Field);

	if (FieldOffsetInBits != CurOffsetInBits)
	return llvm::None;

	CurOffsetInBits = FieldSizeInBits + FieldOffsetInBits;
	}

	return CurOffsetInBits;
	}

	bool ASTContext::hasUniqueObjectRepresentations(QualType Ty) const {
	// C++17 [meta.unary.prop]:
	// The predicate condition for a template specialization
	// has_unique_object_representations<T> shall be
	// satisfied if and only if:
	// (9.1) - T is trivially copyable, and
	// (9.2) - any two objects of type T with the same value have the same
	// object representation, where two objects
	// of array or non-union class type are considered to have the same value
	// if their respective sequences of
	// direct subobjects have the same values, and two objects of union type
	// are considered to have the same
	// value if they have the same active member and the corresponding members
	// have the same value.
	// The set of scalar types for which this condition holds is
	// implementation-defined. [ Note: If a type has padding
	// bits, the condition does not hold; otherwise, the condition holds true
	// for unsigned integral types. -- end note ]
	assert(!Ty.isNull() && "Null QualType sent to unique object rep check");

	// Arrays are unique only if their element type is unique.
	if (Ty->isArrayType())
	return hasUniqueObjectRepresentations(getBaseElementType(Ty));

	// (9.1) - T is trivially copyable...
	if (!Ty.isTriviallyCopyableType(*this))
	return false;

	// All integrals and enums are unique.
	if (Ty->isIntegralOrEnumerationType())
	return true;

	// All other pointers are unique.
	if (Ty->isPointerType())
	return true;

	if (Ty->isMemberPointerType()) {
	const auto *MPT = Ty->getAs<MemberPointerType>();
	return !ABI->getMemberPointerInfo(MPT).HasPadding;
	}

	if (Ty->isRecordType()) {
	const RecordDecl *Record = Ty->castAs<RecordType>()->getDecl();

	if (Record->isInvalidDecl())
	return false;

	if (Record->isUnion())
	return unionHasUniqueObjectRepresentations(*this, Record);

	Optional<int64_t> StructSize =
	structHasUniqueObjectRepresentations(*this, Record);

	return StructSize &&
	StructSize.getValue() == static_cast<int64_t>(getTypeSize(Ty));
	}

	// FIXME: More cases to handle here (list by rsmith):
	// vectors (careful about, eg, vector of 3 foo)
	// _Complex int and friends
	// _Atomic T
	// Obj-C block pointers
	// Obj-C object pointers
	// and perhaps OpenCL's various builtin types (pipe, sampler_t, event_t,
	// clk_event_t, queue_t, reserve_id_t)
	// There're also Obj-C class types and the Obj-C selector type, but I think it
	// makes sense for those to return false here.

	return false;
	}

	unsigned ASTContext::CountNonClassIvars(const ObjCInterfaceDecl *OI) const {
	unsigned count = 0;
	// Count ivars declared in class extension.
	for (const auto *Ext : OI->known_extensions())
	count += Ext->ivar_size();

	// Count ivar defined in this class's implementation. This
	// includes synthesized ivars.
	if (ObjCImplementationDecl *ImplDecl = OI->getImplementation())
	count += ImplDecl->ivar_size();

	return count;
	}

	bool ASTContext::isSentinelNullExpr(const Expr *E) {
	if (!E)
	return false;

	// nullptr_t is always treated as null.
	if (E->getType()->isNullPtrType()) return true;

	if (E->getType()->isAnyPointerType() &&
	E->IgnoreParenCasts()->isNullPointerConstant(*this,
	Expr::NPC_ValueDependentIsNull))
	return true;

	// Unfortunately, __null has type 'int'.
	if (isa<GNUNullExpr>(E)) return true;

	return false;
	}

	/// Get the implementation of ObjCInterfaceDecl, or nullptr if none
	/// exists.
	ObjCImplementationDecl ASTContext::getObjCImplementation(ObjCInterfaceDecl D) {
	llvm::DenseMap<ObjCContainerDecl, ObjCImplDecl>::iterator
	I = ObjCImpls.find(D);
	if (I != ObjCImpls.end())
	return cast<ObjCImplementationDecl>(I->second);
	return nullptr;
	}

	/// Get the implementation of ObjCCategoryDecl, or nullptr if none
	/// exists.
	ObjCCategoryImplDecl ASTContext::getObjCImplementation(ObjCCategoryDecl D) {
	llvm::DenseMap<ObjCContainerDecl, ObjCImplDecl>::iterator
	I = ObjCImpls.find(D);
	if (I != ObjCImpls.end())
	return cast<ObjCCategoryImplDecl>(I->second);
	return nullptr;
	}

	/// Set the implementation of ObjCInterfaceDecl.
	void ASTContext::setObjCImplementation(ObjCInterfaceDecl *IFaceD,
	ObjCImplementationDecl *ImplD) {
	assert(IFaceD && ImplD && "Passed null params");
	ObjCImpls[IFaceD] = ImplD;
	}

	/// Set the implementation of ObjCCategoryDecl.
	void ASTContext::setObjCImplementation(ObjCCategoryDecl *CatD,
	ObjCCategoryImplDecl *ImplD) {
	assert(CatD && ImplD && "Passed null params");
	ObjCImpls[CatD] = ImplD;
	}

	const ObjCMethodDecl *
	ASTContext::getObjCMethodRedeclaration(const ObjCMethodDecl *MD) const {
	return ObjCMethodRedecls.lookup(MD);
	}

	void ASTContext::setObjCMethodRedeclaration(const ObjCMethodDecl *MD,
	const ObjCMethodDecl *Redecl) {
	assert(!getObjCMethodRedeclaration(MD) && "MD already has a redeclaration");
	ObjCMethodRedecls[MD] = Redecl;
	}

	const ObjCInterfaceDecl *ASTContext::getObjContainingInterface(
	const NamedDecl *ND) const {
	if (const auto *ID = dyn_cast<ObjCInterfaceDecl>(ND->getDeclContext()))
	return ID;
	if (const auto *CD = dyn_cast<ObjCCategoryDecl>(ND->getDeclContext()))
	return CD->getClassInterface();
	if (const auto *IMD = dyn_cast<ObjCImplDecl>(ND->getDeclContext()))
	return IMD->getClassInterface();

	return nullptr;
	}

	/// Get the copy initialization expression of VarDecl, or nullptr if
	/// none exists.
	BlockVarCopyInit ASTContext::getBlockVarCopyInit(const VarDecl *VD) const {
	assert(VD && "Passed null params");
	assert(VD->hasAttr<BlocksAttr>() &&
	"getBlockVarCopyInits - not __block var");
	auto I = BlockVarCopyInits.find(VD);
	if (I != BlockVarCopyInits.end())
	return I->second;
	return {nullptr, false};
	}

	/// Set the copy initialization expression of a block var decl.
	void ASTContext::setBlockVarCopyInit(const VarDeclVD, Expr CopyExpr,
	bool CanThrow) {
	assert(VD && CopyExpr && "Passed null params");
	assert(VD->hasAttr<BlocksAttr>() &&
	"setBlockVarCopyInits - not __block var");
	BlockVarCopyInits[VD].setExprAndFlag(CopyExpr, CanThrow);
	}

	TypeSourceInfo *ASTContext::CreateTypeSourceInfo(QualType T,
	unsigned DataSize) const {
	if (!DataSize)
	DataSize = TypeLoc::getFullDataSizeForType(T);
	else
	assert(DataSize == TypeLoc::getFullDataSizeForType(T) &&
	"incorrect data size provided to CreateTypeSourceInfo!");

	auto *TInfo =
	(TypeSourceInfo*)BumpAlloc.Allocate(sizeof(TypeSourceInfo) + DataSize, 8);
	new (TInfo) TypeSourceInfo(T);
	return TInfo;
	}

	TypeSourceInfo *ASTContext::getTrivialTypeSourceInfo(QualType T,
	SourceLocation L) const {
	TypeSourceInfo *DI = CreateTypeSourceInfo(T);
	DI->getTypeLoc().initialize(const_cast<ASTContext &>(*this), L);
	return DI;
	}

	const ASTRecordLayout &
	ASTContext::getASTObjCInterfaceLayout(const ObjCInterfaceDecl *D) const {
	return getObjCLayout(D, nullptr);
	}

	const ASTRecordLayout &
	ASTContext::getASTObjCImplementationLayout(
	const ObjCImplementationDecl *D) const {
	return getObjCLayout(D->getClassInterface(), D);
	}

	//===----------------------------------------------------------------------===//
	// Type creation/memoization methods
	//===----------------------------------------------------------------------===//

	QualType
	ASTContext::getExtQualType(const Type *baseType, Qualifiers quals) const {
	unsigned fastQuals = quals.getFastQualifiers();
	quals.removeFastQualifiers();

	// Check if we've already instantiated this type.
	llvm::FoldingSetNodeID ID;
	ExtQuals::Profile(ID, baseType, quals);
	void *insertPos = nullptr;
	if (ExtQuals *eq = ExtQualNodes.FindNodeOrInsertPos(ID, insertPos)) {
	assert(eq->getQualifiers() == quals);
	return QualType(eq, fastQuals);
	}

	// If the base type is not canonical, make the appropriate canonical type.
	QualType canon;
	if (!baseType->isCanonicalUnqualified()) {
	SplitQualType canonSplit = baseType->getCanonicalTypeInternal().split();
	canonSplit.Quals.addConsistentQualifiers(quals);
	canon = getExtQualType(canonSplit.Ty, canonSplit.Quals);

	// Re-find the insert position.
	(void) ExtQualNodes.FindNodeOrInsertPos(ID, insertPos);
	}

	auto eq = new (this, TypeAlignment) ExtQuals(baseType, canon, quals);
	ExtQualNodes.InsertNode(eq, insertPos);
	return QualType(eq, fastQuals);
	}

	QualType ASTContext::getAddrSpaceQualType(QualType T,
	LangAS AddressSpace) const {
	QualType CanT = getCanonicalType(T);
	if (CanT.getAddressSpace() == AddressSpace)
	return T;

	// If we are composing extended qualifiers together, merge together
	// into one ExtQuals node.
	QualifierCollector Quals;
	const Type *TypeNode = Quals.strip(T);

	// If this type already has an address space specified, it cannot get
	// another one.
	assert(!Quals.hasAddressSpace() &&
	"Type cannot be in multiple addr spaces!");
	Quals.addAddressSpace(AddressSpace);

	return getExtQualType(TypeNode, Quals);
	}

	QualType ASTContext::removeAddrSpaceQualType(QualType T) const {
	// If the type is not qualified with an address space, just return it
	// immediately.
	if (!T.hasAddressSpace())
	return T;

	// If we are composing extended qualifiers together, merge together
	// into one ExtQuals node.
	QualifierCollector Quals;
	const Type *TypeNode;

	while (T.hasAddressSpace()) {
	TypeNode = Quals.strip(T);

	// If the type no longer has an address space after stripping qualifiers,
	// jump out.
	if (!QualType(TypeNode, 0).hasAddressSpace())
	break;

	// There might be sugar in the way. Strip it and try again.
	T = T.getSingleStepDesugaredType(*this);
	}

	Quals.removeAddressSpace();

	// Removal of the address space can mean there are no longer any
	// non-fast qualifiers, so creating an ExtQualType isn't possible (asserts)
	// or required.
	if (Quals.hasNonFastQualifiers())
	return getExtQualType(TypeNode, Quals);
	else
	return QualType(TypeNode, Quals.getFastQualifiers());
	}

	QualType ASTContext::getObjCGCQualType(QualType T,
	Qualifiers::GC GCAttr) const {
	QualType CanT = getCanonicalType(T);
	if (CanT.getObjCGCAttr() == GCAttr)
	return T;

	if (const auto *ptr = T->getAs<PointerType>()) {
	QualType Pointee = ptr->getPointeeType();
	if (Pointee->isAnyPointerType()) {
	QualType ResultType = getObjCGCQualType(Pointee, GCAttr);
	return getPointerType(ResultType);
	}
	}

	// If we are composing extended qualifiers together, merge together
	// into one ExtQuals node.
	QualifierCollector Quals;
	const Type *TypeNode = Quals.strip(T);

	// If this type already has an ObjCGC specified, it cannot get
	// another one.
	assert(!Quals.hasObjCGCAttr() &&
	"Type cannot have multiple ObjCGCs!");
	Quals.addObjCGCAttr(GCAttr);

	return getExtQualType(TypeNode, Quals);
	}

	QualType ASTContext::removePtrSizeAddrSpace(QualType T) const {
	if (const PointerType *Ptr = T->getAs<PointerType>()) {
	QualType Pointee = Ptr->getPointeeType();
	if (isPtrSizeAddressSpace(Pointee.getAddressSpace())) {
	return getPointerType(removeAddrSpaceQualType(Pointee));
	}
	}
	return T;
	}

	const FunctionType ASTContext::adjustFunctionType(const FunctionType T,
	FunctionType::ExtInfo Info) {
	if (T->getExtInfo() == Info)
	return T;

	QualType Result;
	if (const auto *FNPT = dyn_cast<FunctionNoProtoType>(T)) {
	Result = getFunctionNoProtoType(FNPT->getReturnType(), Info);
	} else {
	const auto *FPT = cast<FunctionProtoType>(T);
	FunctionProtoType::ExtProtoInfo EPI = FPT->getExtProtoInfo();
	EPI.ExtInfo = Info;
	Result = getFunctionType(FPT->getReturnType(), FPT->getParamTypes(), EPI);
	}

	return cast<FunctionType>(Result.getTypePtr());
	}

	void ASTContext::adjustDeducedFunctionResultType(FunctionDecl *FD,
	QualType ResultType) {
	FD = FD->getMostRecentDecl();
	while (true) {
	const auto *FPT = FD->getType()->castAs<FunctionProtoType>();
	FunctionProtoType::ExtProtoInfo EPI = FPT->getExtProtoInfo();
	FD->setType(getFunctionType(ResultType, FPT->getParamTypes(), EPI));
	if (FunctionDecl *Next = FD->getPreviousDecl())
	FD = Next;
	else
	break;
	}
	if (ASTMutationListener *L = getASTMutationListener())
	L->DeducedReturnType(FD, ResultType);
	}

	/// Get a function type and produce the equivalent function type with the
	/// specified exception specification. Type sugar that can be present on a
	/// declaration of a function with an exception specification is permitted
	/// and preserved. Other type sugar (for instance, typedefs) is not.
	QualType ASTContext::getFunctionTypeWithExceptionSpec(
	QualType Orig, const FunctionProtoType::ExceptionSpecInfo &ESI) {
	// Might have some parens.
	if (const auto *PT = dyn_cast<ParenType>(Orig))
	return getParenType(
	getFunctionTypeWithExceptionSpec(PT->getInnerType(), ESI));

	// Might be wrapped in a macro qualified type.
	if (const auto *MQT = dyn_cast<MacroQualifiedType>(Orig))
	return getMacroQualifiedType(
	getFunctionTypeWithExceptionSpec(MQT->getUnderlyingType(), ESI),
	MQT->getMacroIdentifier());

	// Might have a calling-convention attribute.
	if (const auto *AT = dyn_cast<AttributedType>(Orig))
	return getAttributedType(
	AT->getAttrKind(),
	getFunctionTypeWithExceptionSpec(AT->getModifiedType(), ESI),
	getFunctionTypeWithExceptionSpec(AT->getEquivalentType(), ESI));

	// Anything else must be a function type. Rebuild it with the new exception
	// specification.
	const auto *Proto = Orig->castAs<FunctionProtoType>();
	return getFunctionType(
	Proto->getReturnType(), Proto->getParamTypes(),
	Proto->getExtProtoInfo().withExceptionSpec(ESI));
	}

	bool ASTContext::hasSameFunctionTypeIgnoringExceptionSpec(QualType T,
	QualType U) {
	return hasSameType(T, U) \|\|
	(getLangOpts().CPlusPlus17 &&
	hasSameType(getFunctionTypeWithExceptionSpec(T, EST_None),
	getFunctionTypeWithExceptionSpec(U, EST_None)));
	}

	QualType ASTContext::getFunctionTypeWithoutPtrSizes(QualType T) {
	if (const auto *Proto = T->getAs<FunctionProtoType>()) {
	QualType RetTy = removePtrSizeAddrSpace(Proto->getReturnType());
	SmallVector<QualType, 16> Args(Proto->param_types());
	for (unsigned i = 0, n = Args.size(); i != n; ++i)
	Args[i] = removePtrSizeAddrSpace(Args[i]);
	return getFunctionType(RetTy, Args, Proto->getExtProtoInfo());
	}

	if (const FunctionNoProtoType *Proto = T->getAs<FunctionNoProtoType>()) {
	QualType RetTy = removePtrSizeAddrSpace(Proto->getReturnType());
	return getFunctionNoProtoType(RetTy, Proto->getExtInfo());
	}

	return T;
	}

	bool ASTContext::hasSameFunctionTypeIgnoringPtrSizes(QualType T, QualType U) {
	return hasSameType(T, U) \|\|
	hasSameType(getFunctionTypeWithoutPtrSizes(T),
	getFunctionTypeWithoutPtrSizes(U));
	}

	void ASTContext::adjustExceptionSpec(
	FunctionDecl *FD, const FunctionProtoType::ExceptionSpecInfo &ESI,
	bool AsWritten) {
	// Update the type.
	QualType Updated =
	getFunctionTypeWithExceptionSpec(FD->getType(), ESI);
	FD->setType(Updated);

	if (!AsWritten)
	return;

	// Update the type in the type source information too.
	if (TypeSourceInfo *TSInfo = FD->getTypeSourceInfo()) {
	// If the type and the type-as-written differ, we may need to update
	// the type-as-written too.
	if (TSInfo->getType() != FD->getType())
	Updated = getFunctionTypeWithExceptionSpec(TSInfo->getType(), ESI);

	// FIXME: When we get proper type location information for exceptions,
	// we'll also have to rebuild the TypeSourceInfo. For now, we just patch
	// up the TypeSourceInfo;
	assert(TypeLoc::getFullDataSizeForType(Updated) ==
	TypeLoc::getFullDataSizeForType(TSInfo->getType()) &&
	"TypeLoc size mismatch from updating exception specification");
	TSInfo->overrideType(Updated);
	}
	}

	/// getComplexType - Return the uniqued reference to the type for a complex
	/// number with the specified element type.
	QualType ASTContext::getComplexType(QualType T) const {
	// Unique pointers, to guarantee there is only one pointer of a particular
	// structure.
	llvm::FoldingSetNodeID ID;
	ComplexType::Profile(ID, T);

	void *InsertPos = nullptr;
	if (ComplexType *CT = ComplexTypes.FindNodeOrInsertPos(ID, InsertPos))
	return QualType(CT, 0);

	// If the pointee type isn't canonical, this won't be a canonical type either,
	// so fill in the canonical type field.
	QualType Canonical;
	if (!T.isCanonical()) {
	Canonical = getComplexType(getCanonicalType(T));

	// Get the new insert position for the node we care about.
	ComplexType *NewIP = ComplexTypes.FindNodeOrInsertPos(ID, InsertPos);
	assert(!NewIP && "Shouldn't be in the map!"); (void)NewIP;
	}
	auto New = new (this, TypeAlignment) ComplexType(T, Canonical);
	Types.push_back(New);
	ComplexTypes.InsertNode(New, InsertPos);
	return QualType(New, 0);
	}

	/// getPointerType - Return the uniqued reference to the type for a pointer to
	/// the specified type.
	QualType ASTContext::getPointerType(QualType T) const {
	// Unique pointers, to guarantee there is only one pointer of a particular
	// structure.
	llvm::FoldingSetNodeID ID;
	PointerType::Profile(ID, T);

	void *InsertPos = nullptr;
	if (PointerType *PT = PointerTypes.FindNodeOrInsertPos(ID, InsertPos))
	return QualType(PT, 0);

	// If the pointee type isn't canonical, this won't be a canonical type either,
	// so fill in the canonical type field.
	QualType Canonical;
	if (!T.isCanonical()) {
	Canonical = getPointerType(getCanonicalType(T));

	// Get the new insert position for the node we care about.
	PointerType *NewIP = PointerTypes.FindNodeOrInsertPos(ID, InsertPos);
	assert(!NewIP && "Shouldn't be in the map!"); (void)NewIP;
	}
	auto New = new (this, TypeAlignment) PointerType(T, Canonical);
	Types.push_back(New);
	PointerTypes.InsertNode(New, InsertPos);
	return QualType(New, 0);
	}

	QualType ASTContext::getAdjustedType(QualType Orig, QualType New) const {
	llvm::FoldingSetNodeID ID;
	AdjustedType::Profile(ID, Orig, New);
	void *InsertPos = nullptr;
	AdjustedType *AT = AdjustedTypes.FindNodeOrInsertPos(ID, InsertPos);
	if (AT)
	return QualType(AT, 0);

	QualType Canonical = getCanonicalType(New);

	// Get the new insert position for the node we care about.
	AT = AdjustedTypes.FindNodeOrInsertPos(ID, InsertPos);
	assert(!AT && "Shouldn't be in the map!");

	AT = new (*this, TypeAlignment)
	AdjustedType(Type::Adjusted, Orig, New, Canonical);
	Types.push_back(AT);
	AdjustedTypes.InsertNode(AT, InsertPos);
	return QualType(AT, 0);
	}

	QualType ASTContext::getDecayedType(QualType T) const {
	assert((T->isArrayType() \|\| T->isFunctionType()) && "T does not decay");

	QualType Decayed;

	// C99 6.7.5.3p7:
	// A declaration of a parameter as "array of type" shall be
	// adjusted to "qualified pointer to type", where the type
	// qualifiers (if any) are those specified within the [ and ] of
	// the array type derivation.
	if (T->isArrayType())
	Decayed = getArrayDecayedType(T);

	// C99 6.7.5.3p8:
	// A declaration of a parameter as "function returning type"
	// shall be adjusted to "pointer to function returning type", as
	// in 6.3.2.1.
	if (T->isFunctionType())
	Decayed = getPointerType(T);

	llvm::FoldingSetNodeID ID;
	AdjustedType::Profile(ID, T, Decayed);
	void *InsertPos = nullptr;
	AdjustedType *AT = AdjustedTypes.FindNodeOrInsertPos(ID, InsertPos);
	if (AT)
	return QualType(AT, 0);

	QualType Canonical = getCanonicalType(Decayed);

	// Get the new insert position for the node we care about.
	AT = AdjustedTypes.FindNodeOrInsertPos(ID, InsertPos);
	assert(!AT && "Shouldn't be in the map!");

	AT = new (*this, TypeAlignment) DecayedType(T, Decayed, Canonical);
	Types.push_back(AT);
	AdjustedTypes.InsertNode(AT, InsertPos);
	return QualType(AT, 0);
	}

	/// getBlockPointerType - Return the uniqued reference to the type for
	/// a pointer to the specified block.
	QualType ASTContext::getBlockPointerType(QualType T) const {
	assert(T->isFunctionType() && "block of function types only");
	// Unique pointers, to guarantee there is only one block of a particular
	// structure.
	llvm::FoldingSetNodeID ID;
	BlockPointerType::Profile(ID, T);

	void *InsertPos = nullptr;
	if (BlockPointerType *PT =
	BlockPointerTypes.FindNodeOrInsertPos(ID, InsertPos))
	return QualType(PT, 0);

	// If the block pointee type isn't canonical, this won't be a canonical
	// type either so fill in the canonical type field.
	QualType Canonical;
	if (!T.isCanonical()) {
	Canonical = getBlockPointerType(getCanonicalType(T));

	// Get the new insert position for the node we care about.
	BlockPointerType *NewIP =
	BlockPointerTypes.FindNodeOrInsertPos(ID, InsertPos);
	assert(!NewIP && "Shouldn't be in the map!"); (void)NewIP;
	}
	auto New = new (this, TypeAlignment) BlockPointerType(T, Canonical);
	Types.push_back(New);
	BlockPointerTypes.InsertNode(New, InsertPos);
	return QualType(New, 0);
	}

	/// getLValueReferenceType - Return the uniqued reference to the type for an
	/// lvalue reference to the specified type.
	QualType
	ASTContext::getLValueReferenceType(QualType T, bool SpelledAsLValue) const {
	assert(getCanonicalType(T) != OverloadTy &&
	"Unresolved overloaded function type");

	// Unique pointers, to guarantee there is only one pointer of a particular
	// structure.
	llvm::FoldingSetNodeID ID;
	ReferenceType::Profile(ID, T, SpelledAsLValue);

	void *InsertPos = nullptr;
	if (LValueReferenceType *RT =
	LValueReferenceTypes.FindNodeOrInsertPos(ID, InsertPos))
	return QualType(RT, 0);

	const auto *InnerRef = T->getAs<ReferenceType>();

	// If the referencee type isn't canonical, this won't be a canonical type
	// either, so fill in the canonical type field.
	QualType Canonical;
	if (!SpelledAsLValue \|\| InnerRef \|\| !T.isCanonical()) {
	QualType PointeeType = (InnerRef ? InnerRef->getPointeeType() : T);
	Canonical = getLValueReferenceType(getCanonicalType(PointeeType));

	// Get the new insert position for the node we care about.
	LValueReferenceType *NewIP =
	LValueReferenceTypes.FindNodeOrInsertPos(ID, InsertPos);
	assert(!NewIP && "Shouldn't be in the map!"); (void)NewIP;
	}

	auto New = new (this, TypeAlignment) LValueReferenceType(T, Canonical,
	SpelledAsLValue);
	Types.push_back(New);
	LValueReferenceTypes.InsertNode(New, InsertPos);

	return QualType(New, 0);
	}

	/// getRValueReferenceType - Return the uniqued reference to the type for an
	/// rvalue reference to the specified type.
	QualType ASTContext::getRValueReferenceType(QualType T) const {
	// Unique pointers, to guarantee there is only one pointer of a particular
	// structure.
	llvm::FoldingSetNodeID ID;
	ReferenceType::Profile(ID, T, false);

	void *InsertPos = nullptr;
	if (RValueReferenceType *RT =
	RValueReferenceTypes.FindNodeOrInsertPos(ID, InsertPos))
	return QualType(RT, 0);

	const auto *InnerRef = T->getAs<ReferenceType>();

	// If the referencee type isn't canonical, this won't be a canonical type
	// either, so fill in the canonical type field.
	QualType Canonical;
	if (InnerRef \|\| !T.isCanonical()) {
	QualType PointeeType = (InnerRef ? InnerRef->getPointeeType() : T);
	Canonical = getRValueReferenceType(getCanonicalType(PointeeType));

	// Get the new insert position for the node we care about.
	RValueReferenceType *NewIP =
	RValueReferenceTypes.FindNodeOrInsertPos(ID, InsertPos);
	assert(!NewIP && "Shouldn't be in the map!"); (void)NewIP;
	}

	auto New = new (this, TypeAlignment) RValueReferenceType(T, Canonical);
	Types.push_back(New);
	RValueReferenceTypes.InsertNode(New, InsertPos);
	return QualType(New, 0);
	}

	/// getMemberPointerType - Return the uniqued reference to the type for a
	/// member pointer to the specified type, in the specified class.
	QualType ASTContext::getMemberPointerType(QualType T, const Type *Cls) const {
	// Unique pointers, to guarantee there is only one pointer of a particular
	// structure.
	llvm::FoldingSetNodeID ID;
	MemberPointerType::Profile(ID, T, Cls);

	void *InsertPos = nullptr;
	if (MemberPointerType *PT =
	MemberPointerTypes.FindNodeOrInsertPos(ID, InsertPos))
	return QualType(PT, 0);

	// If the pointee or class type isn't canonical, this won't be a canonical
	// type either, so fill in the canonical type field.
	QualType Canonical;
	if (!T.isCanonical() \|\| !Cls->isCanonicalUnqualified()) {
	Canonical = getMemberPointerType(getCanonicalType(T),getCanonicalType(Cls));

	// Get the new insert position for the node we care about.
	MemberPointerType *NewIP =
	MemberPointerTypes.FindNodeOrInsertPos(ID, InsertPos);
	assert(!NewIP && "Shouldn't be in the map!"); (void)NewIP;
	}
	auto New = new (this, TypeAlignment) MemberPointerType(T, Cls, Canonical);
	Types.push_back(New);
	MemberPointerTypes.InsertNode(New, InsertPos);
	return QualType(New, 0);
	}

	/// getConstantArrayType - Return the unique reference to the type for an
	/// array of the specified element type.
	QualType ASTContext::getConstantArrayType(QualType EltTy,
	const llvm::APInt &ArySizeIn,
	const Expr *SizeExpr,
	ArrayType::ArraySizeModifier ASM,
	unsigned IndexTypeQuals) const {
	assert((EltTy->isDependentType() \|\|
	EltTy->isIncompleteType() \|\| EltTy->isConstantSizeType()) &&
	"Constant array of VLAs is illegal!");

	// We only need the size as part of the type if it's instantiation-dependent.
	if (SizeExpr && !SizeExpr->isInstantiationDependent())
	SizeExpr = nullptr;

	// Convert the array size into a canonical width matching the pointer size for
	// the target.
	llvm::APInt ArySize(ArySizeIn);
	ArySize = ArySize.zextOrTrunc(Target->getMaxPointerWidth());

	llvm::FoldingSetNodeID ID;
	ConstantArrayType::Profile(ID, *this, EltTy, ArySize, SizeExpr, ASM,
	IndexTypeQuals);

	void *InsertPos = nullptr;
	if (ConstantArrayType *ATP =
	ConstantArrayTypes.FindNodeOrInsertPos(ID, InsertPos))
	return QualType(ATP, 0);

	// If the element type isn't canonical or has qualifiers, or the array bound
	// is instantiation-dependent, this won't be a canonical type either, so fill
	// in the canonical type field.
	QualType Canon;
	if (!EltTy.isCanonical() \|\| EltTy.hasLocalQualifiers() \|\| SizeExpr) {
	SplitQualType canonSplit = getCanonicalType(EltTy).split();
	Canon = getConstantArrayType(QualType(canonSplit.Ty, 0), ArySize, nullptr,
	ASM, IndexTypeQuals);
	Canon = getQualifiedType(Canon, canonSplit.Quals);

	// Get the new insert position for the node we care about.
	ConstantArrayType *NewIP =
	ConstantArrayTypes.FindNodeOrInsertPos(ID, InsertPos);
	assert(!NewIP && "Shouldn't be in the map!"); (void)NewIP;
	}

	void *Mem = Allocate(
	ConstantArrayType::totalSizeToAlloc<const Expr *>(SizeExpr ? 1 : 0),
	TypeAlignment);
	auto *New = new (Mem)
	ConstantArrayType(EltTy, Canon, ArySize, SizeExpr, ASM, IndexTypeQuals);
	ConstantArrayTypes.InsertNode(New, InsertPos);
	Types.push_back(New);
	return QualType(New, 0);
	}

	/// getVariableArrayDecayedType - Turns the given type, which may be
	/// variably-modified, into the corresponding type with all the known
	/// sizes replaced with [*].
	QualType ASTContext::getVariableArrayDecayedType(QualType type) const {
	// Vastly most common case.
	if (!type->isVariablyModifiedType()) return type;

	QualType result;

	SplitQualType split = type.getSplitDesugaredType();
	const Type *ty = split.Ty;
	switch (ty->getTypeClass()) {
	#define TYPE(Class, Base)
	#define ABSTRACT_TYPE(Class, Base)
	#define NON_CANONICAL_TYPE(Class, Base) case Type::Class:
	#include "clang/AST/TypeNodes.inc"
	llvm_unreachable("didn't desugar past all non-canonical types?");

	// These types should never be variably-modified.
	case Type::Builtin:
	case Type::Complex:
	case Type::Vector:
	case Type::DependentVector:
	case Type::ExtVector:
	case Type::DependentSizedExtVector:
	case Type::ConstantMatrix:
	case Type::DependentSizedMatrix:
	case Type::DependentAddressSpace:
	case Type::ObjCObject:
	case Type::ObjCInterface:
	case Type::ObjCObjectPointer:
	case Type::Record:
	case Type::Enum:
	case Type::UnresolvedUsing:
	case Type::TypeOfExpr:
	case Type::TypeOf:
	case Type::Decltype:
	case Type::UnaryTransform:
	case Type::DependentName:
	case Type::InjectedClassName:
	case Type::TemplateSpecialization:
	case Type::DependentTemplateSpecialization:
	case Type::TemplateTypeParm:
	case Type::SubstTemplateTypeParmPack:
	case Type::Auto:
	case Type::DeducedTemplateSpecialization:
	case Type::PackExpansion:
	case Type::ExtInt:
	case Type::DependentExtInt:
	llvm_unreachable("type should never be variably-modified");

	// These types can be variably-modified but should never need to
	// further decay.
	case Type::FunctionNoProto:
	case Type::FunctionProto:
	case Type::BlockPointer:
	case Type::MemberPointer:
	case Type::Pipe:
	return type;

	// These types can be variably-modified. All these modifications
	// preserve structure except as noted by comments.
	// TODO: if we ever care about optimizing VLAs, there are no-op
	// optimizations available here.
	case Type::Pointer:
	result = getPointerType(getVariableArrayDecayedType(
	cast<PointerType>(ty)->getPointeeType()));
	break;

	case Type::LValueReference: {
	const auto *lv = cast<LValueReferenceType>(ty);
	result = getLValueReferenceType(
	getVariableArrayDecayedType(lv->getPointeeType()),
	lv->isSpelledAsLValue());
	break;
	}

	case Type::RValueReference: {
	const auto *lv = cast<RValueReferenceType>(ty);
	result = getRValueReferenceType(
	getVariableArrayDecayedType(lv->getPointeeType()));
	break;
	}

	case Type::Atomic: {
	const auto *at = cast<AtomicType>(ty);
	result = getAtomicType(getVariableArrayDecayedType(at->getValueType()));
	break;
	}

	case Type::ConstantArray: {
	const auto *cat = cast<ConstantArrayType>(ty);
	result = getConstantArrayType(
	getVariableArrayDecayedType(cat->getElementType()),
	cat->getSize(),
	cat->getSizeExpr(),
	cat->getSizeModifier(),
	cat->getIndexTypeCVRQualifiers());
	break;
	}

	case Type::DependentSizedArray: {
	const auto *dat = cast<DependentSizedArrayType>(ty);
	result = getDependentSizedArrayType(
	getVariableArrayDecayedType(dat->getElementType()),
	dat->getSizeExpr(),
	dat->getSizeModifier(),
	dat->getIndexTypeCVRQualifiers(),
	dat->getBracketsRange());
	break;
	}

	// Turn incomplete types into [*] types.
	case Type::IncompleteArray: {
	const auto *iat = cast<IncompleteArrayType>(ty);
	result = getVariableArrayType(
	getVariableArrayDecayedType(iat->getElementType()),
	/size/ nullptr,
	ArrayType::Normal,
	iat->getIndexTypeCVRQualifiers(),
	SourceRange());
	break;
	}

	// Turn VLA types into [*] types.
	case Type::VariableArray: {
	const auto *vat = cast<VariableArrayType>(ty);
	result = getVariableArrayType(
	getVariableArrayDecayedType(vat->getElementType()),
	/size/ nullptr,
	ArrayType::Star,
	vat->getIndexTypeCVRQualifiers(),
	vat->getBracketsRange());
	break;
	}
	}

	// Apply the top-level qualifiers from the original.
	return getQualifiedType(result, split.Quals);
	}

	/// getVariableArrayType - Returns a non-unique reference to the type for a
	/// variable array of the specified element type.
	QualType ASTContext::getVariableArrayType(QualType EltTy,
	Expr *NumElts,
	ArrayType::ArraySizeModifier ASM,
	unsigned IndexTypeQuals,
	SourceRange Brackets) const {
	// Since we don't unique expressions, it isn't possible to unique VLA's
	// that have an expression provided for their size.
	QualType Canon;

	// Be sure to pull qualifiers off the element type.
	if (!EltTy.isCanonical() \|\| EltTy.hasLocalQualifiers()) {
	SplitQualType canonSplit = getCanonicalType(EltTy).split();
	Canon = getVariableArrayType(QualType(canonSplit.Ty, 0), NumElts, ASM,
	IndexTypeQuals, Brackets);
	Canon = getQualifiedType(Canon, canonSplit.Quals);
	}

	auto New = new (this, TypeAlignment)
	VariableArrayType(EltTy, Canon, NumElts, ASM, IndexTypeQuals, Brackets);

	VariableArrayTypes.push_back(New);
	Types.push_back(New);
	return QualType(New, 0);
	}

	/// getDependentSizedArrayType - Returns a non-unique reference to
	/// the type for a dependently-sized array of the specified element
	/// type.
	QualType ASTContext::getDependentSizedArrayType(QualType elementType,
	Expr *numElements,
	ArrayType::ArraySizeModifier ASM,
	unsigned elementTypeQuals,
	SourceRange brackets) const {
	assert((!numElements \|\| numElements->isTypeDependent() \|\|
	numElements->isValueDependent()) &&
	"Size must be type- or value-dependent!");

	// Dependently-sized array types that do not have a specified number
	// of elements will have their sizes deduced from a dependent
	// initializer. We do no canonicalization here at all, which is okay
	// because they can't be used in most locations.
	if (!numElements) {
	auto *newType
	= new (*this, TypeAlignment)
	DependentSizedArrayType(*this, elementType, QualType(),
	numElements, ASM, elementTypeQuals,
	brackets);
	Types.push_back(newType);
	return QualType(newType, 0);
	}

	// Otherwise, we actually build a new type every time, but we
	// also build a canonical type.

	SplitQualType canonElementType = getCanonicalType(elementType).split();

	void *insertPos = nullptr;
	llvm::FoldingSetNodeID ID;
	DependentSizedArrayType::Profile(ID, *this,
	QualType(canonElementType.Ty, 0),
	ASM, elementTypeQuals, numElements);

	// Look for an existing type with these properties.
	DependentSizedArrayType *canonTy =
	DependentSizedArrayTypes.FindNodeOrInsertPos(ID, insertPos);

	// If we don't have one, build one.
	if (!canonTy) {
	canonTy = new (*this, TypeAlignment)
	DependentSizedArrayType(*this, QualType(canonElementType.Ty, 0),
	QualType(), numElements, ASM, elementTypeQuals,
	brackets);
	DependentSizedArrayTypes.InsertNode(canonTy, insertPos);
	Types.push_back(canonTy);
	}

	// Apply qualifiers from the element type to the array.
	QualType canon = getQualifiedType(QualType(canonTy,0),
	canonElementType.Quals);

	// If we didn't need extra canonicalization for the element type or the size
	// expression, then just use that as our result.
	if (QualType(canonElementType.Ty, 0) == elementType &&
	canonTy->getSizeExpr() == numElements)
	return canon;

	// Otherwise, we need to build a type which follows the spelling
	// of the element type.
	auto *sugaredType
	= new (*this, TypeAlignment)
	DependentSizedArrayType(*this, elementType, canon, numElements,
	ASM, elementTypeQuals, brackets);
	Types.push_back(sugaredType);
	return QualType(sugaredType, 0);
	}

	QualType ASTContext::getIncompleteArrayType(QualType elementType,
	ArrayType::ArraySizeModifier ASM,
	unsigned elementTypeQuals) const {
	llvm::FoldingSetNodeID ID;
	IncompleteArrayType::Profile(ID, elementType, ASM, elementTypeQuals);

	void *insertPos = nullptr;
	if (IncompleteArrayType *iat =
	IncompleteArrayTypes.FindNodeOrInsertPos(ID, insertPos))
	return QualType(iat, 0);

	// If the element type isn't canonical, this won't be a canonical type
	// either, so fill in the canonical type field. We also have to pull
	// qualifiers off the element type.
	QualType canon;

	if (!elementType.isCanonical() \|\| elementType.hasLocalQualifiers()) {
	SplitQualType canonSplit = getCanonicalType(elementType).split();
	canon = getIncompleteArrayType(QualType(canonSplit.Ty, 0),
	ASM, elementTypeQuals);
	canon = getQualifiedType(canon, canonSplit.Quals);

	// Get the new insert position for the node we care about.
	IncompleteArrayType *existing =
	IncompleteArrayTypes.FindNodeOrInsertPos(ID, insertPos);
	assert(!existing && "Shouldn't be in the map!"); (void) existing;
	}

	auto newType = new (this, TypeAlignment)
	IncompleteArrayType(elementType, canon, ASM, elementTypeQuals);

	IncompleteArrayTypes.InsertNode(newType, insertPos);
	Types.push_back(newType);
	return QualType(newType, 0);
	}

	ASTContext::BuiltinVectorTypeInfo
	ASTContext::getBuiltinVectorTypeInfo(const BuiltinType *Ty) const {
	#define SVE_INT_ELTTY(BITS, ELTS, SIGNED, NUMVECTORS) \
	{getIntTypeForBitwidth(BITS, SIGNED), llvm::ElementCount::getScalable(ELTS), \
	NUMVECTORS};

	#define SVE_ELTTY(ELTTY, ELTS, NUMVECTORS) \
	{ELTTY, llvm::ElementCount::getScalable(ELTS), NUMVECTORS};

	switch (Ty->getKind()) {
	default:
	llvm_unreachable("Unsupported builtin vector type");
	case BuiltinType::SveInt8:
	return SVE_INT_ELTTY(8, 16, true, 1);
	case BuiltinType::SveUint8:
	return SVE_INT_ELTTY(8, 16, false, 1);
	case BuiltinType::SveInt8x2:
	return SVE_INT_ELTTY(8, 16, true, 2);
	case BuiltinType::SveUint8x2:
	return SVE_INT_ELTTY(8, 16, false, 2);
	case BuiltinType::SveInt8x3:
	return SVE_INT_ELTTY(8, 16, true, 3);
	case BuiltinType::SveUint8x3:
	return SVE_INT_ELTTY(8, 16, false, 3);
	case BuiltinType::SveInt8x4:
	return SVE_INT_ELTTY(8, 16, true, 4);
	case BuiltinType::SveUint8x4:
	return SVE_INT_ELTTY(8, 16, false, 4);
	case BuiltinType::SveInt16:
	return SVE_INT_ELTTY(16, 8, true, 1);
	case BuiltinType::SveUint16:
	return SVE_INT_ELTTY(16, 8, false, 1);
	case BuiltinType::SveInt16x2:
	return SVE_INT_ELTTY(16, 8, true, 2);
	case BuiltinType::SveUint16x2:
	return SVE_INT_ELTTY(16, 8, false, 2);
	case BuiltinType::SveInt16x3:
	return SVE_INT_ELTTY(16, 8, true, 3);
	case BuiltinType::SveUint16x3:
	return SVE_INT_ELTTY(16, 8, false, 3);
	case BuiltinType::SveInt16x4:
	return SVE_INT_ELTTY(16, 8, true, 4);
	case BuiltinType::SveUint16x4:
	return SVE_INT_ELTTY(16, 8, false, 4);
	case BuiltinType::SveInt32:
	return SVE_INT_ELTTY(32, 4, true, 1);
	case BuiltinType::SveUint32:
	return SVE_INT_ELTTY(32, 4, false, 1);
	case BuiltinType::SveInt32x2:
	return SVE_INT_ELTTY(32, 4, true, 2);
	case BuiltinType::SveUint32x2:
	return SVE_INT_ELTTY(32, 4, false, 2);
	case BuiltinType::SveInt32x3:
	return SVE_INT_ELTTY(32, 4, true, 3);
	case BuiltinType::SveUint32x3:
	return SVE_INT_ELTTY(32, 4, false, 3);
	case BuiltinType::SveInt32x4:
	return SVE_INT_ELTTY(32, 4, true, 4);
	case BuiltinType::SveUint32x4:
	return SVE_INT_ELTTY(32, 4, false, 4);
	case BuiltinType::SveInt64:
	return SVE_INT_ELTTY(64, 2, true, 1);
	case BuiltinType::SveUint64:
	return SVE_INT_ELTTY(64, 2, false, 1);
	case BuiltinType::SveInt64x2:
	return SVE_INT_ELTTY(64, 2, true, 2);
	case BuiltinType::SveUint64x2:
	return SVE_INT_ELTTY(64, 2, false, 2);
	case BuiltinType::SveInt64x3:
	return SVE_INT_ELTTY(64, 2, true, 3);
	case BuiltinType::SveUint64x3:
	return SVE_INT_ELTTY(64, 2, false, 3);
	case BuiltinType::SveInt64x4:
	return SVE_INT_ELTTY(64, 2, true, 4);
	case BuiltinType::SveUint64x4:
	return SVE_INT_ELTTY(64, 2, false, 4);
	case BuiltinType::SveBool:
	return SVE_ELTTY(BoolTy, 16, 1);
	case BuiltinType::SveFloat16:
	return SVE_ELTTY(HalfTy, 8, 1);
	case BuiltinType::SveFloat16x2:
	return SVE_ELTTY(HalfTy, 8, 2);
	case BuiltinType::SveFloat16x3:
	return SVE_ELTTY(HalfTy, 8, 3);
	case BuiltinType::SveFloat16x4:
	return SVE_ELTTY(HalfTy, 8, 4);
	case BuiltinType::SveFloat32:
	return SVE_ELTTY(FloatTy, 4, 1);
	case BuiltinType::SveFloat32x2:
	return SVE_ELTTY(FloatTy, 4, 2);
	case BuiltinType::SveFloat32x3:
	return SVE_ELTTY(FloatTy, 4, 3);
	case BuiltinType::SveFloat32x4:
	return SVE_ELTTY(FloatTy, 4, 4);
	case BuiltinType::SveFloat64:
	return SVE_ELTTY(DoubleTy, 2, 1);
	case BuiltinType::SveFloat64x2:
	return SVE_ELTTY(DoubleTy, 2, 2);
	case BuiltinType::SveFloat64x3:
	return SVE_ELTTY(DoubleTy, 2, 3);
	case BuiltinType::SveFloat64x4:
	return SVE_ELTTY(DoubleTy, 2, 4);
	case BuiltinType::SveBFloat16:
	return SVE_ELTTY(BFloat16Ty, 8, 1);
	case BuiltinType::SveBFloat16x2:
	return SVE_ELTTY(BFloat16Ty, 8, 2);
	case BuiltinType::SveBFloat16x3:
	return SVE_ELTTY(BFloat16Ty, 8, 3);
	case BuiltinType::SveBFloat16x4:
	return SVE_ELTTY(BFloat16Ty, 8, 4);
	#define RVV_VECTOR_TYPE_INT(Name, Id, SingletonId, NumEls, ElBits, NF, \
	IsSigned) \
	case BuiltinType::Id: \
	return {getIntTypeForBitwidth(ElBits, IsSigned), \
	llvm::ElementCount::getScalable(NumEls), NF};
	#define RVV_VECTOR_TYPE_FLOAT(Name, Id, SingletonId, NumEls, ElBits, NF) \
	case BuiltinType::Id: \
	return {ElBits == 16 ? Float16Ty : (ElBits == 32 ? FloatTy : DoubleTy), \
	llvm::ElementCount::getScalable(NumEls), NF};
	#define RVV_PREDICATE_TYPE(Name, Id, SingletonId, NumEls) \
	case BuiltinType::Id: \
	return {BoolTy, llvm::ElementCount::getScalable(NumEls), 1};
	#include "clang/Basic/RISCVVTypes.def"
	}
	}

	/// getScalableVectorType - Return the unique reference to a scalable vector
	/// type of the specified element type and size. VectorType must be a built-in
	/// type.
	QualType ASTContext::getScalableVectorType(QualType EltTy,
	unsigned NumElts) const {
	if (Target->hasAArch64SVETypes()) {
	uint64_t EltTySize = getTypeSize(EltTy);
	#define SVE_VECTOR_TYPE(Name, MangledName, Id, SingletonId, NumEls, ElBits, \
	IsSigned, IsFP, IsBF) \
	if (!EltTy->isBooleanType() && \
	((EltTy->hasIntegerRepresentation() && \
	EltTy->hasSignedIntegerRepresentation() == IsSigned) \|\| \
	(EltTy->hasFloatingRepresentation() && !EltTy->isBFloat16Type() && \
	IsFP && !IsBF) \|\| \
	(EltTy->hasFloatingRepresentation() && EltTy->isBFloat16Type() && \
	IsBF && !IsFP)) && \
	EltTySize == ElBits && NumElts == NumEls) { \
	return SingletonId; \
	}
	#define SVE_PREDICATE_TYPE(Name, MangledName, Id, SingletonId, NumEls) \
	if (EltTy->isBooleanType() && NumElts == NumEls) \
	return SingletonId;
	#include "clang/Basic/AArch64SVEACLETypes.def"
	} else if (Target->hasRISCVVTypes()) {
	uint64_t EltTySize = getTypeSize(EltTy);
	#define RVV_VECTOR_TYPE(Name, Id, SingletonId, NumEls, ElBits, NF, IsSigned, \
	IsFP) \
	if (!EltTy->isBooleanType() && \
	((EltTy->hasIntegerRepresentation() && \
	EltTy->hasSignedIntegerRepresentation() == IsSigned) \|\| \
	(EltTy->hasFloatingRepresentation() && IsFP)) && \
	EltTySize == ElBits && NumElts == NumEls) \
	return SingletonId;
	#define RVV_PREDICATE_TYPE(Name, Id, SingletonId, NumEls) \
	if (EltTy->isBooleanType() && NumElts == NumEls) \
	return SingletonId;
	#include "clang/Basic/RISCVVTypes.def"
	}
	return QualType();
	}

	/// getVectorType - Return the unique reference to a vector type of
	/// the specified element type and size. VectorType must be a built-in type.
	QualType ASTContext::getVectorType(QualType vecType, unsigned NumElts,
	VectorType::VectorKind VecKind) const {
	assert(vecType->isBuiltinType());

	// Check if we've already instantiated a vector of this type.
	llvm::FoldingSetNodeID ID;
	VectorType::Profile(ID, vecType, NumElts, Type::Vector, VecKind);

	void *InsertPos = nullptr;
	if (VectorType *VTP = VectorTypes.FindNodeOrInsertPos(ID, InsertPos))
	return QualType(VTP, 0);

	// If the element type isn't canonical, this won't be a canonical type either,
	// so fill in the canonical type field.
	QualType Canonical;
	if (!vecType.isCanonical()) {
	Canonical = getVectorType(getCanonicalType(vecType), NumElts, VecKind);

	// Get the new insert position for the node we care about.
	VectorType *NewIP = VectorTypes.FindNodeOrInsertPos(ID, InsertPos);
	assert(!NewIP && "Shouldn't be in the map!"); (void)NewIP;
	}
	auto New = new (this, TypeAlignment)
	VectorType(vecType, NumElts, Canonical, VecKind);
	VectorTypes.InsertNode(New, InsertPos);
	Types.push_back(New);
	return QualType(New, 0);
	}

	QualType
	ASTContext::getDependentVectorType(QualType VecType, Expr *SizeExpr,
	SourceLocation AttrLoc,
	VectorType::VectorKind VecKind) const {
	llvm::FoldingSetNodeID ID;
	DependentVectorType::Profile(ID, *this, getCanonicalType(VecType), SizeExpr,
	VecKind);
	void *InsertPos = nullptr;
	DependentVectorType *Canon =
	DependentVectorTypes.FindNodeOrInsertPos(ID, InsertPos);
	DependentVectorType *New;

	if (Canon) {
	New = new (*this, TypeAlignment) DependentVectorType(
	*this, VecType, QualType(Canon, 0), SizeExpr, AttrLoc, VecKind);
	} else {
	QualType CanonVecTy = getCanonicalType(VecType);
	if (CanonVecTy == VecType) {
	New = new (*this, TypeAlignment) DependentVectorType(
	*this, VecType, QualType(), SizeExpr, AttrLoc, VecKind);

	DependentVectorType *CanonCheck =
	DependentVectorTypes.FindNodeOrInsertPos(ID, InsertPos);
	assert(!CanonCheck &&
	"Dependent-sized vector_size canonical type broken");
	(void)CanonCheck;
	DependentVectorTypes.InsertNode(New, InsertPos);
	} else {
	QualType CanonTy = getDependentVectorType(CanonVecTy, SizeExpr,
	SourceLocation(), VecKind);
	New = new (*this, TypeAlignment) DependentVectorType(
	*this, VecType, CanonTy, SizeExpr, AttrLoc, VecKind);
	}
	}

	Types.push_back(New);
	return QualType(New, 0);
	}

	/// getExtVectorType - Return the unique reference to an extended vector type of
	/// the specified element type and size. VectorType must be a built-in type.
	QualType
	ASTContext::getExtVectorType(QualType vecType, unsigned NumElts) const {
	assert(vecType->isBuiltinType() \|\| vecType->isDependentType());

	// Check if we've already instantiated a vector of this type.
	llvm::FoldingSetNodeID ID;
	VectorType::Profile(ID, vecType, NumElts, Type::ExtVector,
	VectorType::GenericVector);
	void *InsertPos = nullptr;
	if (VectorType *VTP = VectorTypes.FindNodeOrInsertPos(ID, InsertPos))
	return QualType(VTP, 0);

	// If the element type isn't canonical, this won't be a canonical type either,
	// so fill in the canonical type field.
	QualType Canonical;
	if (!vecType.isCanonical()) {
	Canonical = getExtVectorType(getCanonicalType(vecType), NumElts);

	// Get the new insert position for the node we care about.
	VectorType *NewIP = VectorTypes.FindNodeOrInsertPos(ID, InsertPos);
	assert(!NewIP && "Shouldn't be in the map!"); (void)NewIP;
	}
	auto New = new (this, TypeAlignment)
	ExtVectorType(vecType, NumElts, Canonical);
	VectorTypes.InsertNode(New, InsertPos);
	Types.push_back(New);
	return QualType(New, 0);
	}

	QualType
	ASTContext::getDependentSizedExtVectorType(QualType vecType,
	Expr *SizeExpr,
	SourceLocation AttrLoc) const {
	llvm::FoldingSetNodeID ID;
	DependentSizedExtVectorType::Profile(ID, *this, getCanonicalType(vecType),
	SizeExpr);

	void *InsertPos = nullptr;
	DependentSizedExtVectorType *Canon
	= DependentSizedExtVectorTypes.FindNodeOrInsertPos(ID, InsertPos);
	DependentSizedExtVectorType *New;
	if (Canon) {
	// We already have a canonical version of this array type; use it as
	// the canonical type for a newly-built type.
	New = new (*this, TypeAlignment)
	DependentSizedExtVectorType(*this, vecType, QualType(Canon, 0),
	SizeExpr, AttrLoc);
	} else {
	QualType CanonVecTy = getCanonicalType(vecType);
	if (CanonVecTy == vecType) {
	New = new (*this, TypeAlignment)
	DependentSizedExtVectorType(*this, vecType, QualType(), SizeExpr,
	AttrLoc);

	DependentSizedExtVectorType *CanonCheck
	= DependentSizedExtVectorTypes.FindNodeOrInsertPos(ID, InsertPos);
	assert(!CanonCheck && "Dependent-sized ext_vector canonical type broken");
	(void)CanonCheck;
	DependentSizedExtVectorTypes.InsertNode(New, InsertPos);
	} else {
	QualType CanonExtTy = getDependentSizedExtVectorType(CanonVecTy, SizeExpr,
	SourceLocation());
	New = new (*this, TypeAlignment) DependentSizedExtVectorType(
	*this, vecType, CanonExtTy, SizeExpr, AttrLoc);
	}
	}

	Types.push_back(New);
	return QualType(New, 0);
	}

	QualType ASTContext::getConstantMatrixType(QualType ElementTy, unsigned NumRows,
	unsigned NumColumns) const {
	llvm::FoldingSetNodeID ID;
	ConstantMatrixType::Profile(ID, ElementTy, NumRows, NumColumns,
	Type::ConstantMatrix);

	assert(MatrixType::isValidElementType(ElementTy) &&
	"need a valid element type");
	assert(ConstantMatrixType::isDimensionValid(NumRows) &&
	ConstantMatrixType::isDimensionValid(NumColumns) &&
	"need valid matrix dimensions");
	void *InsertPos = nullptr;
	if (ConstantMatrixType *MTP = MatrixTypes.FindNodeOrInsertPos(ID, InsertPos))
	return QualType(MTP, 0);

	QualType Canonical;
	if (!ElementTy.isCanonical()) {
	Canonical =
	getConstantMatrixType(getCanonicalType(ElementTy), NumRows, NumColumns);

	ConstantMatrixType *NewIP = MatrixTypes.FindNodeOrInsertPos(ID, InsertPos);
	assert(!NewIP && "Matrix type shouldn't already exist in the map");
	(void)NewIP;
	}

	auto New = new (this, TypeAlignment)
	ConstantMatrixType(ElementTy, NumRows, NumColumns, Canonical);
	MatrixTypes.InsertNode(New, InsertPos);
	Types.push_back(New);
	return QualType(New, 0);
	}

	QualType ASTContext::getDependentSizedMatrixType(QualType ElementTy,
	Expr *RowExpr,
	Expr *ColumnExpr,
	SourceLocation AttrLoc) const {
	QualType CanonElementTy = getCanonicalType(ElementTy);
	llvm::FoldingSetNodeID ID;
	DependentSizedMatrixType::Profile(ID, *this, CanonElementTy, RowExpr,
	ColumnExpr);

	void *InsertPos = nullptr;
	DependentSizedMatrixType *Canon =
	DependentSizedMatrixTypes.FindNodeOrInsertPos(ID, InsertPos);

	if (!Canon) {
	Canon = new (*this, TypeAlignment) DependentSizedMatrixType(
	*this, CanonElementTy, QualType(), RowExpr, ColumnExpr, AttrLoc);
	#ifndef NDEBUG
	DependentSizedMatrixType *CanonCheck =
	DependentSizedMatrixTypes.FindNodeOrInsertPos(ID, InsertPos);
	assert(!CanonCheck && "Dependent-sized matrix canonical type broken");
	#endif
	DependentSizedMatrixTypes.InsertNode(Canon, InsertPos);
	Types.push_back(Canon);
	}

	// Already have a canonical version of the matrix type
	//
	// If it exactly matches the requested type, use it directly.
	if (Canon->getElementType() == ElementTy && Canon->getRowExpr() == RowExpr &&
	Canon->getRowExpr() == ColumnExpr)
	return QualType(Canon, 0);

	// Use Canon as the canonical type for newly-built type.
	DependentSizedMatrixType New = new (this, TypeAlignment)
	DependentSizedMatrixType(*this, ElementTy, QualType(Canon, 0), RowExpr,
	ColumnExpr, AttrLoc);
	Types.push_back(New);
	return QualType(New, 0);
	}

	QualType ASTContext::getDependentAddressSpaceType(QualType PointeeType,
	Expr *AddrSpaceExpr,
	SourceLocation AttrLoc) const {
	assert(AddrSpaceExpr->isInstantiationDependent());

	QualType canonPointeeType = getCanonicalType(PointeeType);

	void *insertPos = nullptr;
	llvm::FoldingSetNodeID ID;
	DependentAddressSpaceType::Profile(ID, *this, canonPointeeType,
	AddrSpaceExpr);

	DependentAddressSpaceType *canonTy =
	DependentAddressSpaceTypes.FindNodeOrInsertPos(ID, insertPos);

	if (!canonTy) {
	canonTy = new (*this, TypeAlignment)
	DependentAddressSpaceType(*this, canonPointeeType,
	QualType(), AddrSpaceExpr, AttrLoc);
	DependentAddressSpaceTypes.InsertNode(canonTy, insertPos);
	Types.push_back(canonTy);
	}

	if (canonPointeeType == PointeeType &&
	canonTy->getAddrSpaceExpr() == AddrSpaceExpr)
	return QualType(canonTy, 0);

	auto *sugaredType
	= new (*this, TypeAlignment)
	DependentAddressSpaceType(*this, PointeeType, QualType(canonTy, 0),
	AddrSpaceExpr, AttrLoc);
	Types.push_back(sugaredType);
	return QualType(sugaredType, 0);
	}

	/// Determine whether \p T is canonical as the result type of a function.
	static bool isCanonicalResultType(QualType T) {
	return T.isCanonical() &&
	(T.getObjCLifetime() == Qualifiers::OCL_None \|\|
	T.getObjCLifetime() == Qualifiers::OCL_ExplicitNone);
	}

	/// getFunctionNoProtoType - Return a K&R style C function type like 'int()'.
	QualType
	ASTContext::getFunctionNoProtoType(QualType ResultTy,
	const FunctionType::ExtInfo &Info) const {
	// Unique functions, to guarantee there is only one function of a particular
	// structure.
	llvm::FoldingSetNodeID ID;
	FunctionNoProtoType::Profile(ID, ResultTy, Info);

	void *InsertPos = nullptr;
	if (FunctionNoProtoType *FT =
	FunctionNoProtoTypes.FindNodeOrInsertPos(ID, InsertPos))
	return QualType(FT, 0);

	QualType Canonical;
	if (!isCanonicalResultType(ResultTy)) {
	Canonical =
	getFunctionNoProtoType(getCanonicalFunctionResultType(ResultTy), Info);

	// Get the new insert position for the node we care about.
	FunctionNoProtoType *NewIP =
	FunctionNoProtoTypes.FindNodeOrInsertPos(ID, InsertPos);
	assert(!NewIP && "Shouldn't be in the map!"); (void)NewIP;
	}

	auto New = new (this, TypeAlignment)
	FunctionNoProtoType(ResultTy, Canonical, Info);
	Types.push_back(New);
	FunctionNoProtoTypes.InsertNode(New, InsertPos);
	return QualType(New, 0);
	}

	CanQualType
	ASTContext::getCanonicalFunctionResultType(QualType ResultType) const {
	CanQualType CanResultType = getCanonicalType(ResultType);

	// Canonical result types do not have ARC lifetime qualifiers.
	if (CanResultType.getQualifiers().hasObjCLifetime()) {
	Qualifiers Qs = CanResultType.getQualifiers();
	Qs.removeObjCLifetime();
	return CanQualType::CreateUnsafe(
	getQualifiedType(CanResultType.getUnqualifiedType(), Qs));
	}

	return CanResultType;
	}

	static bool isCanonicalExceptionSpecification(
	const FunctionProtoType::ExceptionSpecInfo &ESI, bool NoexceptInType) {
	if (ESI.Type == EST_None)
	return true;
	if (!NoexceptInType)
	return false;

	// C++17 onwards: exception specification is part of the type, as a simple
	// boolean "can this function type throw".
	if (ESI.Type == EST_BasicNoexcept)
	return true;

	// A noexcept(expr) specification is (possibly) canonical if expr is
	// value-dependent.
	if (ESI.Type == EST_DependentNoexcept)
	return true;

	// A dynamic exception specification is canonical if it only contains pack
	// expansions (so we can't tell whether it's non-throwing) and all its
	// contained types are canonical.
	if (ESI.Type == EST_Dynamic) {
	bool AnyPackExpansions = false;
	for (QualType ET : ESI.Exceptions) {
	if (!ET.isCanonical())
	return false;
	if (ET->getAs<PackExpansionType>())
	AnyPackExpansions = true;
	}
	return AnyPackExpansions;
	}

	return false;
	}

	QualType ASTContext::getFunctionTypeInternal(
	QualType ResultTy, ArrayRef<QualType> ArgArray,
	const FunctionProtoType::ExtProtoInfo &EPI, bool OnlyWantCanonical) const {
	size_t NumArgs = ArgArray.size();

	// Unique functions, to guarantee there is only one function of a particular
	// structure.
	llvm::FoldingSetNodeID ID;
	FunctionProtoType::Profile(ID, ResultTy, ArgArray.begin(), NumArgs, EPI,
	*this, true);

	QualType Canonical;
	bool Unique = false;

	void *InsertPos = nullptr;
	if (FunctionProtoType *FPT =
	FunctionProtoTypes.FindNodeOrInsertPos(ID, InsertPos)) {
	QualType Existing = QualType(FPT, 0);

	// If we find a pre-existing equivalent FunctionProtoType, we can just reuse
	// it so long as our exception specification doesn't contain a dependent
	// noexcept expression, or we're just looking for a canonical type.
	// Otherwise, we're going to need to create a type
	// sugar node to hold the concrete expression.
	if (OnlyWantCanonical \|\| !isComputedNoexcept(EPI.ExceptionSpec.Type) \|\|
	EPI.ExceptionSpec.NoexceptExpr == FPT->getNoexceptExpr())
	return Existing;

	// We need a new type sugar node for this one, to hold the new noexcept
	// expression. We do no canonicalization here, but that's OK since we don't
	// expect to see the same noexcept expression much more than once.
	Canonical = getCanonicalType(Existing);
	Unique = true;
	}

	bool NoexceptInType = getLangOpts().CPlusPlus17;
	bool IsCanonicalExceptionSpec =
	isCanonicalExceptionSpecification(EPI.ExceptionSpec, NoexceptInType);

	// Determine whether the type being created is already canonical or not.
	bool isCanonical = !Unique && IsCanonicalExceptionSpec &&
	isCanonicalResultType(ResultTy) && !EPI.HasTrailingReturn;
	for (unsigned i = 0; i != NumArgs && isCanonical; ++i)
	if (!ArgArray[i].isCanonicalAsParam())
	isCanonical = false;

	if (OnlyWantCanonical)
	assert(isCanonical &&
	"given non-canonical parameters constructing canonical type");

	// If this type isn't canonical, get the canonical version of it if we don't
	// already have it. The exception spec is only partially part of the
	// canonical type, and only in C++17 onwards.
	if (!isCanonical && Canonical.isNull()) {
	SmallVector<QualType, 16> CanonicalArgs;
	CanonicalArgs.reserve(NumArgs);
	for (unsigned i = 0; i != NumArgs; ++i)
	CanonicalArgs.push_back(getCanonicalParamType(ArgArray[i]));

	llvm::SmallVector<QualType, 8> ExceptionTypeStorage;
	FunctionProtoType::ExtProtoInfo CanonicalEPI = EPI;
	CanonicalEPI.HasTrailingReturn = false;

	if (IsCanonicalExceptionSpec) {
	// Exception spec is already OK.
	} else if (NoexceptInType) {
	switch (EPI.ExceptionSpec.Type) {
	case EST_Unparsed: case EST_Unevaluated: case EST_Uninstantiated:
	// We don't know yet. It shouldn't matter what we pick here; no-one
	// should ever look at this.
	LLVM_FALLTHROUGH;
	case EST_None: case EST_MSAny: case EST_NoexceptFalse:
	CanonicalEPI.ExceptionSpec.Type = EST_None;
	break;

	// A dynamic exception specification is almost always "not noexcept",
	// with the exception that a pack expansion might expand to no types.
	case EST_Dynamic: {
	bool AnyPacks = false;
	for (QualType ET : EPI.ExceptionSpec.Exceptions) {
	if (ET->getAs<PackExpansionType>())
	AnyPacks = true;
	ExceptionTypeStorage.push_back(getCanonicalType(ET));
	}
	if (!AnyPacks)
	CanonicalEPI.ExceptionSpec.Type = EST_None;
	else {
	CanonicalEPI.ExceptionSpec.Type = EST_Dynamic;
	CanonicalEPI.ExceptionSpec.Exceptions = ExceptionTypeStorage;
	}
	break;
	}

	case EST_DynamicNone:
	case EST_BasicNoexcept:
	case EST_NoexceptTrue:
	case EST_NoThrow:
	CanonicalEPI.ExceptionSpec.Type = EST_BasicNoexcept;
	break;

	case EST_DependentNoexcept:
	llvm_unreachable("dependent noexcept is already canonical");
	}
	} else {
	CanonicalEPI.ExceptionSpec = FunctionProtoType::ExceptionSpecInfo();
	}

	// Adjust the canonical function result type.
	CanQualType CanResultTy = getCanonicalFunctionResultType(ResultTy);
	Canonical =
	getFunctionTypeInternal(CanResultTy, CanonicalArgs, CanonicalEPI, true);

	// Get the new insert position for the node we care about.
	FunctionProtoType *NewIP =
	FunctionProtoTypes.FindNodeOrInsertPos(ID, InsertPos);
	assert(!NewIP && "Shouldn't be in the map!"); (void)NewIP;
	}

	// Compute the needed size to hold this FunctionProtoType and the
	// various trailing objects.
	auto ESH = FunctionProtoType::getExceptionSpecSize(
	EPI.ExceptionSpec.Type, EPI.ExceptionSpec.Exceptions.size());
	size_t Size = FunctionProtoType::totalSizeToAlloc<
	QualType, SourceLocation, FunctionType::FunctionTypeExtraBitfields,
	FunctionType::ExceptionType, Expr , FunctionDecl ,
	FunctionProtoType::ExtParameterInfo, Qualifiers>(
	NumArgs, EPI.Variadic,
	FunctionProtoType::hasExtraBitfields(EPI.ExceptionSpec.Type),
	ESH.NumExceptionType, ESH.NumExprPtr, ESH.NumFunctionDeclPtr,
	EPI.ExtParameterInfos ? NumArgs : 0,
	EPI.TypeQuals.hasNonFastQualifiers() ? 1 : 0);

	auto FTP = (FunctionProtoType )Allocate(Size, TypeAlignment);
	FunctionProtoType::ExtProtoInfo newEPI = EPI;
	new (FTP) FunctionProtoType(ResultTy, ArgArray, Canonical, newEPI);
	Types.push_back(FTP);
	if (!Unique)
	FunctionProtoTypes.InsertNode(FTP, InsertPos);
	return QualType(FTP, 0);
	}

	QualType ASTContext::getPipeType(QualType T, bool ReadOnly) const {
	llvm::FoldingSetNodeID ID;
	PipeType::Profile(ID, T, ReadOnly);

	void *InsertPos = nullptr;
	if (PipeType *PT = PipeTypes.FindNodeOrInsertPos(ID, InsertPos))
	return QualType(PT, 0);

	// If the pipe element type isn't canonical, this won't be a canonical type
	// either, so fill in the canonical type field.
	QualType Canonical;
	if (!T.isCanonical()) {
	Canonical = getPipeType(getCanonicalType(T), ReadOnly);

	// Get the new insert position for the node we care about.
	PipeType *NewIP = PipeTypes.FindNodeOrInsertPos(ID, InsertPos);
	assert(!NewIP && "Shouldn't be in the map!");
	(void)NewIP;
	}
	auto New = new (this, TypeAlignment) PipeType(T, Canonical, ReadOnly);
	Types.push_back(New);
	PipeTypes.InsertNode(New, InsertPos);
	return QualType(New, 0);
	}

	QualType ASTContext::adjustStringLiteralBaseType(QualType Ty) const {
	// OpenCL v1.1 s6.5.3: a string literal is in the constant address space.
	return LangOpts.OpenCL ? getAddrSpaceQualType(Ty, LangAS::opencl_constant)
	: Ty;
	}

	QualType ASTContext::getReadPipeType(QualType T) const {
	return getPipeType(T, true);
	}

	QualType ASTContext::getWritePipeType(QualType T) const {
	return getPipeType(T, false);
	}

	QualType ASTContext::getExtIntType(bool IsUnsigned, unsigned NumBits) const {
	llvm::FoldingSetNodeID ID;
	ExtIntType::Profile(ID, IsUnsigned, NumBits);

	void *InsertPos = nullptr;
	if (ExtIntType *EIT = ExtIntTypes.FindNodeOrInsertPos(ID, InsertPos))
	return QualType(EIT, 0);

	auto New = new (this, TypeAlignment) ExtIntType(IsUnsigned, NumBits);
	ExtIntTypes.InsertNode(New, InsertPos);
	Types.push_back(New);
	return QualType(New, 0);
	}

	QualType ASTContext::getDependentExtIntType(bool IsUnsigned,
	Expr *NumBitsExpr) const {
	assert(NumBitsExpr->isInstantiationDependent() && "Only good for dependent");
	llvm::FoldingSetNodeID ID;
	DependentExtIntType::Profile(ID, *this, IsUnsigned, NumBitsExpr);

	void *InsertPos = nullptr;
	if (DependentExtIntType *Existing =
	DependentExtIntTypes.FindNodeOrInsertPos(ID, InsertPos))
	return QualType(Existing, 0);

	auto New = new (this, TypeAlignment)
	DependentExtIntType(*this, IsUnsigned, NumBitsExpr);
	DependentExtIntTypes.InsertNode(New, InsertPos);

	Types.push_back(New);
	return QualType(New, 0);
	}

	#ifndef NDEBUG
	static bool NeedsInjectedClassNameType(const RecordDecl *D) {
	if (!isa<CXXRecordDecl>(D)) return false;
	const auto *RD = cast<CXXRecordDecl>(D);
	if (isa<ClassTemplatePartialSpecializationDecl>(RD))
	return true;
	if (RD->getDescribedClassTemplate() &&
	!isa<ClassTemplateSpecializationDecl>(RD))
	return true;
	return false;
	}
	#endif

	/// getInjectedClassNameType - Return the unique reference to the
	/// injected class name type for the specified templated declaration.
	QualType ASTContext::getInjectedClassNameType(CXXRecordDecl *Decl,
	QualType TST) const {
	assert(NeedsInjectedClassNameType(Decl));
	if (Decl->TypeForDecl) {
	assert(isa<InjectedClassNameType>(Decl->TypeForDecl));
	} else if (CXXRecordDecl *PrevDecl = Decl->getPreviousDecl()) {
	assert(PrevDecl->TypeForDecl && "previous declaration has no type");
	Decl->TypeForDecl = PrevDecl->TypeForDecl;
	assert(isa<InjectedClassNameType>(Decl->TypeForDecl));
	} else {
	Type *newType =
	new (*this, TypeAlignment) InjectedClassNameType(Decl, TST);
	Decl->TypeForDecl = newType;
	Types.push_back(newType);
	}
	return QualType(Decl->TypeForDecl, 0);
	}

	/// getTypeDeclType - Return the unique reference to the type for the
	/// specified type declaration.
	QualType ASTContext::getTypeDeclTypeSlow(const TypeDecl *Decl) const {
	assert(Decl && "Passed null for Decl param");
	assert(!Decl->TypeForDecl && "TypeForDecl present in slow case");

	if (const auto *Typedef = dyn_cast<TypedefNameDecl>(Decl))
	return getTypedefType(Typedef);

	assert(!isa<TemplateTypeParmDecl>(Decl) &&
	"Template type parameter types are always available.");

	if (const auto *Record = dyn_cast<RecordDecl>(Decl)) {
	assert(Record->isFirstDecl() && "struct/union has previous declaration");
	assert(!NeedsInjectedClassNameType(Record));
	return getRecordType(Record);
	} else if (const auto *Enum = dyn_cast<EnumDecl>(Decl)) {
	assert(Enum->isFirstDecl() && "enum has previous declaration");
	return getEnumType(Enum);
	} else if (const auto *Using = dyn_cast<UnresolvedUsingTypenameDecl>(Decl)) {
	Type newType = new (this, TypeAlignment) UnresolvedUsingType(Using);
	Decl->TypeForDecl = newType;
	Types.push_back(newType);
	} else
	llvm_unreachable("TypeDecl without a type?");

	return QualType(Decl->TypeForDecl, 0);
	}

	/// getTypedefType - Return the unique reference to the type for the
	/// specified typedef name decl.
	QualType ASTContext::getTypedefType(const TypedefNameDecl *Decl,
	QualType Underlying) const {
	if (Decl->TypeForDecl) return QualType(Decl->TypeForDecl, 0);

	if (Underlying.isNull())
	Underlying = Decl->getUnderlyingType();
	QualType Canonical = getCanonicalType(Underlying);
	auto newType = new (this, TypeAlignment)
	TypedefType(Type::Typedef, Decl, Underlying, Canonical);
	Decl->TypeForDecl = newType;
	Types.push_back(newType);
	return QualType(newType, 0);
	}

	QualType ASTContext::getRecordType(const RecordDecl *Decl) const {
	if (Decl->TypeForDecl) return QualType(Decl->TypeForDecl, 0);

	if (const RecordDecl *PrevDecl = Decl->getPreviousDecl())
	if (PrevDecl->TypeForDecl)
	return QualType(Decl->TypeForDecl = PrevDecl->TypeForDecl, 0);

	auto newType = new (this, TypeAlignment) RecordType(Decl);
	Decl->TypeForDecl = newType;
	Types.push_back(newType);
	return QualType(newType, 0);
	}

	QualType ASTContext::getEnumType(const EnumDecl *Decl) const {
	if (Decl->TypeForDecl) return QualType(Decl->TypeForDecl, 0);

	if (const EnumDecl *PrevDecl = Decl->getPreviousDecl())
	if (PrevDecl->TypeForDecl)
	return QualType(Decl->TypeForDecl = PrevDecl->TypeForDecl, 0);

	auto newType = new (this, TypeAlignment) EnumType(Decl);
	Decl->TypeForDecl = newType;
	Types.push_back(newType);
	return QualType(newType, 0);
	}

	QualType ASTContext::getAttributedType(attr::Kind attrKind,
	QualType modifiedType,
	QualType equivalentType) {
	llvm::FoldingSetNodeID id;
	AttributedType::Profile(id, attrKind, modifiedType, equivalentType);

	void *insertPos = nullptr;
	AttributedType *type = AttributedTypes.FindNodeOrInsertPos(id, insertPos);
	if (type) return QualType(type, 0);

	QualType canon = getCanonicalType(equivalentType);
	type = new (*this, TypeAlignment)
	AttributedType(canon, attrKind, modifiedType, equivalentType);

	Types.push_back(type);
	AttributedTypes.InsertNode(type, insertPos);

	return QualType(type, 0);
	}

	/// Retrieve a substitution-result type.
	QualType
	ASTContext::getSubstTemplateTypeParmType(const TemplateTypeParmType *Parm,
	QualType Replacement) const {
	assert(Replacement.isCanonical()
	&& "replacement types must always be canonical");

	llvm::FoldingSetNodeID ID;
	SubstTemplateTypeParmType::Profile(ID, Parm, Replacement);
	void *InsertPos = nullptr;
	SubstTemplateTypeParmType *SubstParm
	= SubstTemplateTypeParmTypes.FindNodeOrInsertPos(ID, InsertPos);

	if (!SubstParm) {
	SubstParm = new (*this, TypeAlignment)
	SubstTemplateTypeParmType(Parm, Replacement);
	Types.push_back(SubstParm);
	SubstTemplateTypeParmTypes.InsertNode(SubstParm, InsertPos);
	}

	return QualType(SubstParm, 0);
	}

	/// Retrieve a
	QualType ASTContext::getSubstTemplateTypeParmPackType(
	const TemplateTypeParmType *Parm,
	const TemplateArgument &ArgPack) {
	#ifndef NDEBUG
	for (const auto &P : ArgPack.pack_elements()) {
	assert(P.getKind() == TemplateArgument::Type &&"Pack contains a non-type");
	assert(P.getAsType().isCanonical() && "Pack contains non-canonical type");
	}
	#endif

	llvm::FoldingSetNodeID ID;
	SubstTemplateTypeParmPackType::Profile(ID, Parm, ArgPack);
	void *InsertPos = nullptr;
	if (SubstTemplateTypeParmPackType *SubstParm
	= SubstTemplateTypeParmPackTypes.FindNodeOrInsertPos(ID, InsertPos))
	return QualType(SubstParm, 0);

	QualType Canon;
	if (!Parm->isCanonicalUnqualified()) {
	Canon = getCanonicalType(QualType(Parm, 0));
	Canon = getSubstTemplateTypeParmPackType(cast<TemplateTypeParmType>(Canon),
	ArgPack);
	SubstTemplateTypeParmPackTypes.FindNodeOrInsertPos(ID, InsertPos);
	}

	auto *SubstParm
	= new (*this, TypeAlignment) SubstTemplateTypeParmPackType(Parm, Canon,
	ArgPack);
	Types.push_back(SubstParm);
	SubstTemplateTypeParmPackTypes.InsertNode(SubstParm, InsertPos);
	return QualType(SubstParm, 0);
	}

	/// Retrieve the template type parameter type for a template
	/// parameter or parameter pack with the given depth, index, and (optionally)
	/// name.
	QualType ASTContext::getTemplateTypeParmType(unsigned Depth, unsigned Index,
	bool ParameterPack,
	TemplateTypeParmDecl *TTPDecl) const {
	llvm::FoldingSetNodeID ID;
	TemplateTypeParmType::Profile(ID, Depth, Index, ParameterPack, TTPDecl);
	void *InsertPos = nullptr;
	TemplateTypeParmType *TypeParm
	= TemplateTypeParmTypes.FindNodeOrInsertPos(ID, InsertPos);

	if (TypeParm)
	return QualType(TypeParm, 0);

	if (TTPDecl) {
	QualType Canon = getTemplateTypeParmType(Depth, Index, ParameterPack);
	TypeParm = new (*this, TypeAlignment) TemplateTypeParmType(TTPDecl, Canon);

	TemplateTypeParmType *TypeCheck
	= TemplateTypeParmTypes.FindNodeOrInsertPos(ID, InsertPos);
	assert(!TypeCheck && "Template type parameter canonical type broken");
	(void)TypeCheck;
	} else
	TypeParm = new (*this, TypeAlignment)
	TemplateTypeParmType(Depth, Index, ParameterPack);

	Types.push_back(TypeParm);
	TemplateTypeParmTypes.InsertNode(TypeParm, InsertPos);

	return QualType(TypeParm, 0);
	}

	TypeSourceInfo *
	ASTContext::getTemplateSpecializationTypeInfo(TemplateName Name,
	SourceLocation NameLoc,
	const TemplateArgumentListInfo &Args,
	QualType Underlying) const {
	assert(!Name.getAsDependentTemplateName() &&
	"No dependent template names here!");
	QualType TST = getTemplateSpecializationType(Name, Args, Underlying);

	TypeSourceInfo *DI = CreateTypeSourceInfo(TST);
	TemplateSpecializationTypeLoc TL =
	DI->getTypeLoc().castAs<TemplateSpecializationTypeLoc>();
	TL.setTemplateKeywordLoc(SourceLocation());
	TL.setTemplateNameLoc(NameLoc);
	TL.setLAngleLoc(Args.getLAngleLoc());
	TL.setRAngleLoc(Args.getRAngleLoc());
	for (unsigned i = 0, e = TL.getNumArgs(); i != e; ++i)
	TL.setArgLocInfo(i, Args[i].getLocInfo());
	return DI;
	}

	QualType
	ASTContext::getTemplateSpecializationType(TemplateName Template,
	const TemplateArgumentListInfo &Args,
	QualType Underlying) const {
	assert(!Template.getAsDependentTemplateName() &&
	"No dependent template names here!");

	SmallVector<TemplateArgument, 4> ArgVec;
	ArgVec.reserve(Args.size());
	for (const TemplateArgumentLoc &Arg : Args.arguments())
	ArgVec.push_back(Arg.getArgument());

	return getTemplateSpecializationType(Template, ArgVec, Underlying);
	}

	#ifndef NDEBUG
	static bool hasAnyPackExpansions(ArrayRef<TemplateArgument> Args) {
	for (const TemplateArgument &Arg : Args)
	if (Arg.isPackExpansion())
	return true;

	return true;
	}
	#endif

	QualType
	ASTContext::getTemplateSpecializationType(TemplateName Template,
	ArrayRef<TemplateArgument> Args,
	QualType Underlying) const {
	assert(!Template.getAsDependentTemplateName() &&
	"No dependent template names here!");
	// Look through qualified template names.
	if (QualifiedTemplateName *QTN = Template.getAsQualifiedTemplateName())
	Template = TemplateName(QTN->getTemplateDecl());

	bool IsTypeAlias =
	Template.getAsTemplateDecl() &&
	isa<TypeAliasTemplateDecl>(Template.getAsTemplateDecl());
	QualType CanonType;
	if (!Underlying.isNull())
	CanonType = getCanonicalType(Underlying);
	else {
	// We can get here with an alias template when the specialization contains
	// a pack expansion that does not match up with a parameter pack.
	assert((!IsTypeAlias \|\| hasAnyPackExpansions(Args)) &&
	"Caller must compute aliased type");
	IsTypeAlias = false;
	CanonType = getCanonicalTemplateSpecializationType(Template, Args);
	}

	// Allocate the (non-canonical) template specialization type, but don't
	// try to unique it: these types typically have location information that
	// we don't unique and don't want to lose.
	void *Mem = Allocate(sizeof(TemplateSpecializationType) +
	sizeof(TemplateArgument) * Args.size() +
	(IsTypeAlias? sizeof(QualType) : 0),
	TypeAlignment);
	auto *Spec
	= new (Mem) TemplateSpecializationType(Template, Args, CanonType,
	IsTypeAlias ? Underlying : QualType());

	Types.push_back(Spec);
	return QualType(Spec, 0);
	}

	QualType ASTContext::getCanonicalTemplateSpecializationType(
	TemplateName Template, ArrayRef<TemplateArgument> Args) const {
	assert(!Template.getAsDependentTemplateName() &&
	"No dependent template names here!");

	// Look through qualified template names.
	if (QualifiedTemplateName *QTN = Template.getAsQualifiedTemplateName())
	Template = TemplateName(QTN->getTemplateDecl());

	// Build the canonical template specialization type.
	TemplateName CanonTemplate = getCanonicalTemplateName(Template);
	SmallVector<TemplateArgument, 4> CanonArgs;
	unsigned NumArgs = Args.size();
	CanonArgs.reserve(NumArgs);
	for (const TemplateArgument &Arg : Args)
	CanonArgs.push_back(getCanonicalTemplateArgument(Arg));

	// Determine whether this canonical template specialization type already
	// exists.
	llvm::FoldingSetNodeID ID;
	TemplateSpecializationType::Profile(ID, CanonTemplate,
	CanonArgs, *this);

	void *InsertPos = nullptr;
	TemplateSpecializationType *Spec
	= TemplateSpecializationTypes.FindNodeOrInsertPos(ID, InsertPos);

	if (!Spec) {
	// Allocate a new canonical template specialization type.
	void *Mem = Allocate((sizeof(TemplateSpecializationType) +
	sizeof(TemplateArgument) * NumArgs),
	TypeAlignment);
	Spec = new (Mem) TemplateSpecializationType(CanonTemplate,
	CanonArgs,
	QualType(), QualType());
	Types.push_back(Spec);
	TemplateSpecializationTypes.InsertNode(Spec, InsertPos);
	}

	assert(Spec->isDependentType() &&
	"Non-dependent template-id type must have a canonical type");
	return QualType(Spec, 0);
	}

	QualType ASTContext::getElaboratedType(ElaboratedTypeKeyword Keyword,
	NestedNameSpecifier *NNS,
	QualType NamedType,
	TagDecl *OwnedTagDecl) const {
	llvm::FoldingSetNodeID ID;
	ElaboratedType::Profile(ID, Keyword, NNS, NamedType, OwnedTagDecl);

	void *InsertPos = nullptr;
	ElaboratedType *T = ElaboratedTypes.FindNodeOrInsertPos(ID, InsertPos);
	if (T)
	return QualType(T, 0);

	QualType Canon = NamedType;
	if (!Canon.isCanonical()) {
	Canon = getCanonicalType(NamedType);
	ElaboratedType *CheckT = ElaboratedTypes.FindNodeOrInsertPos(ID, InsertPos);
	assert(!CheckT && "Elaborated canonical type broken");
	(void)CheckT;
	}

	void Mem = Allocate(ElaboratedType::totalSizeToAlloc<TagDecl >(!!OwnedTagDecl),
	TypeAlignment);
	T = new (Mem) ElaboratedType(Keyword, NNS, NamedType, Canon, OwnedTagDecl);

	Types.push_back(T);
	ElaboratedTypes.InsertNode(T, InsertPos);
	return QualType(T, 0);
	}

	QualType
	ASTContext::getParenType(QualType InnerType) const {
	llvm::FoldingSetNodeID ID;
	ParenType::Profile(ID, InnerType);

	void *InsertPos = nullptr;
	ParenType *T = ParenTypes.FindNodeOrInsertPos(ID, InsertPos);
	if (T)
	return QualType(T, 0);

	QualType Canon = InnerType;
	if (!Canon.isCanonical()) {
	Canon = getCanonicalType(InnerType);
	ParenType *CheckT = ParenTypes.FindNodeOrInsertPos(ID, InsertPos);
	assert(!CheckT && "Paren canonical type broken");
	(void)CheckT;
	}

	T = new (*this, TypeAlignment) ParenType(InnerType, Canon);
	Types.push_back(T);
	ParenTypes.InsertNode(T, InsertPos);
	return QualType(T, 0);
	}

	QualType
	ASTContext::getMacroQualifiedType(QualType UnderlyingTy,
	const IdentifierInfo *MacroII) const {
	QualType Canon = UnderlyingTy;
	if (!Canon.isCanonical())
	Canon = getCanonicalType(UnderlyingTy);

	auto newType = new (this, TypeAlignment)
	MacroQualifiedType(UnderlyingTy, Canon, MacroII);
	Types.push_back(newType);
	return QualType(newType, 0);
	}

	QualType ASTContext::getDependentNameType(ElaboratedTypeKeyword Keyword,
	NestedNameSpecifier *NNS,
	const IdentifierInfo *Name,
	QualType Canon) const {
	if (Canon.isNull()) {
	NestedNameSpecifier *CanonNNS = getCanonicalNestedNameSpecifier(NNS);
	if (CanonNNS != NNS)
	Canon = getDependentNameType(Keyword, CanonNNS, Name);
	}

	llvm::FoldingSetNodeID ID;
	DependentNameType::Profile(ID, Keyword, NNS, Name);

	void *InsertPos = nullptr;
	DependentNameType *T
	= DependentNameTypes.FindNodeOrInsertPos(ID, InsertPos);
	if (T)
	return QualType(T, 0);

	T = new (*this, TypeAlignment) DependentNameType(Keyword, NNS, Name, Canon);
	Types.push_back(T);
	DependentNameTypes.InsertNode(T, InsertPos);
	return QualType(T, 0);
	}

	QualType
	ASTContext::getDependentTemplateSpecializationType(
	ElaboratedTypeKeyword Keyword,
	NestedNameSpecifier *NNS,
	const IdentifierInfo *Name,
	const TemplateArgumentListInfo &Args) const {
	// TODO: avoid this copy
	SmallVector<TemplateArgument, 16> ArgCopy;
	for (unsigned I = 0, E = Args.size(); I != E; ++I)
	ArgCopy.push_back(Args[I].getArgument());
	return getDependentTemplateSpecializationType(Keyword, NNS, Name, ArgCopy);
	}

	QualType
	ASTContext::getDependentTemplateSpecializationType(
	ElaboratedTypeKeyword Keyword,
	NestedNameSpecifier *NNS,
	const IdentifierInfo *Name,
	ArrayRef<TemplateArgument> Args) const {
	assert((!NNS \|\| NNS->isDependent()) &&
	"nested-name-specifier must be dependent");

	llvm::FoldingSetNodeID ID;
	DependentTemplateSpecializationType::Profile(ID, *this, Keyword, NNS,
	Name, Args);

	void *InsertPos = nullptr;
	DependentTemplateSpecializationType *T
	= DependentTemplateSpecializationTypes.FindNodeOrInsertPos(ID, InsertPos);
	if (T)
	return QualType(T, 0);

	NestedNameSpecifier *CanonNNS = getCanonicalNestedNameSpecifier(NNS);

	ElaboratedTypeKeyword CanonKeyword = Keyword;
	if (Keyword == ETK_None) CanonKeyword = ETK_Typename;

	bool AnyNonCanonArgs = false;
	unsigned NumArgs = Args.size();
	SmallVector<TemplateArgument, 16> CanonArgs(NumArgs);
	for (unsigned I = 0; I != NumArgs; ++I) {
	CanonArgs[I] = getCanonicalTemplateArgument(Args[I]);
	if (!CanonArgs[I].structurallyEquals(Args[I]))
	AnyNonCanonArgs = true;
	}

	QualType Canon;
	if (AnyNonCanonArgs \|\| CanonNNS != NNS \|\| CanonKeyword != Keyword) {
	Canon = getDependentTemplateSpecializationType(CanonKeyword, CanonNNS,
	Name,
	CanonArgs);

	// Find the insert position again.
	DependentTemplateSpecializationTypes.FindNodeOrInsertPos(ID, InsertPos);
	}

	void *Mem = Allocate((sizeof(DependentTemplateSpecializationType) +
	sizeof(TemplateArgument) * NumArgs),
	TypeAlignment);
	T = new (Mem) DependentTemplateSpecializationType(Keyword, NNS,
	Name, Args, Canon);
	Types.push_back(T);
	DependentTemplateSpecializationTypes.InsertNode(T, InsertPos);
	return QualType(T, 0);
	}

	TemplateArgument ASTContext::getInjectedTemplateArg(NamedDecl *Param) {
	TemplateArgument Arg;
	if (const auto *TTP = dyn_cast<TemplateTypeParmDecl>(Param)) {
	QualType ArgType = getTypeDeclType(TTP);
	if (TTP->isParameterPack())
	ArgType = getPackExpansionType(ArgType, None);

	Arg = TemplateArgument(ArgType);
	} else if (auto *NTTP = dyn_cast<NonTypeTemplateParmDecl>(Param)) {
	QualType T =
	NTTP->getType().getNonPackExpansionType().getNonLValueExprType(*this);
	// For class NTTPs, ensure we include the 'const' so the type matches that
	// of a real template argument.
	// FIXME: It would be more faithful to model this as something like an
	// lvalue-to-rvalue conversion applied to a const-qualified lvalue.
	if (T->isRecordType())
	T.addConst();
	Expr E = new (this) DeclRefExpr(
	this, NTTP, /enclosing*/ false, T,
	Expr::getValueKindForType(NTTP->getType()), NTTP->getLocation());

	if (NTTP->isParameterPack())
	E = new (*this) PackExpansionExpr(DependentTy, E, NTTP->getLocation(),
	None);
	Arg = TemplateArgument(E);
	} else {
	auto *TTP = cast<TemplateTemplateParmDecl>(Param);
	if (TTP->isParameterPack())
	Arg = TemplateArgument(TemplateName(TTP), Optional<unsigned>());
	else
	Arg = TemplateArgument(TemplateName(TTP));
	}

	if (Param->isTemplateParameterPack())
	Arg = TemplateArgument::CreatePackCopy(*this, Arg);

	return Arg;
	}

	void
	ASTContext::getInjectedTemplateArgs(const TemplateParameterList *Params,
	SmallVectorImpl<TemplateArgument> &Args) {
	Args.reserve(Args.size() + Params->size());

	for (NamedDecl Param : Params)
	Args.push_back(getInjectedTemplateArg(Param));
	}

	QualType ASTContext::getPackExpansionType(QualType Pattern,
	Optional<unsigned> NumExpansions,
	bool ExpectPackInType) {
	assert((!ExpectPackInType \|\| Pattern->containsUnexpandedParameterPack()) &&
	"Pack expansions must expand one or more parameter packs");

	llvm::FoldingSetNodeID ID;
	PackExpansionType::Profile(ID, Pattern, NumExpansions);

	void *InsertPos = nullptr;
	PackExpansionType *T = PackExpansionTypes.FindNodeOrInsertPos(ID, InsertPos);
	if (T)
	return QualType(T, 0);

	QualType Canon;
	if (!Pattern.isCanonical()) {
	Canon = getPackExpansionType(getCanonicalType(Pattern), NumExpansions,
	/ExpectPackInType=/false);

	// Find the insert position again, in case we inserted an element into
	// PackExpansionTypes and invalidated our insert position.
	PackExpansionTypes.FindNodeOrInsertPos(ID, InsertPos);
	}

	T = new (*this, TypeAlignment)
	PackExpansionType(Pattern, Canon, NumExpansions);
	Types.push_back(T);
	PackExpansionTypes.InsertNode(T, InsertPos);
	return QualType(T, 0);
	}

	/// CmpProtocolNames - Comparison predicate for sorting protocols
	/// alphabetically.
	static int CmpProtocolNames(ObjCProtocolDecl const LHS,
	ObjCProtocolDecl const RHS) {
	return DeclarationName::compare((LHS)->getDeclName(), (RHS)->getDeclName());
	}

	static bool areSortedAndUniqued(ArrayRef<ObjCProtocolDecl *> Protocols) {
	if (Protocols.empty()) return true;

	if (Protocols[0]->getCanonicalDecl() != Protocols[0])
	return false;

	for (unsigned i = 1; i != Protocols.size(); ++i)
	if (CmpProtocolNames(&Protocols[i - 1], &Protocols[i]) >= 0 \|\|
	Protocols[i]->getCanonicalDecl() != Protocols[i])
	return false;
	return true;
	}

	static void
	SortAndUniqueProtocols(SmallVectorImpl<ObjCProtocolDecl *> &Protocols) {
	// Sort protocols, keyed by name.
	llvm::array_pod_sort(Protocols.begin(), Protocols.end(), CmpProtocolNames);

	// Canonicalize.
	for (ObjCProtocolDecl *&P : Protocols)
	P = P->getCanonicalDecl();

	// Remove duplicates.
	auto ProtocolsEnd = std::unique(Protocols.begin(), Protocols.end());
	Protocols.erase(ProtocolsEnd, Protocols.end());
	}

	QualType ASTContext::getObjCObjectType(QualType BaseType,
	ObjCProtocolDecl * const *Protocols,
	unsigned NumProtocols) const {
	return getObjCObjectType(BaseType, {},
	llvm::makeArrayRef(Protocols, NumProtocols),
	/isKindOf=/false);
	}

	QualType ASTContext::getObjCObjectType(
	QualType baseType,
	ArrayRef<QualType> typeArgs,
	ArrayRef<ObjCProtocolDecl *> protocols,
	bool isKindOf) const {
	// If the base type is an interface and there aren't any protocols or
	// type arguments to add, then the interface type will do just fine.
	if (typeArgs.empty() && protocols.empty() && !isKindOf &&
	isa<ObjCInterfaceType>(baseType))
	return baseType;

	// Look in the folding set for an existing type.
	llvm::FoldingSetNodeID ID;
	ObjCObjectTypeImpl::Profile(ID, baseType, typeArgs, protocols, isKindOf);
	void *InsertPos = nullptr;
	if (ObjCObjectType *QT = ObjCObjectTypes.FindNodeOrInsertPos(ID, InsertPos))
	return QualType(QT, 0);

	// Determine the type arguments to be used for canonicalization,
	// which may be explicitly specified here or written on the base
	// type.
	ArrayRef<QualType> effectiveTypeArgs = typeArgs;
	if (effectiveTypeArgs.empty()) {
	if (const auto *baseObject = baseType->getAs<ObjCObjectType>())
	effectiveTypeArgs = baseObject->getTypeArgs();
	}

	// Build the canonical type, which has the canonical base type and a
	// sorted-and-uniqued list of protocols and the type arguments
	// canonicalized.
	QualType canonical;
	bool typeArgsAreCanonical = std::all_of(effectiveTypeArgs.begin(),
	effectiveTypeArgs.end(),
	[&](QualType type) {
	return type.isCanonical();
	});
	bool protocolsSorted = areSortedAndUniqued(protocols);
	if (!typeArgsAreCanonical \|\| !protocolsSorted \|\| !baseType.isCanonical()) {
	// Determine the canonical type arguments.
	ArrayRef<QualType> canonTypeArgs;
	SmallVector<QualType, 4> canonTypeArgsVec;
	if (!typeArgsAreCanonical) {
	canonTypeArgsVec.reserve(effectiveTypeArgs.size());
	for (auto typeArg : effectiveTypeArgs)
	canonTypeArgsVec.push_back(getCanonicalType(typeArg));
	canonTypeArgs = canonTypeArgsVec;
	} else {
	canonTypeArgs = effectiveTypeArgs;
	}

	ArrayRef<ObjCProtocolDecl *> canonProtocols;
	SmallVector<ObjCProtocolDecl*, 8> canonProtocolsVec;
	if (!protocolsSorted) {
	canonProtocolsVec.append(protocols.begin(), protocols.end());
	SortAndUniqueProtocols(canonProtocolsVec);
	canonProtocols = canonProtocolsVec;
	} else {
	canonProtocols = protocols;
	}

	canonical = getObjCObjectType(getCanonicalType(baseType), canonTypeArgs,
	canonProtocols, isKindOf);

	// Regenerate InsertPos.
	ObjCObjectTypes.FindNodeOrInsertPos(ID, InsertPos);
	}

	unsigned size = sizeof(ObjCObjectTypeImpl);
	size += typeArgs.size() * sizeof(QualType);
	size += protocols.size() * sizeof(ObjCProtocolDecl *);
	void *mem = Allocate(size, TypeAlignment);
	auto *T =
	new (mem) ObjCObjectTypeImpl(canonical, baseType, typeArgs, protocols,
	isKindOf);

	Types.push_back(T);
	ObjCObjectTypes.InsertNode(T, InsertPos);
	return QualType(T, 0);
	}

	/// Apply Objective-C protocol qualifiers to the given type.
	/// If this is for the canonical type of a type parameter, we can apply
	/// protocol qualifiers on the ObjCObjectPointerType.
	QualType
	ASTContext::applyObjCProtocolQualifiers(QualType type,
	ArrayRef<ObjCProtocolDecl *> protocols, bool &hasError,
	bool allowOnPointerType) const {
	hasError = false;

	if (const auto *objT = dyn_cast<ObjCTypeParamType>(type.getTypePtr())) {
	return getObjCTypeParamType(objT->getDecl(), protocols);
	}

	// Apply protocol qualifiers to ObjCObjectPointerType.
	if (allowOnPointerType) {
	if (const auto *objPtr =
	dyn_cast<ObjCObjectPointerType>(type.getTypePtr())) {
	const ObjCObjectType *objT = objPtr->getObjectType();
	// Merge protocol lists and construct ObjCObjectType.
	SmallVector<ObjCProtocolDecl*, 8> protocolsVec;
	protocolsVec.append(objT->qual_begin(),
	objT->qual_end());
	protocolsVec.append(protocols.begin(), protocols.end());
	ArrayRef<ObjCProtocolDecl *> protocols = protocolsVec;
	type = getObjCObjectType(
	objT->getBaseType(),
	objT->getTypeArgsAsWritten(),
	protocols,
	objT->isKindOfTypeAsWritten());
	return getObjCObjectPointerType(type);
	}
	}

	// Apply protocol qualifiers to ObjCObjectType.
	if (const auto *objT = dyn_cast<ObjCObjectType>(type.getTypePtr())){
	// FIXME: Check for protocols to which the class type is already
	// known to conform.

	return getObjCObjectType(objT->getBaseType(),
	objT->getTypeArgsAsWritten(),
	protocols,
	objT->isKindOfTypeAsWritten());
	}

	// If the canonical type is ObjCObjectType, ...
	if (type->isObjCObjectType()) {
	// Silently overwrite any existing protocol qualifiers.
	// TODO: determine whether that's the right thing to do.

	// FIXME: Check for protocols to which the class type is already
	// known to conform.
	return getObjCObjectType(type, {}, protocols, false);
	}

	// id<protocol-list>
	if (type->isObjCIdType()) {
	const auto *objPtr = type->castAs<ObjCObjectPointerType>();
	type = getObjCObjectType(ObjCBuiltinIdTy, {}, protocols,
	objPtr->isKindOfType());
	return getObjCObjectPointerType(type);
	}

	// Class<protocol-list>
	if (type->isObjCClassType()) {
	const auto *objPtr = type->castAs<ObjCObjectPointerType>();
	type = getObjCObjectType(ObjCBuiltinClassTy, {}, protocols,
	objPtr->isKindOfType());
	return getObjCObjectPointerType(type);
	}

	hasError = true;
	return type;
	}

	QualType
	ASTContext::getObjCTypeParamType(const ObjCTypeParamDecl *Decl,
	ArrayRef<ObjCProtocolDecl *> protocols) const {
	// Look in the folding set for an existing type.
	llvm::FoldingSetNodeID ID;
	ObjCTypeParamType::Profile(ID, Decl, Decl->getUnderlyingType(), protocols);
	void *InsertPos = nullptr;
	if (ObjCTypeParamType *TypeParam =
	ObjCTypeParamTypes.FindNodeOrInsertPos(ID, InsertPos))
	return QualType(TypeParam, 0);

	// We canonicalize to the underlying type.
	QualType Canonical = getCanonicalType(Decl->getUnderlyingType());
	if (!protocols.empty()) {
	// Apply the protocol qualifers.
	bool hasError;
	Canonical = getCanonicalType(applyObjCProtocolQualifiers(
	Canonical, protocols, hasError, true /allowOnPointerType/));
	assert(!hasError && "Error when apply protocol qualifier to bound type");
	}

	unsigned size = sizeof(ObjCTypeParamType);
	size += protocols.size() * sizeof(ObjCProtocolDecl *);
	void *mem = Allocate(size, TypeAlignment);
	auto *newType = new (mem) ObjCTypeParamType(Decl, Canonical, protocols);

	Types.push_back(newType);
	ObjCTypeParamTypes.InsertNode(newType, InsertPos);
	return QualType(newType, 0);
	}

	void ASTContext::adjustObjCTypeParamBoundType(const ObjCTypeParamDecl *Orig,
	ObjCTypeParamDecl *New) const {
	New->setTypeSourceInfo(getTrivialTypeSourceInfo(Orig->getUnderlyingType()));
	// Update TypeForDecl after updating TypeSourceInfo.
	auto NewTypeParamTy = cast<ObjCTypeParamType>(New->getTypeForDecl());
	SmallVector<ObjCProtocolDecl *, 8> protocols;
	protocols.append(NewTypeParamTy->qual_begin(), NewTypeParamTy->qual_end());
	QualType UpdatedTy = getObjCTypeParamType(New, protocols);
	New->setTypeForDecl(UpdatedTy.getTypePtr());
	}

	/// ObjCObjectAdoptsQTypeProtocols - Checks that protocols in IC's
	/// protocol list adopt all protocols in QT's qualified-id protocol
	/// list.
	bool ASTContext::ObjCObjectAdoptsQTypeProtocols(QualType QT,
	ObjCInterfaceDecl *IC) {
	if (!QT->isObjCQualifiedIdType())
	return false;

	if (const auto *OPT = QT->getAs<ObjCObjectPointerType>()) {
	// If both the right and left sides have qualifiers.
	for (auto *Proto : OPT->quals()) {
	if (!IC->ClassImplementsProtocol(Proto, false))
	return false;
	}
	return true;
	}
	return false;
	}

	/// QIdProtocolsAdoptObjCObjectProtocols - Checks that protocols in
	/// QT's qualified-id protocol list adopt all protocols in IDecl's list
	/// of protocols.
	bool ASTContext::QIdProtocolsAdoptObjCObjectProtocols(QualType QT,
	ObjCInterfaceDecl *IDecl) {
	if (!QT->isObjCQualifiedIdType())
	return false;
	const auto *OPT = QT->getAs<ObjCObjectPointerType>();
	if (!OPT)
	return false;
	if (!IDecl->hasDefinition())
	return false;
	llvm::SmallPtrSet<ObjCProtocolDecl *, 8> InheritedProtocols;
	CollectInheritedProtocols(IDecl, InheritedProtocols);
	if (InheritedProtocols.empty())
	return false;
	// Check that if every protocol in list of id<plist> conforms to a protocol
	// of IDecl's, then bridge casting is ok.
	bool Conforms = false;
	for (auto *Proto : OPT->quals()) {
	Conforms = false;
	for (auto *PI : InheritedProtocols) {
	if (ProtocolCompatibleWithProtocol(Proto, PI)) {
	Conforms = true;
	break;
	}
	}
	if (!Conforms)
	break;
	}
	if (Conforms)
	return true;

	for (auto *PI : InheritedProtocols) {
	// If both the right and left sides have qualifiers.
	bool Adopts = false;
	for (auto *Proto : OPT->quals()) {
	// return 'true' if 'PI' is in the inheritance hierarchy of Proto
	if ((Adopts = ProtocolCompatibleWithProtocol(PI, Proto)))
	break;
	}
	if (!Adopts)
	return false;
	}
	return true;
	}

	/// getObjCObjectPointerType - Return a ObjCObjectPointerType type for
	/// the given object type.
	QualType ASTContext::getObjCObjectPointerType(QualType ObjectT) const {
	llvm::FoldingSetNodeID ID;
	ObjCObjectPointerType::Profile(ID, ObjectT);

	void *InsertPos = nullptr;
	if (ObjCObjectPointerType *QT =
	ObjCObjectPointerTypes.FindNodeOrInsertPos(ID, InsertPos))
	return QualType(QT, 0);

	// Find the canonical object type.
	QualType Canonical;
	if (!ObjectT.isCanonical()) {
	Canonical = getObjCObjectPointerType(getCanonicalType(ObjectT));

	// Regenerate InsertPos.
	ObjCObjectPointerTypes.FindNodeOrInsertPos(ID, InsertPos);
	}

	// No match.
	void *Mem = Allocate(sizeof(ObjCObjectPointerType), TypeAlignment);
	auto *QType =
	new (Mem) ObjCObjectPointerType(Canonical, ObjectT);

	Types.push_back(QType);
	ObjCObjectPointerTypes.InsertNode(QType, InsertPos);
	return QualType(QType, 0);
	}

	/// getObjCInterfaceType - Return the unique reference to the type for the
	/// specified ObjC interface decl. The list of protocols is optional.
	QualType ASTContext::getObjCInterfaceType(const ObjCInterfaceDecl *Decl,
	ObjCInterfaceDecl *PrevDecl) const {
	if (Decl->TypeForDecl)
	return QualType(Decl->TypeForDecl, 0);

	if (PrevDecl) {
	assert(PrevDecl->TypeForDecl && "previous decl has no TypeForDecl");
	Decl->TypeForDecl = PrevDecl->TypeForDecl;
	return QualType(PrevDecl->TypeForDecl, 0);
	}

	// Prefer the definition, if there is one.
	if (const ObjCInterfaceDecl *Def = Decl->getDefinition())
	Decl = Def;

	void *Mem = Allocate(sizeof(ObjCInterfaceType), TypeAlignment);
	auto *T = new (Mem) ObjCInterfaceType(Decl);
	Decl->TypeForDecl = T;
	Types.push_back(T);
	return QualType(T, 0);
	}

	/// getTypeOfExprType - Unlike many "get<Type>" functions, we can't unique
	/// TypeOfExprType AST's (since expression's are never shared). For example,
	/// multiple declarations that refer to "typeof(x)" all contain different
	/// DeclRefExpr's. This doesn't effect the type checker, since it operates
	/// on canonical type's (which are always unique).
	QualType ASTContext::getTypeOfExprType(Expr *tofExpr) const {
	TypeOfExprType *toe;
	if (tofExpr->isTypeDependent()) {
	llvm::FoldingSetNodeID ID;
	DependentTypeOfExprType::Profile(ID, *this, tofExpr);

	void *InsertPos = nullptr;
	DependentTypeOfExprType *Canon
	= DependentTypeOfExprTypes.FindNodeOrInsertPos(ID, InsertPos);
	if (Canon) {
	// We already have a "canonical" version of an identical, dependent
	// typeof(expr) type. Use that as our canonical type.
	toe = new (*this, TypeAlignment) TypeOfExprType(tofExpr,
	QualType((TypeOfExprType*)Canon, 0));
	} else {
	// Build a new, canonical typeof(expr) type.
	Canon
	= new (this, TypeAlignment) DependentTypeOfExprType(this, tofExpr);
	DependentTypeOfExprTypes.InsertNode(Canon, InsertPos);
	toe = Canon;
	}
	} else {
	QualType Canonical = getCanonicalType(tofExpr->getType());
	toe = new (*this, TypeAlignment) TypeOfExprType(tofExpr, Canonical);
	}
	Types.push_back(toe);
	return QualType(toe, 0);
	}

	/// getTypeOfType - Unlike many "get<Type>" functions, we don't unique
	/// TypeOfType nodes. The only motivation to unique these nodes would be
	/// memory savings. Since typeof(t) is fairly uncommon, space shouldn't be
	/// an issue. This doesn't affect the type checker, since it operates
	/// on canonical types (which are always unique).
	QualType ASTContext::getTypeOfType(QualType tofType) const {
	QualType Canonical = getCanonicalType(tofType);
	auto tot = new (this, TypeAlignment) TypeOfType(tofType, Canonical);
	Types.push_back(tot);
	return QualType(tot, 0);
	}

	/// Unlike many "get<Type>" functions, we don't unique DecltypeType
	/// nodes. This would never be helpful, since each such type has its own
	/// expression, and would not give a significant memory saving, since there
	/// is an Expr tree under each such type.
	QualType ASTContext::getDecltypeType(Expr *e, QualType UnderlyingType) const {
	DecltypeType *dt;

	// C++11 [temp.type]p2:
	// If an expression e involves a template parameter, decltype(e) denotes a
	// unique dependent type. Two such decltype-specifiers refer to the same
	// type only if their expressions are equivalent (14.5.6.1).
	if (e->isInstantiationDependent()) {
	llvm::FoldingSetNodeID ID;
	DependentDecltypeType::Profile(ID, *this, e);

	void *InsertPos = nullptr;
	DependentDecltypeType *Canon
	= DependentDecltypeTypes.FindNodeOrInsertPos(ID, InsertPos);
	if (!Canon) {
	// Build a new, canonical decltype(expr) type.
	Canon = new (this, TypeAlignment) DependentDecltypeType(this, e);
	DependentDecltypeTypes.InsertNode(Canon, InsertPos);
	}
	dt = new (*this, TypeAlignment)
	DecltypeType(e, UnderlyingType, QualType((DecltypeType *)Canon, 0));
	} else {
	dt = new (*this, TypeAlignment)
	DecltypeType(e, UnderlyingType, getCanonicalType(UnderlyingType));
	}
	Types.push_back(dt);
	return QualType(dt, 0);
	}

	/// getUnaryTransformationType - We don't unique these, since the memory
	/// savings are minimal and these are rare.
	QualType ASTContext::getUnaryTransformType(QualType BaseType,
	QualType UnderlyingType,
	UnaryTransformType::UTTKind Kind)
	const {
	UnaryTransformType *ut = nullptr;

	if (BaseType->isDependentType()) {
	// Look in the folding set for an existing type.
	llvm::FoldingSetNodeID ID;
	DependentUnaryTransformType::Profile(ID, getCanonicalType(BaseType), Kind);

	void *InsertPos = nullptr;
	DependentUnaryTransformType *Canon
	= DependentUnaryTransformTypes.FindNodeOrInsertPos(ID, InsertPos);

	if (!Canon) {
	// Build a new, canonical __underlying_type(type) type.
	Canon = new (*this, TypeAlignment)
	DependentUnaryTransformType(*this, getCanonicalType(BaseType),
	Kind);
	DependentUnaryTransformTypes.InsertNode(Canon, InsertPos);
	}
	ut = new (*this, TypeAlignment) UnaryTransformType (BaseType,
	QualType(), Kind,
	QualType(Canon, 0));
	} else {
	QualType CanonType = getCanonicalType(UnderlyingType);
	ut = new (*this, TypeAlignment) UnaryTransformType (BaseType,
	UnderlyingType, Kind,
	CanonType);
	}
	Types.push_back(ut);
	return QualType(ut, 0);
	}

	/// getAutoType - Return the uniqued reference to the 'auto' type which has been
	/// deduced to the given type, or to the canonical undeduced 'auto' type, or the
	/// canonical deduced-but-dependent 'auto' type.
	QualType
	ASTContext::getAutoType(QualType DeducedType, AutoTypeKeyword Keyword,
	bool IsDependent, bool IsPack,
	ConceptDecl *TypeConstraintConcept,
	ArrayRef<TemplateArgument> TypeConstraintArgs) const {
	assert((!IsPack \|\| IsDependent) && "only use IsPack for a dependent pack");
	if (DeducedType.isNull() && Keyword == AutoTypeKeyword::Auto &&
	!TypeConstraintConcept && !IsDependent)
	return getAutoDeductType();

	// Look in the folding set for an existing type.
	void *InsertPos = nullptr;
	llvm::FoldingSetNodeID ID;
	AutoType::Profile(ID, *this, DeducedType, Keyword, IsDependent,
	TypeConstraintConcept, TypeConstraintArgs);
	if (AutoType *AT = AutoTypes.FindNodeOrInsertPos(ID, InsertPos))
	return QualType(AT, 0);

	void *Mem = Allocate(sizeof(AutoType) +
	sizeof(TemplateArgument) * TypeConstraintArgs.size(),
	TypeAlignment);
	auto *AT = new (Mem) AutoType(
	DeducedType, Keyword,
	(IsDependent ? TypeDependence::DependentInstantiation
	: TypeDependence::None) \|
	(IsPack ? TypeDependence::UnexpandedPack : TypeDependence::None),
	TypeConstraintConcept, TypeConstraintArgs);
	Types.push_back(AT);
	if (InsertPos)
	AutoTypes.InsertNode(AT, InsertPos);
	return QualType(AT, 0);
	}

	/// Return the uniqued reference to the deduced template specialization type
	/// which has been deduced to the given type, or to the canonical undeduced
	/// such type, or the canonical deduced-but-dependent such type.
	QualType ASTContext::getDeducedTemplateSpecializationType(
	TemplateName Template, QualType DeducedType, bool IsDependent) const {
	// Look in the folding set for an existing type.
	void *InsertPos = nullptr;
	llvm::FoldingSetNodeID ID;
	DeducedTemplateSpecializationType::Profile(ID, Template, DeducedType,
	IsDependent);
	if (DeducedTemplateSpecializationType *DTST =
	DeducedTemplateSpecializationTypes.FindNodeOrInsertPos(ID, InsertPos))
	return QualType(DTST, 0);

	auto DTST = new (this, TypeAlignment)
	DeducedTemplateSpecializationType(Template, DeducedType, IsDependent);
	Types.push_back(DTST);
	if (InsertPos)
	DeducedTemplateSpecializationTypes.InsertNode(DTST, InsertPos);
	return QualType(DTST, 0);
	}

	/// getAtomicType - Return the uniqued reference to the atomic type for
	/// the given value type.
	QualType ASTContext::getAtomicType(QualType T) const {
	// Unique pointers, to guarantee there is only one pointer of a particular
	// structure.
	llvm::FoldingSetNodeID ID;
	AtomicType::Profile(ID, T);

	void *InsertPos = nullptr;
	if (AtomicType *AT = AtomicTypes.FindNodeOrInsertPos(ID, InsertPos))
	return QualType(AT, 0);

	// If the atomic value type isn't canonical, this won't be a canonical type
	// either, so fill in the canonical type field.
	QualType Canonical;
	if (!T.isCanonical()) {
	Canonical = getAtomicType(getCanonicalType(T));

	// Get the new insert position for the node we care about.
	AtomicType *NewIP = AtomicTypes.FindNodeOrInsertPos(ID, InsertPos);
	assert(!NewIP && "Shouldn't be in the map!"); (void)NewIP;
	}
	auto New = new (this, TypeAlignment) AtomicType(T, Canonical);
	Types.push_back(New);
	AtomicTypes.InsertNode(New, InsertPos);
	return QualType(New, 0);
	}

	/// getAutoDeductType - Get type pattern for deducing against 'auto'.
	QualType ASTContext::getAutoDeductType() const {
	if (AutoDeductTy.isNull())
	AutoDeductTy = QualType(new (*this, TypeAlignment)
	AutoType(QualType(), AutoTypeKeyword::Auto,
	TypeDependence::None,
	/concept/ nullptr, /args/ {}),
	0);
	return AutoDeductTy;
	}

	/// getAutoRRefDeductType - Get type pattern for deducing against 'auto &&'.
	QualType ASTContext::getAutoRRefDeductType() const {
	if (AutoRRefDeductTy.isNull())
	AutoRRefDeductTy = getRValueReferenceType(getAutoDeductType());
	assert(!AutoRRefDeductTy.isNull() && "can't build 'auto &&' pattern");
	return AutoRRefDeductTy;
	}

	/// getTagDeclType - Return the unique reference to the type for the
	/// specified TagDecl (struct/union/class/enum) decl.
	QualType ASTContext::getTagDeclType(const TagDecl *Decl) const {
	assert(Decl);
	// FIXME: What is the design on getTagDeclType when it requires casting
	// away const? mutable?
	return getTypeDeclType(const_cast<TagDecl*>(Decl));
	}

	/// getSizeType - Return the unique type for "size_t" (C99 7.17), the result
	/// of the sizeof operator (C99 6.5.3.4p4). The value is target dependent and
	/// needs to agree with the definition in <stddef.h>.
	CanQualType ASTContext::getSizeType() const {
	return getFromTargetType(Target->getSizeType());
	}

	/// Return the unique signed counterpart of the integer type
	/// corresponding to size_t.
	CanQualType ASTContext::getSignedSizeType() const {
	return getFromTargetType(Target->getSignedSizeType());
	}

	/// getIntMaxType - Return the unique type for "intmax_t" (C99 7.18.1.5).
	CanQualType ASTContext::getIntMaxType() const {
	return getFromTargetType(Target->getIntMaxType());
	}

	/// getUIntMaxType - Return the unique type for "uintmax_t" (C99 7.18.1.5).
	CanQualType ASTContext::getUIntMaxType() const {
	return getFromTargetType(Target->getUIntMaxType());
	}

	/// getSignedWCharType - Return the type of "signed wchar_t".
	/// Used when in C++, as a GCC extension.
	QualType ASTContext::getSignedWCharType() const {
	// FIXME: derive from "Target" ?
	return WCharTy;
	}

	/// getUnsignedWCharType - Return the type of "unsigned wchar_t".
	/// Used when in C++, as a GCC extension.
	QualType ASTContext::getUnsignedWCharType() const {
	// FIXME: derive from "Target" ?
	return UnsignedIntTy;
	}

	QualType ASTContext::getIntPtrType() const {
	return getFromTargetType(Target->getIntPtrType());
	}

	QualType ASTContext::getUIntPtrType() const {
	return getCorrespondingUnsignedType(getIntPtrType());
	}

	/// getPointerDiffType - Return the unique type for "ptrdiff_t" (C99 7.17)
	/// defined in <stddef.h>. Pointer - pointer requires this (C99 6.5.6p9).
	QualType ASTContext::getPointerDiffType() const {
	return getFromTargetType(Target->getPtrDiffType(0));
	}

	/// Return the unique unsigned counterpart of "ptrdiff_t"
	/// integer type. The standard (C11 7.21.6.1p7) refers to this type
	/// in the definition of %tu format specifier.
	QualType ASTContext::getUnsignedPointerDiffType() const {
	return getFromTargetType(Target->getUnsignedPtrDiffType(0));
	}

	/// Return the unique type for "pid_t" defined in
	/// <sys/types.h>. We need this to compute the correct type for vfork().
	QualType ASTContext::getProcessIDType() const {
	return getFromTargetType(Target->getProcessIDType());
	}

	//===----------------------------------------------------------------------===//
	// Type Operators
	//===----------------------------------------------------------------------===//

	CanQualType ASTContext::getCanonicalParamType(QualType T) const {
	// Push qualifiers into arrays, and then discard any remaining
	// qualifiers.
	T = getCanonicalType(T);
	T = getVariableArrayDecayedType(T);
	const Type *Ty = T.getTypePtr();
	QualType Result;
	if (isa<ArrayType>(Ty)) {
	Result = getArrayDecayedType(QualType(Ty,0));
	} else if (isa<FunctionType>(Ty)) {
	Result = getPointerType(QualType(Ty, 0));
	} else {
	Result = QualType(Ty, 0);
	}

	return CanQualType::CreateUnsafe(Result);
	}

	QualType ASTContext::getUnqualifiedArrayType(QualType type,
	Qualifiers &quals) {
	SplitQualType splitType = type.getSplitUnqualifiedType();

	// FIXME: getSplitUnqualifiedType() actually walks all the way to
	// the unqualified desugared type and then drops it on the floor.
	// We then have to strip that sugar back off with
	// getUnqualifiedDesugaredType(), which is silly.
	const auto *AT =
	dyn_cast<ArrayType>(splitType.Ty->getUnqualifiedDesugaredType());

	// If we don't have an array, just use the results in splitType.
	if (!AT) {
	quals = splitType.Quals;
	return QualType(splitType.Ty, 0);
	}

	// Otherwise, recurse on the array's element type.
	QualType elementType = AT->getElementType();
	QualType unqualElementType = getUnqualifiedArrayType(elementType, quals);

	// If that didn't change the element type, AT has no qualifiers, so we
	// can just use the results in splitType.
	if (elementType == unqualElementType) {
	assert(quals.empty()); // from the recursive call
	quals = splitType.Quals;
	return QualType(splitType.Ty, 0);
	}

	// Otherwise, add in the qualifiers from the outermost type, then
	// build the type back up.
	quals.addConsistentQualifiers(splitType.Quals);

	if (const auto *CAT = dyn_cast<ConstantArrayType>(AT)) {
	return getConstantArrayType(unqualElementType, CAT->getSize(),
	CAT->getSizeExpr(), CAT->getSizeModifier(), 0);
	}

	if (const auto *IAT = dyn_cast<IncompleteArrayType>(AT)) {
	return getIncompleteArrayType(unqualElementType, IAT->getSizeModifier(), 0);
	}

	if (const auto *VAT = dyn_cast<VariableArrayType>(AT)) {
	return getVariableArrayType(unqualElementType,
	VAT->getSizeExpr(),
	VAT->getSizeModifier(),
	VAT->getIndexTypeCVRQualifiers(),
	VAT->getBracketsRange());
	}

	const auto *DSAT = cast<DependentSizedArrayType>(AT);
	return getDependentSizedArrayType(unqualElementType, DSAT->getSizeExpr(),
	DSAT->getSizeModifier(), 0,
	SourceRange());
	}

	/// Attempt to unwrap two types that may both be array types with the same bound
	/// (or both be array types of unknown bound) for the purpose of comparing the
	/// cv-decomposition of two types per C++ [conv.qual].
	void ASTContext::UnwrapSimilarArrayTypes(QualType &T1, QualType &T2) {
	while (true) {
	auto *AT1 = getAsArrayType(T1);
	if (!AT1)
	return;

	auto *AT2 = getAsArrayType(T2);
	if (!AT2)
	return;

	// If we don't have two array types with the same constant bound nor two
	// incomplete array types, we've unwrapped everything we can.
	if (auto *CAT1 = dyn_cast<ConstantArrayType>(AT1)) {
	auto *CAT2 = dyn_cast<ConstantArrayType>(AT2);
	if (!CAT2 \|\| CAT1->getSize() != CAT2->getSize())
	return;
	} else if (!isa<IncompleteArrayType>(AT1) \|\|
	!isa<IncompleteArrayType>(AT2)) {
	return;
	}

	T1 = AT1->getElementType();
	T2 = AT2->getElementType();
	}
	}

	/// Attempt to unwrap two types that may be similar (C++ [conv.qual]).
	///
	/// If T1 and T2 are both pointer types of the same kind, or both array types
	/// with the same bound, unwraps layers from T1 and T2 until a pointer type is
	/// unwrapped. Top-level qualifiers on T1 and T2 are ignored.
	///
	/// This function will typically be called in a loop that successively
	/// "unwraps" pointer and pointer-to-member types to compare them at each
	/// level.
	///
	/// \return \c true if a pointer type was unwrapped, \c false if we reached a
	/// pair of types that can't be unwrapped further.
	bool ASTContext::UnwrapSimilarTypes(QualType &T1, QualType &T2) {
	UnwrapSimilarArrayTypes(T1, T2);

	const auto *T1PtrType = T1->getAs<PointerType>();
	const auto *T2PtrType = T2->getAs<PointerType>();
	if (T1PtrType && T2PtrType) {
	T1 = T1PtrType->getPointeeType();
	T2 = T2PtrType->getPointeeType();
	return true;
	}

	const auto *T1MPType = T1->getAs<MemberPointerType>();
	const auto *T2MPType = T2->getAs<MemberPointerType>();
	if (T1MPType && T2MPType &&
	hasSameUnqualifiedType(QualType(T1MPType->getClass(), 0),
	QualType(T2MPType->getClass(), 0))) {
	T1 = T1MPType->getPointeeType();
	T2 = T2MPType->getPointeeType();
	return true;
	}

	if (getLangOpts().ObjC) {
	const auto *T1OPType = T1->getAs<ObjCObjectPointerType>();
	const auto *T2OPType = T2->getAs<ObjCObjectPointerType>();
	if (T1OPType && T2OPType) {
	T1 = T1OPType->getPointeeType();
	T2 = T2OPType->getPointeeType();
	return true;
	}
	}

	// FIXME: Block pointers, too?

	return false;
	}

	bool ASTContext::hasSimilarType(QualType T1, QualType T2) {
	while (true) {
	Qualifiers Quals;
	T1 = getUnqualifiedArrayType(T1, Quals);
	T2 = getUnqualifiedArrayType(T2, Quals);
	if (hasSameType(T1, T2))
	return true;
	if (!UnwrapSimilarTypes(T1, T2))
	return false;
	}
	}

	bool ASTContext::hasCvrSimilarType(QualType T1, QualType T2) {
	while (true) {
	Qualifiers Quals1, Quals2;
	T1 = getUnqualifiedArrayType(T1, Quals1);
	T2 = getUnqualifiedArrayType(T2, Quals2);

	Quals1.removeCVRQualifiers();
	Quals2.removeCVRQualifiers();
	if (Quals1 != Quals2)
	return false;

	if (hasSameType(T1, T2))
	return true;

	if (!UnwrapSimilarTypes(T1, T2))
	return false;
	}
	}

	DeclarationNameInfo
	ASTContext::getNameForTemplate(TemplateName Name,
	SourceLocation NameLoc) const {
	switch (Name.getKind()) {
	case TemplateName::QualifiedTemplate:
	case TemplateName::Template:
	// DNInfo work in progress: CHECKME: what about DNLoc?
	return DeclarationNameInfo(Name.getAsTemplateDecl()->getDeclName(),
	NameLoc);

	case TemplateName::OverloadedTemplate: {
	OverloadedTemplateStorage *Storage = Name.getAsOverloadedTemplate();
	// DNInfo work in progress: CHECKME: what about DNLoc?
	return DeclarationNameInfo((*Storage->begin())->getDeclName(), NameLoc);
	}

	case TemplateName::AssumedTemplate: {
	AssumedTemplateStorage *Storage = Name.getAsAssumedTemplateName();
	return DeclarationNameInfo(Storage->getDeclName(), NameLoc);
	}

	case TemplateName::DependentTemplate: {
	DependentTemplateName *DTN = Name.getAsDependentTemplateName();
	DeclarationName DName;
	if (DTN->isIdentifier()) {
	DName = DeclarationNames.getIdentifier(DTN->getIdentifier());
	return DeclarationNameInfo(DName, NameLoc);
	} else {
	DName = DeclarationNames.getCXXOperatorName(DTN->getOperator());
	// DNInfo work in progress: FIXME: source locations?
	DeclarationNameLoc DNLoc =
	DeclarationNameLoc::makeCXXOperatorNameLoc(SourceRange());
	return DeclarationNameInfo(DName, NameLoc, DNLoc);
	}
	}

	case TemplateName::SubstTemplateTemplateParm: {
	SubstTemplateTemplateParmStorage *subst
	= Name.getAsSubstTemplateTemplateParm();
	return DeclarationNameInfo(subst->getParameter()->getDeclName(),
	NameLoc);
	}

	case TemplateName::SubstTemplateTemplateParmPack: {
	SubstTemplateTemplateParmPackStorage *subst
	= Name.getAsSubstTemplateTemplateParmPack();
	return DeclarationNameInfo(subst->getParameterPack()->getDeclName(),
	NameLoc);
	}
	}

	llvm_unreachable("bad template name kind!");
	}

	TemplateName ASTContext::getCanonicalTemplateName(TemplateName Name) const {
	switch (Name.getKind()) {
	case TemplateName::QualifiedTemplate:
	case TemplateName::Template: {
	TemplateDecl *Template = Name.getAsTemplateDecl();
	if (auto *TTP = dyn_cast<TemplateTemplateParmDecl>(Template))
	Template = getCanonicalTemplateTemplateParmDecl(TTP);

	// The canonical template name is the canonical template declaration.
	return TemplateName(cast<TemplateDecl>(Template->getCanonicalDecl()));
	}

	case TemplateName::OverloadedTemplate:
	case TemplateName::AssumedTemplate:
	llvm_unreachable("cannot canonicalize unresolved template");

	case TemplateName::DependentTemplate: {
	DependentTemplateName *DTN = Name.getAsDependentTemplateName();
	assert(DTN && "Non-dependent template names must refer to template decls.");
	return DTN->CanonicalTemplateName;
	}

	case TemplateName::SubstTemplateTemplateParm: {
	SubstTemplateTemplateParmStorage *subst
	= Name.getAsSubstTemplateTemplateParm();
	return getCanonicalTemplateName(subst->getReplacement());
	}

	case TemplateName::SubstTemplateTemplateParmPack: {
	SubstTemplateTemplateParmPackStorage *subst
	= Name.getAsSubstTemplateTemplateParmPack();
	TemplateTemplateParmDecl *canonParameter
	= getCanonicalTemplateTemplateParmDecl(subst->getParameterPack());
	TemplateArgument canonArgPack
	= getCanonicalTemplateArgument(subst->getArgumentPack());
	return getSubstTemplateTemplateParmPack(canonParameter, canonArgPack);
	}
	}

	llvm_unreachable("bad template name!");
	}

	bool ASTContext::hasSameTemplateName(TemplateName X, TemplateName Y) {
	X = getCanonicalTemplateName(X);
	Y = getCanonicalTemplateName(Y);
	return X.getAsVoidPointer() == Y.getAsVoidPointer();
	}

	TemplateArgument
	ASTContext::getCanonicalTemplateArgument(const TemplateArgument &Arg) const {
	switch (Arg.getKind()) {
	case TemplateArgument::Null:
	return Arg;

	case TemplateArgument::Expression:
	return Arg;

	case TemplateArgument::Declaration: {
	auto *D = cast<ValueDecl>(Arg.getAsDecl()->getCanonicalDecl());
	return TemplateArgument(D, Arg.getParamTypeForDecl());
	}

	case TemplateArgument::NullPtr:
	return TemplateArgument(getCanonicalType(Arg.getNullPtrType()),
	/isNullPtr/true);

	case TemplateArgument::Template:
	return TemplateArgument(getCanonicalTemplateName(Arg.getAsTemplate()));

	case TemplateArgument::TemplateExpansion:
	return TemplateArgument(getCanonicalTemplateName(
	Arg.getAsTemplateOrTemplatePattern()),
	Arg.getNumTemplateExpansions());

	case TemplateArgument::Integral:
	return TemplateArgument(Arg, getCanonicalType(Arg.getIntegralType()));

	case TemplateArgument::Type:
	return TemplateArgument(getCanonicalType(Arg.getAsType()));

	case TemplateArgument::Pack: {
	if (Arg.pack_size() == 0)
	return Arg;

	auto CanonArgs = new (this) TemplateArgument[Arg.pack_size()];
	unsigned Idx = 0;
	for (TemplateArgument::pack_iterator A = Arg.pack_begin(),
	AEnd = Arg.pack_end();
	A != AEnd; (void)++A, ++Idx)
	CanonArgs[Idx] = getCanonicalTemplateArgument(*A);

	return TemplateArgument(llvm::makeArrayRef(CanonArgs, Arg.pack_size()));
	}
	}

	// Silence GCC warning
	llvm_unreachable("Unhandled template argument kind");
	}

	NestedNameSpecifier *
	ASTContext::getCanonicalNestedNameSpecifier(NestedNameSpecifier *NNS) const {
	if (!NNS)
	return nullptr;

	switch (NNS->getKind()) {
	case NestedNameSpecifier::Identifier:
	// Canonicalize the prefix but keep the identifier the same.
	return NestedNameSpecifier::Create(*this,
	getCanonicalNestedNameSpecifier(NNS->getPrefix()),
	NNS->getAsIdentifier());

	case NestedNameSpecifier::Namespace:
	// A namespace is canonical; build a nested-name-specifier with
	// this namespace and no prefix.
	return NestedNameSpecifier::Create(*this, nullptr,
	NNS->getAsNamespace()->getOriginalNamespace());

	case NestedNameSpecifier::NamespaceAlias:
	// A namespace is canonical; build a nested-name-specifier with
	// this namespace and no prefix.
	return NestedNameSpecifier::Create(*this, nullptr,
	NNS->getAsNamespaceAlias()->getNamespace()
	->getOriginalNamespace());

	// The difference between TypeSpec and TypeSpecWithTemplate is that the
	// latter will have the 'template' keyword when printed.
	case NestedNameSpecifier::TypeSpec:
	case NestedNameSpecifier::TypeSpecWithTemplate: {
	const Type *T = getCanonicalType(NNS->getAsType());

	// If we have some kind of dependent-named type (e.g., "typename T::type"),
	// break it apart into its prefix and identifier, then reconsititute those
	// as the canonical nested-name-specifier. This is required to canonicalize
	// a dependent nested-name-specifier involving typedefs of dependent-name
	// types, e.g.,
	// typedef typename T::type T1;
	// typedef typename T1::type T2;
	if (const auto *DNT = T->getAs<DependentNameType>())
	return NestedNameSpecifier::Create(
	*this, DNT->getQualifier(),
	const_cast<IdentifierInfo *>(DNT->getIdentifier()));
	if (const auto *DTST = T->getAs<DependentTemplateSpecializationType>())
	return NestedNameSpecifier::Create(*this, DTST->getQualifier(), true,
	const_cast<Type *>(T));

	// TODO: Set 'Template' parameter to true for other template types.
	return NestedNameSpecifier::Create(*this, nullptr, false,
	const_cast<Type *>(T));
	}

	case NestedNameSpecifier::Global:
	case NestedNameSpecifier::Super:
	// The global specifier and __super specifer are canonical and unique.
	return NNS;
	}

	llvm_unreachable("Invalid NestedNameSpecifier::Kind!");
	}

	const ArrayType *ASTContext::getAsArrayType(QualType T) const {
	// Handle the non-qualified case efficiently.
	if (!T.hasLocalQualifiers()) {
	// Handle the common positive case fast.
	if (const auto *AT = dyn_cast<ArrayType>(T))
	return AT;
	}

	// Handle the common negative case fast.
	if (!isa<ArrayType>(T.getCanonicalType()))
	return nullptr;

	// Apply any qualifiers from the array type to the element type. This
	// implements C99 6.7.3p8: "If the specification of an array type includes
	// any type qualifiers, the element type is so qualified, not the array type."

	// If we get here, we either have type qualifiers on the type, or we have
	// sugar such as a typedef in the way. If we have type qualifiers on the type
	// we must propagate them down into the element type.

	SplitQualType split = T.getSplitDesugaredType();
	Qualifiers qs = split.Quals;

	// If we have a simple case, just return now.
	const auto *ATy = dyn_cast<ArrayType>(split.Ty);
	if (!ATy \|\| qs.empty())
	return ATy;

	// Otherwise, we have an array and we have qualifiers on it. Push the
	// qualifiers into the array element type and return a new array type.
	QualType NewEltTy = getQualifiedType(ATy->getElementType(), qs);

	if (const auto *CAT = dyn_cast<ConstantArrayType>(ATy))
	return cast<ArrayType>(getConstantArrayType(NewEltTy, CAT->getSize(),
	CAT->getSizeExpr(),
	CAT->getSizeModifier(),
	CAT->getIndexTypeCVRQualifiers()));
	if (const auto *IAT = dyn_cast<IncompleteArrayType>(ATy))
	return cast<ArrayType>(getIncompleteArrayType(NewEltTy,
	IAT->getSizeModifier(),
	IAT->getIndexTypeCVRQualifiers()));

	if (const auto *DSAT = dyn_cast<DependentSizedArrayType>(ATy))
	return cast<ArrayType>(
	getDependentSizedArrayType(NewEltTy,
	DSAT->getSizeExpr(),
	DSAT->getSizeModifier(),
	DSAT->getIndexTypeCVRQualifiers(),
	DSAT->getBracketsRange()));

	const auto *VAT = cast<VariableArrayType>(ATy);
	return cast<ArrayType>(getVariableArrayType(NewEltTy,
	VAT->getSizeExpr(),
	VAT->getSizeModifier(),
	VAT->getIndexTypeCVRQualifiers(),
	VAT->getBracketsRange()));
	}

	QualType ASTContext::getAdjustedParameterType(QualType T) const {
	if (T->isArrayType() \|\| T->isFunctionType())
	return getDecayedType(T);
	return T;
	}

	QualType ASTContext::getSignatureParameterType(QualType T) const {
	T = getVariableArrayDecayedType(T);
	T = getAdjustedParameterType(T);
	return T.getUnqualifiedType();
	}

	QualType ASTContext::getExceptionObjectType(QualType T) const {
	// C++ [except.throw]p3:
	// A throw-expression initializes a temporary object, called the exception
	// object, the type of which is determined by removing any top-level
	// cv-qualifiers from the static type of the operand of throw and adjusting
	// the type from "array of T" or "function returning T" to "pointer to T"
	// or "pointer to function returning T", [...]
	T = getVariableArrayDecayedType(T);
	if (T->isArrayType() \|\| T->isFunctionType())
	T = getDecayedType(T);
	return T.getUnqualifiedType();
	}

	/// getArrayDecayedType - Return the properly qualified result of decaying the
	/// specified array type to a pointer. This operation is non-trivial when
	/// handling typedefs etc. The canonical type of "T" must be an array type,
	/// this returns a pointer to a properly qualified element of the array.
	///
	/// See C99 6.7.5.3p7 and C99 6.3.2.1p3.
	QualType ASTContext::getArrayDecayedType(QualType Ty) const {
	// Get the element type with 'getAsArrayType' so that we don't lose any
	// typedefs in the element type of the array. This also handles propagation
	// of type qualifiers from the array type into the element type if present
	// (C99 6.7.3p8).
	const ArrayType *PrettyArrayType = getAsArrayType(Ty);
	assert(PrettyArrayType && "Not an array type!");

	QualType PtrTy = getPointerType(PrettyArrayType->getElementType());

	// int x[restrict 4] -> int *restrict
	QualType Result = getQualifiedType(PtrTy,
	PrettyArrayType->getIndexTypeQualifiers());

	// int x[_Nullable] -> int * _Nullable
	if (auto Nullability = Ty->getNullability(*this)) {
	Result = const_cast<ASTContext *>(this)->getAttributedType(
	AttributedType::getNullabilityAttrKind(*Nullability), Result, Result);
	}
	return Result;
	}

	QualType ASTContext::getBaseElementType(const ArrayType *array) const {
	return getBaseElementType(array->getElementType());
	}

	QualType ASTContext::getBaseElementType(QualType type) const {
	Qualifiers qs;
	while (true) {
	SplitQualType split = type.getSplitDesugaredType();
	const ArrayType *array = split.Ty->getAsArrayTypeUnsafe();
	if (!array) break;

	type = array->getElementType();
	qs.addConsistentQualifiers(split.Quals);
	}

	return getQualifiedType(type, qs);
	}

	/// getConstantArrayElementCount - Returns number of constant array elements.
	uint64_t
	ASTContext::getConstantArrayElementCount(const ConstantArrayType *CA) const {
	uint64_t ElementCount = 1;
	do {
	ElementCount *= CA->getSize().getZExtValue();
	CA = dyn_cast_or_null<ConstantArrayType>(
	CA->getElementType()->getAsArrayTypeUnsafe());
	} while (CA);
	return ElementCount;
	}

	/// getFloatingRank - Return a relative rank for floating point types.
	/// This routine will assert if passed a built-in type that isn't a float.
	static FloatingRank getFloatingRank(QualType T) {
	if (const auto *CT = T->getAs<ComplexType>())
	return getFloatingRank(CT->getElementType());

	switch (T->castAs<BuiltinType>()->getKind()) {
	default: llvm_unreachable("getFloatingRank(): not a floating type");
	case BuiltinType::Float16: return Float16Rank;
	case BuiltinType::Half: return HalfRank;
	case BuiltinType::Float: return FloatRank;
	case BuiltinType::Double: return DoubleRank;
	case BuiltinType::LongDouble: return LongDoubleRank;
	case BuiltinType::Float128: return Float128Rank;
	case BuiltinType::BFloat16: return BFloat16Rank;
	}
	}

	/// getFloatingTypeOfSizeWithinDomain - Returns a real floating
	/// point or a complex type (based on typeDomain/typeSize).
	/// 'typeDomain' is a real floating point or complex type.
	/// 'typeSize' is a real floating point or complex type.
	QualType ASTContext::getFloatingTypeOfSizeWithinDomain(QualType Size,
	QualType Domain) const {
	FloatingRank EltRank = getFloatingRank(Size);
	if (Domain->isComplexType()) {
	switch (EltRank) {
	case BFloat16Rank: llvm_unreachable("Complex bfloat16 is not supported");
	case Float16Rank:
	case HalfRank: llvm_unreachable("Complex half is not supported");
	case FloatRank: return FloatComplexTy;
	case DoubleRank: return DoubleComplexTy;
	case LongDoubleRank: return LongDoubleComplexTy;
	case Float128Rank: return Float128ComplexTy;
	}
	}

	assert(Domain->isRealFloatingType() && "Unknown domain!");
	switch (EltRank) {
	case Float16Rank: return HalfTy;
	case BFloat16Rank: return BFloat16Ty;
	case HalfRank: return HalfTy;
	case FloatRank: return FloatTy;
	case DoubleRank: return DoubleTy;
	case LongDoubleRank: return LongDoubleTy;
	case Float128Rank: return Float128Ty;
	}
	llvm_unreachable("getFloatingRank(): illegal value for rank");
	}

	/// getFloatingTypeOrder - Compare the rank of the two specified floating
	/// point types, ignoring the domain of the type (i.e. 'double' ==
	/// '_Complex double'). If LHS > RHS, return 1. If LHS == RHS, return 0. If
	/// LHS < RHS, return -1.
	int ASTContext::getFloatingTypeOrder(QualType LHS, QualType RHS) const {
	FloatingRank LHSR = getFloatingRank(LHS);
	FloatingRank RHSR = getFloatingRank(RHS);

	if (LHSR == RHSR)
	return 0;
	if (LHSR > RHSR)
	return 1;
	return -1;
	}

	int ASTContext::getFloatingTypeSemanticOrder(QualType LHS, QualType RHS) const {
	if (&getFloatTypeSemantics(LHS) == &getFloatTypeSemantics(RHS))
	return 0;
	return getFloatingTypeOrder(LHS, RHS);
	}

	/// getIntegerRank - Return an integer conversion rank (C99 6.3.1.1p1). This
	/// routine will assert if passed a built-in type that isn't an integer or enum,
	/// or if it is not canonicalized.
	unsigned ASTContext::getIntegerRank(const Type *T) const {
	assert(T->isCanonicalUnqualified() && "T should be canonicalized");

	// Results in this 'losing' to any type of the same size, but winning if
	// larger.
	if (const auto *EIT = dyn_cast<ExtIntType>(T))
	return 0 + (EIT->getNumBits() << 3);

	switch (cast<BuiltinType>(T)->getKind()) {
	default: llvm_unreachable("getIntegerRank(): not a built-in integer");
	case BuiltinType::Bool:
	return 1 + (getIntWidth(BoolTy) << 3);
	case BuiltinType::Char_S:
	case BuiltinType::Char_U:
	case BuiltinType::SChar:
	case BuiltinType::UChar:
	return 2 + (getIntWidth(CharTy) << 3);
	case BuiltinType::Short:
	case BuiltinType::UShort:
	return 3 + (getIntWidth(ShortTy) << 3);
	case BuiltinType::Int:
	case BuiltinType::UInt:
	return 4 + (getIntWidth(IntTy) << 3);
	case BuiltinType::Long:
	case BuiltinType::ULong:
	return 5 + (getIntWidth(LongTy) << 3);
	case BuiltinType::LongLong:
	case BuiltinType::ULongLong:
	return 6 + (getIntWidth(LongLongTy) << 3);
	case BuiltinType::Int128:
	case BuiltinType::UInt128:
	return 7 + (getIntWidth(Int128Ty) << 3);
	}
	}

	/// Whether this is a promotable bitfield reference according
	/// to C99 6.3.1.1p2, bullet 2 (and GCC extensions).
	///
	/// \returns the type this bit-field will promote to, or NULL if no
	/// promotion occurs.
	QualType ASTContext::isPromotableBitField(Expr *E) const {
	if (E->isTypeDependent() \|\| E->isValueDependent())
	return {};

	// C++ [conv.prom]p5:
	// If the bit-field has an enumerated type, it is treated as any other
	// value of that type for promotion purposes.
	if (getLangOpts().CPlusPlus && E->getType()->isEnumeralType())
	return {};

	// FIXME: We should not do this unless E->refersToBitField() is true. This
	// matters in C where getSourceBitField() will find bit-fields for various
	// cases where the source expression is not a bit-field designator.

	FieldDecl *Field = E->getSourceBitField(); // FIXME: conditional bit-fields?
	if (!Field)
	return {};

	QualType FT = Field->getType();

	uint64_t BitWidth = Field->getBitWidthValue(*this);
	uint64_t IntSize = getTypeSize(IntTy);
	// C++ [conv.prom]p5:
	// A prvalue for an integral bit-field can be converted to a prvalue of type
	// int if int can represent all the values of the bit-field; otherwise, it
	// can be converted to unsigned int if unsigned int can represent all the
	// values of the bit-field. If the bit-field is larger yet, no integral
	// promotion applies to it.
	// C11 6.3.1.1/2:
	// [For a bit-field of type _Bool, int, signed int, or unsigned int:]
	// If an int can represent all values of the original type (as restricted by
	// the width, for a bit-field), the value is converted to an int; otherwise,
	// it is converted to an unsigned int.
	//
	// FIXME: C does not permit promotion of a 'long : 3' bitfield to int.
	// We perform that promotion here to match GCC and C++.
	// FIXME: C does not permit promotion of an enum bit-field whose rank is
	// greater than that of 'int'. We perform that promotion to match GCC.
	if (BitWidth < IntSize)
	return IntTy;

	if (BitWidth == IntSize)
	return FT->isSignedIntegerType() ? IntTy : UnsignedIntTy;

	// Bit-fields wider than int are not subject to promotions, and therefore act
	// like the base type. GCC has some weird bugs in this area that we
	// deliberately do not follow (GCC follows a pre-standard resolution to
	// C's DR315 which treats bit-width as being part of the type, and this leaks
	// into their semantics in some cases).
	return {};
	}

	/// getPromotedIntegerType - Returns the type that Promotable will
	/// promote to: C99 6.3.1.1p2, assuming that Promotable is a promotable
	/// integer type.
	QualType ASTContext::getPromotedIntegerType(QualType Promotable) const {
	assert(!Promotable.isNull());
	assert(Promotable->isPromotableIntegerType());
	if (const auto *ET = Promotable->getAs<EnumType>())
	return ET->getDecl()->getPromotionType();

	if (const auto *BT = Promotable->getAs<BuiltinType>()) {
	// C++ [conv.prom]: A prvalue of type char16_t, char32_t, or wchar_t
	// (3.9.1) can be converted to a prvalue of the first of the following
	// types that can represent all the values of its underlying type:
	// int, unsigned int, long int, unsigned long int, long long int, or
	// unsigned long long int [...]
	// FIXME: Is there some better way to compute this?
	if (BT->getKind() == BuiltinType::WChar_S \|\|
	BT->getKind() == BuiltinType::WChar_U \|\|
	BT->getKind() == BuiltinType::Char8 \|\|
	BT->getKind() == BuiltinType::Char16 \|\|
	BT->getKind() == BuiltinType::Char32) {
	bool FromIsSigned = BT->getKind() == BuiltinType::WChar_S;
	uint64_t FromSize = getTypeSize(BT);
	QualType PromoteTypes[] = { IntTy, UnsignedIntTy, LongTy, UnsignedLongTy,
	LongLongTy, UnsignedLongLongTy };
	for (size_t Idx = 0; Idx < llvm::array_lengthof(PromoteTypes); ++Idx) {
	uint64_t ToSize = getTypeSize(PromoteTypes[Idx]);
	if (FromSize < ToSize \|\|
	(FromSize == ToSize &&
	FromIsSigned == PromoteTypes[Idx]->isSignedIntegerType()))
	return PromoteTypes[Idx];
	}
	llvm_unreachable("char type should fit into long long");
	}
	}

	// At this point, we should have a signed or unsigned integer type.
	if (Promotable->isSignedIntegerType())
	return IntTy;
	uint64_t PromotableSize = getIntWidth(Promotable);
	uint64_t IntSize = getIntWidth(IntTy);
	assert(Promotable->isUnsignedIntegerType() && PromotableSize <= IntSize);
	return (PromotableSize != IntSize) ? IntTy : UnsignedIntTy;
	}

	/// Recurses in pointer/array types until it finds an objc retainable
	/// type and returns its ownership.
	Qualifiers::ObjCLifetime ASTContext::getInnerObjCOwnership(QualType T) const {
	while (!T.isNull()) {
	if (T.getObjCLifetime() != Qualifiers::OCL_None)
	return T.getObjCLifetime();
	if (T->isArrayType())
	T = getBaseElementType(T);
	else if (const auto *PT = T->getAs<PointerType>())
	T = PT->getPointeeType();
	else if (const auto *RT = T->getAs<ReferenceType>())
	T = RT->getPointeeType();
	else
	break;
	}

	return Qualifiers::OCL_None;
	}

	static const Type getIntegerTypeForEnum(const EnumType ET) {
	// Incomplete enum types are not treated as integer types.
	// FIXME: In C++, enum types are never integer types.
	if (ET->getDecl()->isComplete() && !ET->getDecl()->isScoped())
	return ET->getDecl()->getIntegerType().getTypePtr();
	return nullptr;
	}

	/// getIntegerTypeOrder - Returns the highest ranked integer type:
	/// C99 6.3.1.8p1. If LHS > RHS, return 1. If LHS == RHS, return 0. If
	/// LHS < RHS, return -1.
	int ASTContext::getIntegerTypeOrder(QualType LHS, QualType RHS) const {
	const Type *LHSC = getCanonicalType(LHS).getTypePtr();
	const Type *RHSC = getCanonicalType(RHS).getTypePtr();

	// Unwrap enums to their underlying type.
	if (const auto *ET = dyn_cast<EnumType>(LHSC))
	LHSC = getIntegerTypeForEnum(ET);
	if (const auto *ET = dyn_cast<EnumType>(RHSC))
	RHSC = getIntegerTypeForEnum(ET);

	if (LHSC == RHSC) return 0;

	bool LHSUnsigned = LHSC->isUnsignedIntegerType();
	bool RHSUnsigned = RHSC->isUnsignedIntegerType();

	unsigned LHSRank = getIntegerRank(LHSC);
	unsigned RHSRank = getIntegerRank(RHSC);

	if (LHSUnsigned == RHSUnsigned) { // Both signed or both unsigned.
	if (LHSRank == RHSRank) return 0;
	return LHSRank > RHSRank ? 1 : -1;
	}

	// Otherwise, the LHS is signed and the RHS is unsigned or visa versa.
	if (LHSUnsigned) {
	// If the unsigned [LHS] type is larger, return it.
	if (LHSRank >= RHSRank)
	return 1;

	// If the signed type can represent all values of the unsigned type, it
	// wins. Because we are dealing with 2's complement and types that are
	// powers of two larger than each other, this is always safe.
	return -1;
	}

	// If the unsigned [RHS] type is larger, return it.
	if (RHSRank >= LHSRank)
	return -1;

	// If the signed type can represent all values of the unsigned type, it
	// wins. Because we are dealing with 2's complement and types that are
	// powers of two larger than each other, this is always safe.
	return 1;
	}

	TypedefDecl *ASTContext::getCFConstantStringDecl() const {
	if (CFConstantStringTypeDecl)
	return CFConstantStringTypeDecl;

	assert(!CFConstantStringTagDecl &&
	"tag and typedef should be initialized together");
	CFConstantStringTagDecl = buildImplicitRecord("__NSConstantString_tag");
	CFConstantStringTagDecl->startDefinition();

	struct {
	QualType Type;
	const char *Name;
	} Fields[5];
	unsigned Count = 0;

	/// Objective-C ABI
	///
	/// typedef struct __NSConstantString_tag {
	/// const int *isa;
	/// int flags;
	/// const char *str;
	/// long length;
	/// } __NSConstantString;
	///
	/// Swift ABI (4.1, 4.2)
	///
	/// typedef struct __NSConstantString_tag {
	/// uintptr_t _cfisa;
	/// uintptr_t _swift_rc;
	/// _Atomic(uint64_t) _cfinfoa;
	/// const char *_ptr;
	/// uint32_t _length;
	/// } __NSConstantString;
	///
	/// Swift ABI (5.0)
	///
	/// typedef struct __NSConstantString_tag {
	/// uintptr_t _cfisa;
	/// uintptr_t _swift_rc;
	/// _Atomic(uint64_t) _cfinfoa;
	/// const char *_ptr;
	/// uintptr_t _length;
	/// } __NSConstantString;

	const auto CFRuntime = getLangOpts().CFRuntime;
	if (static_cast<unsigned>(CFRuntime) <
	static_cast<unsigned>(LangOptions::CoreFoundationABI::Swift)) {
	Fields[Count++] = { getPointerType(IntTy.withConst()), "isa" };
	Fields[Count++] = { IntTy, "flags" };
	Fields[Count++] = { getPointerType(CharTy.withConst()), "str" };
	Fields[Count++] = { LongTy, "length" };
	} else {
	Fields[Count++] = { getUIntPtrType(), "_cfisa" };
	Fields[Count++] = { getUIntPtrType(), "_swift_rc" };
	Fields[Count++] = { getFromTargetType(Target->getUInt64Type()), "_swift_rc" };
	Fields[Count++] = { getPointerType(CharTy.withConst()), "_ptr" };
	if (CFRuntime == LangOptions::CoreFoundationABI::Swift4_1 \|\|
	CFRuntime == LangOptions::CoreFoundationABI::Swift4_2)
	Fields[Count++] = { IntTy, "_ptr" };
	else
	Fields[Count++] = { getUIntPtrType(), "_ptr" };
	}

	// Create fields
	for (unsigned i = 0; i < Count; ++i) {
	FieldDecl *Field =
	FieldDecl::Create(*this, CFConstantStringTagDecl, SourceLocation(),
	SourceLocation(), &Idents.get(Fields[i].Name),
	Fields[i].Type, /TInfo=/nullptr,
	/BitWidth=/nullptr, /Mutable=/false, ICIS_NoInit);
	Field->setAccess(AS_public);
	CFConstantStringTagDecl->addDecl(Field);
	}

	CFConstantStringTagDecl->completeDefinition();
	// This type is designed to be compatible with NSConstantString, but cannot
	// use the same name, since NSConstantString is an interface.
	auto tagType = getTagDeclType(CFConstantStringTagDecl);
	CFConstantStringTypeDecl =
	buildImplicitTypedef(tagType, "__NSConstantString");

	return CFConstantStringTypeDecl;
	}

	RecordDecl *ASTContext::getCFConstantStringTagDecl() const {
	if (!CFConstantStringTagDecl)
	getCFConstantStringDecl(); // Build the tag and the typedef.
	return CFConstantStringTagDecl;
	}

	// getCFConstantStringType - Return the type used for constant CFStrings.
	QualType ASTContext::getCFConstantStringType() const {
	return getTypedefType(getCFConstantStringDecl());
	}

	QualType ASTContext::getObjCSuperType() const {
	if (ObjCSuperType.isNull()) {
	RecordDecl *ObjCSuperTypeDecl = buildImplicitRecord("objc_super");
	getTranslationUnitDecl()->addDecl(ObjCSuperTypeDecl);
	ObjCSuperType = getTagDeclType(ObjCSuperTypeDecl);
	}
	return ObjCSuperType;
	}

	void ASTContext::setCFConstantStringType(QualType T) {
	const auto *TD = T->castAs<TypedefType>();
	CFConstantStringTypeDecl = cast<TypedefDecl>(TD->getDecl());
	const auto *TagType =
	CFConstantStringTypeDecl->getUnderlyingType()->castAs<RecordType>();
	CFConstantStringTagDecl = TagType->getDecl();
	}

	QualType ASTContext::getBlockDescriptorType() const {
	if (BlockDescriptorType)
	return getTagDeclType(BlockDescriptorType);

	RecordDecl *RD;
	// FIXME: Needs the FlagAppleBlock bit.
	RD = buildImplicitRecord("__block_descriptor");
	RD->startDefinition();

	QualType FieldTypes[] = {
	UnsignedLongTy,
	UnsignedLongTy,
	};

	static const char *const FieldNames[] = {
	"reserved",
	"Size"
	};

	for (size_t i = 0; i < 2; ++i) {
	FieldDecl *Field = FieldDecl::Create(
	*this, RD, SourceLocation(), SourceLocation(),
	&Idents.get(FieldNames[i]), FieldTypes[i], /TInfo=/nullptr,
	/BitWidth=/nullptr, /Mutable=/false, ICIS_NoInit);
	Field->setAccess(AS_public);
	RD->addDecl(Field);
	}

	RD->completeDefinition();

	BlockDescriptorType = RD;

	return getTagDeclType(BlockDescriptorType);
	}

	QualType ASTContext::getBlockDescriptorExtendedType() const {
	if (BlockDescriptorExtendedType)
	return getTagDeclType(BlockDescriptorExtendedType);

	RecordDecl *RD;
	// FIXME: Needs the FlagAppleBlock bit.
	RD = buildImplicitRecord("__block_descriptor_withcopydispose");
	RD->startDefinition();

	QualType FieldTypes[] = {
	UnsignedLongTy,
	UnsignedLongTy,
	getPointerType(VoidPtrTy),
	getPointerType(VoidPtrTy)
	};

	static const char *const FieldNames[] = {
	"reserved",
	"Size",
	"CopyFuncPtr",
	"DestroyFuncPtr"
	};

	for (size_t i = 0; i < 4; ++i) {
	FieldDecl *Field = FieldDecl::Create(
	*this, RD, SourceLocation(), SourceLocation(),
	&Idents.get(FieldNames[i]), FieldTypes[i], /TInfo=/nullptr,
	/BitWidth=/nullptr,
	/Mutable=/false, ICIS_NoInit);
	Field->setAccess(AS_public);
	RD->addDecl(Field);
	}

	RD->completeDefinition();

	BlockDescriptorExtendedType = RD;
	return getTagDeclType(BlockDescriptorExtendedType);
	}

	OpenCLTypeKind ASTContext::getOpenCLTypeKind(const Type *T) const {
	const auto *BT = dyn_cast<BuiltinType>(T);

	if (!BT) {
	if (isa<PipeType>(T))
	return OCLTK_Pipe;

	return OCLTK_Default;
	}

	switch (BT->getKind()) {
	#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
	case BuiltinType::Id: \
	return OCLTK_Image;
	#include "clang/Basic/OpenCLImageTypes.def"

	case BuiltinType::OCLClkEvent:
	return OCLTK_ClkEvent;

	case BuiltinType::OCLEvent:
	return OCLTK_Event;

	case BuiltinType::OCLQueue:
	return OCLTK_Queue;

	case BuiltinType::OCLReserveID:
	return OCLTK_ReserveID;

	case BuiltinType::OCLSampler:
	return OCLTK_Sampler;

	default:
	return OCLTK_Default;
	}
	}

	LangAS ASTContext::getOpenCLTypeAddrSpace(const Type *T) const {
	return Target->getOpenCLTypeAddrSpace(getOpenCLTypeKind(T));
	}

	/// BlockRequiresCopying - Returns true if byref variable "D" of type "Ty"
	/// requires copy/dispose. Note that this must match the logic
	/// in buildByrefHelpers.
	bool ASTContext::BlockRequiresCopying(QualType Ty,
	const VarDecl *D) {
	if (const CXXRecordDecl *record = Ty->getAsCXXRecordDecl()) {
	const Expr *copyExpr = getBlockVarCopyInit(D).getCopyExpr();
	if (!copyExpr && record->hasTrivialDestructor()) return false;

	return true;
	}

	// The block needs copy/destroy helpers if Ty is non-trivial to destructively
	// move or destroy.
	if (Ty.isNonTrivialToPrimitiveDestructiveMove() \|\| Ty.isDestructedType())
	return true;

	if (!Ty->isObjCRetainableType()) return false;

	Qualifiers qs = Ty.getQualifiers();

	// If we have lifetime, that dominates.
	if (Qualifiers::ObjCLifetime lifetime = qs.getObjCLifetime()) {
	switch (lifetime) {
	case Qualifiers::OCL_None: llvm_unreachable("impossible");

	// These are just bits as far as the runtime is concerned.
	case Qualifiers::OCL_ExplicitNone:
	case Qualifiers::OCL_Autoreleasing:
	return false;

	// These cases should have been taken care of when checking the type's
	// non-triviality.
	case Qualifiers::OCL_Weak:
	case Qualifiers::OCL_Strong:
	llvm_unreachable("impossible");
	}
	llvm_unreachable("fell out of lifetime switch!");
	}
	return (Ty->isBlockPointerType() \|\| isObjCNSObjectType(Ty) \|\|
	Ty->isObjCObjectPointerType());
	}

	bool ASTContext::getByrefLifetime(QualType Ty,
	Qualifiers::ObjCLifetime &LifeTime,
	bool &HasByrefExtendedLayout) const {
	if (!getLangOpts().ObjC \|\|
	getLangOpts().getGC() != LangOptions::NonGC)
	return false;

	HasByrefExtendedLayout = false;
	if (Ty->isRecordType()) {
	HasByrefExtendedLayout = true;
	LifeTime = Qualifiers::OCL_None;
	} else if ((LifeTime = Ty.getObjCLifetime())) {
	// Honor the ARC qualifiers.
	} else if (Ty->isObjCObjectPointerType() \|\| Ty->isBlockPointerType()) {
	// The MRR rule.
	LifeTime = Qualifiers::OCL_ExplicitNone;
	} else {
	LifeTime = Qualifiers::OCL_None;
	}
	return true;
	}

	CanQualType ASTContext::getNSUIntegerType() const {
	assert(Target && "Expected target to be initialized");
	const llvm::Triple &T = Target->getTriple();
	// Windows is LLP64 rather than LP64
	if (T.isOSWindows() && T.isArch64Bit())
	return UnsignedLongLongTy;
	return UnsignedLongTy;
	}

	CanQualType ASTContext::getNSIntegerType() const {
	assert(Target && "Expected target to be initialized");
	const llvm::Triple &T = Target->getTriple();
	// Windows is LLP64 rather than LP64
	if (T.isOSWindows() && T.isArch64Bit())
	return LongLongTy;
	return LongTy;
	}

	TypedefDecl *ASTContext::getObjCInstanceTypeDecl() {
	if (!ObjCInstanceTypeDecl)
	ObjCInstanceTypeDecl =
	buildImplicitTypedef(getObjCIdType(), "instancetype");
	return ObjCInstanceTypeDecl;
	}

	// This returns true if a type has been typedefed to BOOL:
	// typedef <type> BOOL;
	static bool isTypeTypedefedAsBOOL(QualType T) {
	if (const auto *TT = dyn_cast<TypedefType>(T))
	if (IdentifierInfo *II = TT->getDecl()->getIdentifier())
	return II->isStr("BOOL");

	return false;
	}

	/// getObjCEncodingTypeSize returns size of type for objective-c encoding
	/// purpose.
	CharUnits ASTContext::getObjCEncodingTypeSize(QualType type) const {
	if (!type->isIncompleteArrayType() && type->isIncompleteType())
	return CharUnits::Zero();

	CharUnits sz = getTypeSizeInChars(type);

	// Make all integer and enum types at least as large as an int
	if (sz.isPositive() && type->isIntegralOrEnumerationType())
	sz = std::max(sz, getTypeSizeInChars(IntTy));
	// Treat arrays as pointers, since that's how they're passed in.
	else if (type->isArrayType())
	sz = getTypeSizeInChars(VoidPtrTy);
	return sz;
	}

	bool ASTContext::isMSStaticDataMemberInlineDefinition(const VarDecl *VD) const {
	return getTargetInfo().getCXXABI().isMicrosoft() &&
	VD->isStaticDataMember() &&
	VD->getType()->isIntegralOrEnumerationType() &&
	!VD->getFirstDecl()->isOutOfLine() && VD->getFirstDecl()->hasInit();
	}

	ASTContext::InlineVariableDefinitionKind
	ASTContext::getInlineVariableDefinitionKind(const VarDecl *VD) const {
	if (!VD->isInline())
	return InlineVariableDefinitionKind::None;

	// In almost all cases, it's a weak definition.
	auto *First = VD->getFirstDecl();
	if (First->isInlineSpecified() \|\| !First->isStaticDataMember())
	return InlineVariableDefinitionKind::Weak;

	// If there's a file-context declaration in this translation unit, it's a
	// non-discardable definition.
	for (auto *D : VD->redecls())
	if (D->getLexicalDeclContext()->isFileContext() &&
	!D->isInlineSpecified() && (D->isConstexpr() \|\| First->isConstexpr()))
	return InlineVariableDefinitionKind::Strong;

	// If we've not seen one yet, we don't know.
	return InlineVariableDefinitionKind::WeakUnknown;
	}

	static std::string charUnitsToString(const CharUnits &CU) {
	return llvm::itostr(CU.getQuantity());
	}

	/// getObjCEncodingForBlock - Return the encoded type for this block
	/// declaration.
	std::string ASTContext::getObjCEncodingForBlock(const BlockExpr *Expr) const {
	std::string S;

	const BlockDecl *Decl = Expr->getBlockDecl();
	QualType BlockTy =
	Expr->getType()->castAs<BlockPointerType>()->getPointeeType();
	QualType BlockReturnTy = BlockTy->castAs<FunctionType>()->getReturnType();
	// Encode result type.
	if (getLangOpts().EncodeExtendedBlockSig)
	getObjCEncodingForMethodParameter(Decl::OBJC_TQ_None, BlockReturnTy, S,
	true /Extended/);
	else
	getObjCEncodingForType(BlockReturnTy, S);
	// Compute size of all parameters.
	// Start with computing size of a pointer in number of bytes.
	// FIXME: There might(should) be a better way of doing this computation!
	CharUnits PtrSize = getTypeSizeInChars(VoidPtrTy);
	CharUnits ParmOffset = PtrSize;
	for (auto PI : Decl->parameters()) {
	QualType PType = PI->getType();
	CharUnits sz = getObjCEncodingTypeSize(PType);
	if (sz.isZero())
	continue;
	assert(sz.isPositive() && "BlockExpr - Incomplete param type");
	ParmOffset += sz;
	}
	// Size of the argument frame
	S += charUnitsToString(ParmOffset);
	// Block pointer and offset.
	S += "@?0";

	// Argument types.
	ParmOffset = PtrSize;
	for (auto PVDecl : Decl->parameters()) {
	QualType PType = PVDecl->getOriginalType();
	if (const auto *AT =
	dyn_cast<ArrayType>(PType->getCanonicalTypeInternal())) {
	// Use array's original type only if it has known number of
	// elements.
	if (!isa<ConstantArrayType>(AT))
	PType = PVDecl->getType();
	} else if (PType->isFunctionType())
	PType = PVDecl->getType();
	if (getLangOpts().EncodeExtendedBlockSig)
	getObjCEncodingForMethodParameter(Decl::OBJC_TQ_None, PType,
	S, true /Extended/);
	else
	getObjCEncodingForType(PType, S);
	S += charUnitsToString(ParmOffset);
	ParmOffset += getObjCEncodingTypeSize(PType);
	}

	return S;
	}

	std::string
	ASTContext::getObjCEncodingForFunctionDecl(const FunctionDecl *Decl) const {
	std::string S;
	// Encode result type.
	getObjCEncodingForType(Decl->getReturnType(), S);
	CharUnits ParmOffset;
	// Compute size of all parameters.
	for (auto PI : Decl->parameters()) {
	QualType PType = PI->getType();
	CharUnits sz = getObjCEncodingTypeSize(PType);
	if (sz.isZero())
	continue;

	assert(sz.isPositive() &&
	"getObjCEncodingForFunctionDecl - Incomplete param type");
	ParmOffset += sz;
	}
	S += charUnitsToString(ParmOffset);
	ParmOffset = CharUnits::Zero();

	// Argument types.
	for (auto PVDecl : Decl->parameters()) {
	QualType PType = PVDecl->getOriginalType();
	if (const auto *AT =
	dyn_cast<ArrayType>(PType->getCanonicalTypeInternal())) {
	// Use array's original type only if it has known number of
	// elements.
	if (!isa<ConstantArrayType>(AT))
	PType = PVDecl->getType();
	} else if (PType->isFunctionType())
	PType = PVDecl->getType();
	getObjCEncodingForType(PType, S);
	S += charUnitsToString(ParmOffset);
	ParmOffset += getObjCEncodingTypeSize(PType);
	}

	return S;
	}

	/// getObjCEncodingForMethodParameter - Return the encoded type for a single
	/// method parameter or return type. If Extended, include class names and
	/// block object types.
	void ASTContext::getObjCEncodingForMethodParameter(Decl::ObjCDeclQualifier QT,
	QualType T, std::string& S,
	bool Extended) const {
	// Encode type qualifer, 'in', 'inout', etc. for the parameter.
	getObjCEncodingForTypeQualifier(QT, S);
	// Encode parameter type.
	ObjCEncOptions Options = ObjCEncOptions()
	.setExpandPointedToStructures()
	.setExpandStructures()
	.setIsOutermostType();
	if (Extended)
	Options.setEncodeBlockParameters().setEncodeClassNames();
	getObjCEncodingForTypeImpl(T, S, Options, /Field=/nullptr);
	}

	/// getObjCEncodingForMethodDecl - Return the encoded type for this method
	/// declaration.
	std::string ASTContext::getObjCEncodingForMethodDecl(const ObjCMethodDecl *Decl,
	bool Extended) const {
	// FIXME: This is not very efficient.
	// Encode return type.
	std::string S;
	getObjCEncodingForMethodParameter(Decl->getObjCDeclQualifier(),
	Decl->getReturnType(), S, Extended);
	// Compute size of all parameters.
	// Start with computing size of a pointer in number of bytes.
	// FIXME: There might(should) be a better way of doing this computation!
	CharUnits PtrSize = getTypeSizeInChars(VoidPtrTy);
	// The first two arguments (self and _cmd) are pointers; account for
	// their size.
	CharUnits ParmOffset = 2 * PtrSize;
	for (ObjCMethodDecl::param_const_iterator PI = Decl->param_begin(),
	E = Decl->sel_param_end(); PI != E; ++PI) {
	QualType PType = (*PI)->getType();
	CharUnits sz = getObjCEncodingTypeSize(PType);
	if (sz.isZero())
	continue;

	assert(sz.isPositive() &&
	"getObjCEncodingForMethodDecl - Incomplete param type");
	ParmOffset += sz;
	}
	S += charUnitsToString(ParmOffset);
	S += "@0:";
	S += charUnitsToString(PtrSize);

	// Argument types.
	ParmOffset = 2 * PtrSize;
	for (ObjCMethodDecl::param_const_iterator PI = Decl->param_begin(),
	E = Decl->sel_param_end(); PI != E; ++PI) {
	const ParmVarDecl PVDecl = PI;
	QualType PType = PVDecl->getOriginalType();
	if (const auto *AT =
	dyn_cast<ArrayType>(PType->getCanonicalTypeInternal())) {
	// Use array's original type only if it has known number of
	// elements.
	if (!isa<ConstantArrayType>(AT))
	PType = PVDecl->getType();
	} else if (PType->isFunctionType())
	PType = PVDecl->getType();
	getObjCEncodingForMethodParameter(PVDecl->getObjCDeclQualifier(),
	PType, S, Extended);
	S += charUnitsToString(ParmOffset);
	ParmOffset += getObjCEncodingTypeSize(PType);
	}

	return S;
	}

	ObjCPropertyImplDecl *
	ASTContext::getObjCPropertyImplDeclForPropertyDecl(
	const ObjCPropertyDecl *PD,
	const Decl *Container) const {
	if (!Container)
	return nullptr;
	if (const auto *CID = dyn_cast<ObjCCategoryImplDecl>(Container)) {
	for (auto *PID : CID->property_impls())
	if (PID->getPropertyDecl() == PD)
	return PID;
	} else {
	const auto *OID = cast<ObjCImplementationDecl>(Container);
	for (auto *PID : OID->property_impls())
	if (PID->getPropertyDecl() == PD)
	return PID;
	}
	return nullptr;
	}

	/// getObjCEncodingForPropertyDecl - Return the encoded type for this
	/// property declaration. If non-NULL, Container must be either an
	/// ObjCCategoryImplDecl or ObjCImplementationDecl; it should only be
	/// NULL when getting encodings for protocol properties.
	/// Property attributes are stored as a comma-delimited C string. The simple
	/// attributes readonly and bycopy are encoded as single characters. The
	/// parametrized attributes, getter=name, setter=name, and ivar=name, are
	/// encoded as single characters, followed by an identifier. Property types
	/// are also encoded as a parametrized attribute. The characters used to encode
	/// these attributes are defined by the following enumeration:
	/// @code
	/// enum PropertyAttributes {
	/// kPropertyReadOnly = 'R', // property is read-only.
	/// kPropertyBycopy = 'C', // property is a copy of the value last assigned
	/// kPropertyByref = '&', // property is a reference to the value last assigned
	/// kPropertyDynamic = 'D', // property is dynamic
	/// kPropertyGetter = 'G', // followed by getter selector name
	/// kPropertySetter = 'S', // followed by setter selector name
	/// kPropertyInstanceVariable = 'V' // followed by instance variable name
	/// kPropertyType = 'T' // followed by old-style type encoding.
	/// kPropertyWeak = 'W' // 'weak' property
	/// kPropertyStrong = 'P' // property GC'able
	/// kPropertyNonAtomic = 'N' // property non-atomic
	/// };
	/// @endcode
	std::string
	ASTContext::getObjCEncodingForPropertyDecl(const ObjCPropertyDecl *PD,
	const Decl *Container) const {
	// Collect information from the property implementation decl(s).
	bool Dynamic = false;
	ObjCPropertyImplDecl *SynthesizePID = nullptr;

	if (ObjCPropertyImplDecl *PropertyImpDecl =
	getObjCPropertyImplDeclForPropertyDecl(PD, Container)) {
	if (PropertyImpDecl->getPropertyImplementation() == ObjCPropertyImplDecl::Dynamic)
	Dynamic = true;
	else
	SynthesizePID = PropertyImpDecl;
	}

	// FIXME: This is not very efficient.
	std::string S = "T";

	// Encode result type.
	// GCC has some special rules regarding encoding of properties which
	// closely resembles encoding of ivars.
	getObjCEncodingForPropertyType(PD->getType(), S);

	if (PD->isReadOnly()) {
	S += ",R";
	if (PD->getPropertyAttributes() & ObjCPropertyAttribute::kind_copy)
	S += ",C";
	if (PD->getPropertyAttributes() & ObjCPropertyAttribute::kind_retain)
	S += ",&";
	if (PD->getPropertyAttributes() & ObjCPropertyAttribute::kind_weak)
	S += ",W";
	} else {
	switch (PD->getSetterKind()) {
	case ObjCPropertyDecl::Assign: break;
	case ObjCPropertyDecl::Copy: S += ",C"; break;
	case ObjCPropertyDecl::Retain: S += ",&"; break;
	case ObjCPropertyDecl::Weak: S += ",W"; break;
	}
	}

	// It really isn't clear at all what this means, since properties
	// are "dynamic by default".
	if (Dynamic)
	S += ",D";

	if (PD->getPropertyAttributes() & ObjCPropertyAttribute::kind_nonatomic)
	S += ",N";

	if (PD->getPropertyAttributes() & ObjCPropertyAttribute::kind_getter) {
	S += ",G";
	S += PD->getGetterName().getAsString();
	}

	if (PD->getPropertyAttributes() & ObjCPropertyAttribute::kind_setter) {
	S += ",S";
	S += PD->getSetterName().getAsString();
	}

	if (SynthesizePID) {
	const ObjCIvarDecl *OID = SynthesizePID->getPropertyIvarDecl();
	S += ",V";
	S += OID->getNameAsString();
	}

	// FIXME: OBJCGC: weak & strong
	return S;
	}

	/// getLegacyIntegralTypeEncoding -
	/// Another legacy compatibility encoding: 32-bit longs are encoded as
	/// 'l' or 'L' , but not always. For typedefs, we need to use
	/// 'i' or 'I' instead if encoding a struct field, or a pointer!
	void ASTContext::getLegacyIntegralTypeEncoding (QualType &PointeeTy) const {
	if (isa<TypedefType>(PointeeTy.getTypePtr())) {
	if (const auto *BT = PointeeTy->getAs<BuiltinType>()) {
	if (BT->getKind() == BuiltinType::ULong && getIntWidth(PointeeTy) == 32)
	PointeeTy = UnsignedIntTy;
	else
	if (BT->getKind() == BuiltinType::Long && getIntWidth(PointeeTy) == 32)
	PointeeTy = IntTy;
	}
	}
	}

	void ASTContext::getObjCEncodingForType(QualType T, std::string& S,
	const FieldDecl *Field,
	QualType *NotEncodedT) const {
	// We follow the behavior of gcc, expanding structures which are
	// directly pointed to, and expanding embedded structures. Note that
	// these rules are sufficient to prevent recursive encoding of the
	// same type.
	getObjCEncodingForTypeImpl(T, S,
	ObjCEncOptions()
	.setExpandPointedToStructures()
	.setExpandStructures()
	.setIsOutermostType(),
	Field, NotEncodedT);
	}

	void ASTContext::getObjCEncodingForPropertyType(QualType T,
	std::string& S) const {
	// Encode result type.
	// GCC has some special rules regarding encoding of properties which
	// closely resembles encoding of ivars.
	getObjCEncodingForTypeImpl(T, S,
	ObjCEncOptions()
	.setExpandPointedToStructures()
	.setExpandStructures()
	.setIsOutermostType()
	.setEncodingProperty(),
	/Field=/nullptr);
	}

	static char getObjCEncodingForPrimitiveType(const ASTContext *C,
	const BuiltinType *BT) {
	BuiltinType::Kind kind = BT->getKind();
	switch (kind) {
	case BuiltinType::Void: return 'v';
	case BuiltinType::Bool: return 'B';
	case BuiltinType::Char8:
	case BuiltinType::Char_U:
	case BuiltinType::UChar: return 'C';
	case BuiltinType::Char16:
	case BuiltinType::UShort: return 'S';
	case BuiltinType::Char32:
	case BuiltinType::UInt: return 'I';
	case BuiltinType::ULong:
	return C->getTargetInfo().getLongWidth() == 32 ? 'L' : 'Q';
	case BuiltinType::UInt128: return 'T';
	case BuiltinType::ULongLong: return 'Q';
	case BuiltinType::Char_S:
	case BuiltinType::SChar: return 'c';
	case BuiltinType::Short: return 's';
	case BuiltinType::WChar_S:
	case BuiltinType::WChar_U:
	case BuiltinType::Int: return 'i';
	case BuiltinType::Long:
	return C->getTargetInfo().getLongWidth() == 32 ? 'l' : 'q';
	case BuiltinType::LongLong: return 'q';
	case BuiltinType::Int128: return 't';
	case BuiltinType::Float: return 'f';
	case BuiltinType::Double: return 'd';
	case BuiltinType::LongDouble: return 'D';
	case BuiltinType::NullPtr: return ''; // like char

	case BuiltinType::BFloat16:
	case BuiltinType::Float16:
	case BuiltinType::Float128:
	case BuiltinType::Half:
	case BuiltinType::ShortAccum:
	case BuiltinType::Accum:
	case BuiltinType::LongAccum:
	case BuiltinType::UShortAccum:
	case BuiltinType::UAccum:
	case BuiltinType::ULongAccum:
	case BuiltinType::ShortFract:
	case BuiltinType::Fract:
	case BuiltinType::LongFract:
	case BuiltinType::UShortFract:
	case BuiltinType::UFract:
	case BuiltinType::ULongFract:
	case BuiltinType::SatShortAccum:
	case BuiltinType::SatAccum:
	case BuiltinType::SatLongAccum:
	case BuiltinType::SatUShortAccum:
	case BuiltinType::SatUAccum:
	case BuiltinType::SatULongAccum:
	case BuiltinType::SatShortFract:
	case BuiltinType::SatFract:
	case BuiltinType::SatLongFract:
	case BuiltinType::SatUShortFract:
	case BuiltinType::SatUFract:
	case BuiltinType::SatULongFract:
	// FIXME: potentially need @encodes for these!
	return ' ';

	#define SVE_TYPE(Name, Id, SingletonId) \
	case BuiltinType::Id:
	#include "clang/Basic/AArch64SVEACLETypes.def"
	#define RVV_TYPE(Name, Id, SingletonId) case BuiltinType::Id:
	#include "clang/Basic/RISCVVTypes.def"
	{
	DiagnosticsEngine &Diags = C->getDiagnostics();
	unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
	"cannot yet @encode type %0");
	Diags.Report(DiagID) << BT->getName(C->getPrintingPolicy());
	return ' ';
	}

	case BuiltinType::ObjCId:
	case BuiltinType::ObjCClass:
	case BuiltinType::ObjCSel:
	llvm_unreachable("@encoding ObjC primitive type");

	// OpenCL and placeholder types don't need @encodings.
	#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
	case BuiltinType::Id:
	#include "clang/Basic/OpenCLImageTypes.def"
	#define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \
	case BuiltinType::Id:
	#include "clang/Basic/OpenCLExtensionTypes.def"
	case BuiltinType::OCLEvent:
	case BuiltinType::OCLClkEvent:
	case BuiltinType::OCLQueue:
	case BuiltinType::OCLReserveID:
	case BuiltinType::OCLSampler:
	case BuiltinType::Dependent:
	#define PPC_VECTOR_TYPE(Name, Id, Size) \
	case BuiltinType::Id:
	#include "clang/Basic/PPCTypes.def"
	#define BUILTIN_TYPE(KIND, ID)
	#define PLACEHOLDER_TYPE(KIND, ID) \
	case BuiltinType::KIND:
	#include "clang/AST/BuiltinTypes.def"
	llvm_unreachable("invalid builtin type for @encode");
	}
	llvm_unreachable("invalid BuiltinType::Kind value");
	}

	static char ObjCEncodingForEnumType(const ASTContext C, const EnumType ET) {
	EnumDecl *Enum = ET->getDecl();

	// The encoding of an non-fixed enum type is always 'i', regardless of size.
	if (!Enum->isFixed())
	return 'i';

	// The encoding of a fixed enum type matches its fixed underlying type.
	const auto *BT = Enum->getIntegerType()->castAs<BuiltinType>();
	return getObjCEncodingForPrimitiveType(C, BT);
	}

	static void EncodeBitField(const ASTContext *Ctx, std::string& S,
	QualType T, const FieldDecl *FD) {
	assert(FD->isBitField() && "not a bitfield - getObjCEncodingForTypeImpl");
	S += 'b';
	// The NeXT runtime encodes bit fields as b followed by the number of bits.
	// The GNU runtime requires more information; bitfields are encoded as b,
	// then the offset (in bits) of the first element, then the type of the
	// bitfield, then the size in bits. For example, in this structure:
	//
	// struct
	// {
	// int integer;
	// int flags:2;
	// };
	// On a 32-bit system, the encoding for flags would be b2 for the NeXT
	// runtime, but b32i2 for the GNU runtime. The reason for this extra
	// information is not especially sensible, but we're stuck with it for
	// compatibility with GCC, although providing it breaks anything that
	// actually uses runtime introspection and wants to work on both runtimes...
	if (Ctx->getLangOpts().ObjCRuntime.isGNUFamily()) {
	uint64_t Offset;

	if (const auto *IVD = dyn_cast<ObjCIvarDecl>(FD)) {
	Offset = Ctx->lookupFieldBitOffset(IVD->getContainingInterface(), nullptr,
	IVD);
	} else {
	const RecordDecl *RD = FD->getParent();
	const ASTRecordLayout &RL = Ctx->getASTRecordLayout(RD);
	Offset = RL.getFieldOffset(FD->getFieldIndex());
	}

	S += llvm::utostr(Offset);

	if (const auto *ET = T->getAs<EnumType>())
	S += ObjCEncodingForEnumType(Ctx, ET);
	else {
	const auto *BT = T->castAs<BuiltinType>();
	S += getObjCEncodingForPrimitiveType(Ctx, BT);
	}
	}
	S += llvm::utostr(FD->getBitWidthValue(*Ctx));
	}

	// Helper function for determining whether the encoded type string would include
	// a template specialization type.
	static bool hasTemplateSpecializationInEncodedString(const Type *T,
	bool VisitBasesAndFields) {
	T = T->getBaseElementTypeUnsafe();

	if (auto *PT = T->getAs<PointerType>())
	return hasTemplateSpecializationInEncodedString(
	PT->getPointeeType().getTypePtr(), false);

	auto *CXXRD = T->getAsCXXRecordDecl();

	if (!CXXRD)
	return false;

	if (isa<ClassTemplateSpecializationDecl>(CXXRD))
	return true;

	if (!CXXRD->hasDefinition() \|\| !VisitBasesAndFields)
	return false;

	for (auto B : CXXRD->bases())
	if (hasTemplateSpecializationInEncodedString(B.getType().getTypePtr(),
	true))
	return true;

	for (auto *FD : CXXRD->fields())
	if (hasTemplateSpecializationInEncodedString(FD->getType().getTypePtr(),
	true))
	return true;

	return false;
	}

	// FIXME: Use SmallString for accumulating string.
	void ASTContext::getObjCEncodingForTypeImpl(QualType T, std::string &S,
	const ObjCEncOptions Options,
	const FieldDecl *FD,
	QualType *NotEncodedT) const {
	CanQualType CT = getCanonicalType(T);
	switch (CT->getTypeClass()) {
	case Type::Builtin:
	case Type::Enum:
	if (FD && FD->isBitField())
	return EncodeBitField(this, S, T, FD);
	if (const auto *BT = dyn_cast<BuiltinType>(CT))
	S += getObjCEncodingForPrimitiveType(this, BT);
	else
	S += ObjCEncodingForEnumType(this, cast<EnumType>(CT));
	return;

	case Type::Complex:
	S += 'j';
	getObjCEncodingForTypeImpl(T->castAs<ComplexType>()->getElementType(), S,
	ObjCEncOptions(),
	/Field=/nullptr);
	return;

	case Type::Atomic:
	S += 'A';
	getObjCEncodingForTypeImpl(T->castAs<AtomicType>()->getValueType(), S,
	ObjCEncOptions(),
	/Field=/nullptr);
	return;

	// encoding for pointer or reference types.
	case Type::Pointer:
	case Type::LValueReference:
	case Type::RValueReference: {
	QualType PointeeTy;
	if (isa<PointerType>(CT)) {
	const auto *PT = T->castAs<PointerType>();
	if (PT->isObjCSelType()) {
	S += ':';
	return;
	}
	PointeeTy = PT->getPointeeType();
	} else {
	PointeeTy = T->castAs<ReferenceType>()->getPointeeType();
	}

	bool isReadOnly = false;
	// For historical/compatibility reasons, the read-only qualifier of the
	// pointee gets emitted _before_ the '^'. The read-only qualifier of
	// the pointer itself gets ignored, _unless_ we are looking at a typedef!
	// Also, do not emit the 'r' for anything but the outermost type!
	if (isa<TypedefType>(T.getTypePtr())) {
	if (Options.IsOutermostType() && T.isConstQualified()) {
	isReadOnly = true;
	S += 'r';
	}
	} else if (Options.IsOutermostType()) {
	QualType P = PointeeTy;
	while (auto PT = P->getAs<PointerType>())
	P = PT->getPointeeType();
	if (P.isConstQualified()) {
	isReadOnly = true;
	S += 'r';
	}
	}
	if (isReadOnly) {
	// Another legacy compatibility encoding. Some ObjC qualifier and type
	// combinations need to be rearranged.
	// Rewrite "in const" from "nr" to "rn"
	if (StringRef(S).endswith("nr"))
	S.replace(S.end()-2, S.end(), "rn");
	}

	if (PointeeTy->isCharType()) {
	// char pointer types should be encoded as '*' unless it is a
	// type that has been typedef'd to 'BOOL'.
	if (!isTypeTypedefedAsBOOL(PointeeTy)) {
	S += '*';
	return;
	}
	} else if (const auto *RTy = PointeeTy->getAs<RecordType>()) {
	// GCC binary compat: Need to convert "struct objc_class *" to "#".
	if (RTy->getDecl()->getIdentifier() == &Idents.get("objc_class")) {
	S += '#';
	return;
	}
	// GCC binary compat: Need to convert "struct objc_object *" to "@".
	if (RTy->getDecl()->getIdentifier() == &Idents.get("objc_object")) {
	S += '@';
	return;
	}
	// If the encoded string for the class includes template names, just emit
	// "^v" for pointers to the class.
	if (getLangOpts().CPlusPlus &&
	(!getLangOpts().EncodeCXXClassTemplateSpec &&
	hasTemplateSpecializationInEncodedString(
	RTy, Options.ExpandPointedToStructures()))) {
	S += "^v";
	return;
	}
	// fall through...
	}
	S += '^';
	getLegacyIntegralTypeEncoding(PointeeTy);

	ObjCEncOptions NewOptions;
	if (Options.ExpandPointedToStructures())
	NewOptions.setExpandStructures();
	getObjCEncodingForTypeImpl(PointeeTy, S, NewOptions,
	/Field=/nullptr, NotEncodedT);
	return;
	}

	case Type::ConstantArray:
	case Type::IncompleteArray:
	case Type::VariableArray: {
	const auto *AT = cast<ArrayType>(CT);

	if (isa<IncompleteArrayType>(AT) && !Options.IsStructField()) {
	// Incomplete arrays are encoded as a pointer to the array element.
	S += '^';

	getObjCEncodingForTypeImpl(
	AT->getElementType(), S,
	Options.keepingOnly(ObjCEncOptions().setExpandStructures()), FD);
	} else {
	S += '[';

	if (const auto *CAT = dyn_cast<ConstantArrayType>(AT))
	S += llvm::utostr(CAT->getSize().getZExtValue());
	else {
	//Variable length arrays are encoded as a regular array with 0 elements.
	assert((isa<VariableArrayType>(AT) \|\| isa<IncompleteArrayType>(AT)) &&
	"Unknown array type!");
	S += '0';
	}

	getObjCEncodingForTypeImpl(
	AT->getElementType(), S,
	Options.keepingOnly(ObjCEncOptions().setExpandStructures()), FD,
	NotEncodedT);
	S += ']';
	}
	return;
	}

	case Type::FunctionNoProto:
	case Type::FunctionProto:
	S += '?';
	return;

	case Type::Record: {
	RecordDecl *RDecl = cast<RecordType>(CT)->getDecl();
	S += RDecl->isUnion() ? '(' : '{';
	// Anonymous structures print as '?'
	if (const IdentifierInfo *II = RDecl->getIdentifier()) {
	S += II->getName();
	if (const auto *Spec = dyn_cast<ClassTemplateSpecializationDecl>(RDecl)) {
	const TemplateArgumentList &TemplateArgs = Spec->getTemplateArgs();
	llvm::raw_string_ostream OS(S);
	printTemplateArgumentList(OS, TemplateArgs.asArray(),
	getPrintingPolicy());
	}
	} else {
	S += '?';
	}
	if (Options.ExpandStructures()) {
	S += '=';
	if (!RDecl->isUnion()) {
	getObjCEncodingForStructureImpl(RDecl, S, FD, true, NotEncodedT);
	} else {
	for (const auto *Field : RDecl->fields()) {
	if (FD) {
	S += '"';
	S += Field->getNameAsString();
	S += '"';
	}

	// Special case bit-fields.
	if (Field->isBitField()) {
	getObjCEncodingForTypeImpl(Field->getType(), S,
	ObjCEncOptions().setExpandStructures(),
	Field);
	} else {
	QualType qt = Field->getType();
	getLegacyIntegralTypeEncoding(qt);
	getObjCEncodingForTypeImpl(
	qt, S,
	ObjCEncOptions().setExpandStructures().setIsStructField(), FD,
	NotEncodedT);
	}
	}
	}
	}
	S += RDecl->isUnion() ? ')' : '}';
	return;
	}

	case Type::BlockPointer: {
	const auto *BT = T->castAs<BlockPointerType>();
	S += "@?"; // Unlike a pointer-to-function, which is "^?".
	if (Options.EncodeBlockParameters()) {
	const auto *FT = BT->getPointeeType()->castAs<FunctionType>();

	S += '<';
	// Block return type
	getObjCEncodingForTypeImpl(FT->getReturnType(), S,
	Options.forComponentType(), FD, NotEncodedT);
	// Block self
	S += "@?";
	// Block parameters
	if (const auto *FPT = dyn_cast<FunctionProtoType>(FT)) {
	for (const auto &I : FPT->param_types())
	getObjCEncodingForTypeImpl(I, S, Options.forComponentType(), FD,
	NotEncodedT);
	}
	S += '>';
	}
	return;
	}

	case Type::ObjCObject: {
	// hack to match legacy encoding of id and Class
	QualType Ty = getObjCObjectPointerType(CT);
	if (Ty->isObjCIdType()) {
	S += "{objc_object=}";
	return;
	}
	else if (Ty->isObjCClassType()) {
	S += "{objc_class=}";
	return;
	}
	// TODO: Double check to make sure this intentionally falls through.
	LLVM_FALLTHROUGH;
	}

	case Type::ObjCInterface: {
	// Ignore protocol qualifiers when mangling at this level.
	// @encode(class_name)
	ObjCInterfaceDecl *OI = T->castAs<ObjCObjectType>()->getInterface();
	S += '{';
	S += OI->getObjCRuntimeNameAsString();
	if (Options.ExpandStructures()) {
	S += '=';
	SmallVector<const ObjCIvarDecl*, 32> Ivars;
	DeepCollectObjCIvars(OI, true, Ivars);
	for (unsigned i = 0, e = Ivars.size(); i != e; ++i) {
	const FieldDecl *Field = Ivars[i];
	if (Field->isBitField())
	getObjCEncodingForTypeImpl(Field->getType(), S,
	ObjCEncOptions().setExpandStructures(),
	Field);
	else
	getObjCEncodingForTypeImpl(Field->getType(), S,
	ObjCEncOptions().setExpandStructures(), FD,
	NotEncodedT);
	}
	}
	S += '}';
	return;
	}

	case Type::ObjCObjectPointer: {
	const auto *OPT = T->castAs<ObjCObjectPointerType>();
	if (OPT->isObjCIdType()) {
	S += '@';
	return;
	}

	if (OPT->isObjCClassType() \|\| OPT->isObjCQualifiedClassType()) {
	// FIXME: Consider if we need to output qualifiers for 'Class<p>'.
	// Since this is a binary compatibility issue, need to consult with
	// runtime folks. Fortunately, this is a very obscure construct.
	S += '#';
	return;
	}

	if (OPT->isObjCQualifiedIdType()) {
	getObjCEncodingForTypeImpl(
	getObjCIdType(), S,
	Options.keepingOnly(ObjCEncOptions()
	.setExpandPointedToStructures()
	.setExpandStructures()),
	FD);
	if (FD \|\| Options.EncodingProperty() \|\| Options.EncodeClassNames()) {
	// Note that we do extended encoding of protocol qualifer list
	// Only when doing ivar or property encoding.
	S += '"';
	for (const auto *I : OPT->quals()) {
	S += '<';
	S += I->getObjCRuntimeNameAsString();
	S += '>';
	}
	S += '"';
	}
	return;
	}

	S += '@';
	if (OPT->getInterfaceDecl() &&
	(FD \|\| Options.EncodingProperty() \|\| Options.EncodeClassNames())) {
	S += '"';
	S += OPT->getInterfaceDecl()->getObjCRuntimeNameAsString();
	for (const auto *I : OPT->quals()) {
	S += '<';
	S += I->getObjCRuntimeNameAsString();
	S += '>';
	}
	S += '"';
	}
	return;
	}

	// gcc just blithely ignores member pointers.
	// FIXME: we should do better than that. 'M' is available.
	case Type::MemberPointer:
	// This matches gcc's encoding, even though technically it is insufficient.
	//FIXME. We should do a better job than gcc.
	case Type::Vector:
	case Type::ExtVector:
	// Until we have a coherent encoding of these three types, issue warning.
	if (NotEncodedT)
	*NotEncodedT = T;
	return;

	case Type::ConstantMatrix:
	if (NotEncodedT)
	*NotEncodedT = T;
	return;

	// We could see an undeduced auto type here during error recovery.
	// Just ignore it.
	case Type::Auto:
	case Type::DeducedTemplateSpecialization:
	return;

	case Type::Pipe:
	case Type::ExtInt:
	#define ABSTRACT_TYPE(KIND, BASE)
	#define TYPE(KIND, BASE)
	#define DEPENDENT_TYPE(KIND, BASE) \
	case Type::KIND:
	#define NON_CANONICAL_TYPE(KIND, BASE) \
	case Type::KIND:
	#define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(KIND, BASE) \
	case Type::KIND:
	#include "clang/AST/TypeNodes.inc"
	llvm_unreachable("@encode for dependent type!");
	}
	llvm_unreachable("bad type kind!");
	}

	void ASTContext::getObjCEncodingForStructureImpl(RecordDecl *RDecl,
	std::string &S,
	const FieldDecl *FD,
	bool includeVBases,
	QualType *NotEncodedT) const {
	assert(RDecl && "Expected non-null RecordDecl");
	assert(!RDecl->isUnion() && "Should not be called for unions");
	if (!RDecl->getDefinition() \|\| RDecl->getDefinition()->isInvalidDecl())
	return;

	const auto *CXXRec = dyn_cast<CXXRecordDecl>(RDecl);
	std::multimap<uint64_t, NamedDecl *> FieldOrBaseOffsets;
	const ASTRecordLayout &layout = getASTRecordLayout(RDecl);

	if (CXXRec) {
	for (const auto &BI : CXXRec->bases()) {
	if (!BI.isVirtual()) {
	CXXRecordDecl *base = BI.getType()->getAsCXXRecordDecl();
	if (base->isEmpty())
	continue;
	uint64_t offs = toBits(layout.getBaseClassOffset(base));
	FieldOrBaseOffsets.insert(FieldOrBaseOffsets.upper_bound(offs),
	std::make_pair(offs, base));
	}
	}
	}

	unsigned i = 0;
	for (FieldDecl *Field : RDecl->fields()) {
	if (!Field->isZeroLengthBitField(this) && Field->isZeroSize(this))
	continue;
	uint64_t offs = layout.getFieldOffset(i);
	FieldOrBaseOffsets.insert(FieldOrBaseOffsets.upper_bound(offs),
	std::make_pair(offs, Field));
	++i;
	}

	if (CXXRec && includeVBases) {
	for (const auto &BI : CXXRec->vbases()) {
	CXXRecordDecl *base = BI.getType()->getAsCXXRecordDecl();
	if (base->isEmpty())
	continue;
	uint64_t offs = toBits(layout.getVBaseClassOffset(base));
	if (offs >= uint64_t(toBits(layout.getNonVirtualSize())) &&
	FieldOrBaseOffsets.find(offs) == FieldOrBaseOffsets.end())
	FieldOrBaseOffsets.insert(FieldOrBaseOffsets.end(),
	std::make_pair(offs, base));
	}
	}

	CharUnits size;
	if (CXXRec) {
	size = includeVBases ? layout.getSize() : layout.getNonVirtualSize();
	} else {
	size = layout.getSize();
	}

	#ifndef NDEBUG
	uint64_t CurOffs = 0;
	#endif
	std::multimap<uint64_t, NamedDecl *>::iterator
	CurLayObj = FieldOrBaseOffsets.begin();

	if (CXXRec && CXXRec->isDynamicClass() &&
	(CurLayObj == FieldOrBaseOffsets.end() \|\| CurLayObj->first != 0)) {
	if (FD) {
	S += "\"_vptr$";
	std::string recname = CXXRec->getNameAsString();
	if (recname.empty()) recname = "?";
	S += recname;
	S += '"';
	}
	S += "^^?";
	#ifndef NDEBUG
	CurOffs += getTypeSize(VoidPtrTy);
	#endif
	}

	if (!RDecl->hasFlexibleArrayMember()) {
	// Mark the end of the structure.
	uint64_t offs = toBits(size);
	FieldOrBaseOffsets.insert(FieldOrBaseOffsets.upper_bound(offs),
	std::make_pair(offs, nullptr));
	}

	for (; CurLayObj != FieldOrBaseOffsets.end(); ++CurLayObj) {
	#ifndef NDEBUG
	assert(CurOffs <= CurLayObj->first);
	if (CurOffs < CurLayObj->first) {
	uint64_t padding = CurLayObj->first - CurOffs;
	// FIXME: There doesn't seem to be a way to indicate in the encoding that
	// packing/alignment of members is different that normal, in which case
	// the encoding will be out-of-sync with the real layout.
	// If the runtime switches to just consider the size of types without
	// taking into account alignment, we could make padding explicit in the
	// encoding (e.g. using arrays of chars). The encoding strings would be
	// longer then though.
	CurOffs += padding;
	}
	#endif

	NamedDecl *dcl = CurLayObj->second;
	if (!dcl)
	break; // reached end of structure.

	if (auto *base = dyn_cast<CXXRecordDecl>(dcl)) {
	// We expand the bases without their virtual bases since those are going
	// in the initial structure. Note that this differs from gcc which
	// expands virtual bases each time one is encountered in the hierarchy,
	// making the encoding type bigger than it really is.
	getObjCEncodingForStructureImpl(base, S, FD, /includeVBases/false,
	NotEncodedT);
	assert(!base->isEmpty());
	#ifndef NDEBUG
	CurOffs += toBits(getASTRecordLayout(base).getNonVirtualSize());
	#endif
	} else {
	const auto *field = cast<FieldDecl>(dcl);
	if (FD) {
	S += '"';
	S += field->getNameAsString();
	S += '"';
	}

	if (field->isBitField()) {
	EncodeBitField(this, S, field->getType(), field);
	#ifndef NDEBUG
	CurOffs += field->getBitWidthValue(*this);
	#endif
	} else {
	QualType qt = field->getType();
	getLegacyIntegralTypeEncoding(qt);
	getObjCEncodingForTypeImpl(
	qt, S, ObjCEncOptions().setExpandStructures().setIsStructField(),
	FD, NotEncodedT);
	#ifndef NDEBUG
	CurOffs += getTypeSize(field->getType());
	#endif
	}
	}
	}
	}

	void ASTContext::getObjCEncodingForTypeQualifier(Decl::ObjCDeclQualifier QT,
	std::string& S) const {
	if (QT & Decl::OBJC_TQ_In)
	S += 'n';
	if (QT & Decl::OBJC_TQ_Inout)
	S += 'N';
	if (QT & Decl::OBJC_TQ_Out)
	S += 'o';
	if (QT & Decl::OBJC_TQ_Bycopy)
	S += 'O';
	if (QT & Decl::OBJC_TQ_Byref)
	S += 'R';
	if (QT & Decl::OBJC_TQ_Oneway)
	S += 'V';
	}

	TypedefDecl *ASTContext::getObjCIdDecl() const {
	if (!ObjCIdDecl) {
	QualType T = getObjCObjectType(ObjCBuiltinIdTy, {}, {});
	T = getObjCObjectPointerType(T);
	ObjCIdDecl = buildImplicitTypedef(T, "id");
	}
	return ObjCIdDecl;
	}

	TypedefDecl *ASTContext::getObjCSelDecl() const {
	if (!ObjCSelDecl) {
	QualType T = getPointerType(ObjCBuiltinSelTy);
	ObjCSelDecl = buildImplicitTypedef(T, "SEL");
	}
	return ObjCSelDecl;
	}

	TypedefDecl *ASTContext::getObjCClassDecl() const {
	if (!ObjCClassDecl) {
	QualType T = getObjCObjectType(ObjCBuiltinClassTy, {}, {});
	T = getObjCObjectPointerType(T);
	ObjCClassDecl = buildImplicitTypedef(T, "Class");
	}
	return ObjCClassDecl;
	}

	ObjCInterfaceDecl *ASTContext::getObjCProtocolDecl() const {
	if (!ObjCProtocolClassDecl) {
	ObjCProtocolClassDecl
	= ObjCInterfaceDecl::Create(*this, getTranslationUnitDecl(),
	SourceLocation(),
	&Idents.get("Protocol"),
	/typeParamList=/nullptr,
	/PrevDecl=/nullptr,
	SourceLocation(), true);
	}

	return ObjCProtocolClassDecl;
	}

	//===----------------------------------------------------------------------===//
	// __builtin_va_list Construction Functions
	//===----------------------------------------------------------------------===//

	static TypedefDecl CreateCharPtrNamedVaListDecl(const ASTContext Context,
	StringRef Name) {
	// typedef char* __builtin[_ms]_va_list;
	QualType T = Context->getPointerType(Context->CharTy);
	return Context->buildImplicitTypedef(T, Name);
	}

	static TypedefDecl CreateMSVaListDecl(const ASTContext Context) {
	return CreateCharPtrNamedVaListDecl(Context, "__builtin_ms_va_list");
	}

	static TypedefDecl CreateCharPtrBuiltinVaListDecl(const ASTContext Context) {
	return CreateCharPtrNamedVaListDecl(Context, "__builtin_va_list");
	}

	static TypedefDecl CreateVoidPtrBuiltinVaListDecl(const ASTContext Context) {
	// typedef void* __builtin_va_list;
	QualType T = Context->getPointerType(Context->VoidTy);
	return Context->buildImplicitTypedef(T, "__builtin_va_list");
	}

	static TypedefDecl *
	CreateAArch64ABIBuiltinVaListDecl(const ASTContext *Context) {
	RecordDecl *VaListTagDecl = Context->buildImplicitRecord("__va_list");
	// namespace std { struct __va_list {
	// Note that we create the namespace even in C. This is intentional so that
	// the type is consistent between C and C++, which is important in cases where
	// the types need to match between translation units (e.g. with
	// -fsanitize=cfi-icall). Ideally we wouldn't have created this namespace at
	// all, but it's now part of the ABI (e.g. in mangled names), so we can't
	// change it.
	auto *NS = NamespaceDecl::Create(
	const_cast<ASTContext &>(*Context), Context->getTranslationUnitDecl(),
	/Inline/ false, SourceLocation(), SourceLocation(),
	&Context->Idents.get("std"),
	/PrevDecl/ nullptr);
	NS->setImplicit();
	VaListTagDecl->setDeclContext(NS);

	VaListTagDecl->startDefinition();

	const size_t NumFields = 5;
	QualType FieldTypes[NumFields];
	const char *FieldNames[NumFields];

	// void *__stack;
	FieldTypes[0] = Context->getPointerType(Context->VoidTy);
	FieldNames[0] = "__stack";

	// void *__gr_top;
	FieldTypes[1] = Context->getPointerType(Context->VoidTy);
	FieldNames[1] = "__gr_top";

	// void *__vr_top;
	FieldTypes[2] = Context->getPointerType(Context->VoidTy);
	FieldNames[2] = "__vr_top";

	// int __gr_offs;
	FieldTypes[3] = Context->IntTy;
	FieldNames[3] = "__gr_offs";

	// int __vr_offs;
	FieldTypes[4] = Context->IntTy;
	FieldNames[4] = "__vr_offs";

	// Create fields
	for (unsigned i = 0; i < NumFields; ++i) {
	FieldDecl Field = FieldDecl::Create(const_cast<ASTContext &>(Context),
	VaListTagDecl,
	SourceLocation(),
	SourceLocation(),
	&Context->Idents.get(FieldNames[i]),
	FieldTypes[i], /TInfo=/nullptr,
	/BitWidth=/nullptr,
	/Mutable=/false,
	ICIS_NoInit);
	Field->setAccess(AS_public);
	VaListTagDecl->addDecl(Field);
	}
	VaListTagDecl->completeDefinition();
	Context->VaListTagDecl = VaListTagDecl;
	QualType VaListTagType = Context->getRecordType(VaListTagDecl);

	// } __builtin_va_list;
	return Context->buildImplicitTypedef(VaListTagType, "__builtin_va_list");
	}

	static TypedefDecl CreatePowerABIBuiltinVaListDecl(const ASTContext Context) {
	// typedef struct __va_list_tag {
	RecordDecl *VaListTagDecl;

	VaListTagDecl = Context->buildImplicitRecord("__va_list_tag");
	VaListTagDecl->startDefinition();

	const size_t NumFields = 5;
	QualType FieldTypes[NumFields];
	const char *FieldNames[NumFields];

	// unsigned char gpr;
	FieldTypes[0] = Context->UnsignedCharTy;
	FieldNames[0] = "gpr";

	// unsigned char fpr;
	FieldTypes[1] = Context->UnsignedCharTy;
	FieldNames[1] = "fpr";

	// unsigned short reserved;
	FieldTypes[2] = Context->UnsignedShortTy;
	FieldNames[2] = "reserved";

	// void* overflow_arg_area;
	FieldTypes[3] = Context->getPointerType(Context->VoidTy);
	FieldNames[3] = "overflow_arg_area";

	// void* reg_save_area;
	FieldTypes[4] = Context->getPointerType(Context->VoidTy);
	FieldNames[4] = "reg_save_area";

	// Create fields
	for (unsigned i = 0; i < NumFields; ++i) {
	FieldDecl Field = FieldDecl::Create(Context, VaListTagDecl,
	SourceLocation(),
	SourceLocation(),
	&Context->Idents.get(FieldNames[i]),
	FieldTypes[i], /TInfo=/nullptr,
	/BitWidth=/nullptr,
	/Mutable=/false,
	ICIS_NoInit);
	Field->setAccess(AS_public);
	VaListTagDecl->addDecl(Field);
	}
	VaListTagDecl->completeDefinition();
	Context->VaListTagDecl = VaListTagDecl;
	QualType VaListTagType = Context->getRecordType(VaListTagDecl);

	// } __va_list_tag;
	TypedefDecl *VaListTagTypedefDecl =
	Context->buildImplicitTypedef(VaListTagType, "__va_list_tag");

	QualType VaListTagTypedefType =
	Context->getTypedefType(VaListTagTypedefDecl);

	// typedef __va_list_tag __builtin_va_list[1];
	llvm::APInt Size(Context->getTypeSize(Context->getSizeType()), 1);
	QualType VaListTagArrayType
	= Context->getConstantArrayType(VaListTagTypedefType,
	Size, nullptr, ArrayType::Normal, 0);
	return Context->buildImplicitTypedef(VaListTagArrayType, "__builtin_va_list");
	}

	static TypedefDecl *
	CreateX86_64ABIBuiltinVaListDecl(const ASTContext *Context) {
	// struct __va_list_tag {
	RecordDecl *VaListTagDecl;
	VaListTagDecl = Context->buildImplicitRecord("__va_list_tag");
	VaListTagDecl->startDefinition();

	const size_t NumFields = 4;
	QualType FieldTypes[NumFields];
	const char *FieldNames[NumFields];

	// unsigned gp_offset;
	FieldTypes[0] = Context->UnsignedIntTy;
	FieldNames[0] = "gp_offset";

	// unsigned fp_offset;
	FieldTypes[1] = Context->UnsignedIntTy;
	FieldNames[1] = "fp_offset";

	// void* overflow_arg_area;
	FieldTypes[2] = Context->getPointerType(Context->VoidTy);
	FieldNames[2] = "overflow_arg_area";

	// void* reg_save_area;
	FieldTypes[3] = Context->getPointerType(Context->VoidTy);
	FieldNames[3] = "reg_save_area";

	// Create fields
	for (unsigned i = 0; i < NumFields; ++i) {
	FieldDecl Field = FieldDecl::Create(const_cast<ASTContext &>(Context),
	VaListTagDecl,
	SourceLocation(),
	SourceLocation(),
	&Context->Idents.get(FieldNames[i]),
	FieldTypes[i], /TInfo=/nullptr,
	/BitWidth=/nullptr,
	/Mutable=/false,
	ICIS_NoInit);
	Field->setAccess(AS_public);
	VaListTagDecl->addDecl(Field);
	}
	VaListTagDecl->completeDefinition();
	Context->VaListTagDecl = VaListTagDecl;
	QualType VaListTagType = Context->getRecordType(VaListTagDecl);

	// };

	// typedef struct __va_list_tag __builtin_va_list[1];
	llvm::APInt Size(Context->getTypeSize(Context->getSizeType()), 1);
	QualType VaListTagArrayType = Context->getConstantArrayType(
	VaListTagType, Size, nullptr, ArrayType::Normal, 0);
	return Context->buildImplicitTypedef(VaListTagArrayType, "__builtin_va_list");
	}

	static TypedefDecl CreatePNaClABIBuiltinVaListDecl(const ASTContext Context) {
	// typedef int __builtin_va_list[4];
	llvm::APInt Size(Context->getTypeSize(Context->getSizeType()), 4);
	QualType IntArrayType = Context->getConstantArrayType(
	Context->IntTy, Size, nullptr, ArrayType::Normal, 0);
	return Context->buildImplicitTypedef(IntArrayType, "__builtin_va_list");
	}

	static TypedefDecl *
	CreateAAPCSABIBuiltinVaListDecl(const ASTContext *Context) {
	// struct __va_list
	RecordDecl *VaListDecl = Context->buildImplicitRecord("__va_list");
	if (Context->getLangOpts().CPlusPlus) {
	// namespace std { struct __va_list {
	NamespaceDecl *NS;
	NS = NamespaceDecl::Create(const_cast<ASTContext &>(*Context),
	Context->getTranslationUnitDecl(),
	/Inline/false, SourceLocation(),
	SourceLocation(), &Context->Idents.get("std"),
	/PrevDecl/ nullptr);
	NS->setImplicit();
	VaListDecl->setDeclContext(NS);
	}

	VaListDecl->startDefinition();

	// void * __ap;
	FieldDecl Field = FieldDecl::Create(const_cast<ASTContext &>(Context),
	VaListDecl,
	SourceLocation(),
	SourceLocation(),
	&Context->Idents.get("__ap"),
	Context->getPointerType(Context->VoidTy),
	/TInfo=/nullptr,
	/BitWidth=/nullptr,
	/Mutable=/false,
	ICIS_NoInit);
	Field->setAccess(AS_public);
	VaListDecl->addDecl(Field);

	// };
	VaListDecl->completeDefinition();
	Context->VaListTagDecl = VaListDecl;

	// typedef struct __va_list __builtin_va_list;
	QualType T = Context->getRecordType(VaListDecl);
	return Context->buildImplicitTypedef(T, "__builtin_va_list");
	}

	static TypedefDecl *
	CreateSystemZBuiltinVaListDecl(const ASTContext *Context) {
	// struct __va_list_tag {
	RecordDecl *VaListTagDecl;
	VaListTagDecl = Context->buildImplicitRecord("__va_list_tag");
	VaListTagDecl->startDefinition();

	const size_t NumFields = 4;
	QualType FieldTypes[NumFields];
	const char *FieldNames[NumFields];

	// long __gpr;
	FieldTypes[0] = Context->LongTy;
	FieldNames[0] = "__gpr";

	// long __fpr;
	FieldTypes[1] = Context->LongTy;
	FieldNames[1] = "__fpr";

	// void *__overflow_arg_area;
	FieldTypes[2] = Context->getPointerType(Context->VoidTy);
	FieldNames[2] = "__overflow_arg_area";

	// void *__reg_save_area;
	FieldTypes[3] = Context->getPointerType(Context->VoidTy);
	FieldNames[3] = "__reg_save_area";

	// Create fields
	for (unsigned i = 0; i < NumFields; ++i) {
	FieldDecl Field = FieldDecl::Create(const_cast<ASTContext &>(Context),
	VaListTagDecl,
	SourceLocation(),
	SourceLocation(),
	&Context->Idents.get(FieldNames[i]),
	FieldTypes[i], /TInfo=/nullptr,
	/BitWidth=/nullptr,
	/Mutable=/false,
	ICIS_NoInit);
	Field->setAccess(AS_public);
	VaListTagDecl->addDecl(Field);
	}
	VaListTagDecl->completeDefinition();
	Context->VaListTagDecl = VaListTagDecl;
	QualType VaListTagType = Context->getRecordType(VaListTagDecl);

	// };

	// typedef __va_list_tag __builtin_va_list[1];
	llvm::APInt Size(Context->getTypeSize(Context->getSizeType()), 1);
	QualType VaListTagArrayType = Context->getConstantArrayType(
	VaListTagType, Size, nullptr, ArrayType::Normal, 0);

	return Context->buildImplicitTypedef(VaListTagArrayType, "__builtin_va_list");
	}

	static TypedefDecl CreateHexagonBuiltinVaListDecl(const ASTContext Context) {
	// typedef struct __va_list_tag {
	RecordDecl *VaListTagDecl;
	VaListTagDecl = Context->buildImplicitRecord("__va_list_tag");
	VaListTagDecl->startDefinition();

	const size_t NumFields = 3;
	QualType FieldTypes[NumFields];
	const char *FieldNames[NumFields];

	// void *CurrentSavedRegisterArea;
	FieldTypes[0] = Context->getPointerType(Context->VoidTy);
	FieldNames[0] = "__current_saved_reg_area_pointer";

	// void *SavedRegAreaEnd;
	FieldTypes[1] = Context->getPointerType(Context->VoidTy);
	FieldNames[1] = "__saved_reg_area_end_pointer";

	// void *OverflowArea;
	FieldTypes[2] = Context->getPointerType(Context->VoidTy);
	FieldNames[2] = "__overflow_area_pointer";

	// Create fields
	for (unsigned i = 0; i < NumFields; ++i) {
	FieldDecl *Field = FieldDecl::Create(
	const_cast<ASTContext &>(*Context), VaListTagDecl, SourceLocation(),
	SourceLocation(), &Context->Idents.get(FieldNames[i]), FieldTypes[i],
	/TInfo=/0,
	/BitWidth=/0,
	/Mutable=/false, ICIS_NoInit);
	Field->setAccess(AS_public);
	VaListTagDecl->addDecl(Field);
	}
	VaListTagDecl->completeDefinition();
	Context->VaListTagDecl = VaListTagDecl;
	QualType VaListTagType = Context->getRecordType(VaListTagDecl);

	// } __va_list_tag;
	TypedefDecl *VaListTagTypedefDecl =
	Context->buildImplicitTypedef(VaListTagType, "__va_list_tag");

	QualType VaListTagTypedefType = Context->getTypedefType(VaListTagTypedefDecl);

	// typedef __va_list_tag __builtin_va_list[1];
	llvm::APInt Size(Context->getTypeSize(Context->getSizeType()), 1);
	QualType VaListTagArrayType = Context->getConstantArrayType(
	VaListTagTypedefType, Size, nullptr, ArrayType::Normal, 0);

	return Context->buildImplicitTypedef(VaListTagArrayType, "__builtin_va_list");
	}

	static TypedefDecl CreateVaListDecl(const ASTContext Context,
	TargetInfo::BuiltinVaListKind Kind) {
	switch (Kind) {
	case TargetInfo::CharPtrBuiltinVaList:
	return CreateCharPtrBuiltinVaListDecl(Context);
	case TargetInfo::VoidPtrBuiltinVaList:
	return CreateVoidPtrBuiltinVaListDecl(Context);
	case TargetInfo::AArch64ABIBuiltinVaList:
	return CreateAArch64ABIBuiltinVaListDecl(Context);
	case TargetInfo::PowerABIBuiltinVaList:
	return CreatePowerABIBuiltinVaListDecl(Context);
	case TargetInfo::X86_64ABIBuiltinVaList:
	return CreateX86_64ABIBuiltinVaListDecl(Context);
	case TargetInfo::PNaClABIBuiltinVaList:
	return CreatePNaClABIBuiltinVaListDecl(Context);
	case TargetInfo::AAPCSABIBuiltinVaList:
	return CreateAAPCSABIBuiltinVaListDecl(Context);
	case TargetInfo::SystemZBuiltinVaList:
	return CreateSystemZBuiltinVaListDecl(Context);
	case TargetInfo::HexagonBuiltinVaList:
	return CreateHexagonBuiltinVaListDecl(Context);
	}

	llvm_unreachable("Unhandled __builtin_va_list type kind");
	}

	TypedefDecl *ASTContext::getBuiltinVaListDecl() const {
	if (!BuiltinVaListDecl) {
	BuiltinVaListDecl = CreateVaListDecl(this, Target->getBuiltinVaListKind());
	assert(BuiltinVaListDecl->isImplicit());
	}

	return BuiltinVaListDecl;
	}

	Decl *ASTContext::getVaListTagDecl() const {
	// Force the creation of VaListTagDecl by building the __builtin_va_list
	// declaration.
	if (!VaListTagDecl)
	(void)getBuiltinVaListDecl();

	return VaListTagDecl;
	}

	TypedefDecl *ASTContext::getBuiltinMSVaListDecl() const {
	if (!BuiltinMSVaListDecl)
	BuiltinMSVaListDecl = CreateMSVaListDecl(this);

	return BuiltinMSVaListDecl;
	}

	bool ASTContext::canBuiltinBeRedeclared(const FunctionDecl *FD) const {
	return BuiltinInfo.canBeRedeclared(FD->getBuiltinID());
	}

	void ASTContext::setObjCConstantStringInterface(ObjCInterfaceDecl *Decl) {
	assert(ObjCConstantStringType.isNull() &&
	"'NSConstantString' type already set!");

	ObjCConstantStringType = getObjCInterfaceType(Decl);
	}

	/// Retrieve the template name that corresponds to a non-empty
	/// lookup.
	TemplateName
	ASTContext::getOverloadedTemplateName(UnresolvedSetIterator Begin,
	UnresolvedSetIterator End) const {
	unsigned size = End - Begin;
	assert(size > 1 && "set is not overloaded!");

	void *memory = Allocate(sizeof(OverloadedTemplateStorage) +
	size * sizeof(FunctionTemplateDecl*));
	auto *OT = new (memory) OverloadedTemplateStorage(size);

	NamedDecl **Storage = OT->getStorage();
	for (UnresolvedSetIterator I = Begin; I != End; ++I) {
	NamedDecl D = I;
	assert(isa<FunctionTemplateDecl>(D) \|\|
	isa<UnresolvedUsingValueDecl>(D) \|\|
	(isa<UsingShadowDecl>(D) &&
	isa<FunctionTemplateDecl>(D->getUnderlyingDecl())));
	*Storage++ = D;
	}

	return TemplateName(OT);
	}

	/// Retrieve a template name representing an unqualified-id that has been
	/// assumed to name a template for ADL purposes.
	TemplateName ASTContext::getAssumedTemplateName(DeclarationName Name) const {
	auto OT = new (this) AssumedTemplateStorage(Name);
	return TemplateName(OT);
	}

	/// Retrieve the template name that represents a qualified
	/// template name such as \c std::vector.
	TemplateName
	ASTContext::getQualifiedTemplateName(NestedNameSpecifier *NNS,
	bool TemplateKeyword,
	TemplateDecl *Template) const {
	assert(NNS && "Missing nested-name-specifier in qualified template name");

	// FIXME: Canonicalization?
	llvm::FoldingSetNodeID ID;
	QualifiedTemplateName::Profile(ID, NNS, TemplateKeyword, Template);

	void *InsertPos = nullptr;
	QualifiedTemplateName *QTN =
	QualifiedTemplateNames.FindNodeOrInsertPos(ID, InsertPos);
	if (!QTN) {
	QTN = new (*this, alignof(QualifiedTemplateName))
	QualifiedTemplateName(NNS, TemplateKeyword, Template);
	QualifiedTemplateNames.InsertNode(QTN, InsertPos);
	}

	return TemplateName(QTN);
	}

	/// Retrieve the template name that represents a dependent
	/// template name such as \c MetaFun::template apply.
	TemplateName
	ASTContext::getDependentTemplateName(NestedNameSpecifier *NNS,
	const IdentifierInfo *Name) const {
	assert((!NNS \|\| NNS->isDependent()) &&
	"Nested name specifier must be dependent");

	llvm::FoldingSetNodeID ID;
	DependentTemplateName::Profile(ID, NNS, Name);

	void *InsertPos = nullptr;
	DependentTemplateName *QTN =
	DependentTemplateNames.FindNodeOrInsertPos(ID, InsertPos);

	if (QTN)
	return TemplateName(QTN);

	NestedNameSpecifier *CanonNNS = getCanonicalNestedNameSpecifier(NNS);
	if (CanonNNS == NNS) {
	QTN = new (*this, alignof(DependentTemplateName))
	DependentTemplateName(NNS, Name);
	} else {
	TemplateName Canon = getDependentTemplateName(CanonNNS, Name);
	QTN = new (*this, alignof(DependentTemplateName))
	DependentTemplateName(NNS, Name, Canon);
	DependentTemplateName *CheckQTN =
	DependentTemplateNames.FindNodeOrInsertPos(ID, InsertPos);
	assert(!CheckQTN && "Dependent type name canonicalization broken");
	(void)CheckQTN;
	}

	DependentTemplateNames.InsertNode(QTN, InsertPos);
	return TemplateName(QTN);
	}

	/// Retrieve the template name that represents a dependent
	/// template name such as \c MetaFun::template operator+.
	TemplateName
	ASTContext::getDependentTemplateName(NestedNameSpecifier *NNS,
	OverloadedOperatorKind Operator) const {
	assert((!NNS \|\| NNS->isDependent()) &&
	"Nested name specifier must be dependent");

	llvm::FoldingSetNodeID ID;
	DependentTemplateName::Profile(ID, NNS, Operator);

	void *InsertPos = nullptr;
	DependentTemplateName *QTN
	= DependentTemplateNames.FindNodeOrInsertPos(ID, InsertPos);

	if (QTN)
	return TemplateName(QTN);

	NestedNameSpecifier *CanonNNS = getCanonicalNestedNameSpecifier(NNS);
	if (CanonNNS == NNS) {
	QTN = new (*this, alignof(DependentTemplateName))
	DependentTemplateName(NNS, Operator);
	} else {
	TemplateName Canon = getDependentTemplateName(CanonNNS, Operator);
	QTN = new (*this, alignof(DependentTemplateName))
	DependentTemplateName(NNS, Operator, Canon);

	DependentTemplateName *CheckQTN
	= DependentTemplateNames.FindNodeOrInsertPos(ID, InsertPos);
	assert(!CheckQTN && "Dependent template name canonicalization broken");
	(void)CheckQTN;
	}

	DependentTemplateNames.InsertNode(QTN, InsertPos);
	return TemplateName(QTN);
	}

	TemplateName
	ASTContext::getSubstTemplateTemplateParm(TemplateTemplateParmDecl *param,
	TemplateName replacement) const {
	llvm::FoldingSetNodeID ID;
	SubstTemplateTemplateParmStorage::Profile(ID, param, replacement);

	void *insertPos = nullptr;
	SubstTemplateTemplateParmStorage *subst
	= SubstTemplateTemplateParms.FindNodeOrInsertPos(ID, insertPos);

	if (!subst) {
	subst = new (*this) SubstTemplateTemplateParmStorage(param, replacement);
	SubstTemplateTemplateParms.InsertNode(subst, insertPos);
	}

	return TemplateName(subst);
	}

	TemplateName
	ASTContext::getSubstTemplateTemplateParmPack(TemplateTemplateParmDecl *Param,
	const TemplateArgument &ArgPack) const {
	auto &Self = const_cast<ASTContext &>(*this);
	llvm::FoldingSetNodeID ID;
	SubstTemplateTemplateParmPackStorage::Profile(ID, Self, Param, ArgPack);

	void *InsertPos = nullptr;
	SubstTemplateTemplateParmPackStorage *Subst
	= SubstTemplateTemplateParmPacks.FindNodeOrInsertPos(ID, InsertPos);

	if (!Subst) {
	Subst = new (*this) SubstTemplateTemplateParmPackStorage(Param,
	ArgPack.pack_size(),
	ArgPack.pack_begin());
	SubstTemplateTemplateParmPacks.InsertNode(Subst, InsertPos);
	}

	return TemplateName(Subst);
	}

	/// getFromTargetType - Given one of the integer types provided by
	/// TargetInfo, produce the corresponding type. The unsigned @p Type
	/// is actually a value of type @c TargetInfo::IntType.
	CanQualType ASTContext::getFromTargetType(unsigned Type) const {
	switch (Type) {
	case TargetInfo::NoInt: return {};
	case TargetInfo::SignedChar: return SignedCharTy;
	case TargetInfo::UnsignedChar: return UnsignedCharTy;
	case TargetInfo::SignedShort: return ShortTy;
	case TargetInfo::UnsignedShort: return UnsignedShortTy;
	case TargetInfo::SignedInt: return IntTy;
	case TargetInfo::UnsignedInt: return UnsignedIntTy;
	case TargetInfo::SignedLong: return LongTy;
	case TargetInfo::UnsignedLong: return UnsignedLongTy;
	case TargetInfo::SignedLongLong: return LongLongTy;
	case TargetInfo::UnsignedLongLong: return UnsignedLongLongTy;
	}

	llvm_unreachable("Unhandled TargetInfo::IntType value");
	}

	//===----------------------------------------------------------------------===//
	// Type Predicates.
	//===----------------------------------------------------------------------===//

	/// getObjCGCAttr - Returns one of GCNone, Weak or Strong objc's
	/// garbage collection attribute.
	///
	Qualifiers::GC ASTContext::getObjCGCAttrKind(QualType Ty) const {
	if (getLangOpts().getGC() == LangOptions::NonGC)
	return Qualifiers::GCNone;

	assert(getLangOpts().ObjC);
	Qualifiers::GC GCAttrs = Ty.getObjCGCAttr();

	// Default behaviour under objective-C's gc is for ObjC pointers
	// (or pointers to them) be treated as though they were declared
	// as __strong.
	if (GCAttrs == Qualifiers::GCNone) {
	if (Ty->isObjCObjectPointerType() \|\| Ty->isBlockPointerType())
	return Qualifiers::Strong;
	else if (Ty->isPointerType())
	return getObjCGCAttrKind(Ty->castAs<PointerType>()->getPointeeType());
	} else {
	// It's not valid to set GC attributes on anything that isn't a
	// pointer.
	#ifndef NDEBUG
	QualType CT = Ty->getCanonicalTypeInternal();
	while (const auto *AT = dyn_cast<ArrayType>(CT))
	CT = AT->getElementType();
	assert(CT->isAnyPointerType() \|\| CT->isBlockPointerType());
	#endif
	}
	return GCAttrs;
	}

	//===----------------------------------------------------------------------===//
	// Type Compatibility Testing
	//===----------------------------------------------------------------------===//

	/// areCompatVectorTypes - Return true if the two specified vector types are
	/// compatible.
	static bool areCompatVectorTypes(const VectorType *LHS,
	const VectorType *RHS) {
	assert(LHS->isCanonicalUnqualified() && RHS->isCanonicalUnqualified());
	return LHS->getElementType() == RHS->getElementType() &&
	LHS->getNumElements() == RHS->getNumElements();
	}

	/// areCompatMatrixTypes - Return true if the two specified matrix types are
	/// compatible.
	static bool areCompatMatrixTypes(const ConstantMatrixType *LHS,
	const ConstantMatrixType *RHS) {
	assert(LHS->isCanonicalUnqualified() && RHS->isCanonicalUnqualified());
	return LHS->getElementType() == RHS->getElementType() &&
	LHS->getNumRows() == RHS->getNumRows() &&
	LHS->getNumColumns() == RHS->getNumColumns();
	}

	bool ASTContext::areCompatibleVectorTypes(QualType FirstVec,
	QualType SecondVec) {
	assert(FirstVec->isVectorType() && "FirstVec should be a vector type");
	assert(SecondVec->isVectorType() && "SecondVec should be a vector type");

	if (hasSameUnqualifiedType(FirstVec, SecondVec))
	return true;

	// Treat Neon vector types and most AltiVec vector types as if they are the
	// equivalent GCC vector types.
	const auto *First = FirstVec->castAs<VectorType>();
	const auto *Second = SecondVec->castAs<VectorType>();
	if (First->getNumElements() == Second->getNumElements() &&
	hasSameType(First->getElementType(), Second->getElementType()) &&
	First->getVectorKind() != VectorType::AltiVecPixel &&
	First->getVectorKind() != VectorType::AltiVecBool &&
	Second->getVectorKind() != VectorType::AltiVecPixel &&
	Second->getVectorKind() != VectorType::AltiVecBool &&
	First->getVectorKind() != VectorType::SveFixedLengthDataVector &&
	First->getVectorKind() != VectorType::SveFixedLengthPredicateVector &&
	Second->getVectorKind() != VectorType::SveFixedLengthDataVector &&
	Second->getVectorKind() != VectorType::SveFixedLengthPredicateVector)
	return true;

	return false;
	}

	/// getSVETypeSize - Return SVE vector or predicate register size.
	static uint64_t getSVETypeSize(ASTContext &Context, const BuiltinType *Ty) {
	assert(Ty->isVLSTBuiltinType() && "Invalid SVE Type");
	return Ty->getKind() == BuiltinType::SveBool
	? Context.getLangOpts().ArmSveVectorBits / Context.getCharWidth()
	: Context.getLangOpts().ArmSveVectorBits;
	}

	bool ASTContext::areCompatibleSveTypes(QualType FirstType,
	QualType SecondType) {
	assert(((FirstType->isSizelessBuiltinType() && SecondType->isVectorType()) \|\|
	(FirstType->isVectorType() && SecondType->isSizelessBuiltinType())) &&
	"Expected SVE builtin type and vector type!");

	auto IsValidCast = [this](QualType FirstType, QualType SecondType) {
	if (const auto *BT = FirstType->getAs<BuiltinType>()) {
	if (const auto *VT = SecondType->getAs<VectorType>()) {
	// Predicates have the same representation as uint8 so we also have to
	// check the kind to make these types incompatible.
	if (VT->getVectorKind() == VectorType::SveFixedLengthPredicateVector)
	return BT->getKind() == BuiltinType::SveBool;
	else if (VT->getVectorKind() == VectorType::SveFixedLengthDataVector)
	return VT->getElementType().getCanonicalType() ==
	FirstType->getSveEltType(*this);
	else if (VT->getVectorKind() == VectorType::GenericVector)
	return getTypeSize(SecondType) == getSVETypeSize(*this, BT) &&
	hasSameType(VT->getElementType(),
	getBuiltinVectorTypeInfo(BT).ElementType);
	}
	}
	return false;
	};

	return IsValidCast(FirstType, SecondType) \|\|
	IsValidCast(SecondType, FirstType);
	}

	bool ASTContext::areLaxCompatibleSveTypes(QualType FirstType,
	QualType SecondType) {
	assert(((FirstType->isSizelessBuiltinType() && SecondType->isVectorType()) \|\|
	(FirstType->isVectorType() && SecondType->isSizelessBuiltinType())) &&
	"Expected SVE builtin type and vector type!");

	auto IsLaxCompatible = [this](QualType FirstType, QualType SecondType) {
	const auto *BT = FirstType->getAs<BuiltinType>();
	if (!BT)
	return false;

	const auto *VecTy = SecondType->getAs<VectorType>();
	if (VecTy &&
	(VecTy->getVectorKind() == VectorType::SveFixedLengthDataVector \|\|
	VecTy->getVectorKind() == VectorType::GenericVector)) {
	const LangOptions::LaxVectorConversionKind LVCKind =
	getLangOpts().getLaxVectorConversions();

	// Can not convert between sve predicates and sve vectors because of
	// different size.
	if (BT->getKind() == BuiltinType::SveBool &&
	VecTy->getVectorKind() == VectorType::SveFixedLengthDataVector)
	return false;

	// If __ARM_FEATURE_SVE_BITS != N do not allow GNU vector lax conversion.
	// "Whenever __ARM_FEATURE_SVE_BITS==N, GNUT implicitly
	// converts to VLAT and VLAT implicitly converts to GNUT."
	// ACLE Spec Version 00bet6, 3.7.3.2. Behavior common to vectors and
	// predicates.
	if (VecTy->getVectorKind() == VectorType::GenericVector &&
	getTypeSize(SecondType) != getSVETypeSize(*this, BT))
	return false;

	// If -flax-vector-conversions=all is specified, the types are
	// certainly compatible.
	if (LVCKind == LangOptions::LaxVectorConversionKind::All)
	return true;

	// If -flax-vector-conversions=integer is specified, the types are
	// compatible if the elements are integer types.
	if (LVCKind == LangOptions::LaxVectorConversionKind::Integer)
	return VecTy->getElementType().getCanonicalType()->isIntegerType() &&
	FirstType->getSveEltType(*this)->isIntegerType();
	}

	return false;
	};

	return IsLaxCompatible(FirstType, SecondType) \|\|
	IsLaxCompatible(SecondType, FirstType);
	}

	bool ASTContext::hasDirectOwnershipQualifier(QualType Ty) const {
	while (true) {
	// __strong id
	if (const AttributedType *Attr = dyn_cast<AttributedType>(Ty)) {
	if (Attr->getAttrKind() == attr::ObjCOwnership)
	return true;

	Ty = Attr->getModifiedType();

	// X *__strong (...)
	} else if (const ParenType *Paren = dyn_cast<ParenType>(Ty)) {
	Ty = Paren->getInnerType();

	// We do not want to look through typedefs, typeof(expr),
	// typeof(type), or any other way that the type is somehow
	// abstracted.
	} else {
	return false;
	}
	}
	}

	//===----------------------------------------------------------------------===//
	// ObjCQualifiedIdTypesAreCompatible - Compatibility testing for qualified id's.
	//===----------------------------------------------------------------------===//

	/// ProtocolCompatibleWithProtocol - return 'true' if 'lProto' is in the
	/// inheritance hierarchy of 'rProto'.
	bool
	ASTContext::ProtocolCompatibleWithProtocol(ObjCProtocolDecl *lProto,
	ObjCProtocolDecl *rProto) const {
	if (declaresSameEntity(lProto, rProto))
	return true;
	for (auto *PI : rProto->protocols())
	if (ProtocolCompatibleWithProtocol(lProto, PI))
	return true;
	return false;
	}

	/// ObjCQualifiedClassTypesAreCompatible - compare Class<pr,...> and
	/// Class<pr1, ...>.
	bool ASTContext::ObjCQualifiedClassTypesAreCompatible(
	const ObjCObjectPointerType lhs, const ObjCObjectPointerType rhs) {
	for (auto *lhsProto : lhs->quals()) {
	bool match = false;
	for (auto *rhsProto : rhs->quals()) {
	if (ProtocolCompatibleWithProtocol(lhsProto, rhsProto)) {
	match = true;
	break;
	}
	}
	if (!match)
	return false;
	}
	return true;
	}

	/// ObjCQualifiedIdTypesAreCompatible - We know that one of lhs/rhs is an
	/// ObjCQualifiedIDType.
	bool ASTContext::ObjCQualifiedIdTypesAreCompatible(
	const ObjCObjectPointerType lhs, const ObjCObjectPointerType rhs,
	bool compare) {
	// Allow id<P..> and an 'id' in all cases.
	if (lhs->isObjCIdType() \|\| rhs->isObjCIdType())
	return true;

	// Don't allow id<P..> to convert to Class or Class<P..> in either direction.
	if (lhs->isObjCClassType() \|\| lhs->isObjCQualifiedClassType() \|\|
	rhs->isObjCClassType() \|\| rhs->isObjCQualifiedClassType())
	return false;

	if (lhs->isObjCQualifiedIdType()) {
	if (rhs->qual_empty()) {
	// If the RHS is a unqualified interface pointer "NSString*",
	// make sure we check the class hierarchy.
	if (ObjCInterfaceDecl *rhsID = rhs->getInterfaceDecl()) {
	for (auto *I : lhs->quals()) {
	// when comparing an id<P> on lhs with a static type on rhs,
	// see if static class implements all of id's protocols, directly or
	// through its super class and categories.
	if (!rhsID->ClassImplementsProtocol(I, true))
	return false;
	}
	}
	// If there are no qualifiers and no interface, we have an 'id'.
	return true;
	}
	// Both the right and left sides have qualifiers.
	for (auto *lhsProto : lhs->quals()) {
	bool match = false;

	// when comparing an id<P> on lhs with a static type on rhs,
	// see if static class implements all of id's protocols, directly or
	// through its super class and categories.
	for (auto *rhsProto : rhs->quals()) {
	if (ProtocolCompatibleWithProtocol(lhsProto, rhsProto) \|\|
	(compare && ProtocolCompatibleWithProtocol(rhsProto, lhsProto))) {
	match = true;
	break;
	}
	}
	// If the RHS is a qualified interface pointer "NSString<P>*",
	// make sure we check the class hierarchy.
	if (ObjCInterfaceDecl *rhsID = rhs->getInterfaceDecl()) {
	for (auto *I : lhs->quals()) {
	// when comparing an id<P> on lhs with a static type on rhs,
	// see if static class implements all of id's protocols, directly or
	// through its super class and categories.
	if (rhsID->ClassImplementsProtocol(I, true)) {
	match = true;
	break;
	}
	}
	}
	if (!match)
	return false;
	}

	return true;
	}

	assert(rhs->isObjCQualifiedIdType() && "One of the LHS/RHS should be id<x>");

	if (lhs->getInterfaceType()) {
	// If both the right and left sides have qualifiers.
	for (auto *lhsProto : lhs->quals()) {
	bool match = false;

	// when comparing an id<P> on rhs with a static type on lhs,
	// see if static class implements all of id's protocols, directly or
	// through its super class and categories.
	// First, lhs protocols in the qualifier list must be found, direct
	// or indirect in rhs's qualifier list or it is a mismatch.
	for (auto *rhsProto : rhs->quals()) {
	if (ProtocolCompatibleWithProtocol(lhsProto, rhsProto) \|\|
	(compare && ProtocolCompatibleWithProtocol(rhsProto, lhsProto))) {
	match = true;
	break;
	}
	}
	if (!match)
	return false;
	}

	// Static class's protocols, or its super class or category protocols
	// must be found, direct or indirect in rhs's qualifier list or it is a mismatch.
	if (ObjCInterfaceDecl *lhsID = lhs->getInterfaceDecl()) {
	llvm::SmallPtrSet<ObjCProtocolDecl *, 8> LHSInheritedProtocols;
	CollectInheritedProtocols(lhsID, LHSInheritedProtocols);
	// This is rather dubious but matches gcc's behavior. If lhs has
	// no type qualifier and its class has no static protocol(s)
	// assume that it is mismatch.
	if (LHSInheritedProtocols.empty() && lhs->qual_empty())
	return false;
	for (auto *lhsProto : LHSInheritedProtocols) {
	bool match = false;
	for (auto *rhsProto : rhs->quals()) {
	if (ProtocolCompatibleWithProtocol(lhsProto, rhsProto) \|\|
	(compare && ProtocolCompatibleWithProtocol(rhsProto, lhsProto))) {
	match = true;
	break;
	}
	}
	if (!match)
	return false;
	}
	}
	return true;
	}
	return false;
	}

	/// canAssignObjCInterfaces - Return true if the two interface types are
	/// compatible for assignment from RHS to LHS. This handles validation of any
	/// protocol qualifiers on the LHS or RHS.
	bool ASTContext::canAssignObjCInterfaces(const ObjCObjectPointerType *LHSOPT,
	const ObjCObjectPointerType *RHSOPT) {
	const ObjCObjectType* LHS = LHSOPT->getObjectType();
	const ObjCObjectType* RHS = RHSOPT->getObjectType();

	// If either type represents the built-in 'id' type, return true.
	if (LHS->isObjCUnqualifiedId() \|\| RHS->isObjCUnqualifiedId())
	return true;

	// Function object that propagates a successful result or handles
	// __kindof types.
	auto finish = [&](bool succeeded) -> bool {
	if (succeeded)
	return true;

	if (!RHS->isKindOfType())
	return false;

	// Strip off __kindof and protocol qualifiers, then check whether
	// we can assign the other way.
	return canAssignObjCInterfaces(RHSOPT->stripObjCKindOfTypeAndQuals(*this),
	LHSOPT->stripObjCKindOfTypeAndQuals(*this));
	};

	// Casts from or to id<P> are allowed when the other side has compatible
	// protocols.
	if (LHS->isObjCQualifiedId() \|\| RHS->isObjCQualifiedId()) {
	return finish(ObjCQualifiedIdTypesAreCompatible(LHSOPT, RHSOPT, false));
	}

	// Verify protocol compatibility for casts from Class<P1> to Class<P2>.
	if (LHS->isObjCQualifiedClass() && RHS->isObjCQualifiedClass()) {
	return finish(ObjCQualifiedClassTypesAreCompatible(LHSOPT, RHSOPT));
	}

	// Casts from Class to Class<Foo>, or vice-versa, are allowed.
	if (LHS->isObjCClass() && RHS->isObjCClass()) {
	return true;
	}

	// If we have 2 user-defined types, fall into that path.
	if (LHS->getInterface() && RHS->getInterface()) {
	return finish(canAssignObjCInterfaces(LHS, RHS));
	}

	return false;
	}

	/// canAssignObjCInterfacesInBlockPointer - This routine is specifically written
	/// for providing type-safety for objective-c pointers used to pass/return
	/// arguments in block literals. When passed as arguments, passing 'A*' where
	/// 'id' is expected is not OK. Passing 'Sub " where 'Super " is expected is
	/// not OK. For the return type, the opposite is not OK.
	bool ASTContext::canAssignObjCInterfacesInBlockPointer(
	const ObjCObjectPointerType *LHSOPT,
	const ObjCObjectPointerType *RHSOPT,
	bool BlockReturnType) {

	// Function object that propagates a successful result or handles
	// __kindof types.
	auto finish = [&](bool succeeded) -> bool {
	if (succeeded)
	return true;

	const ObjCObjectPointerType *Expected = BlockReturnType ? RHSOPT : LHSOPT;
	if (!Expected->isKindOfType())
	return false;

	// Strip off __kindof and protocol qualifiers, then check whether
	// we can assign the other way.
	return canAssignObjCInterfacesInBlockPointer(
	RHSOPT->stripObjCKindOfTypeAndQuals(*this),
	LHSOPT->stripObjCKindOfTypeAndQuals(*this),
	BlockReturnType);
	};

	if (RHSOPT->isObjCBuiltinType() \|\| LHSOPT->isObjCIdType())
	return true;

	if (LHSOPT->isObjCBuiltinType()) {
	return finish(RHSOPT->isObjCBuiltinType() \|\|
	RHSOPT->isObjCQualifiedIdType());
	}

	if (LHSOPT->isObjCQualifiedIdType() \|\| RHSOPT->isObjCQualifiedIdType()) {
	if (getLangOpts().CompatibilityQualifiedIdBlockParamTypeChecking)
	// Use for block parameters previous type checking for compatibility.
	return finish(ObjCQualifiedIdTypesAreCompatible(LHSOPT, RHSOPT, false) \|\|
	// Or corrected type checking as in non-compat mode.
	(!BlockReturnType &&
	ObjCQualifiedIdTypesAreCompatible(RHSOPT, LHSOPT, false)));
	else
	return finish(ObjCQualifiedIdTypesAreCompatible(
	(BlockReturnType ? LHSOPT : RHSOPT),
	(BlockReturnType ? RHSOPT : LHSOPT), false));
	}

	const ObjCInterfaceType* LHS = LHSOPT->getInterfaceType();
	const ObjCInterfaceType* RHS = RHSOPT->getInterfaceType();
	if (LHS && RHS) { // We have 2 user-defined types.
	if (LHS != RHS) {
	if (LHS->getDecl()->isSuperClassOf(RHS->getDecl()))
	return finish(BlockReturnType);
	if (RHS->getDecl()->isSuperClassOf(LHS->getDecl()))
	return finish(!BlockReturnType);
	}
	else
	return true;
	}
	return false;
	}

	/// Comparison routine for Objective-C protocols to be used with
	/// llvm::array_pod_sort.
	static int compareObjCProtocolsByName(ObjCProtocolDecl * const *lhs,
	ObjCProtocolDecl * const *rhs) {
	return (lhs)->getName().compare((rhs)->getName());
	}

	/// getIntersectionOfProtocols - This routine finds the intersection of set
	/// of protocols inherited from two distinct objective-c pointer objects with
	/// the given common base.
	/// It is used to build composite qualifier list of the composite type of
	/// the conditional expression involving two objective-c pointer objects.
	static
	void getIntersectionOfProtocols(ASTContext &Context,
	const ObjCInterfaceDecl *CommonBase,
	const ObjCObjectPointerType *LHSOPT,
	const ObjCObjectPointerType *RHSOPT,
	SmallVectorImpl<ObjCProtocolDecl *> &IntersectionSet) {

	const ObjCObjectType* LHS = LHSOPT->getObjectType();
	const ObjCObjectType* RHS = RHSOPT->getObjectType();
	assert(LHS->getInterface() && "LHS must have an interface base");
	assert(RHS->getInterface() && "RHS must have an interface base");

	// Add all of the protocols for the LHS.
	llvm::SmallPtrSet<ObjCProtocolDecl *, 8> LHSProtocolSet;

	// Start with the protocol qualifiers.
	for (auto proto : LHS->quals()) {
	Context.CollectInheritedProtocols(proto, LHSProtocolSet);
	}

	// Also add the protocols associated with the LHS interface.
	Context.CollectInheritedProtocols(LHS->getInterface(), LHSProtocolSet);

	// Add all of the protocols for the RHS.
	llvm::SmallPtrSet<ObjCProtocolDecl *, 8> RHSProtocolSet;

	// Start with the protocol qualifiers.
	for (auto proto : RHS->quals()) {
	Context.CollectInheritedProtocols(proto, RHSProtocolSet);
	}

	// Also add the protocols associated with the RHS interface.
	Context.CollectInheritedProtocols(RHS->getInterface(), RHSProtocolSet);

	// Compute the intersection of the collected protocol sets.
	for (auto proto : LHSProtocolSet) {
	if (RHSProtocolSet.count(proto))
	IntersectionSet.push_back(proto);
	}

	// Compute the set of protocols that is implied by either the common type or
	// the protocols within the intersection.
	llvm::SmallPtrSet<ObjCProtocolDecl *, 8> ImpliedProtocols;
	Context.CollectInheritedProtocols(CommonBase, ImpliedProtocols);

	// Remove any implied protocols from the list of inherited protocols.
	if (!ImpliedProtocols.empty()) {
	IntersectionSet.erase(
	std::remove_if(IntersectionSet.begin(),
	IntersectionSet.end(),
	[&](ObjCProtocolDecl *proto) -> bool {
	return ImpliedProtocols.count(proto) > 0;
	}),
	IntersectionSet.end());
	}

	// Sort the remaining protocols by name.
	llvm::array_pod_sort(IntersectionSet.begin(), IntersectionSet.end(),
	compareObjCProtocolsByName);
	}

	/// Determine whether the first type is a subtype of the second.
	static bool canAssignObjCObjectTypes(ASTContext &ctx, QualType lhs,
	QualType rhs) {
	// Common case: two object pointers.
	const auto *lhsOPT = lhs->getAs<ObjCObjectPointerType>();
	const auto *rhsOPT = rhs->getAs<ObjCObjectPointerType>();
	if (lhsOPT && rhsOPT)
	return ctx.canAssignObjCInterfaces(lhsOPT, rhsOPT);

	// Two block pointers.
	const auto *lhsBlock = lhs->getAs<BlockPointerType>();
	const auto *rhsBlock = rhs->getAs<BlockPointerType>();
	if (lhsBlock && rhsBlock)
	return ctx.typesAreBlockPointerCompatible(lhs, rhs);

	// If either is an unqualified 'id' and the other is a block, it's
	// acceptable.
	if ((lhsOPT && lhsOPT->isObjCIdType() && rhsBlock) \|\|
	(rhsOPT && rhsOPT->isObjCIdType() && lhsBlock))
	return true;

	return false;
	}

	// Check that the given Objective-C type argument lists are equivalent.
	static bool sameObjCTypeArgs(ASTContext &ctx,
	const ObjCInterfaceDecl *iface,
	ArrayRef<QualType> lhsArgs,
	ArrayRef<QualType> rhsArgs,
	bool stripKindOf) {
	if (lhsArgs.size() != rhsArgs.size())
	return false;

	ObjCTypeParamList *typeParams = iface->getTypeParamList();
	for (unsigned i = 0, n = lhsArgs.size(); i != n; ++i) {
	if (ctx.hasSameType(lhsArgs[i], rhsArgs[i]))
	continue;

	switch (typeParams->begin()[i]->getVariance()) {
	case ObjCTypeParamVariance::Invariant:
	if (!stripKindOf \|\|
	!ctx.hasSameType(lhsArgs[i].stripObjCKindOfType(ctx),
	rhsArgs[i].stripObjCKindOfType(ctx))) {
	return false;
	}
	break;

	case ObjCTypeParamVariance::Covariant:
	if (!canAssignObjCObjectTypes(ctx, lhsArgs[i], rhsArgs[i]))
	return false;
	break;

	case ObjCTypeParamVariance::Contravariant:
	if (!canAssignObjCObjectTypes(ctx, rhsArgs[i], lhsArgs[i]))
	return false;
	break;
	}
	}

	return true;
	}

	QualType ASTContext::areCommonBaseCompatible(
	const ObjCObjectPointerType *Lptr,
	const ObjCObjectPointerType *Rptr) {
	const ObjCObjectType *LHS = Lptr->getObjectType();
	const ObjCObjectType *RHS = Rptr->getObjectType();
	const ObjCInterfaceDecl* LDecl = LHS->getInterface();
	const ObjCInterfaceDecl* RDecl = RHS->getInterface();

	if (!LDecl \|\| !RDecl)
	return {};

	// When either LHS or RHS is a kindof type, we should return a kindof type.
	// For example, for common base of kindof(ASub1) and kindof(ASub2), we return
	// kindof(A).
	bool anyKindOf = LHS->isKindOfType() \|\| RHS->isKindOfType();

	// Follow the left-hand side up the class hierarchy until we either hit a
	// root or find the RHS. Record the ancestors in case we don't find it.
	llvm::SmallDenseMap<const ObjCInterfaceDecl , const ObjCObjectType , 4>
	LHSAncestors;
	while (true) {
	// Record this ancestor. We'll need this if the common type isn't in the
	// path from the LHS to the root.
	LHSAncestors[LHS->getInterface()->getCanonicalDecl()] = LHS;

	if (declaresSameEntity(LHS->getInterface(), RDecl)) {
	// Get the type arguments.
	ArrayRef<QualType> LHSTypeArgs = LHS->getTypeArgsAsWritten();
	bool anyChanges = false;
	if (LHS->isSpecialized() && RHS->isSpecialized()) {
	// Both have type arguments, compare them.
	if (!sameObjCTypeArgs(*this, LHS->getInterface(),
	LHS->getTypeArgs(), RHS->getTypeArgs(),
	/stripKindOf=/true))
	return {};
	} else if (LHS->isSpecialized() != RHS->isSpecialized()) {
	// If only one has type arguments, the result will not have type
	// arguments.
	LHSTypeArgs = {};
	anyChanges = true;
	}

	// Compute the intersection of protocols.
	SmallVector<ObjCProtocolDecl *, 8> Protocols;
	getIntersectionOfProtocols(*this, LHS->getInterface(), Lptr, Rptr,
	Protocols);
	if (!Protocols.empty())
	anyChanges = true;

	// If anything in the LHS will have changed, build a new result type.
	// If we need to return a kindof type but LHS is not a kindof type, we
	// build a new result type.
	if (anyChanges \|\| LHS->isKindOfType() != anyKindOf) {
	QualType Result = getObjCInterfaceType(LHS->getInterface());
	Result = getObjCObjectType(Result, LHSTypeArgs, Protocols,
	anyKindOf \|\| LHS->isKindOfType());
	return getObjCObjectPointerType(Result);
	}

	return getObjCObjectPointerType(QualType(LHS, 0));
	}

	// Find the superclass.
	QualType LHSSuperType = LHS->getSuperClassType();
	if (LHSSuperType.isNull())
	break;

	LHS = LHSSuperType->castAs<ObjCObjectType>();
	}

	// We didn't find anything by following the LHS to its root; now check
	// the RHS against the cached set of ancestors.
	while (true) {
	auto KnownLHS = LHSAncestors.find(RHS->getInterface()->getCanonicalDecl());
	if (KnownLHS != LHSAncestors.end()) {
	LHS = KnownLHS->second;

	// Get the type arguments.
	ArrayRef<QualType> RHSTypeArgs = RHS->getTypeArgsAsWritten();
	bool anyChanges = false;
	if (LHS->isSpecialized() && RHS->isSpecialized()) {
	// Both have type arguments, compare them.
	if (!sameObjCTypeArgs(*this, LHS->getInterface(),
	LHS->getTypeArgs(), RHS->getTypeArgs(),
	/stripKindOf=/true))
	return {};
	} else if (LHS->isSpecialized() != RHS->isSpecialized()) {
	// If only one has type arguments, the result will not have type
	// arguments.
	RHSTypeArgs = {};
	anyChanges = true;
	}

	// Compute the intersection of protocols.
	SmallVector<ObjCProtocolDecl *, 8> Protocols;
	getIntersectionOfProtocols(*this, RHS->getInterface(), Lptr, Rptr,
	Protocols);
	if (!Protocols.empty())
	anyChanges = true;

	// If we need to return a kindof type but RHS is not a kindof type, we
	// build a new result type.
	if (anyChanges \|\| RHS->isKindOfType() != anyKindOf) {
	QualType Result = getObjCInterfaceType(RHS->getInterface());
	Result = getObjCObjectType(Result, RHSTypeArgs, Protocols,
	anyKindOf \|\| RHS->isKindOfType());
	return getObjCObjectPointerType(Result);
	}

	return getObjCObjectPointerType(QualType(RHS, 0));
	}

	// Find the superclass of the RHS.
	QualType RHSSuperType = RHS->getSuperClassType();
	if (RHSSuperType.isNull())
	break;

	RHS = RHSSuperType->castAs<ObjCObjectType>();
	}

	return {};
	}

	bool ASTContext::canAssignObjCInterfaces(const ObjCObjectType *LHS,
	const ObjCObjectType *RHS) {
	assert(LHS->getInterface() && "LHS is not an interface type");
	assert(RHS->getInterface() && "RHS is not an interface type");

	// Verify that the base decls are compatible: the RHS must be a subclass of
	// the LHS.
	ObjCInterfaceDecl *LHSInterface = LHS->getInterface();
	bool IsSuperClass = LHSInterface->isSuperClassOf(RHS->getInterface());
	if (!IsSuperClass)
	return false;

	// If the LHS has protocol qualifiers, determine whether all of them are
	// satisfied by the RHS (i.e., the RHS has a superset of the protocols in the
	// LHS).
	if (LHS->getNumProtocols() > 0) {
	// OK if conversion of LHS to SuperClass results in narrowing of types
	// ; i.e., SuperClass may implement at least one of the protocols
	// in LHS's protocol list. Example, SuperObj<P1> = lhs<P1,P2> is ok.
	// But not SuperObj<P1,P2,P3> = lhs<P1,P2>.
	llvm::SmallPtrSet<ObjCProtocolDecl *, 8> SuperClassInheritedProtocols;
	CollectInheritedProtocols(RHS->getInterface(), SuperClassInheritedProtocols);
	// Also, if RHS has explicit quelifiers, include them for comparing with LHS's
	// qualifiers.
	for (auto *RHSPI : RHS->quals())
	CollectInheritedProtocols(RHSPI, SuperClassInheritedProtocols);
	// If there is no protocols associated with RHS, it is not a match.
	if (SuperClassInheritedProtocols.empty())
	return false;

	for (const auto *LHSProto : LHS->quals()) {
	bool SuperImplementsProtocol = false;
	for (auto *SuperClassProto : SuperClassInheritedProtocols)
	if (SuperClassProto->lookupProtocolNamed(LHSProto->getIdentifier())) {
	SuperImplementsProtocol = true;
	break;
	}
	if (!SuperImplementsProtocol)
	return false;
	}
	}

	// If the LHS is specialized, we may need to check type arguments.
	if (LHS->isSpecialized()) {
	// Follow the superclass chain until we've matched the LHS class in the
	// hierarchy. This substitutes type arguments through.
	const ObjCObjectType *RHSSuper = RHS;
	while (!declaresSameEntity(RHSSuper->getInterface(), LHSInterface))
	RHSSuper = RHSSuper->getSuperClassType()->castAs<ObjCObjectType>();

	// If the RHS is specializd, compare type arguments.
	if (RHSSuper->isSpecialized() &&
	!sameObjCTypeArgs(*this, LHS->getInterface(),
	LHS->getTypeArgs(), RHSSuper->getTypeArgs(),
	/stripKindOf=/true)) {
	return false;
	}
	}

	return true;
	}

	bool ASTContext::areComparableObjCPointerTypes(QualType LHS, QualType RHS) {
	// get the "pointed to" types
	const auto *LHSOPT = LHS->getAs<ObjCObjectPointerType>();
	const auto *RHSOPT = RHS->getAs<ObjCObjectPointerType>();

	if (!LHSOPT \|\| !RHSOPT)
	return false;

	return canAssignObjCInterfaces(LHSOPT, RHSOPT) \|\|
	canAssignObjCInterfaces(RHSOPT, LHSOPT);
	}

	bool ASTContext::canBindObjCObjectType(QualType To, QualType From) {
	return canAssignObjCInterfaces(
	getObjCObjectPointerType(To)->castAs<ObjCObjectPointerType>(),
	getObjCObjectPointerType(From)->castAs<ObjCObjectPointerType>());
	}

	/// typesAreCompatible - C99 6.7.3p9: For two qualified types to be compatible,
	/// both shall have the identically qualified version of a compatible type.
	/// C99 6.2.7p1: Two types have compatible types if their types are the
	/// same. See 6.7.[2,3,5] for additional rules.
	bool ASTContext::typesAreCompatible(QualType LHS, QualType RHS,
	bool CompareUnqualified) {
	if (getLangOpts().CPlusPlus)
	return hasSameType(LHS, RHS);

	return !mergeTypes(LHS, RHS, false, CompareUnqualified).isNull();
	}

	bool ASTContext::propertyTypesAreCompatible(QualType LHS, QualType RHS) {
	return typesAreCompatible(LHS, RHS);
	}

	bool ASTContext::typesAreBlockPointerCompatible(QualType LHS, QualType RHS) {
	return !mergeTypes(LHS, RHS, true).isNull();
	}

	/// mergeTransparentUnionType - if T is a transparent union type and a member
	/// of T is compatible with SubType, return the merged type, else return
	/// QualType()
	QualType ASTContext::mergeTransparentUnionType(QualType T, QualType SubType,
	bool OfBlockPointer,
	bool Unqualified) {
	if (const RecordType *UT = T->getAsUnionType()) {
	RecordDecl *UD = UT->getDecl();
	if (UD->hasAttr<TransparentUnionAttr>()) {
	for (const auto *I : UD->fields()) {
	QualType ET = I->getType().getUnqualifiedType();
	QualType MT = mergeTypes(ET, SubType, OfBlockPointer, Unqualified);
	if (!MT.isNull())
	return MT;
	}
	}
	}

	return {};
	}

	/// mergeFunctionParameterTypes - merge two types which appear as function
	/// parameter types
	QualType ASTContext::mergeFunctionParameterTypes(QualType lhs, QualType rhs,
	bool OfBlockPointer,
	bool Unqualified) {
	// GNU extension: two types are compatible if they appear as a function
	// argument, one of the types is a transparent union type and the other
	// type is compatible with a union member
	QualType lmerge = mergeTransparentUnionType(lhs, rhs, OfBlockPointer,
	Unqualified);
	if (!lmerge.isNull())
	return lmerge;

	QualType rmerge = mergeTransparentUnionType(rhs, lhs, OfBlockPointer,
	Unqualified);
	if (!rmerge.isNull())
	return rmerge;

	return mergeTypes(lhs, rhs, OfBlockPointer, Unqualified);
	}

	QualType ASTContext::mergeFunctionTypes(QualType lhs, QualType rhs,
	bool OfBlockPointer, bool Unqualified,
	bool AllowCXX) {
	const auto *lbase = lhs->castAs<FunctionType>();
	const auto *rbase = rhs->castAs<FunctionType>();
	const auto *lproto = dyn_cast<FunctionProtoType>(lbase);
	const auto *rproto = dyn_cast<FunctionProtoType>(rbase);
	bool allLTypes = true;
	bool allRTypes = true;

	// Check return type
	QualType retType;
	if (OfBlockPointer) {
	QualType RHS = rbase->getReturnType();
	QualType LHS = lbase->getReturnType();
	bool UnqualifiedResult = Unqualified;
	if (!UnqualifiedResult)
	UnqualifiedResult = (!RHS.hasQualifiers() && LHS.hasQualifiers());
	retType = mergeTypes(LHS, RHS, true, UnqualifiedResult, true);
	}
	else
	retType = mergeTypes(lbase->getReturnType(), rbase->getReturnType(), false,
	Unqualified);
	if (retType.isNull())
	return {};

	if (Unqualified)
	retType = retType.getUnqualifiedType();

	CanQualType LRetType = getCanonicalType(lbase->getReturnType());
	CanQualType RRetType = getCanonicalType(rbase->getReturnType());
	if (Unqualified) {
	LRetType = LRetType.getUnqualifiedType();
	RRetType = RRetType.getUnqualifiedType();
	}

	if (getCanonicalType(retType) != LRetType)
	allLTypes = false;
	if (getCanonicalType(retType) != RRetType)
	allRTypes = false;

	// FIXME: double check this
	// FIXME: should we error if lbase->getRegParmAttr() != 0 &&
	// rbase->getRegParmAttr() != 0 &&
	// lbase->getRegParmAttr() != rbase->getRegParmAttr()?
	FunctionType::ExtInfo lbaseInfo = lbase->getExtInfo();
	FunctionType::ExtInfo rbaseInfo = rbase->getExtInfo();

	// Compatible functions must have compatible calling conventions
	if (lbaseInfo.getCC() != rbaseInfo.getCC())
	return {};

	// Regparm is part of the calling convention.
	if (lbaseInfo.getHasRegParm() != rbaseInfo.getHasRegParm())
	return {};
	if (lbaseInfo.getRegParm() != rbaseInfo.getRegParm())
	return {};

	if (lbaseInfo.getProducesResult() != rbaseInfo.getProducesResult())
	return {};
	if (lbaseInfo.getNoCallerSavedRegs() != rbaseInfo.getNoCallerSavedRegs())
	return {};
	if (lbaseInfo.getNoCfCheck() != rbaseInfo.getNoCfCheck())
	return {};

	// FIXME: some uses, e.g. conditional exprs, really want this to be 'both'.
	bool NoReturn = lbaseInfo.getNoReturn() \|\| rbaseInfo.getNoReturn();

	if (lbaseInfo.getNoReturn() != NoReturn)
	allLTypes = false;
	if (rbaseInfo.getNoReturn() != NoReturn)
	allRTypes = false;

	FunctionType::ExtInfo einfo = lbaseInfo.withNoReturn(NoReturn);

	if (lproto && rproto) { // two C99 style function prototypes
	assert((AllowCXX \|\|
	(!lproto->hasExceptionSpec() && !rproto->hasExceptionSpec())) &&
	"C++ shouldn't be here");
	// Compatible functions must have the same number of parameters
	if (lproto->getNumParams() != rproto->getNumParams())
	return {};

	// Variadic and non-variadic functions aren't compatible
	if (lproto->isVariadic() != rproto->isVariadic())
	return {};

	if (lproto->getMethodQuals() != rproto->getMethodQuals())
	return {};

	SmallVector<FunctionProtoType::ExtParameterInfo, 4> newParamInfos;
	bool canUseLeft, canUseRight;
	if (!mergeExtParameterInfo(lproto, rproto, canUseLeft, canUseRight,
	newParamInfos))
	return {};

	if (!canUseLeft)
	allLTypes = false;
	if (!canUseRight)
	allRTypes = false;

	// Check parameter type compatibility
	SmallVector<QualType, 10> types;
	for (unsigned i = 0, n = lproto->getNumParams(); i < n; i++) {
	QualType lParamType = lproto->getParamType(i).getUnqualifiedType();
	QualType rParamType = rproto->getParamType(i).getUnqualifiedType();
	QualType paramType = mergeFunctionParameterTypes(
	lParamType, rParamType, OfBlockPointer, Unqualified);
	if (paramType.isNull())
	return {};

	if (Unqualified)
	paramType = paramType.getUnqualifiedType();

	types.push_back(paramType);
	if (Unqualified) {
	lParamType = lParamType.getUnqualifiedType();
	rParamType = rParamType.getUnqualifiedType();
	}

	if (getCanonicalType(paramType) != getCanonicalType(lParamType))
	allLTypes = false;
	if (getCanonicalType(paramType) != getCanonicalType(rParamType))
	allRTypes = false;
	}

	if (allLTypes) return lhs;
	if (allRTypes) return rhs;

	FunctionProtoType::ExtProtoInfo EPI = lproto->getExtProtoInfo();
	EPI.ExtInfo = einfo;
	EPI.ExtParameterInfos =
	newParamInfos.empty() ? nullptr : newParamInfos.data();
	return getFunctionType(retType, types, EPI);
	}

	if (lproto) allRTypes = false;
	if (rproto) allLTypes = false;

	const FunctionProtoType *proto = lproto ? lproto : rproto;
	if (proto) {
	assert((AllowCXX \|\| !proto->hasExceptionSpec()) && "C++ shouldn't be here");
	if (proto->isVariadic())
	return {};
	// Check that the types are compatible with the types that
	// would result from default argument promotions (C99 6.7.5.3p15).
	// The only types actually affected are promotable integer
	// types and floats, which would be passed as a different
	// type depending on whether the prototype is visible.
	for (unsigned i = 0, n = proto->getNumParams(); i < n; ++i) {
	QualType paramTy = proto->getParamType(i);

	// Look at the converted type of enum types, since that is the type used
	// to pass enum values.
	if (const auto *Enum = paramTy->getAs<EnumType>()) {
	paramTy = Enum->getDecl()->getIntegerType();
	if (paramTy.isNull())
	return {};
	}

	if (paramTy->isPromotableIntegerType() \|\|
	getCanonicalType(paramTy).getUnqualifiedType() == FloatTy)
	return {};
	}

	if (allLTypes) return lhs;
	if (allRTypes) return rhs;

	FunctionProtoType::ExtProtoInfo EPI = proto->getExtProtoInfo();
	EPI.ExtInfo = einfo;
	return getFunctionType(retType, proto->getParamTypes(), EPI);
	}

	if (allLTypes) return lhs;
	if (allRTypes) return rhs;
	return getFunctionNoProtoType(retType, einfo);
	}

	/// Given that we have an enum type and a non-enum type, try to merge them.
	static QualType mergeEnumWithInteger(ASTContext &Context, const EnumType *ET,
	QualType other, bool isBlockReturnType) {
	// C99 6.7.2.2p4: Each enumerated type shall be compatible with char,
	// a signed integer type, or an unsigned integer type.
	// Compatibility is based on the underlying type, not the promotion
	// type.
	QualType underlyingType = ET->getDecl()->getIntegerType();
	if (underlyingType.isNull())
	return {};
	if (Context.hasSameType(underlyingType, other))
	return other;

	// In block return types, we're more permissive and accept any
	// integral type of the same size.
	if (isBlockReturnType && other->isIntegerType() &&
	Context.getTypeSize(underlyingType) == Context.getTypeSize(other))
	return other;

	return {};
	}

	QualType ASTContext::mergeTypes(QualType LHS, QualType RHS,
	bool OfBlockPointer,
	bool Unqualified, bool BlockReturnType) {
	+ // For C++ we will not reach this code with reference types (see below),
	+ // for OpenMP variant call overloading we might.
	+ //
	// C++ [expr]: If an expression initially has the type "reference to T", the
	// type is adjusted to "T" prior to any further analysis, the expression
	// designates the object or function denoted by the reference, and the
	// expression is an lvalue unless the reference is an rvalue reference and
	// the expression is a function call (possibly inside parentheses).
	+ if (LangOpts.OpenMP && LHS->getAs<ReferenceType>() &&
	+ RHS->getAs<ReferenceType>() && LHS->getTypeClass() == RHS->getTypeClass())
	+ return mergeTypes(LHS->getAs<ReferenceType>()->getPointeeType(),
	+ RHS->getAs<ReferenceType>()->getPointeeType(),
	+ OfBlockPointer, Unqualified, BlockReturnType);
	if (LHS->getAs<ReferenceType>() \|\| RHS->getAs<ReferenceType>())
	return {};

	if (Unqualified) {
	LHS = LHS.getUnqualifiedType();
	RHS = RHS.getUnqualifiedType();
	}

	QualType LHSCan = getCanonicalType(LHS),
	RHSCan = getCanonicalType(RHS);

	// If two types are identical, they are compatible.
	if (LHSCan == RHSCan)
	return LHS;

	// If the qualifiers are different, the types aren't compatible... mostly.
	Qualifiers LQuals = LHSCan.getLocalQualifiers();
	Qualifiers RQuals = RHSCan.getLocalQualifiers();
	if (LQuals != RQuals) {
	// If any of these qualifiers are different, we have a type
	// mismatch.
	if (LQuals.getCVRQualifiers() != RQuals.getCVRQualifiers() \|\|
	LQuals.getAddressSpace() != RQuals.getAddressSpace() \|\|
	LQuals.getObjCLifetime() != RQuals.getObjCLifetime() \|\|
	LQuals.hasUnaligned() != RQuals.hasUnaligned())
	return {};

	// Exactly one GC qualifier difference is allowed: __strong is
	// okay if the other type has no GC qualifier but is an Objective
	// C object pointer (i.e. implicitly strong by default). We fix
	// this by pretending that the unqualified type was actually
	// qualified __strong.
	Qualifiers::GC GC_L = LQuals.getObjCGCAttr();
	Qualifiers::GC GC_R = RQuals.getObjCGCAttr();
	assert((GC_L != GC_R) && "unequal qualifier sets had only equal elements");

	if (GC_L == Qualifiers::Weak \|\| GC_R == Qualifiers::Weak)
	return {};

	if (GC_L == Qualifiers::Strong && RHSCan->isObjCObjectPointerType()) {
	return mergeTypes(LHS, getObjCGCQualType(RHS, Qualifiers::Strong));
	}
	if (GC_R == Qualifiers::Strong && LHSCan->isObjCObjectPointerType()) {
	return mergeTypes(getObjCGCQualType(LHS, Qualifiers::Strong), RHS);
	}
	return {};
	}

	// Okay, qualifiers are equal.

	Type::TypeClass LHSClass = LHSCan->getTypeClass();
	Type::TypeClass RHSClass = RHSCan->getTypeClass();

	// We want to consider the two function types to be the same for these
	// comparisons, just force one to the other.
	if (LHSClass == Type::FunctionProto) LHSClass = Type::FunctionNoProto;
	if (RHSClass == Type::FunctionProto) RHSClass = Type::FunctionNoProto;

	// Same as above for arrays
	if (LHSClass == Type::VariableArray \|\| LHSClass == Type::IncompleteArray)
	LHSClass = Type::ConstantArray;
	if (RHSClass == Type::VariableArray \|\| RHSClass == Type::IncompleteArray)
	RHSClass = Type::ConstantArray;

	// ObjCInterfaces are just specialized ObjCObjects.
	if (LHSClass == Type::ObjCInterface) LHSClass = Type::ObjCObject;
	if (RHSClass == Type::ObjCInterface) RHSClass = Type::ObjCObject;

	// Canonicalize ExtVector -> Vector.
	if (LHSClass == Type::ExtVector) LHSClass = Type::Vector;
	if (RHSClass == Type::ExtVector) RHSClass = Type::Vector;

	// If the canonical type classes don't match.
	if (LHSClass != RHSClass) {
	// Note that we only have special rules for turning block enum
	// returns into block int returns, not vice-versa.
	if (const auto *ETy = LHS->getAs<EnumType>()) {
	return mergeEnumWithInteger(*this, ETy, RHS, false);
	}
	if (const EnumType* ETy = RHS->getAs<EnumType>()) {
	return mergeEnumWithInteger(*this, ETy, LHS, BlockReturnType);
	}
	// allow block pointer type to match an 'id' type.
	if (OfBlockPointer && !BlockReturnType) {
	if (LHS->isObjCIdType() && RHS->isBlockPointerType())
	return LHS;
	if (RHS->isObjCIdType() && LHS->isBlockPointerType())
	return RHS;
	}

	return {};
	}

	// The canonical type classes match.
	switch (LHSClass) {
	#define TYPE(Class, Base)
	#define ABSTRACT_TYPE(Class, Base)
	#define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(Class, Base) case Type::Class:
	#define NON_CANONICAL_TYPE(Class, Base) case Type::Class:
	#define DEPENDENT_TYPE(Class, Base) case Type::Class:
	#include "clang/AST/TypeNodes.inc"
	llvm_unreachable("Non-canonical and dependent types shouldn't get here");

	case Type::Auto:
	case Type::DeducedTemplateSpecialization:
	case Type::LValueReference:
	case Type::RValueReference:
	case Type::MemberPointer:
	llvm_unreachable("C++ should never be in mergeTypes");

	case Type::ObjCInterface:
	case Type::IncompleteArray:
	case Type::VariableArray:
	case Type::FunctionProto:
	case Type::ExtVector:
	llvm_unreachable("Types are eliminated above");

	case Type::Pointer:
	{
	// Merge two pointer types, while trying to preserve typedef info
	QualType LHSPointee = LHS->castAs<PointerType>()->getPointeeType();
	QualType RHSPointee = RHS->castAs<PointerType>()->getPointeeType();
	if (Unqualified) {
	LHSPointee = LHSPointee.getUnqualifiedType();
	RHSPointee = RHSPointee.getUnqualifiedType();
	}
	QualType ResultType = mergeTypes(LHSPointee, RHSPointee, false,
	Unqualified);
	if (ResultType.isNull())
	return {};
	if (getCanonicalType(LHSPointee) == getCanonicalType(ResultType))
	return LHS;
	if (getCanonicalType(RHSPointee) == getCanonicalType(ResultType))
	return RHS;
	return getPointerType(ResultType);
	}
	case Type::BlockPointer:
	{
	// Merge two block pointer types, while trying to preserve typedef info
	QualType LHSPointee = LHS->castAs<BlockPointerType>()->getPointeeType();
	QualType RHSPointee = RHS->castAs<BlockPointerType>()->getPointeeType();
	if (Unqualified) {
	LHSPointee = LHSPointee.getUnqualifiedType();
	RHSPointee = RHSPointee.getUnqualifiedType();
	}
	if (getLangOpts().OpenCL) {
	Qualifiers LHSPteeQual = LHSPointee.getQualifiers();
	Qualifiers RHSPteeQual = RHSPointee.getQualifiers();
	// Blocks can't be an expression in a ternary operator (OpenCL v2.0
	// 6.12.5) thus the following check is asymmetric.
	if (!LHSPteeQual.isAddressSpaceSupersetOf(RHSPteeQual))
	return {};
	LHSPteeQual.removeAddressSpace();
	RHSPteeQual.removeAddressSpace();
	LHSPointee =
	QualType(LHSPointee.getTypePtr(), LHSPteeQual.getAsOpaqueValue());
	RHSPointee =
	QualType(RHSPointee.getTypePtr(), RHSPteeQual.getAsOpaqueValue());
	}
	QualType ResultType = mergeTypes(LHSPointee, RHSPointee, OfBlockPointer,
	Unqualified);
	if (ResultType.isNull())
	return {};
	if (getCanonicalType(LHSPointee) == getCanonicalType(ResultType))
	return LHS;
	if (getCanonicalType(RHSPointee) == getCanonicalType(ResultType))
	return RHS;
	return getBlockPointerType(ResultType);
	}
	case Type::Atomic:
	{
	// Merge two pointer types, while trying to preserve typedef info
	QualType LHSValue = LHS->castAs<AtomicType>()->getValueType();
	QualType RHSValue = RHS->castAs<AtomicType>()->getValueType();
	if (Unqualified) {
	LHSValue = LHSValue.getUnqualifiedType();
	RHSValue = RHSValue.getUnqualifiedType();
	}
	QualType ResultType = mergeTypes(LHSValue, RHSValue, false,
	Unqualified);
	if (ResultType.isNull())
	return {};
	if (getCanonicalType(LHSValue) == getCanonicalType(ResultType))
	return LHS;
	if (getCanonicalType(RHSValue) == getCanonicalType(ResultType))
	return RHS;
	return getAtomicType(ResultType);
	}
	case Type::ConstantArray:
	{
	const ConstantArrayType* LCAT = getAsConstantArrayType(LHS);
	const ConstantArrayType* RCAT = getAsConstantArrayType(RHS);
	if (LCAT && RCAT && RCAT->getSize() != LCAT->getSize())
	return {};

	QualType LHSElem = getAsArrayType(LHS)->getElementType();
	QualType RHSElem = getAsArrayType(RHS)->getElementType();
	if (Unqualified) {
	LHSElem = LHSElem.getUnqualifiedType();
	RHSElem = RHSElem.getUnqualifiedType();
	}

	QualType ResultType = mergeTypes(LHSElem, RHSElem, false, Unqualified);
	if (ResultType.isNull())
	return {};

	const VariableArrayType* LVAT = getAsVariableArrayType(LHS);
	const VariableArrayType* RVAT = getAsVariableArrayType(RHS);

	// If either side is a variable array, and both are complete, check whether
	// the current dimension is definite.
	if (LVAT \|\| RVAT) {
	auto SizeFetch = [this](const VariableArrayType* VAT,
	const ConstantArrayType* CAT)
	-> std::pair<bool,llvm::APInt> {
	if (VAT) {
	Optional<llvm::APSInt> TheInt;
	Expr *E = VAT->getSizeExpr();
	if (E && (TheInt = E->getIntegerConstantExpr(*this)))
	return std::make_pair(true, *TheInt);
	return std::make_pair(false, llvm::APSInt());
	}
	if (CAT)
	return std::make_pair(true, CAT->getSize());
	return std::make_pair(false, llvm::APInt());
	};

	bool HaveLSize, HaveRSize;
	llvm::APInt LSize, RSize;
	std::tie(HaveLSize, LSize) = SizeFetch(LVAT, LCAT);
	std::tie(HaveRSize, RSize) = SizeFetch(RVAT, RCAT);
	if (HaveLSize && HaveRSize && !llvm::APInt::isSameValue(LSize, RSize))
	return {}; // Definite, but unequal, array dimension
	}

	if (LCAT && getCanonicalType(LHSElem) == getCanonicalType(ResultType))
	return LHS;
	if (RCAT && getCanonicalType(RHSElem) == getCanonicalType(ResultType))
	return RHS;
	if (LCAT)
	return getConstantArrayType(ResultType, LCAT->getSize(),
	LCAT->getSizeExpr(),
	ArrayType::ArraySizeModifier(), 0);
	if (RCAT)
	return getConstantArrayType(ResultType, RCAT->getSize(),
	RCAT->getSizeExpr(),
	ArrayType::ArraySizeModifier(), 0);
	if (LVAT && getCanonicalType(LHSElem) == getCanonicalType(ResultType))
	return LHS;
	if (RVAT && getCanonicalType(RHSElem) == getCanonicalType(ResultType))
	return RHS;
	if (LVAT) {
	// FIXME: This isn't correct! But tricky to implement because
	// the array's size has to be the size of LHS, but the type
	// has to be different.
	return LHS;
	}
	if (RVAT) {
	// FIXME: This isn't correct! But tricky to implement because
	// the array's size has to be the size of RHS, but the type
	// has to be different.
	return RHS;
	}
	if (getCanonicalType(LHSElem) == getCanonicalType(ResultType)) return LHS;
	if (getCanonicalType(RHSElem) == getCanonicalType(ResultType)) return RHS;
	return getIncompleteArrayType(ResultType,
	ArrayType::ArraySizeModifier(), 0);
	}
	case Type::FunctionNoProto:
	return mergeFunctionTypes(LHS, RHS, OfBlockPointer, Unqualified);
	case Type::Record:
	case Type::Enum:
	return {};
	case Type::Builtin:
	// Only exactly equal builtin types are compatible, which is tested above.
	return {};
	case Type::Complex:
	// Distinct complex types are incompatible.
	return {};
	case Type::Vector:
	// FIXME: The merged type should be an ExtVector!
	if (areCompatVectorTypes(LHSCan->castAs<VectorType>(),
	RHSCan->castAs<VectorType>()))
	return LHS;
	return {};
	case Type::ConstantMatrix:
	if (areCompatMatrixTypes(LHSCan->castAs<ConstantMatrixType>(),
	RHSCan->castAs<ConstantMatrixType>()))
	return LHS;
	return {};
	case Type::ObjCObject: {
	// Check if the types are assignment compatible.
	// FIXME: This should be type compatibility, e.g. whether
	// "LHS x; RHS x;" at global scope is legal.
	if (canAssignObjCInterfaces(LHS->castAs<ObjCObjectType>(),
	RHS->castAs<ObjCObjectType>()))
	return LHS;
	return {};
	}
	case Type::ObjCObjectPointer:
	if (OfBlockPointer) {
	if (canAssignObjCInterfacesInBlockPointer(
	LHS->castAs<ObjCObjectPointerType>(),
	RHS->castAs<ObjCObjectPointerType>(), BlockReturnType))
	return LHS;
	return {};
	}
	if (canAssignObjCInterfaces(LHS->castAs<ObjCObjectPointerType>(),
	RHS->castAs<ObjCObjectPointerType>()))
	return LHS;
	return {};
	case Type::Pipe:
	assert(LHS != RHS &&
	"Equivalent pipe types should have already been handled!");
	return {};
	case Type::ExtInt: {
	// Merge two ext-int types, while trying to preserve typedef info.
	bool LHSUnsigned = LHS->castAs<ExtIntType>()->isUnsigned();
	bool RHSUnsigned = RHS->castAs<ExtIntType>()->isUnsigned();
	unsigned LHSBits = LHS->castAs<ExtIntType>()->getNumBits();
	unsigned RHSBits = RHS->castAs<ExtIntType>()->getNumBits();

	// Like unsigned/int, shouldn't have a type if they dont match.
	if (LHSUnsigned != RHSUnsigned)
	return {};

	if (LHSBits != RHSBits)
	return {};
	return LHS;
	}
	}

	llvm_unreachable("Invalid Type::Class!");
	}

	bool ASTContext::mergeExtParameterInfo(
	const FunctionProtoType FirstFnType, const FunctionProtoType SecondFnType,
	bool &CanUseFirst, bool &CanUseSecond,
	SmallVectorImpl<FunctionProtoType::ExtParameterInfo> &NewParamInfos) {
	assert(NewParamInfos.empty() && "param info list not empty");
	CanUseFirst = CanUseSecond = true;
	bool FirstHasInfo = FirstFnType->hasExtParameterInfos();
	bool SecondHasInfo = SecondFnType->hasExtParameterInfos();

	// Fast path: if the first type doesn't have ext parameter infos,
	// we match if and only if the second type also doesn't have them.
	if (!FirstHasInfo && !SecondHasInfo)
	return true;

	bool NeedParamInfo = false;
	size_t E = FirstHasInfo ? FirstFnType->getExtParameterInfos().size()
	: SecondFnType->getExtParameterInfos().size();

	for (size_t I = 0; I < E; ++I) {
	FunctionProtoType::ExtParameterInfo FirstParam, SecondParam;
	if (FirstHasInfo)
	FirstParam = FirstFnType->getExtParameterInfo(I);
	if (SecondHasInfo)
	SecondParam = SecondFnType->getExtParameterInfo(I);

	// Cannot merge unless everything except the noescape flag matches.
	if (FirstParam.withIsNoEscape(false) != SecondParam.withIsNoEscape(false))
	return false;

	bool FirstNoEscape = FirstParam.isNoEscape();
	bool SecondNoEscape = SecondParam.isNoEscape();
	bool IsNoEscape = FirstNoEscape && SecondNoEscape;
	NewParamInfos.push_back(FirstParam.withIsNoEscape(IsNoEscape));
	if (NewParamInfos.back().getOpaqueValue())
	NeedParamInfo = true;
	if (FirstNoEscape != IsNoEscape)
	CanUseFirst = false;
	if (SecondNoEscape != IsNoEscape)
	CanUseSecond = false;
	}

	if (!NeedParamInfo)
	NewParamInfos.clear();

	return true;
	}

	void ASTContext::ResetObjCLayout(const ObjCContainerDecl *CD) {
	ObjCLayouts[CD] = nullptr;
	}

	/// mergeObjCGCQualifiers - This routine merges ObjC's GC attribute of 'LHS' and
	/// 'RHS' attributes and returns the merged version; including for function
	/// return types.
	QualType ASTContext::mergeObjCGCQualifiers(QualType LHS, QualType RHS) {
	QualType LHSCan = getCanonicalType(LHS),
	RHSCan = getCanonicalType(RHS);
	// If two types are identical, they are compatible.
	if (LHSCan == RHSCan)
	return LHS;
	if (RHSCan->isFunctionType()) {
	if (!LHSCan->isFunctionType())
	return {};
	QualType OldReturnType =
	cast<FunctionType>(RHSCan.getTypePtr())->getReturnType();
	QualType NewReturnType =
	cast<FunctionType>(LHSCan.getTypePtr())->getReturnType();
	QualType ResReturnType =
	mergeObjCGCQualifiers(NewReturnType, OldReturnType);
	if (ResReturnType.isNull())
	return {};
	if (ResReturnType == NewReturnType \|\| ResReturnType == OldReturnType) {
	// id foo(); ... __strong id foo(); or: __strong id foo(); ... id foo();
	// In either case, use OldReturnType to build the new function type.
	const auto *F = LHS->castAs<FunctionType>();
	if (const auto *FPT = cast<FunctionProtoType>(F)) {
	FunctionProtoType::ExtProtoInfo EPI = FPT->getExtProtoInfo();
	EPI.ExtInfo = getFunctionExtInfo(LHS);
	QualType ResultType =
	getFunctionType(OldReturnType, FPT->getParamTypes(), EPI);
	return ResultType;
	}
	}
	return {};
	}

	// If the qualifiers are different, the types can still be merged.
	Qualifiers LQuals = LHSCan.getLocalQualifiers();
	Qualifiers RQuals = RHSCan.getLocalQualifiers();
	if (LQuals != RQuals) {
	// If any of these qualifiers are different, we have a type mismatch.
	if (LQuals.getCVRQualifiers() != RQuals.getCVRQualifiers() \|\|
	LQuals.getAddressSpace() != RQuals.getAddressSpace())
	return {};

	// Exactly one GC qualifier difference is allowed: __strong is
	// okay if the other type has no GC qualifier but is an Objective
	// C object pointer (i.e. implicitly strong by default). We fix
	// this by pretending that the unqualified type was actually
	// qualified __strong.
	Qualifiers::GC GC_L = LQuals.getObjCGCAttr();
	Qualifiers::GC GC_R = RQuals.getObjCGCAttr();
	assert((GC_L != GC_R) && "unequal qualifier sets had only equal elements");

	if (GC_L == Qualifiers::Weak \|\| GC_R == Qualifiers::Weak)
	return {};

	if (GC_L == Qualifiers::Strong)
	return LHS;
	if (GC_R == Qualifiers::Strong)
	return RHS;
	return {};
	}

	if (LHSCan->isObjCObjectPointerType() && RHSCan->isObjCObjectPointerType()) {
	QualType LHSBaseQT = LHS->castAs<ObjCObjectPointerType>()->getPointeeType();
	QualType RHSBaseQT = RHS->castAs<ObjCObjectPointerType>()->getPointeeType();
	QualType ResQT = mergeObjCGCQualifiers(LHSBaseQT, RHSBaseQT);
	if (ResQT == LHSBaseQT)
	return LHS;
	if (ResQT == RHSBaseQT)
	return RHS;
	}
	return {};
	}

	//===----------------------------------------------------------------------===//
	// Integer Predicates
	//===----------------------------------------------------------------------===//

	unsigned ASTContext::getIntWidth(QualType T) const {
	if (const auto *ET = T->getAs<EnumType>())
	T = ET->getDecl()->getIntegerType();
	if (T->isBooleanType())
	return 1;
	if(const auto *EIT = T->getAs<ExtIntType>())
	return EIT->getNumBits();
	// For builtin types, just use the standard type sizing method
	return (unsigned)getTypeSize(T);
	}

	QualType ASTContext::getCorrespondingUnsignedType(QualType T) const {
	assert((T->hasSignedIntegerRepresentation() \|\| T->isSignedFixedPointType()) &&
	"Unexpected type");

	// Turn <4 x signed int> -> <4 x unsigned int>
	if (const auto *VTy = T->getAs<VectorType>())
	return getVectorType(getCorrespondingUnsignedType(VTy->getElementType()),
	VTy->getNumElements(), VTy->getVectorKind());

	// For _ExtInt, return an unsigned _ExtInt with same width.
	if (const auto *EITy = T->getAs<ExtIntType>())
	return getExtIntType(/IsUnsigned=/true, EITy->getNumBits());

	// For enums, get the underlying integer type of the enum, and let the general
	// integer type signchanging code handle it.
	if (const auto *ETy = T->getAs<EnumType>())
	T = ETy->getDecl()->getIntegerType();

	switch (T->castAs<BuiltinType>()->getKind()) {
	case BuiltinType::Char_S:
	case BuiltinType::SChar:
	return UnsignedCharTy;
	case BuiltinType::Short:
	return UnsignedShortTy;
	case BuiltinType::Int:
	return UnsignedIntTy;
	case BuiltinType::Long:
	return UnsignedLongTy;
	case BuiltinType::LongLong:
	return UnsignedLongLongTy;
	case BuiltinType::Int128:
	return UnsignedInt128Ty;
	// wchar_t is special. It is either signed or not, but when it's signed,
	// there's no matching "unsigned wchar_t". Therefore we return the unsigned
	// version of it's underlying type instead.
	case BuiltinType::WChar_S:
	return getUnsignedWCharType();

	case BuiltinType::ShortAccum:
	return UnsignedShortAccumTy;
	case BuiltinType::Accum:
	return UnsignedAccumTy;
	case BuiltinType::LongAccum:
	return UnsignedLongAccumTy;
	case BuiltinType::SatShortAccum:
	return SatUnsignedShortAccumTy;
	case BuiltinType::SatAccum:
	return SatUnsignedAccumTy;
	case BuiltinType::SatLongAccum:
	return SatUnsignedLongAccumTy;
	case BuiltinType::ShortFract:
	return UnsignedShortFractTy;
	case BuiltinType::Fract:
	return UnsignedFractTy;
	case BuiltinType::LongFract:
	return UnsignedLongFractTy;
	case BuiltinType::SatShortFract:
	return SatUnsignedShortFractTy;
	case BuiltinType::SatFract:
	return SatUnsignedFractTy;
	case BuiltinType::SatLongFract:
	return SatUnsignedLongFractTy;
	default:
	llvm_unreachable("Unexpected signed integer or fixed point type");
	}
	}

	QualType ASTContext::getCorrespondingSignedType(QualType T) const {
	assert((T->hasUnsignedIntegerRepresentation() \|\|
	T->isUnsignedFixedPointType()) &&
	"Unexpected type");

	// Turn <4 x unsigned int> -> <4 x signed int>
	if (const auto *VTy = T->getAs<VectorType>())
	return getVectorType(getCorrespondingSignedType(VTy->getElementType()),
	VTy->getNumElements(), VTy->getVectorKind());

	// For _ExtInt, return a signed _ExtInt with same width.
	if (const auto *EITy = T->getAs<ExtIntType>())
	return getExtIntType(/IsUnsigned=/false, EITy->getNumBits());

	// For enums, get the underlying integer type of the enum, and let the general
	// integer type signchanging code handle it.
	if (const auto *ETy = T->getAs<EnumType>())
	T = ETy->getDecl()->getIntegerType();

	switch (T->castAs<BuiltinType>()->getKind()) {
	case BuiltinType::Char_U:
	case BuiltinType::UChar:
	return SignedCharTy;
	case BuiltinType::UShort:
	return ShortTy;
	case BuiltinType::UInt:
	return IntTy;
	case BuiltinType::ULong:
	return LongTy;
	case BuiltinType::ULongLong:
	return LongLongTy;
	case BuiltinType::UInt128:
	return Int128Ty;
	// wchar_t is special. It is either unsigned or not, but when it's unsigned,
	// there's no matching "signed wchar_t". Therefore we return the signed
	// version of it's underlying type instead.
	case BuiltinType::WChar_U:
	return getSignedWCharType();

	case BuiltinType::UShortAccum:
	return ShortAccumTy;
	case BuiltinType::UAccum:
	return AccumTy;
	case BuiltinType::ULongAccum:
	return LongAccumTy;
	case BuiltinType::SatUShortAccum:
	return SatShortAccumTy;
	case BuiltinType::SatUAccum:
	return SatAccumTy;
	case BuiltinType::SatULongAccum:
	return SatLongAccumTy;
	case BuiltinType::UShortFract:
	return ShortFractTy;
	case BuiltinType::UFract:
	return FractTy;
	case BuiltinType::ULongFract:
	return LongFractTy;
	case BuiltinType::SatUShortFract:
	return SatShortFractTy;
	case BuiltinType::SatUFract:
	return SatFractTy;
	case BuiltinType::SatULongFract:
	return SatLongFractTy;
	default:
	llvm_unreachable("Unexpected unsigned integer or fixed point type");
	}
	}

	ASTMutationListener::~ASTMutationListener() = default;

	void ASTMutationListener::DeducedReturnType(const FunctionDecl *FD,
	QualType ReturnType) {}

	//===----------------------------------------------------------------------===//
	// Builtin Type Computation
	//===----------------------------------------------------------------------===//

	/// DecodeTypeFromStr - This decodes one type descriptor from Str, advancing the
	/// pointer over the consumed characters. This returns the resultant type. If
	/// AllowTypeModifiers is false then modifier like * are not parsed, just basic
	/// types. This allows "v2i*" to be parsed as a pointer to a v2i instead of
	/// a vector of "i*".
	///
	/// RequiresICE is filled in on return to indicate whether the value is required
	/// to be an Integer Constant Expression.
	static QualType DecodeTypeFromStr(const char *&Str, const ASTContext &Context,
	ASTContext::GetBuiltinTypeError &Error,
	bool &RequiresICE,
	bool AllowTypeModifiers) {
	// Modifiers.
	int HowLong = 0;
	bool Signed = false, Unsigned = false;
	RequiresICE = false;

	// Read the prefixed modifiers first.
	bool Done = false;
	#ifndef NDEBUG
	bool IsSpecial = false;
	#endif
	while (!Done) {
	switch (*Str++) {
	default: Done = true; --Str; break;
	case 'I':
	RequiresICE = true;
	break;
	case 'S':
	assert(!Unsigned && "Can't use both 'S' and 'U' modifiers!");
	assert(!Signed && "Can't use 'S' modifier multiple times!");
	Signed = true;
	break;
	case 'U':
	assert(!Signed && "Can't use both 'S' and 'U' modifiers!");
	assert(!Unsigned && "Can't use 'U' modifier multiple times!");
	Unsigned = true;
	break;
	case 'L':
	assert(!IsSpecial && "Can't use 'L' with 'W', 'N', 'Z' or 'O' modifiers");
	assert(HowLong <= 2 && "Can't have LLLL modifier");
	++HowLong;
	break;
	case 'N':
	// 'N' behaves like 'L' for all non LP64 targets and 'int' otherwise.
	assert(!IsSpecial && "Can't use two 'N', 'W', 'Z' or 'O' modifiers!");
	assert(HowLong == 0 && "Can't use both 'L' and 'N' modifiers!");
	#ifndef NDEBUG
	IsSpecial = true;
	#endif
	if (Context.getTargetInfo().getLongWidth() == 32)
	++HowLong;
	break;
	case 'W':
	// This modifier represents int64 type.
	assert(!IsSpecial && "Can't use two 'N', 'W', 'Z' or 'O' modifiers!");
	assert(HowLong == 0 && "Can't use both 'L' and 'W' modifiers!");
	#ifndef NDEBUG
	IsSpecial = true;
	#endif
	switch (Context.getTargetInfo().getInt64Type()) {
	default:
	llvm_unreachable("Unexpected integer type");
	case TargetInfo::SignedLong:
	HowLong = 1;
	break;
	case TargetInfo::SignedLongLong:
	HowLong = 2;
	break;
	}
	break;
	case 'Z':
	// This modifier represents int32 type.
	assert(!IsSpecial && "Can't use two 'N', 'W', 'Z' or 'O' modifiers!");
	assert(HowLong == 0 && "Can't use both 'L' and 'Z' modifiers!");
	#ifndef NDEBUG
	IsSpecial = true;
	#endif
	switch (Context.getTargetInfo().getIntTypeByWidth(32, true)) {
	default:
	llvm_unreachable("Unexpected integer type");
	case TargetInfo::SignedInt:
	HowLong = 0;
	break;
	case TargetInfo::SignedLong:
	HowLong = 1;
	break;
	case TargetInfo::SignedLongLong:
	HowLong = 2;
	break;
	}
	break;
	case 'O':
	assert(!IsSpecial && "Can't use two 'N', 'W', 'Z' or 'O' modifiers!");
	assert(HowLong == 0 && "Can't use both 'L' and 'O' modifiers!");
	#ifndef NDEBUG
	IsSpecial = true;
	#endif
	if (Context.getLangOpts().OpenCL)
	HowLong = 1;
	else
	HowLong = 2;
	break;
	}
	}

	QualType Type;

	// Read the base type.
	switch (*Str++) {
	default: llvm_unreachable("Unknown builtin type letter!");
	case 'x':
	assert(HowLong == 0 && !Signed && !Unsigned &&
	"Bad modifiers used with 'x'!");
	Type = Context.Float16Ty;
	break;
	case 'y':
	assert(HowLong == 0 && !Signed && !Unsigned &&
	"Bad modifiers used with 'y'!");
	Type = Context.BFloat16Ty;
	break;
	case 'v':
	assert(HowLong == 0 && !Signed && !Unsigned &&
	"Bad modifiers used with 'v'!");
	Type = Context.VoidTy;
	break;
	case 'h':
	assert(HowLong == 0 && !Signed && !Unsigned &&
	"Bad modifiers used with 'h'!");
	Type = Context.HalfTy;
	break;
	case 'f':
	assert(HowLong == 0 && !Signed && !Unsigned &&
	"Bad modifiers used with 'f'!");
	Type = Context.FloatTy;
	break;
	case 'd':
	assert(HowLong < 3 && !Signed && !Unsigned &&
	"Bad modifiers used with 'd'!");
	if (HowLong == 1)
	Type = Context.LongDoubleTy;
	else if (HowLong == 2)
	Type = Context.Float128Ty;
	else
	Type = Context.DoubleTy;
	break;
	case 's':
	assert(HowLong == 0 && "Bad modifiers used with 's'!");
	if (Unsigned)
	Type = Context.UnsignedShortTy;
	else
	Type = Context.ShortTy;
	break;
	case 'i':
	if (HowLong == 3)
	Type = Unsigned ? Context.UnsignedInt128Ty : Context.Int128Ty;
	else if (HowLong == 2)
	Type = Unsigned ? Context.UnsignedLongLongTy : Context.LongLongTy;
	else if (HowLong == 1)
	Type = Unsigned ? Context.UnsignedLongTy : Context.LongTy;
	else
	Type = Unsigned ? Context.UnsignedIntTy : Context.IntTy;
	break;
	case 'c':
	assert(HowLong == 0 && "Bad modifiers used with 'c'!");
	if (Signed)
	Type = Context.SignedCharTy;
	else if (Unsigned)
	Type = Context.UnsignedCharTy;
	else
	Type = Context.CharTy;
	break;
	case 'b': // boolean
	assert(HowLong == 0 && !Signed && !Unsigned && "Bad modifiers for 'b'!");
	Type = Context.BoolTy;
	break;
	case 'z': // size_t.
	assert(HowLong == 0 && !Signed && !Unsigned && "Bad modifiers for 'z'!");
	Type = Context.getSizeType();
	break;
	case 'w': // wchar_t.
	assert(HowLong == 0 && !Signed && !Unsigned && "Bad modifiers for 'w'!");
	Type = Context.getWideCharType();
	break;
	case 'F':
	Type = Context.getCFConstantStringType();
	break;
	case 'G':
	Type = Context.getObjCIdType();
	break;
	case 'H':
	Type = Context.getObjCSelType();
	break;
	case 'M':
	Type = Context.getObjCSuperType();
	break;
	case 'a':
	Type = Context.getBuiltinVaListType();
	assert(!Type.isNull() && "builtin va list type not initialized!");
	break;
	case 'A':
	// This is a "reference" to a va_list; however, what exactly
	// this means depends on how va_list is defined. There are two
	// different kinds of va_list: ones passed by value, and ones
	// passed by reference. An example of a by-value va_list is
	// x86, where va_list is a char*. An example of by-ref va_list
	// is x86-64, where va_list is a __va_list_tag[1]. For x86,
	// we want this argument to be a char*&; for x86-64, we want
	// it to be a __va_list_tag*.
	Type = Context.getBuiltinVaListType();
	assert(!Type.isNull() && "builtin va list type not initialized!");
	if (Type->isArrayType())
	Type = Context.getArrayDecayedType(Type);
	else
	Type = Context.getLValueReferenceType(Type);
	break;
	case 'q': {
	char *End;
	unsigned NumElements = strtoul(Str, &End, 10);
	assert(End != Str && "Missing vector size");
	Str = End;

	QualType ElementType = DecodeTypeFromStr(Str, Context, Error,
	RequiresICE, false);
	assert(!RequiresICE && "Can't require vector ICE");

	Type = Context.getScalableVectorType(ElementType, NumElements);
	break;
	}
	case 'V': {
	char *End;
	unsigned NumElements = strtoul(Str, &End, 10);
	assert(End != Str && "Missing vector size");
	Str = End;

	QualType ElementType = DecodeTypeFromStr(Str, Context, Error,
	RequiresICE, false);
	assert(!RequiresICE && "Can't require vector ICE");

	// TODO: No way to make AltiVec vectors in builtins yet.
	Type = Context.getVectorType(ElementType, NumElements,
	VectorType::GenericVector);
	break;
	}
	case 'E': {
	char *End;

	unsigned NumElements = strtoul(Str, &End, 10);
	assert(End != Str && "Missing vector size");

	Str = End;

	QualType ElementType = DecodeTypeFromStr(Str, Context, Error, RequiresICE,
	false);
	Type = Context.getExtVectorType(ElementType, NumElements);
	break;
	}
	case 'X': {
	QualType ElementType = DecodeTypeFromStr(Str, Context, Error, RequiresICE,
	false);
	assert(!RequiresICE && "Can't require complex ICE");
	Type = Context.getComplexType(ElementType);
	break;
	}
	case 'Y':
	Type = Context.getPointerDiffType();
	break;
	case 'P':
	Type = Context.getFILEType();
	if (Type.isNull()) {
	Error = ASTContext::GE_Missing_stdio;
	return {};
	}
	break;
	case 'J':
	if (Signed)
	Type = Context.getsigjmp_bufType();
	else
	Type = Context.getjmp_bufType();

	if (Type.isNull()) {
	Error = ASTContext::GE_Missing_setjmp;
	return {};
	}
	break;
	case 'K':
	assert(HowLong == 0 && !Signed && !Unsigned && "Bad modifiers for 'K'!");
	Type = Context.getucontext_tType();

	if (Type.isNull()) {
	Error = ASTContext::GE_Missing_ucontext;
	return {};
	}
	break;
	case 'p':
	Type = Context.getProcessIDType();
	break;
	}

	// If there are modifiers and if we're allowed to parse them, go for it.
	Done = !AllowTypeModifiers;
	while (!Done) {
	switch (char c = *Str++) {
	default: Done = true; --Str; break;
	case '*':
	case '&': {
	// Both pointers and references can have their pointee types
	// qualified with an address space.
	char *End;
	unsigned AddrSpace = strtoul(Str, &End, 10);
	if (End != Str) {
	// Note AddrSpace == 0 is not the same as an unspecified address space.
	Type = Context.getAddrSpaceQualType(
	Type,
	Context.getLangASForBuiltinAddressSpace(AddrSpace));
	Str = End;
	}
	if (c == '*')
	Type = Context.getPointerType(Type);
	else
	Type = Context.getLValueReferenceType(Type);
	break;
	}
	// FIXME: There's no way to have a built-in with an rvalue ref arg.
	case 'C':
	Type = Type.withConst();
	break;
	case 'D':
	Type = Context.getVolatileType(Type);
	break;
	case 'R':
	Type = Type.withRestrict();
	break;
	}
	}

	assert((!RequiresICE \|\| Type->isIntegralOrEnumerationType()) &&
	"Integer constant 'I' type must be an integer");

	return Type;
	}

	// On some targets such as PowerPC, some of the builtins are defined with custom
	// type decriptors for target-dependent types. These descriptors are decoded in
	// other functions, but it may be useful to be able to fall back to default
	// descriptor decoding to define builtins mixing target-dependent and target-
	// independent types. This function allows decoding one type descriptor with
	// default decoding.
	QualType ASTContext::DecodeTypeStr(const char *&Str, const ASTContext &Context,
	GetBuiltinTypeError &Error, bool &RequireICE,
	bool AllowTypeModifiers) const {
	return DecodeTypeFromStr(Str, Context, Error, RequireICE, AllowTypeModifiers);
	}

	/// GetBuiltinType - Return the type for the specified builtin.
	QualType ASTContext::GetBuiltinType(unsigned Id,
	GetBuiltinTypeError &Error,
	unsigned *IntegerConstantArgs) const {
	const char *TypeStr = BuiltinInfo.getTypeString(Id);
	if (TypeStr[0] == '\0') {
	Error = GE_Missing_type;
	return {};
	}

	SmallVector<QualType, 8> ArgTypes;

	bool RequiresICE = false;
	Error = GE_None;
	QualType ResType = DecodeTypeFromStr(TypeStr, *this, Error,
	RequiresICE, true);
	if (Error != GE_None)
	return {};

	assert(!RequiresICE && "Result of intrinsic cannot be required to be an ICE");

	while (TypeStr[0] && TypeStr[0] != '.') {
	QualType Ty = DecodeTypeFromStr(TypeStr, *this, Error, RequiresICE, true);
	if (Error != GE_None)
	return {};

	// If this argument is required to be an IntegerConstantExpression and the
	// caller cares, fill in the bitmask we return.
	if (RequiresICE && IntegerConstantArgs)
	*IntegerConstantArgs \|= 1 << ArgTypes.size();

	// Do array -> pointer decay. The builtin should use the decayed type.
	if (Ty->isArrayType())
	Ty = getArrayDecayedType(Ty);

	ArgTypes.push_back(Ty);
	}

	if (Id == Builtin::BI__GetExceptionInfo)
	return {};

	assert((TypeStr[0] != '.' \|\| TypeStr[1] == 0) &&
	"'.' should only occur at end of builtin type list!");

	bool Variadic = (TypeStr[0] == '.');

	FunctionType::ExtInfo EI(getDefaultCallingConvention(
	Variadic, /IsCXXMethod=/false, /IsBuiltin=/true));
	if (BuiltinInfo.isNoReturn(Id)) EI = EI.withNoReturn(true);


	// We really shouldn't be making a no-proto type here.
	if (ArgTypes.empty() && Variadic && !getLangOpts().CPlusPlus)
	return getFunctionNoProtoType(ResType, EI);

	FunctionProtoType::ExtProtoInfo EPI;
	EPI.ExtInfo = EI;
	EPI.Variadic = Variadic;
	if (getLangOpts().CPlusPlus && BuiltinInfo.isNoThrow(Id))
	EPI.ExceptionSpec.Type =
	getLangOpts().CPlusPlus11 ? EST_BasicNoexcept : EST_DynamicNone;

	return getFunctionType(ResType, ArgTypes, EPI);
	}

	static GVALinkage basicGVALinkageForFunction(const ASTContext &Context,
	const FunctionDecl *FD) {
	if (!FD->isExternallyVisible())
	return GVA_Internal;

	// Non-user-provided functions get emitted as weak definitions with every
	// use, no matter whether they've been explicitly instantiated etc.
	if (const auto *MD = dyn_cast<CXXMethodDecl>(FD))
	if (!MD->isUserProvided())
	return GVA_DiscardableODR;

	GVALinkage External;
	switch (FD->getTemplateSpecializationKind()) {
	case TSK_Undeclared:
	case TSK_ExplicitSpecialization:
	External = GVA_StrongExternal;
	break;

	case TSK_ExplicitInstantiationDefinition:
	return GVA_StrongODR;

	// C++11 [temp.explicit]p10:
	// [ Note: The intent is that an inline function that is the subject of
	// an explicit instantiation declaration will still be implicitly
	// instantiated when used so that the body can be considered for
	// inlining, but that no out-of-line copy of the inline function would be
	// generated in the translation unit. -- end note ]
	case TSK_ExplicitInstantiationDeclaration:
	return GVA_AvailableExternally;

	case TSK_ImplicitInstantiation:
	External = GVA_DiscardableODR;
	break;
	}

	if (!FD->isInlined())
	return External;

	if ((!Context.getLangOpts().CPlusPlus &&
	!Context.getTargetInfo().getCXXABI().isMicrosoft() &&
	!FD->hasAttr<DLLExportAttr>()) \|\|
	FD->hasAttr<GNUInlineAttr>()) {
	// FIXME: This doesn't match gcc's behavior for dllexport inline functions.

	// GNU or C99 inline semantics. Determine whether this symbol should be
	// externally visible.
	if (FD->isInlineDefinitionExternallyVisible())
	return External;

	// C99 inline semantics, where the symbol is not externally visible.
	return GVA_AvailableExternally;
	}

	// Functions specified with extern and inline in -fms-compatibility mode
	// forcibly get emitted. While the body of the function cannot be later
	// replaced, the function definition cannot be discarded.
	if (FD->isMSExternInline())
	return GVA_StrongODR;

	return GVA_DiscardableODR;
	}

	static GVALinkage adjustGVALinkageForAttributes(const ASTContext &Context,
	const Decl *D, GVALinkage L) {
	// See http://msdn.microsoft.com/en-us/library/xa0d9ste.aspx
	// dllexport/dllimport on inline functions.
	if (D->hasAttr<DLLImportAttr>()) {
	if (L == GVA_DiscardableODR \|\| L == GVA_StrongODR)
	return GVA_AvailableExternally;
	} else if (D->hasAttr<DLLExportAttr>()) {
	if (L == GVA_DiscardableODR)
	return GVA_StrongODR;
	} else if (Context.getLangOpts().CUDA && Context.getLangOpts().CUDAIsDevice) {
	// Device-side functions with __global__ attribute must always be
	// visible externally so they can be launched from host.
	if (D->hasAttr<CUDAGlobalAttr>() &&
	(L == GVA_DiscardableODR \|\| L == GVA_Internal))
	return GVA_StrongODR;
	// Single source offloading languages like CUDA/HIP need to be able to
	// access static device variables from host code of the same compilation
	// unit. This is done by externalizing the static variable with a shared
	// name between the host and device compilation which is the same for the
	// same compilation unit whereas different among different compilation
	// units.
	if (Context.shouldExternalizeStaticVar(D))
	return GVA_StrongExternal;
	}
	return L;
	}

	/// Adjust the GVALinkage for a declaration based on what an external AST source
	/// knows about whether there can be other definitions of this declaration.
	static GVALinkage
	adjustGVALinkageForExternalDefinitionKind(const ASTContext &Ctx, const Decl *D,
	GVALinkage L) {
	ExternalASTSource *Source = Ctx.getExternalSource();
	if (!Source)
	return L;

	switch (Source->hasExternalDefinitions(D)) {
	case ExternalASTSource::EK_Never:
	// Other translation units rely on us to provide the definition.
	if (L == GVA_DiscardableODR)
	return GVA_StrongODR;
	break;

	case ExternalASTSource::EK_Always:
	return GVA_AvailableExternally;

	case ExternalASTSource::EK_ReplyHazy:
	break;
	}
	return L;
	}

	GVALinkage ASTContext::GetGVALinkageForFunction(const FunctionDecl *FD) const {
	return adjustGVALinkageForExternalDefinitionKind(*this, FD,
	adjustGVALinkageForAttributes(*this, FD,
	basicGVALinkageForFunction(*this, FD)));
	}

	static GVALinkage basicGVALinkageForVariable(const ASTContext &Context,
	const VarDecl *VD) {
	if (!VD->isExternallyVisible())
	return GVA_Internal;

	if (VD->isStaticLocal()) {
	const DeclContext *LexicalContext = VD->getParentFunctionOrMethod();
	while (LexicalContext && !isa<FunctionDecl>(LexicalContext))
	LexicalContext = LexicalContext->getLexicalParent();

	// ObjC Blocks can create local variables that don't have a FunctionDecl
	// LexicalContext.
	if (!LexicalContext)
	return GVA_DiscardableODR;

	// Otherwise, let the static local variable inherit its linkage from the
	// nearest enclosing function.
	auto StaticLocalLinkage =
	Context.GetGVALinkageForFunction(cast<FunctionDecl>(LexicalContext));

	// Itanium ABI 5.2.2: "Each COMDAT group [for a static local variable] must
	// be emitted in any object with references to the symbol for the object it
	// contains, whether inline or out-of-line."
	// Similar behavior is observed with MSVC. An alternative ABI could use
	// StrongODR/AvailableExternally to match the function, but none are
	// known/supported currently.
	if (StaticLocalLinkage == GVA_StrongODR \|\|
	StaticLocalLinkage == GVA_AvailableExternally)
	return GVA_DiscardableODR;
	return StaticLocalLinkage;
	}

	// MSVC treats in-class initialized static data members as definitions.
	// By giving them non-strong linkage, out-of-line definitions won't
	// cause link errors.
	if (Context.isMSStaticDataMemberInlineDefinition(VD))
	return GVA_DiscardableODR;

	// Most non-template variables have strong linkage; inline variables are
	// linkonce_odr or (occasionally, for compatibility) weak_odr.
	GVALinkage StrongLinkage;
	switch (Context.getInlineVariableDefinitionKind(VD)) {
	case ASTContext::InlineVariableDefinitionKind::None:
	StrongLinkage = GVA_StrongExternal;
	break;
	case ASTContext::InlineVariableDefinitionKind::Weak:
	case ASTContext::InlineVariableDefinitionKind::WeakUnknown:
	StrongLinkage = GVA_DiscardableODR;
	break;
	case ASTContext::InlineVariableDefinitionKind::Strong:
	StrongLinkage = GVA_StrongODR;
	break;
	}

	switch (VD->getTemplateSpecializationKind()) {
	case TSK_Undeclared:
	return StrongLinkage;

	case TSK_ExplicitSpecialization:
	return Context.getTargetInfo().getCXXABI().isMicrosoft() &&
	VD->isStaticDataMember()
	? GVA_StrongODR
	: StrongLinkage;

	case TSK_ExplicitInstantiationDefinition:
	return GVA_StrongODR;

	case TSK_ExplicitInstantiationDeclaration:
	return GVA_AvailableExternally;

	case TSK_ImplicitInstantiation:
	return GVA_DiscardableODR;
	}

	llvm_unreachable("Invalid Linkage!");
	}

	GVALinkage ASTContext::GetGVALinkageForVariable(const VarDecl *VD) {
	return adjustGVALinkageForExternalDefinitionKind(*this, VD,
	adjustGVALinkageForAttributes(*this, VD,
	basicGVALinkageForVariable(*this, VD)));
	}

	bool ASTContext::DeclMustBeEmitted(const Decl *D) {
	if (const auto *VD = dyn_cast<VarDecl>(D)) {
	if (!VD->isFileVarDecl())
	return false;
	// Global named register variables (GNU extension) are never emitted.
	if (VD->getStorageClass() == SC_Register)
	return false;
	if (VD->getDescribedVarTemplate() \|\|
	isa<VarTemplatePartialSpecializationDecl>(VD))
	return false;
	} else if (const auto *FD = dyn_cast<FunctionDecl>(D)) {
	// We never need to emit an uninstantiated function template.
	if (FD->getTemplatedKind() == FunctionDecl::TK_FunctionTemplate)
	return false;
	} else if (isa<PragmaCommentDecl>(D))
	return true;
	else if (isa<PragmaDetectMismatchDecl>(D))
	return true;
	else if (isa<OMPRequiresDecl>(D))
	return true;
	else if (isa<OMPThreadPrivateDecl>(D))
	return !D->getDeclContext()->isDependentContext();
	else if (isa<OMPAllocateDecl>(D))
	return !D->getDeclContext()->isDependentContext();
	else if (isa<OMPDeclareReductionDecl>(D) \|\| isa<OMPDeclareMapperDecl>(D))
	return !D->getDeclContext()->isDependentContext();
	else if (isa<ImportDecl>(D))
	return true;
	else
	return false;

	// If this is a member of a class template, we do not need to emit it.
	if (D->getDeclContext()->isDependentContext())
	return false;

	// Weak references don't produce any output by themselves.
	if (D->hasAttr<WeakRefAttr>())
	return false;

	// Aliases and used decls are required.
	if (D->hasAttr<AliasAttr>() \|\| D->hasAttr<UsedAttr>())
	return true;

	if (const auto *FD = dyn_cast<FunctionDecl>(D)) {
	// Forward declarations aren't required.
	if (!FD->doesThisDeclarationHaveABody())
	return FD->doesDeclarationForceExternallyVisibleDefinition();

	// Constructors and destructors are required.
	if (FD->hasAttr<ConstructorAttr>() \|\| FD->hasAttr<DestructorAttr>())
	return true;

	// The key function for a class is required. This rule only comes
	// into play when inline functions can be key functions, though.
	if (getTargetInfo().getCXXABI().canKeyFunctionBeInline()) {
	if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
	const CXXRecordDecl *RD = MD->getParent();
	if (MD->isOutOfLine() && RD->isDynamicClass()) {
	const CXXMethodDecl *KeyFunc = getCurrentKeyFunction(RD);
	if (KeyFunc && KeyFunc->getCanonicalDecl() == MD->getCanonicalDecl())
	return true;
	}
	}
	}

	GVALinkage Linkage = GetGVALinkageForFunction(FD);

	// static, static inline, always_inline, and extern inline functions can
	// always be deferred. Normal inline functions can be deferred in C99/C++.
	// Implicit template instantiations can also be deferred in C++.
	return !isDiscardableGVALinkage(Linkage);
	}

	const auto *VD = cast<VarDecl>(D);
	assert(VD->isFileVarDecl() && "Expected file scoped var");

	// If the decl is marked as `declare target to`, it should be emitted for the
	// host and for the device.
	if (LangOpts.OpenMP &&
	OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD))
	return true;

	if (VD->isThisDeclarationADefinition() == VarDecl::DeclarationOnly &&
	!isMSStaticDataMemberInlineDefinition(VD))
	return false;

	// Variables that can be needed in other TUs are required.
	auto Linkage = GetGVALinkageForVariable(VD);
	if (!isDiscardableGVALinkage(Linkage))
	return true;

	// We never need to emit a variable that is available in another TU.
	if (Linkage == GVA_AvailableExternally)
	return false;

	// Variables that have destruction with side-effects are required.
	if (VD->needsDestruction(*this))
	return true;

	// Variables that have initialization with side-effects are required.
	if (VD->getInit() && VD->getInit()->HasSideEffects(*this) &&
	// We can get a value-dependent initializer during error recovery.
	(VD->getInit()->isValueDependent() \|\| !VD->evaluateValue()))
	return true;

	// Likewise, variables with tuple-like bindings are required if their
	// bindings have side-effects.
	if (const auto *DD = dyn_cast<DecompositionDecl>(VD))
	for (const auto *BD : DD->bindings())
	if (const auto *BindingVD = BD->getHoldingVar())
	if (DeclMustBeEmitted(BindingVD))
	return true;

	return false;
	}

	void ASTContext::forEachMultiversionedFunctionVersion(
	const FunctionDecl *FD,
	llvm::function_ref<void(FunctionDecl *)> Pred) const {
	assert(FD->isMultiVersion() && "Only valid for multiversioned functions");
	llvm::SmallDenseSet<const FunctionDecl*, 4> SeenDecls;
	FD = FD->getMostRecentDecl();
	// FIXME: The order of traversal here matters and depends on the order of
	// lookup results, which happens to be (mostly) oldest-to-newest, but we
	// shouldn't rely on that.
	for (auto *CurDecl :
	FD->getDeclContext()->getRedeclContext()->lookup(FD->getDeclName())) {
	FunctionDecl *CurFD = CurDecl->getAsFunction()->getMostRecentDecl();
	if (CurFD && hasSameType(CurFD->getType(), FD->getType()) &&
	std::end(SeenDecls) == llvm::find(SeenDecls, CurFD)) {
	SeenDecls.insert(CurFD);
	Pred(CurFD);
	}
	}
	}

	CallingConv ASTContext::getDefaultCallingConvention(bool IsVariadic,
	bool IsCXXMethod,
	bool IsBuiltin) const {
	// Pass through to the C++ ABI object
	if (IsCXXMethod)
	return ABI->getDefaultMethodCallConv(IsVariadic);

	// Builtins ignore user-specified default calling convention and remain the
	// Target's default calling convention.
	if (!IsBuiltin) {
	switch (LangOpts.getDefaultCallingConv()) {
	case LangOptions::DCC_None:
	break;
	case LangOptions::DCC_CDecl:
	return CC_C;
	case LangOptions::DCC_FastCall:
	if (getTargetInfo().hasFeature("sse2") && !IsVariadic)
	return CC_X86FastCall;
	break;
	case LangOptions::DCC_StdCall:
	if (!IsVariadic)
	return CC_X86StdCall;
	break;
	case LangOptions::DCC_VectorCall:
	// __vectorcall cannot be applied to variadic functions.
	if (!IsVariadic)
	return CC_X86VectorCall;
	break;
	case LangOptions::DCC_RegCall:
	// __regcall cannot be applied to variadic functions.
	if (!IsVariadic)
	return CC_X86RegCall;
	break;
	}
	}
	return Target->getDefaultCallingConv();
	}

	bool ASTContext::isNearlyEmpty(const CXXRecordDecl *RD) const {
	// Pass through to the C++ ABI object
	return ABI->isNearlyEmpty(RD);
	}

	VTableContextBase *ASTContext::getVTableContext() {
	if (!VTContext.get()) {
	auto ABI = Target->getCXXABI();
	if (ABI.isMicrosoft())
	VTContext.reset(new MicrosoftVTableContext(*this));
	else {
	auto ComponentLayout = getLangOpts().RelativeCXXABIVTables
	? ItaniumVTableContext::Relative
	: ItaniumVTableContext::Pointer;
	VTContext.reset(new ItaniumVTableContext(*this, ComponentLayout));
	}
	}
	return VTContext.get();
	}

	MangleContext ASTContext::createMangleContext(const TargetInfo T) {
	if (!T)
	T = Target;
	switch (T->getCXXABI().getKind()) {
	case TargetCXXABI::AppleARM64:
	case TargetCXXABI::Fuchsia:
	case TargetCXXABI::GenericAArch64:
	case TargetCXXABI::GenericItanium:
	case TargetCXXABI::GenericARM:
	case TargetCXXABI::GenericMIPS:
	case TargetCXXABI::iOS:
	case TargetCXXABI::WebAssembly:
	case TargetCXXABI::WatchOS:
	case TargetCXXABI::XL:
	return ItaniumMangleContext::create(*this, getDiagnostics());
	case TargetCXXABI::Microsoft:
	return MicrosoftMangleContext::create(*this, getDiagnostics());
	}
	llvm_unreachable("Unsupported ABI");
	}

	MangleContext *ASTContext::createDeviceMangleContext(const TargetInfo &T) {
	assert(T.getCXXABI().getKind() != TargetCXXABI::Microsoft &&
	"Device mangle context does not support Microsoft mangling.");
	switch (T.getCXXABI().getKind()) {
	case TargetCXXABI::AppleARM64:
	case TargetCXXABI::Fuchsia:
	case TargetCXXABI::GenericAArch64:
	case TargetCXXABI::GenericItanium:
	case TargetCXXABI::GenericARM:
	case TargetCXXABI::GenericMIPS:
	case TargetCXXABI::iOS:
	case TargetCXXABI::WebAssembly:
	case TargetCXXABI::WatchOS:
	case TargetCXXABI::XL:
	return ItaniumMangleContext::create(
	*this, getDiagnostics(),
	[](ASTContext &, const NamedDecl *ND) -> llvm::Optional<unsigned> {
	if (const auto *RD = dyn_cast<CXXRecordDecl>(ND))
	return RD->getDeviceLambdaManglingNumber();
	return llvm::None;
	});
	case TargetCXXABI::Microsoft:
	return MicrosoftMangleContext::create(*this, getDiagnostics());
	}
	llvm_unreachable("Unsupported ABI");
	}

	CXXABI::~CXXABI() = default;

	size_t ASTContext::getSideTableAllocatedMemory() const {
	return ASTRecordLayouts.getMemorySize() +
	llvm::capacity_in_bytes(ObjCLayouts) +
	llvm::capacity_in_bytes(KeyFunctions) +
	llvm::capacity_in_bytes(ObjCImpls) +
	llvm::capacity_in_bytes(BlockVarCopyInits) +
	llvm::capacity_in_bytes(DeclAttrs) +
	llvm::capacity_in_bytes(TemplateOrInstantiation) +
	llvm::capacity_in_bytes(InstantiatedFromUsingDecl) +
	llvm::capacity_in_bytes(InstantiatedFromUsingShadowDecl) +
	llvm::capacity_in_bytes(InstantiatedFromUnnamedFieldDecl) +
	llvm::capacity_in_bytes(OverriddenMethods) +
	llvm::capacity_in_bytes(Types) +
	llvm::capacity_in_bytes(VariableArrayTypes);
	}

	/// getIntTypeForBitwidth -
	/// sets integer QualTy according to specified details:
	/// bitwidth, signed/unsigned.
	/// Returns empty type if there is no appropriate target types.
	QualType ASTContext::getIntTypeForBitwidth(unsigned DestWidth,
	unsigned Signed) const {
	TargetInfo::IntType Ty = getTargetInfo().getIntTypeByWidth(DestWidth, Signed);
	CanQualType QualTy = getFromTargetType(Ty);
	if (!QualTy && DestWidth == 128)
	return Signed ? Int128Ty : UnsignedInt128Ty;
	return QualTy;
	}

	/// getRealTypeForBitwidth -
	/// sets floating point QualTy according to specified bitwidth.
	/// Returns empty type if there is no appropriate target types.
	QualType ASTContext::getRealTypeForBitwidth(unsigned DestWidth,
	bool ExplicitIEEE) const {
	TargetInfo::RealType Ty =
	getTargetInfo().getRealTypeByWidth(DestWidth, ExplicitIEEE);
	switch (Ty) {
	case TargetInfo::Float:
	return FloatTy;
	case TargetInfo::Double:
	return DoubleTy;
	case TargetInfo::LongDouble:
	return LongDoubleTy;
	case TargetInfo::Float128:
	return Float128Ty;
	case TargetInfo::NoFloat:
	return {};
	}

	llvm_unreachable("Unhandled TargetInfo::RealType value");
	}

	void ASTContext::setManglingNumber(const NamedDecl *ND, unsigned Number) {
	if (Number > 1)
	MangleNumbers[ND] = Number;
	}

	unsigned ASTContext::getManglingNumber(const NamedDecl *ND) const {
	auto I = MangleNumbers.find(ND);
	return I != MangleNumbers.end() ? I->second : 1;
	}

	void ASTContext::setStaticLocalNumber(const VarDecl *VD, unsigned Number) {
	if (Number > 1)
	StaticLocalNumbers[VD] = Number;
	}

	unsigned ASTContext::getStaticLocalNumber(const VarDecl *VD) const {
	auto I = StaticLocalNumbers.find(VD);
	return I != StaticLocalNumbers.end() ? I->second : 1;
	}

	MangleNumberingContext &
	ASTContext::getManglingNumberContext(const DeclContext *DC) {
	assert(LangOpts.CPlusPlus); // We don't need mangling numbers for plain C.
	std::unique_ptr<MangleNumberingContext> &MCtx = MangleNumberingContexts[DC];
	if (!MCtx)
	MCtx = createMangleNumberingContext();
	return *MCtx;
	}

	MangleNumberingContext &
	ASTContext::getManglingNumberContext(NeedExtraManglingDecl_t, const Decl *D) {
	assert(LangOpts.CPlusPlus); // We don't need mangling numbers for plain C.
	std::unique_ptr<MangleNumberingContext> &MCtx =
	ExtraMangleNumberingContexts[D];
	if (!MCtx)
	MCtx = createMangleNumberingContext();
	return *MCtx;
	}

	std::unique_ptr<MangleNumberingContext>
	ASTContext::createMangleNumberingContext() const {
	return ABI->createMangleNumberingContext();
	}

	const CXXConstructorDecl *
	ASTContext::getCopyConstructorForExceptionObject(CXXRecordDecl *RD) {
	return ABI->getCopyConstructorForExceptionObject(
	cast<CXXRecordDecl>(RD->getFirstDecl()));
	}

	void ASTContext::addCopyConstructorForExceptionObject(CXXRecordDecl *RD,
	CXXConstructorDecl *CD) {
	return ABI->addCopyConstructorForExceptionObject(
	cast<CXXRecordDecl>(RD->getFirstDecl()),
	cast<CXXConstructorDecl>(CD->getFirstDecl()));
	}

	void ASTContext::addTypedefNameForUnnamedTagDecl(TagDecl *TD,
	TypedefNameDecl *DD) {
	return ABI->addTypedefNameForUnnamedTagDecl(TD, DD);
	}

	TypedefNameDecl *
	ASTContext::getTypedefNameForUnnamedTagDecl(const TagDecl *TD) {
	return ABI->getTypedefNameForUnnamedTagDecl(TD);
	}

	void ASTContext::addDeclaratorForUnnamedTagDecl(TagDecl *TD,
	DeclaratorDecl *DD) {
	return ABI->addDeclaratorForUnnamedTagDecl(TD, DD);
	}

	DeclaratorDecl ASTContext::getDeclaratorForUnnamedTagDecl(const TagDecl TD) {
	return ABI->getDeclaratorForUnnamedTagDecl(TD);
	}

	void ASTContext::setParameterIndex(const ParmVarDecl *D, unsigned int index) {
	ParamIndices[D] = index;
	}

	unsigned ASTContext::getParameterIndex(const ParmVarDecl *D) const {
	ParameterIndexTable::const_iterator I = ParamIndices.find(D);
	assert(I != ParamIndices.end() &&
	"ParmIndices lacks entry set by ParmVarDecl");
	return I->second;
	}

	QualType ASTContext::getStringLiteralArrayType(QualType EltTy,
	unsigned Length) const {
	// A C++ string literal has a const-qualified element type (C++ 2.13.4p1).
	if (getLangOpts().CPlusPlus \|\| getLangOpts().ConstStrings)
	EltTy = EltTy.withConst();

	EltTy = adjustStringLiteralBaseType(EltTy);

	// Get an array type for the string, according to C99 6.4.5. This includes
	// the null terminator character.
	return getConstantArrayType(EltTy, llvm::APInt(32, Length + 1), nullptr,
	ArrayType::Normal, /IndexTypeQuals/ 0);
	}

	StringLiteral *
	ASTContext::getPredefinedStringLiteralFromCache(StringRef Key) const {
	StringLiteral *&Result = StringLiteralCache[Key];
	if (!Result)
	Result = StringLiteral::Create(
	*this, Key, StringLiteral::Ascii,
	/Pascal/ false, getStringLiteralArrayType(CharTy, Key.size()),
	SourceLocation());
	return Result;
	}

	MSGuidDecl *
	ASTContext::getMSGuidDecl(MSGuidDecl::Parts Parts) const {
	assert(MSGuidTagDecl && "building MS GUID without MS extensions?");

	llvm::FoldingSetNodeID ID;
	MSGuidDecl::Profile(ID, Parts);

	void *InsertPos;
	if (MSGuidDecl *Existing = MSGuidDecls.FindNodeOrInsertPos(ID, InsertPos))
	return Existing;

	QualType GUIDType = getMSGuidType().withConst();
	MSGuidDecl New = MSGuidDecl::Create(this, GUIDType, Parts);
	MSGuidDecls.InsertNode(New, InsertPos);
	return New;
	}

	TemplateParamObjectDecl *
	ASTContext::getTemplateParamObjectDecl(QualType T, const APValue &V) const {
	assert(T->isRecordType() && "template param object of unexpected type");

	// C++ [temp.param]p8:
	// [...] a static storage duration object of type 'const T' [...]
	T.addConst();

	llvm::FoldingSetNodeID ID;
	TemplateParamObjectDecl::Profile(ID, T, V);

	void *InsertPos;
	if (TemplateParamObjectDecl *Existing =
	TemplateParamObjectDecls.FindNodeOrInsertPos(ID, InsertPos))
	return Existing;

	TemplateParamObjectDecl New = TemplateParamObjectDecl::Create(this, T, V);
	TemplateParamObjectDecls.InsertNode(New, InsertPos);
	return New;
	}

	bool ASTContext::AtomicUsesUnsupportedLibcall(const AtomicExpr *E) const {
	const llvm::Triple &T = getTargetInfo().getTriple();
	if (!T.isOSDarwin())
	return false;

	if (!(T.isiOS() && T.isOSVersionLT(7)) &&
	!(T.isMacOSX() && T.isOSVersionLT(10, 9)))
	return false;

	QualType AtomicTy = E->getPtr()->getType()->getPointeeType();
	CharUnits sizeChars = getTypeSizeInChars(AtomicTy);
	uint64_t Size = sizeChars.getQuantity();
	CharUnits alignChars = getTypeAlignInChars(AtomicTy);
	unsigned Align = alignChars.getQuantity();
	unsigned MaxInlineWidthInBits = getTargetInfo().getMaxAtomicInlineWidth();
	return (Size != Align \|\| toBits(sizeChars) > MaxInlineWidthInBits);
	}

	bool
	ASTContext::ObjCMethodsAreEqual(const ObjCMethodDecl *MethodDecl,
	const ObjCMethodDecl *MethodImpl) {
	// No point trying to match an unavailable/deprecated mothod.
	if (MethodDecl->hasAttr<UnavailableAttr>()
	\|\| MethodDecl->hasAttr<DeprecatedAttr>())
	return false;
	if (MethodDecl->getObjCDeclQualifier() !=
	MethodImpl->getObjCDeclQualifier())
	return false;
	if (!hasSameType(MethodDecl->getReturnType(), MethodImpl->getReturnType()))
	return false;

	if (MethodDecl->param_size() != MethodImpl->param_size())
	return false;

	for (ObjCMethodDecl::param_const_iterator IM = MethodImpl->param_begin(),
	IF = MethodDecl->param_begin(), EM = MethodImpl->param_end(),
	EF = MethodDecl->param_end();
	IM != EM && IF != EF; ++IM, ++IF) {
	const ParmVarDecl DeclVar = (IF);
	const ParmVarDecl ImplVar = (IM);
	if (ImplVar->getObjCDeclQualifier() != DeclVar->getObjCDeclQualifier())
	return false;
	if (!hasSameType(DeclVar->getType(), ImplVar->getType()))
	return false;
	}

	return (MethodDecl->isVariadic() == MethodImpl->isVariadic());
	}

	uint64_t ASTContext::getTargetNullPointerValue(QualType QT) const {
	LangAS AS;
	if (QT->getUnqualifiedDesugaredType()->isNullPtrType())
	AS = LangAS::Default;
	else
	AS = QT->getPointeeType().getAddressSpace();

	return getTargetInfo().getNullPointerValue(AS);
	}

	unsigned ASTContext::getTargetAddressSpace(LangAS AS) const {
	if (isTargetAddressSpace(AS))
	return toTargetAddressSpace(AS);
	else
	return (*AddrSpaceMap)[(unsigned)AS];
	}

	QualType ASTContext::getCorrespondingSaturatedType(QualType Ty) const {
	assert(Ty->isFixedPointType());

	if (Ty->isSaturatedFixedPointType()) return Ty;

	switch (Ty->castAs<BuiltinType>()->getKind()) {
	default:
	llvm_unreachable("Not a fixed point type!");
	case BuiltinType::ShortAccum:
	return SatShortAccumTy;
	case BuiltinType::Accum:
	return SatAccumTy;
	case BuiltinType::LongAccum:
	return SatLongAccumTy;
	case BuiltinType::UShortAccum:
	return SatUnsignedShortAccumTy;
	case BuiltinType::UAccum:
	return SatUnsignedAccumTy;
	case BuiltinType::ULongAccum:
	return SatUnsignedLongAccumTy;
	case BuiltinType::ShortFract:
	return SatShortFractTy;
	case BuiltinType::Fract:
	return SatFractTy;
	case BuiltinType::LongFract:
	return SatLongFractTy;
	case BuiltinType::UShortFract:
	return SatUnsignedShortFractTy;
	case BuiltinType::UFract:
	return SatUnsignedFractTy;
	case BuiltinType::ULongFract:
	return SatUnsignedLongFractTy;
	}
	}

	LangAS ASTContext::getLangASForBuiltinAddressSpace(unsigned AS) const {
	if (LangOpts.OpenCL)
	return getTargetInfo().getOpenCLBuiltinAddressSpace(AS);

	if (LangOpts.CUDA)
	return getTargetInfo().getCUDABuiltinAddressSpace(AS);

	return getLangASFromTargetAS(AS);
	}

	// Explicitly instantiate this in case a Redeclarable<T> is used from a TU that
	// doesn't include ASTContext.h
	template
	clang::LazyGenerationalUpdatePtr<
	const Decl , Decl , &ExternalASTSource::CompleteRedeclChain>::ValueType
	clang::LazyGenerationalUpdatePtr<
	const Decl , Decl , &ExternalASTSource::CompleteRedeclChain>::makeValue(
	const clang::ASTContext &Ctx, Decl *Value);

	unsigned char ASTContext::getFixedPointScale(QualType Ty) const {
	assert(Ty->isFixedPointType());

	const TargetInfo &Target = getTargetInfo();
	switch (Ty->castAs<BuiltinType>()->getKind()) {
	default:
	llvm_unreachable("Not a fixed point type!");
	case BuiltinType::ShortAccum:
	case BuiltinType::SatShortAccum:
	return Target.getShortAccumScale();
	case BuiltinType::Accum:
	case BuiltinType::SatAccum:
	return Target.getAccumScale();
	case BuiltinType::LongAccum:
	case BuiltinType::SatLongAccum:
	return Target.getLongAccumScale();
	case BuiltinType::UShortAccum:
	case BuiltinType::SatUShortAccum:
	return Target.getUnsignedShortAccumScale();
	case BuiltinType::UAccum:
	case BuiltinType::SatUAccum:
	return Target.getUnsignedAccumScale();
	case BuiltinType::ULongAccum:
	case BuiltinType::SatULongAccum:
	return Target.getUnsignedLongAccumScale();
	case BuiltinType::ShortFract:
	case BuiltinType::SatShortFract:
	return Target.getShortFractScale();
	case BuiltinType::Fract:
	case BuiltinType::SatFract:
	return Target.getFractScale();
	case BuiltinType::LongFract:
	case BuiltinType::SatLongFract:
	return Target.getLongFractScale();
	case BuiltinType::UShortFract:
	case BuiltinType::SatUShortFract:
	return Target.getUnsignedShortFractScale();
	case BuiltinType::UFract:
	case BuiltinType::SatUFract:
	return Target.getUnsignedFractScale();
	case BuiltinType::ULongFract:
	case BuiltinType::SatULongFract:
	return Target.getUnsignedLongFractScale();
	}
	}

	unsigned char ASTContext::getFixedPointIBits(QualType Ty) const {
	assert(Ty->isFixedPointType());

	const TargetInfo &Target = getTargetInfo();
	switch (Ty->castAs<BuiltinType>()->getKind()) {
	default:
	llvm_unreachable("Not a fixed point type!");
	case BuiltinType::ShortAccum:
	case BuiltinType::SatShortAccum:
	return Target.getShortAccumIBits();
	case BuiltinType::Accum:
	case BuiltinType::SatAccum:
	return Target.getAccumIBits();
	case BuiltinType::LongAccum:
	case BuiltinType::SatLongAccum:
	return Target.getLongAccumIBits();
	case BuiltinType::UShortAccum:
	case BuiltinType::SatUShortAccum:
	return Target.getUnsignedShortAccumIBits();
	case BuiltinType::UAccum:
	case BuiltinType::SatUAccum:
	return Target.getUnsignedAccumIBits();
	case BuiltinType::ULongAccum:
	case BuiltinType::SatULongAccum:
	return Target.getUnsignedLongAccumIBits();
	case BuiltinType::ShortFract:
	case BuiltinType::SatShortFract:
	case BuiltinType::Fract:
	case BuiltinType::SatFract:
	case BuiltinType::LongFract:
	case BuiltinType::SatLongFract:
	case BuiltinType::UShortFract:
	case BuiltinType::SatUShortFract:
	case BuiltinType::UFract:
	case BuiltinType::SatUFract:
	case BuiltinType::ULongFract:
	case BuiltinType::SatULongFract:
	return 0;
	}
	}

	llvm::FixedPointSemantics
	ASTContext::getFixedPointSemantics(QualType Ty) const {
	assert((Ty->isFixedPointType() \|\| Ty->isIntegerType()) &&
	"Can only get the fixed point semantics for a "
	"fixed point or integer type.");
	if (Ty->isIntegerType())
	return llvm::FixedPointSemantics::GetIntegerSemantics(
	getIntWidth(Ty), Ty->isSignedIntegerType());

	bool isSigned = Ty->isSignedFixedPointType();
	return llvm::FixedPointSemantics(
	static_cast<unsigned>(getTypeSize(Ty)), getFixedPointScale(Ty), isSigned,
	Ty->isSaturatedFixedPointType(),
	!isSigned && getTargetInfo().doUnsignedFixedPointTypesHavePadding());
	}

	llvm::APFixedPoint ASTContext::getFixedPointMax(QualType Ty) const {
	assert(Ty->isFixedPointType());
	return llvm::APFixedPoint::getMax(getFixedPointSemantics(Ty));
	}

	llvm::APFixedPoint ASTContext::getFixedPointMin(QualType Ty) const {
	assert(Ty->isFixedPointType());
	return llvm::APFixedPoint::getMin(getFixedPointSemantics(Ty));
	}

	QualType ASTContext::getCorrespondingSignedFixedPointType(QualType Ty) const {
	assert(Ty->isUnsignedFixedPointType() &&
	"Expected unsigned fixed point type");

	switch (Ty->castAs<BuiltinType>()->getKind()) {
	case BuiltinType::UShortAccum:
	return ShortAccumTy;
	case BuiltinType::UAccum:
	return AccumTy;
	case BuiltinType::ULongAccum:
	return LongAccumTy;
	case BuiltinType::SatUShortAccum:
	return SatShortAccumTy;
	case BuiltinType::SatUAccum:
	return SatAccumTy;
	case BuiltinType::SatULongAccum:
	return SatLongAccumTy;
	case BuiltinType::UShortFract:
	return ShortFractTy;
	case BuiltinType::UFract:
	return FractTy;
	case BuiltinType::ULongFract:
	return LongFractTy;
	case BuiltinType::SatUShortFract:
	return SatShortFractTy;
	case BuiltinType::SatUFract:
	return SatFractTy;
	case BuiltinType::SatULongFract:
	return SatLongFractTy;
	default:
	llvm_unreachable("Unexpected unsigned fixed point type");
	}
	}

	ParsedTargetAttr
	ASTContext::filterFunctionTargetAttrs(const TargetAttr *TD) const {
	assert(TD != nullptr);
	ParsedTargetAttr ParsedAttr = TD->parse();

	ParsedAttr.Features.erase(
	llvm::remove_if(ParsedAttr.Features,
	[&](const std::string &Feat) {
	return !Target->isValidFeatureName(
	StringRef{Feat}.substr(1));
	}),
	ParsedAttr.Features.end());
	return ParsedAttr;
	}

	void ASTContext::getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap,
	const FunctionDecl *FD) const {
	if (FD)
	getFunctionFeatureMap(FeatureMap, GlobalDecl().getWithDecl(FD));
	else
	Target->initFeatureMap(FeatureMap, getDiagnostics(),
	Target->getTargetOpts().CPU,
	Target->getTargetOpts().Features);
	}

	// Fills in the supplied string map with the set of target features for the
	// passed in function.
	void ASTContext::getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap,
	GlobalDecl GD) const {
	StringRef TargetCPU = Target->getTargetOpts().CPU;
	const FunctionDecl *FD = GD.getDecl()->getAsFunction();
	if (const auto *TD = FD->getAttr<TargetAttr>()) {
	ParsedTargetAttr ParsedAttr = filterFunctionTargetAttrs(TD);

	// Make a copy of the features as passed on the command line into the
	// beginning of the additional features from the function to override.
	ParsedAttr.Features.insert(
	ParsedAttr.Features.begin(),
	Target->getTargetOpts().FeaturesAsWritten.begin(),
	Target->getTargetOpts().FeaturesAsWritten.end());

	if (ParsedAttr.Architecture != "" &&
	Target->isValidCPUName(ParsedAttr.Architecture))
	TargetCPU = ParsedAttr.Architecture;

	// Now populate the feature map, first with the TargetCPU which is either
	// the default or a new one from the target attribute string. Then we'll use
	// the passed in features (FeaturesAsWritten) along with the new ones from
	// the attribute.
	Target->initFeatureMap(FeatureMap, getDiagnostics(), TargetCPU,
	ParsedAttr.Features);
	} else if (const auto *SD = FD->getAttr<CPUSpecificAttr>()) {
	llvm::SmallVector<StringRef, 32> FeaturesTmp;
	Target->getCPUSpecificCPUDispatchFeatures(
	SD->getCPUName(GD.getMultiVersionIndex())->getName(), FeaturesTmp);
	std::vector<std::string> Features(FeaturesTmp.begin(), FeaturesTmp.end());
	Target->initFeatureMap(FeatureMap, getDiagnostics(), TargetCPU, Features);
	} else {
	FeatureMap = Target->getTargetOpts().FeatureMap;
	}
	}

	OMPTraitInfo &ASTContext::getNewOMPTraitInfo() {
	OMPTraitInfoVector.emplace_back(new OMPTraitInfo());
	return *OMPTraitInfoVector.back();
	}

	const StreamingDiagnostic &clang::
	operator<<(const StreamingDiagnostic &DB,
	const ASTContext::SectionInfo &Section) {
	if (Section.Decl)
	return DB << Section.Decl;
	return DB << "a prior #pragma section";
	}

	bool ASTContext::mayExternalizeStaticVar(const Decl *D) const {
	bool IsStaticVar =
	isa<VarDecl>(D) && cast<VarDecl>(D)->getStorageClass() == SC_Static;
	bool IsExplicitDeviceVar = (D->hasAttr<CUDADeviceAttr>() &&
	!D->getAttr<CUDADeviceAttr>()->isImplicit()) \|\|
	(D->hasAttr<CUDAConstantAttr>() &&
	!D->getAttr<CUDAConstantAttr>()->isImplicit());
	// CUDA/HIP: static managed variables need to be externalized since it is
	// a declaration in IR, therefore cannot have internal linkage.
	return IsStaticVar &&
	(D->hasAttr<HIPManagedAttr>() \|\| IsExplicitDeviceVar);
	}

	bool ASTContext::shouldExternalizeStaticVar(const Decl *D) const {
	return mayExternalizeStaticVar(D) &&
	(D->hasAttr<HIPManagedAttr>() \|\|
	CUDADeviceVarODRUsedByHost.count(cast<VarDecl>(D)));
	}

	StringRef ASTContext::getCUIDHash() const {
	if (!CUIDHash.empty())
	return CUIDHash;
	if (LangOpts.CUID.empty())
	return StringRef();
	CUIDHash = llvm::utohexstr(llvm::MD5Hash(LangOpts.CUID), /LowerCase=/true);
	return CUIDHash;
	}

	// Get the closest named parent, so we can order the sycl naming decls somewhere
	// that mangling is meaningful.
	static const DeclContext GetNamedParent(const CXXRecordDecl RD) {
	const DeclContext *DC = RD->getDeclContext();

	while (!isa<NamedDecl, TranslationUnitDecl>(DC))
	DC = DC->getParent();
	return DC;
	}

	void ASTContext::AddSYCLKernelNamingDecl(const CXXRecordDecl *RD) {
	assert(getLangOpts().isSYCL() && "Only valid for SYCL programs");
	RD = RD->getCanonicalDecl();
	const DeclContext *DC = GetNamedParent(RD);

	assert(RD->getLocation().isValid() &&
	"Invalid location on kernel naming decl");

	(void)SYCLKernelNamingTypes[DC].insert(RD);
	}

	bool ASTContext::IsSYCLKernelNamingDecl(const NamedDecl *ND) const {
	assert(getLangOpts().isSYCL() && "Only valid for SYCL programs");
	const auto *RD = dyn_cast<CXXRecordDecl>(ND);
	if (!RD)
	return false;
	RD = RD->getCanonicalDecl();
	const DeclContext *DC = GetNamedParent(RD);

	auto Itr = SYCLKernelNamingTypes.find(DC);

	if (Itr == SYCLKernelNamingTypes.end())
	return false;

	return Itr->getSecond().count(RD);
	}

	// Filters the Decls list to those that share the lambda mangling with the
	// passed RD.
	void ASTContext::FilterSYCLKernelNamingDecls(
	const CXXRecordDecl *RD,
	llvm::SmallVectorImpl<const CXXRecordDecl *> &Decls) {

	if (!SYCLKernelFilterContext)
	SYCLKernelFilterContext.reset(
	ItaniumMangleContext::create(*this, getDiagnostics()));

	llvm::SmallString<128> LambdaSig;
	llvm::raw_svector_ostream Out(LambdaSig);
	SYCLKernelFilterContext->mangleLambdaSig(RD, Out);

	llvm::erase_if(Decls, [this, &LambdaSig](const CXXRecordDecl *LocalRD) {
	llvm::SmallString<128> LocalLambdaSig;
	llvm::raw_svector_ostream LocalOut(LocalLambdaSig);
	SYCLKernelFilterContext->mangleLambdaSig(LocalRD, LocalOut);
	return LambdaSig != LocalLambdaSig;
	});
	}

	unsigned ASTContext::GetSYCLKernelNamingIndex(const NamedDecl *ND) {
	assert(getLangOpts().isSYCL() && "Only valid for SYCL programs");
	assert(IsSYCLKernelNamingDecl(ND) &&
	"Lambda not involved in mangling asked for a naming index?");

	const CXXRecordDecl *RD = cast<CXXRecordDecl>(ND)->getCanonicalDecl();
	const DeclContext *DC = GetNamedParent(RD);

	auto Itr = SYCLKernelNamingTypes.find(DC);
	assert(Itr != SYCLKernelNamingTypes.end() && "Not a valid DeclContext?");

	const llvm::SmallPtrSet<const CXXRecordDecl *, 4> &Set = Itr->getSecond();

	llvm::SmallVector<const CXXRecordDecl *> Decls{Set.begin(), Set.end()};

	FilterSYCLKernelNamingDecls(RD, Decls);

	llvm::sort(Decls, [](const CXXRecordDecl LHS, const CXXRecordDecl RHS) {
	return LHS->getLambdaManglingNumber() < RHS->getLambdaManglingNumber();
	});

	return llvm::find(Decls, RD) - Decls.begin();
	}
	diff --git a/contrib/llvm-project/clang/lib/Basic/Targets/M68k.cpp b/contrib/llvm-project/clang/lib/Basic/Targets/M68k.cpp
	index 31cb36d37636..c0cd8fa90ed6 100644
	--- a/contrib/llvm-project/clang/lib/Basic/Targets/M68k.cpp
	+++ b/contrib/llvm-project/clang/lib/Basic/Targets/M68k.cpp
	@@ -1,236 +1,236 @@
	//===--- M68k.cpp - Implement M68k targets feature support-------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file implements M68k TargetInfo objects.
	//
	//===----------------------------------------------------------------------===//

	#include "M68k.h"
	#include "clang/Basic/Builtins.h"
	#include "clang/Basic/Diagnostic.h"
	#include "clang/Basic/TargetBuiltins.h"
	#include "llvm/ADT/StringExtras.h"
	#include "llvm/ADT/StringRef.h"
	#include "llvm/ADT/StringSwitch.h"
	#include "llvm/Support/TargetParser.h"
	#include <cstdint>
	#include <cstring>
	#include <limits>

	namespace clang {
	namespace targets {

	M68kTargetInfo::M68kTargetInfo(const llvm::Triple &Triple,
	const TargetOptions &)
	: TargetInfo(Triple) {

	std::string Layout = "";

	// M68k is Big Endian
	Layout += "E";

	// FIXME how to wire it with the used object format?
	Layout += "-m:e";

	- // M68k pointers are always 32 bit wide even for 16 bit cpus
	- Layout += "-p:32:32";
	+ // M68k pointers are always 32 bit wide even for 16-bit CPUs
	+ Layout += "-p:32:16:32";

	// M68k integer data types
	Layout += "-i8:8:8-i16:16:16-i32:16:32";

	// FIXME no floats at the moment

	// The registers can hold 8, 16, 32 bits
	Layout += "-n8:16:32";

	// 16 bit alignment for both stack and aggregate
	// in order to conform to ABI used by GCC
	Layout += "-a:0:16-S16";

	resetDataLayout(Layout);

	SizeType = UnsignedInt;
	PtrDiffType = SignedInt;
	IntPtrType = SignedInt;
	}

	bool M68kTargetInfo::setCPU(const std::string &Name) {
	StringRef N = Name;
	CPU = llvm::StringSwitch<CPUKind>(N)
	.Case("generic", CK_68000)
	.Case("M68000", CK_68000)
	.Case("M68010", CK_68010)
	.Case("M68020", CK_68020)
	.Case("M68030", CK_68030)
	.Case("M68040", CK_68040)
	.Case("M68060", CK_68060)
	.Default(CK_Unknown);
	return CPU != CK_Unknown;
	}

	void M68kTargetInfo::getTargetDefines(const LangOptions &Opts,
	MacroBuilder &Builder) const {
	using llvm::Twine;

	Builder.defineMacro("__m68k__");

	Builder.defineMacro("mc68000");
	Builder.defineMacro("__mc68000");
	Builder.defineMacro("__mc68000__");

	// For sub-architecture
	switch (CPU) {
	case CK_68010:
	Builder.defineMacro("mc68010");
	Builder.defineMacro("__mc68010");
	Builder.defineMacro("__mc68010__");
	break;
	case CK_68020:
	Builder.defineMacro("mc68020");
	Builder.defineMacro("__mc68020");
	Builder.defineMacro("__mc68020__");
	break;
	case CK_68030:
	Builder.defineMacro("mc68030");
	Builder.defineMacro("__mc68030");
	Builder.defineMacro("__mc68030__");
	break;
	case CK_68040:
	Builder.defineMacro("mc68040");
	Builder.defineMacro("__mc68040");
	Builder.defineMacro("__mc68040__");
	break;
	case CK_68060:
	Builder.defineMacro("mc68060");
	Builder.defineMacro("__mc68060");
	Builder.defineMacro("__mc68060__");
	break;
	default:
	break;
	}
	}

	ArrayRef<Builtin::Info> M68kTargetInfo::getTargetBuiltins() const {
	// FIXME: Implement.
	return None;
	}

	bool M68kTargetInfo::hasFeature(StringRef Feature) const {
	// FIXME elaborate moar
	return Feature == "M68000";
	}

	const char *const M68kTargetInfo::GCCRegNames[] = {
	"d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
	"a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7",
	"pc"};

	ArrayRef<const char *> M68kTargetInfo::getGCCRegNames() const {
	return llvm::makeArrayRef(GCCRegNames);
	}

	ArrayRef<TargetInfo::GCCRegAlias> M68kTargetInfo::getGCCRegAliases() const {
	// No aliases.
	return None;
	}

	bool M68kTargetInfo::validateAsmConstraint(
	const char *&Name, TargetInfo::ConstraintInfo &info) const {
	switch (*Name) {
	case 'a': // address register
	case 'd': // data register
	info.setAllowsRegister();
	return true;
	case 'I': // constant integer in the range [1,8]
	info.setRequiresImmediate(1, 8);
	return true;
	case 'J': // constant signed 16-bit integer
	info.setRequiresImmediate(std::numeric_limits<int16_t>::min(),
	std::numeric_limits<int16_t>::max());
	return true;
	case 'K': // constant that is NOT in the range of [-0x80, 0x80)
	info.setRequiresImmediate();
	return true;
	case 'L': // constant integer in the range [-8,-1]
	info.setRequiresImmediate(-8, -1);
	return true;
	case 'M': // constant that is NOT in the range of [-0x100, 0x100]
	info.setRequiresImmediate();
	return true;
	case 'N': // constant integer in the range [24,31]
	info.setRequiresImmediate(24, 31);
	return true;
	case 'O': // constant integer 16
	info.setRequiresImmediate(16);
	return true;
	case 'P': // constant integer in the range [8,15]
	info.setRequiresImmediate(8, 15);
	return true;
	case 'C':
	++Name;
	switch (*Name) {
	case '0': // constant integer 0
	info.setRequiresImmediate(0);
	return true;
	case 'i': // constant integer
	case 'j': // integer constant that doesn't fit in 16 bits
	info.setRequiresImmediate();
	return true;
	default:
	break;
	}
	break;
	default:
	break;
	}
	return false;
	}

	llvm::Optional<std::string>
	M68kTargetInfo::handleAsmEscapedChar(char EscChar) const {
	char C;
	switch (EscChar) {
	case '.':
	case '#':
	C = EscChar;
	break;
	case '/':
	C = '%';
	break;
	case '$':
	C = 's';
	break;
	case '&':
	C = 'd';
	break;
	default:
	return llvm::None;
	}

	return std::string(1, C);
	}

	std::string M68kTargetInfo::convertConstraint(const char *&Constraint) const {
	if (*Constraint == 'C')
	// Two-character constraint; add "^" hint for later parsing
	return std::string("^") + std::string(Constraint++, 2);

	return std::string(1, *Constraint);
	}

	const char *M68kTargetInfo::getClobbers() const {
	// FIXME: Is this really right?
	return "";
	}

	TargetInfo::BuiltinVaListKind M68kTargetInfo::getBuiltinVaListKind() const {
	return TargetInfo::VoidPtrBuiltinVaList;
	}

	} // namespace targets
	} // namespace clang
	diff --git a/contrib/llvm-project/clang/lib/Basic/Targets/OSTargets.h b/contrib/llvm-project/clang/lib/Basic/Targets/OSTargets.h
	index e24fb5cf082d..3fe39ed64d9c 100644
	--- a/contrib/llvm-project/clang/lib/Basic/Targets/OSTargets.h
	+++ b/contrib/llvm-project/clang/lib/Basic/Targets/OSTargets.h
	@@ -1,966 +1,971 @@
	//===--- OSTargets.h - Declare OS target feature support --------- C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file declares OS specific TargetInfo types.
	//===----------------------------------------------------------------------===//

	#ifndef LLVM_CLANG_LIB_BASIC_TARGETS_OSTARGETS_H
	#define LLVM_CLANG_LIB_BASIC_TARGETS_OSTARGETS_H

	#include "Targets.h"

	namespace clang {
	namespace targets {

	template <typename TgtInfo>
	class LLVM_LIBRARY_VISIBILITY OSTargetInfo : public TgtInfo {
	protected:
	virtual void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
	MacroBuilder &Builder) const = 0;

	public:
	OSTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
	: TgtInfo(Triple, Opts) {}

	void getTargetDefines(const LangOptions &Opts,
	MacroBuilder &Builder) const override {
	TgtInfo::getTargetDefines(Opts, Builder);
	getOSDefines(Opts, TgtInfo::getTriple(), Builder);
	}
	};

	// CloudABI Target
	template <typename Target>
	class LLVM_LIBRARY_VISIBILITY CloudABITargetInfo : public OSTargetInfo<Target> {
	protected:
	void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
	MacroBuilder &Builder) const override {
	Builder.defineMacro("__CloudABI__");
	Builder.defineMacro("__ELF__");

	// CloudABI uses ISO/IEC 10646:2012 for wchar_t, char16_t and char32_t.
	Builder.defineMacro("__STDC_ISO_10646__", "201206L");
	Builder.defineMacro("__STDC_UTF_16__");
	Builder.defineMacro("__STDC_UTF_32__");
	}

	public:
	CloudABITargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
	: OSTargetInfo<Target>(Triple, Opts) {}
	};

	// Ananas target
	template <typename Target>
	class LLVM_LIBRARY_VISIBILITY AnanasTargetInfo : public OSTargetInfo<Target> {
	protected:
	void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
	MacroBuilder &Builder) const override {
	// Ananas defines
	Builder.defineMacro("__Ananas__");
	Builder.defineMacro("__ELF__");
	}

	public:
	AnanasTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
	: OSTargetInfo<Target>(Triple, Opts) {}
	};

	void getDarwinDefines(MacroBuilder &Builder, const LangOptions &Opts,
	const llvm::Triple &Triple, StringRef &PlatformName,
	VersionTuple &PlatformMinVersion);

	template <typename Target>
	class LLVM_LIBRARY_VISIBILITY DarwinTargetInfo : public OSTargetInfo<Target> {
	protected:
	void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
	MacroBuilder &Builder) const override {
	getDarwinDefines(Builder, Opts, Triple, this->PlatformName,
	this->PlatformMinVersion);
	}

	public:
	DarwinTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
	: OSTargetInfo<Target>(Triple, Opts) {
	// By default, no TLS, and we list permitted architecture/OS
	// combinations.
	this->TLSSupported = false;

	if (Triple.isMacOSX())
	this->TLSSupported = !Triple.isMacOSXVersionLT(10, 7);
	else if (Triple.isiOS()) {
	// 64-bit iOS supported it from 8 onwards, 32-bit device from 9 onwards,
	// 32-bit simulator from 10 onwards.
	if (Triple.isArch64Bit())
	this->TLSSupported = !Triple.isOSVersionLT(8);
	else if (Triple.isArch32Bit()) {
	if (!Triple.isSimulatorEnvironment())
	this->TLSSupported = !Triple.isOSVersionLT(9);
	else
	this->TLSSupported = !Triple.isOSVersionLT(10);
	}
	} else if (Triple.isWatchOS()) {
	if (!Triple.isSimulatorEnvironment())
	this->TLSSupported = !Triple.isOSVersionLT(2);
	else
	this->TLSSupported = !Triple.isOSVersionLT(3);
	}

	this->MCountName = "\01mcount";
	}

	const char *getStaticInitSectionSpecifier() const override {
	// FIXME: We should return 0 when building kexts.
	return "__TEXT,__StaticInit,regular,pure_instructions";
	}

	/// Darwin does not support protected visibility. Darwin's "default"
	/// is very similar to ELF's "protected"; Darwin requires a "weak"
	/// attribute on declarations that can be dynamically replaced.
	bool hasProtectedVisibility() const override { return false; }

	unsigned getExnObjectAlignment() const override {
	// Older versions of libc++abi guarantee an alignment of only 8-bytes for
	// exception objects because of a bug in __cxa_exception that was
	// eventually fixed in r319123.
	llvm::VersionTuple MinVersion;
	const llvm::Triple &T = this->getTriple();

	// Compute the earliest OS versions that have the fix to libc++abi.
	switch (T.getOS()) {
	case llvm::Triple::Darwin:
	case llvm::Triple::MacOSX: // Earliest supporting version is 10.14.
	MinVersion = llvm::VersionTuple(10U, 14U);
	break;
	case llvm::Triple::IOS:
	case llvm::Triple::TvOS: // Earliest supporting version is 12.0.0.
	MinVersion = llvm::VersionTuple(12U);
	break;
	case llvm::Triple::WatchOS: // Earliest supporting version is 5.0.0.
	MinVersion = llvm::VersionTuple(5U);
	break;
	default:
	// Conservatively return 8 bytes if OS is unknown.
	return 64;
	}

	unsigned Major, Minor, Micro;
	T.getOSVersion(Major, Minor, Micro);
	if (llvm::VersionTuple(Major, Minor, Micro) < MinVersion)
	return 64;
	return OSTargetInfo<Target>::getExnObjectAlignment();
	}

	TargetInfo::IntType getLeastIntTypeByWidth(unsigned BitWidth,
	bool IsSigned) const final {
	// Darwin uses `long long` for `int_least64_t` and `int_fast64_t`.
	return BitWidth == 64
	? (IsSigned ? TargetInfo::SignedLongLong
	: TargetInfo::UnsignedLongLong)
	: TargetInfo::getLeastIntTypeByWidth(BitWidth, IsSigned);
	}
	};

	// DragonFlyBSD Target
	template <typename Target>
	class LLVM_LIBRARY_VISIBILITY DragonFlyBSDTargetInfo
	: public OSTargetInfo<Target> {
	protected:
	void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
	MacroBuilder &Builder) const override {
	// DragonFly defines; list based off of gcc output
	Builder.defineMacro("__DragonFly__");
	Builder.defineMacro("__DragonFly_cc_version", "100001");
	Builder.defineMacro("__ELF__");
	Builder.defineMacro("__KPRINTF_ATTRIBUTE__");
	Builder.defineMacro("__tune_i386__");
	DefineStd(Builder, "unix", Opts);
	}

	public:
	DragonFlyBSDTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
	: OSTargetInfo<Target>(Triple, Opts) {
	switch (Triple.getArch()) {
	default:
	case llvm::Triple::x86:
	case llvm::Triple::x86_64:
	this->MCountName = ".mcount";
	break;
	}
	}
	};

	#ifndef FREEBSD_CC_VERSION
	#define FREEBSD_CC_VERSION 0U
	#endif

	// FreeBSD Target
	template <typename Target>
	class LLVM_LIBRARY_VISIBILITY FreeBSDTargetInfo : public OSTargetInfo<Target> {
	protected:
	void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
	MacroBuilder &Builder) const override {
	// FreeBSD defines; list based off of gcc output

	unsigned Release = Triple.getOSMajorVersion();
	if (Release == 0U)
	Release = 8U;
	unsigned CCVersion = FREEBSD_CC_VERSION;
	if (CCVersion == 0U)
	CCVersion = Release * 100000U + 1U;

	Builder.defineMacro("__FreeBSD__", Twine(Release));
	Builder.defineMacro("__FreeBSD_cc_version", Twine(CCVersion));
	Builder.defineMacro("__KPRINTF_ATTRIBUTE__");
	DefineStd(Builder, "unix", Opts);
	Builder.defineMacro("__ELF__");

	// On FreeBSD, wchar_t contains the number of the code point as
	// used by the character set of the locale. These character sets are
	// not necessarily a superset of ASCII.
	//
	// FIXME: This is wrong; the macro refers to the numerical values
	// of wchar_t literals, which are not locale-dependent. However,
	// FreeBSD systems apparently depend on us getting this wrong, and
	// setting this to 1 is conforming even if all the basic source
	// character literals have the same encoding as char and wchar_t.
	Builder.defineMacro("__STDC_MB_MIGHT_NEQ_WC__", "1");
	}

	public:
	FreeBSDTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
	: OSTargetInfo<Target>(Triple, Opts) {
	switch (Triple.getArch()) {
	default:
	case llvm::Triple::x86:
	case llvm::Triple::x86_64:
	this->MCountName = ".mcount";
	break;
	case llvm::Triple::mips:
	case llvm::Triple::mipsel:
	case llvm::Triple::ppc:
	case llvm::Triple::ppcle:
	case llvm::Triple::ppc64:
	case llvm::Triple::ppc64le:
	this->MCountName = "_mcount";
	break;
	case llvm::Triple::arm:
	this->MCountName = "__mcount";
	break;
	case llvm::Triple::riscv32:
	case llvm::Triple::riscv64:
	break;
	}
	}
	};

	// GNU/kFreeBSD Target
	template <typename Target>
	class LLVM_LIBRARY_VISIBILITY KFreeBSDTargetInfo : public OSTargetInfo<Target> {
	protected:
	void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
	MacroBuilder &Builder) const override {
	// GNU/kFreeBSD defines; list based off of gcc output

	DefineStd(Builder, "unix", Opts);
	Builder.defineMacro("__FreeBSD_kernel__");
	Builder.defineMacro("__GLIBC__");
	Builder.defineMacro("__ELF__");
	if (Opts.POSIXThreads)
	Builder.defineMacro("_REENTRANT");
	if (Opts.CPlusPlus)
	Builder.defineMacro("_GNU_SOURCE");
	}

	public:
	KFreeBSDTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
	: OSTargetInfo<Target>(Triple, Opts) {}
	};

	// Haiku Target
	template <typename Target>
	class LLVM_LIBRARY_VISIBILITY HaikuTargetInfo : public OSTargetInfo<Target> {
	protected:
	void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
	MacroBuilder &Builder) const override {
	// Haiku defines; list based off of gcc output
	Builder.defineMacro("__HAIKU__");
	Builder.defineMacro("__ELF__");
	DefineStd(Builder, "unix", Opts);
	if (this->HasFloat128)
	Builder.defineMacro("__FLOAT128__");
	}

	public:
	HaikuTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
	: OSTargetInfo<Target>(Triple, Opts) {
	this->SizeType = TargetInfo::UnsignedLong;
	this->IntPtrType = TargetInfo::SignedLong;
	this->PtrDiffType = TargetInfo::SignedLong;
	this->ProcessIDType = TargetInfo::SignedLong;
	this->TLSSupported = false;
	switch (Triple.getArch()) {
	default:
	break;
	case llvm::Triple::x86:
	case llvm::Triple::x86_64:
	this->HasFloat128 = true;
	break;
	}
	}
	};

	// Hurd target
	template <typename Target>
	class LLVM_LIBRARY_VISIBILITY HurdTargetInfo : public OSTargetInfo<Target> {
	protected:
	void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
	MacroBuilder &Builder) const override {
	// Hurd defines; list based off of gcc output.
	DefineStd(Builder, "unix", Opts);
	Builder.defineMacro("__GNU__");
	Builder.defineMacro("__gnu_hurd__");
	Builder.defineMacro("__MACH__");
	Builder.defineMacro("__GLIBC__");
	Builder.defineMacro("__ELF__");
	if (Opts.POSIXThreads)
	Builder.defineMacro("_REENTRANT");
	if (Opts.CPlusPlus)
	Builder.defineMacro("_GNU_SOURCE");
	}
	public:
	HurdTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
	: OSTargetInfo<Target>(Triple, Opts) {}
	};

	// Minix Target
	template <typename Target>
	class LLVM_LIBRARY_VISIBILITY MinixTargetInfo : public OSTargetInfo<Target> {
	protected:
	void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
	MacroBuilder &Builder) const override {
	// Minix defines

	Builder.defineMacro("__minix", "3");
	Builder.defineMacro("_EM_WSIZE", "4");
	Builder.defineMacro("_EM_PSIZE", "4");
	Builder.defineMacro("_EM_SSIZE", "2");
	Builder.defineMacro("_EM_LSIZE", "4");
	Builder.defineMacro("_EM_FSIZE", "4");
	Builder.defineMacro("_EM_DSIZE", "8");
	Builder.defineMacro("__ELF__");
	DefineStd(Builder, "unix", Opts);
	}

	public:
	MinixTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
	: OSTargetInfo<Target>(Triple, Opts) {}
	};

	// Linux target
	template <typename Target>
	class LLVM_LIBRARY_VISIBILITY LinuxTargetInfo : public OSTargetInfo<Target> {
	protected:
	void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
	MacroBuilder &Builder) const override {
	// Linux defines; list based off of gcc output
	DefineStd(Builder, "unix", Opts);
	DefineStd(Builder, "linux", Opts);
	Builder.defineMacro("__ELF__");
	if (Triple.isAndroid()) {
	Builder.defineMacro("__ANDROID__", "1");
	unsigned Maj, Min, Rev;
	Triple.getEnvironmentVersion(Maj, Min, Rev);
	this->PlatformName = "android";
	this->PlatformMinVersion = VersionTuple(Maj, Min, Rev);
	if (Maj) {
	Builder.defineMacro("__ANDROID_MIN_SDK_VERSION__", Twine(Maj));
	// This historical but ambiguous name for the minSdkVersion macro. Keep
	// defined for compatibility.
	Builder.defineMacro("__ANDROID_API__", "__ANDROID_MIN_SDK_VERSION__");
	}
	} else {
	Builder.defineMacro("__gnu_linux__");
	}
	if (Opts.POSIXThreads)
	Builder.defineMacro("_REENTRANT");
	if (Opts.CPlusPlus)
	Builder.defineMacro("_GNU_SOURCE");
	if (this->HasFloat128)
	Builder.defineMacro("__FLOAT128__");
	}

	public:
	LinuxTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
	: OSTargetInfo<Target>(Triple, Opts) {
	this->WIntType = TargetInfo::UnsignedInt;

	switch (Triple.getArch()) {
	default:
	break;
	case llvm::Triple::mips:
	case llvm::Triple::mipsel:
	case llvm::Triple::mips64:
	case llvm::Triple::mips64el:
	case llvm::Triple::ppc:
	case llvm::Triple::ppcle:
	case llvm::Triple::ppc64:
	case llvm::Triple::ppc64le:
	this->MCountName = "_mcount";
	break;
	case llvm::Triple::x86:
	case llvm::Triple::x86_64:
	this->HasFloat128 = true;
	break;
	}
	}

	const char *getStaticInitSectionSpecifier() const override {
	return ".text.startup";
	}
	};

	// NetBSD Target
	template <typename Target>
	class LLVM_LIBRARY_VISIBILITY NetBSDTargetInfo : public OSTargetInfo<Target> {
	protected:
	void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
	MacroBuilder &Builder) const override {
	// NetBSD defines; list based off of gcc output
	Builder.defineMacro("__NetBSD__");
	Builder.defineMacro("__unix__");
	Builder.defineMacro("__ELF__");
	if (Opts.POSIXThreads)
	Builder.defineMacro("_REENTRANT");
	}

	public:
	NetBSDTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
	: OSTargetInfo<Target>(Triple, Opts) {
	this->MCountName = "__mcount";
	}
	};

	// OpenBSD Target
	template <typename Target>
	class LLVM_LIBRARY_VISIBILITY OpenBSDTargetInfo : public OSTargetInfo<Target> {
	protected:
	void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
	MacroBuilder &Builder) const override {
	// OpenBSD defines; list based off of gcc output

	Builder.defineMacro("__OpenBSD__");
	DefineStd(Builder, "unix", Opts);
	Builder.defineMacro("__ELF__");
	if (Opts.POSIXThreads)
	Builder.defineMacro("_REENTRANT");
	if (this->HasFloat128)
	Builder.defineMacro("__FLOAT128__");
	+
	+ if (Opts.C11) {
	+ Builder.defineMacro("__STDC_NO_ATOMICS__");
	+ Builder.defineMacro("__STDC_NO_THREADS__");
	+ }
	}

	public:
	OpenBSDTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
	: OSTargetInfo<Target>(Triple, Opts) {
	this->WCharType = this->WIntType = this->SignedInt;
	this->IntMaxType = TargetInfo::SignedLongLong;
	this->Int64Type = TargetInfo::SignedLongLong;
	switch (Triple.getArch()) {
	case llvm::Triple::x86:
	case llvm::Triple::x86_64:
	this->HasFloat128 = true;
	LLVM_FALLTHROUGH;
	default:
	this->MCountName = "__mcount";
	break;
	case llvm::Triple::mips64:
	case llvm::Triple::mips64el:
	case llvm::Triple::ppc:
	case llvm::Triple::ppc64:
	case llvm::Triple::ppc64le:
	case llvm::Triple::sparcv9:
	this->MCountName = "_mcount";
	break;
	case llvm::Triple::riscv32:
	case llvm::Triple::riscv64:
	break;
	}
	}
	};

	// PSP Target
	template <typename Target>
	class LLVM_LIBRARY_VISIBILITY PSPTargetInfo : public OSTargetInfo<Target> {
	protected:
	void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
	MacroBuilder &Builder) const override {
	// PSP defines; list based on the output of the pspdev gcc toolchain.
	Builder.defineMacro("PSP");
	Builder.defineMacro("_PSP");
	Builder.defineMacro("__psp__");
	Builder.defineMacro("__ELF__");
	}

	public:
	PSPTargetInfo(const llvm::Triple &Triple) : OSTargetInfo<Target>(Triple) {}
	};

	// PS3 PPU Target
	template <typename Target>
	class LLVM_LIBRARY_VISIBILITY PS3PPUTargetInfo : public OSTargetInfo<Target> {
	protected:
	void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
	MacroBuilder &Builder) const override {
	// PS3 PPU defines.
	Builder.defineMacro("__PPC__");
	Builder.defineMacro("__PPU__");
	Builder.defineMacro("__CELLOS_LV2__");
	Builder.defineMacro("__ELF__");
	Builder.defineMacro("__LP32__");
	Builder.defineMacro("_ARCH_PPC64");
	Builder.defineMacro("__powerpc64__");
	}

	public:
	PS3PPUTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
	: OSTargetInfo<Target>(Triple, Opts) {
	this->LongWidth = this->LongAlign = 32;
	this->PointerWidth = this->PointerAlign = 32;
	this->IntMaxType = TargetInfo::SignedLongLong;
	this->Int64Type = TargetInfo::SignedLongLong;
	this->SizeType = TargetInfo::UnsignedInt;
	this->resetDataLayout("E-m:e-p:32:32-i64:64-n32:64");
	}
	};

	template <typename Target>
	class LLVM_LIBRARY_VISIBILITY PS4OSTargetInfo : public OSTargetInfo<Target> {
	protected:
	void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
	MacroBuilder &Builder) const override {
	Builder.defineMacro("__FreeBSD__", "9");
	Builder.defineMacro("__FreeBSD_cc_version", "900001");
	Builder.defineMacro("__KPRINTF_ATTRIBUTE__");
	DefineStd(Builder, "unix", Opts);
	Builder.defineMacro("__ELF__");
	Builder.defineMacro("__SCE__");
	Builder.defineMacro("__ORBIS__");
	}

	public:
	PS4OSTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
	: OSTargetInfo<Target>(Triple, Opts) {
	this->WCharType = TargetInfo::UnsignedShort;

	// On PS4, TLS variable cannot be aligned to more than 32 bytes (256 bits).
	this->MaxTLSAlign = 256;

	// On PS4, do not honor explicit bit field alignment,
	// as in "__attribute__((aligned(2))) int b : 1;".
	this->UseExplicitBitFieldAlignment = false;

	switch (Triple.getArch()) {
	default:
	case llvm::Triple::x86_64:
	this->MCountName = ".mcount";
	this->NewAlign = 256;
	break;
	}
	}
	TargetInfo::CallingConvCheckResult
	checkCallingConvention(CallingConv CC) const override {
	return (CC == CC_C) ? TargetInfo::CCCR_OK : TargetInfo::CCCR_Error;
	}
	};

	// RTEMS Target
	template <typename Target>
	class LLVM_LIBRARY_VISIBILITY RTEMSTargetInfo : public OSTargetInfo<Target> {
	protected:
	void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
	MacroBuilder &Builder) const override {
	// RTEMS defines; list based off of gcc output

	Builder.defineMacro("__rtems__");
	Builder.defineMacro("__ELF__");
	if (Opts.CPlusPlus)
	Builder.defineMacro("_GNU_SOURCE");
	}

	public:
	RTEMSTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
	: OSTargetInfo<Target>(Triple, Opts) {
	switch (Triple.getArch()) {
	default:
	case llvm::Triple::x86:
	// this->MCountName = ".mcount";
	break;
	case llvm::Triple::mips:
	case llvm::Triple::mipsel:
	case llvm::Triple::ppc:
	case llvm::Triple::ppc64:
	case llvm::Triple::ppc64le:
	// this->MCountName = "_mcount";
	break;
	case llvm::Triple::arm:
	// this->MCountName = "__mcount";
	break;
	}
	}
	};

	// Solaris target
	template <typename Target>
	class LLVM_LIBRARY_VISIBILITY SolarisTargetInfo : public OSTargetInfo<Target> {
	protected:
	void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
	MacroBuilder &Builder) const override {
	DefineStd(Builder, "sun", Opts);
	DefineStd(Builder, "unix", Opts);
	Builder.defineMacro("__ELF__");
	Builder.defineMacro("__svr4__");
	Builder.defineMacro("__SVR4");
	// Solaris headers require _XOPEN_SOURCE to be set to 600 for C99 and
	// newer, but to 500 for everything else. feature_test.h has a check to
	// ensure that you are not using C99 with an old version of X/Open or C89
	// with a new version.
	if (Opts.C99)
	Builder.defineMacro("_XOPEN_SOURCE", "600");
	else
	Builder.defineMacro("_XOPEN_SOURCE", "500");
	if (Opts.CPlusPlus) {
	Builder.defineMacro("__C99FEATURES__");
	Builder.defineMacro("_FILE_OFFSET_BITS", "64");
	}
	// GCC restricts the next two to C++.
	Builder.defineMacro("_LARGEFILE_SOURCE");
	Builder.defineMacro("_LARGEFILE64_SOURCE");
	Builder.defineMacro("__EXTENSIONS__");
	if (Opts.POSIXThreads)
	Builder.defineMacro("_REENTRANT");
	if (this->HasFloat128)
	Builder.defineMacro("__FLOAT128__");
	}

	public:
	SolarisTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
	: OSTargetInfo<Target>(Triple, Opts) {
	if (this->PointerWidth == 64) {
	this->WCharType = this->WIntType = this->SignedInt;
	} else {
	this->WCharType = this->WIntType = this->SignedLong;
	}
	switch (Triple.getArch()) {
	default:
	break;
	case llvm::Triple::x86:
	case llvm::Triple::x86_64:
	this->HasFloat128 = true;
	break;
	}
	}
	};

	// AIX Target
	template <typename Target>
	class AIXTargetInfo : public OSTargetInfo<Target> {
	protected:
	void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
	MacroBuilder &Builder) const override {
	DefineStd(Builder, "unix", Opts);
	Builder.defineMacro("_IBMR2");
	Builder.defineMacro("_POWER");

	Builder.defineMacro("_AIX");
	Builder.defineMacro("__TOS_AIX__");

	if (Opts.C11) {
	Builder.defineMacro("__STDC_NO_ATOMICS__");
	Builder.defineMacro("__STDC_NO_THREADS__");
	}

	if (Opts.EnableAIXExtendedAltivecABI)
	Builder.defineMacro("__EXTABI__");

	unsigned Major, Minor, Micro;
	Triple.getOSVersion(Major, Minor, Micro);

	// Define AIX OS-Version Macros.
	// Includes logic for legacy versions of AIX; no specific intent to support.
	std::pair<int, int> OsVersion = {Major, Minor};
	if (OsVersion >= std::make_pair(3, 2)) Builder.defineMacro("_AIX32");
	if (OsVersion >= std::make_pair(4, 1)) Builder.defineMacro("_AIX41");
	if (OsVersion >= std::make_pair(4, 3)) Builder.defineMacro("_AIX43");
	if (OsVersion >= std::make_pair(5, 0)) Builder.defineMacro("_AIX50");
	if (OsVersion >= std::make_pair(5, 1)) Builder.defineMacro("_AIX51");
	if (OsVersion >= std::make_pair(5, 2)) Builder.defineMacro("_AIX52");
	if (OsVersion >= std::make_pair(5, 3)) Builder.defineMacro("_AIX53");
	if (OsVersion >= std::make_pair(6, 1)) Builder.defineMacro("_AIX61");
	if (OsVersion >= std::make_pair(7, 1)) Builder.defineMacro("_AIX71");
	if (OsVersion >= std::make_pair(7, 2)) Builder.defineMacro("_AIX72");
	if (OsVersion >= std::make_pair(7, 3)) Builder.defineMacro("_AIX73");

	// FIXME: Do not define _LONG_LONG when -fno-long-long is specified.
	Builder.defineMacro("_LONG_LONG");

	if (Opts.POSIXThreads) {
	Builder.defineMacro("_THREAD_SAFE");
	}

	if (this->PointerWidth == 64) {
	Builder.defineMacro("__64BIT__");
	}

	// Define _WCHAR_T when it is a fundamental type
	// (i.e., for C++ without -fno-wchar).
	if (Opts.CPlusPlus && Opts.WChar) {
	Builder.defineMacro("_WCHAR_T");
	}
	}

	public:
	AIXTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
	: OSTargetInfo<Target>(Triple, Opts) {
	this->TheCXXABI.set(TargetCXXABI::XL);

	if (this->PointerWidth == 64) {
	this->WCharType = this->UnsignedInt;
	} else {
	this->WCharType = this->UnsignedShort;
	}
	this->UseZeroLengthBitfieldAlignment = true;
	}

	// AIX sets FLT_EVAL_METHOD to be 1.
	unsigned getFloatEvalMethod() const override { return 1; }
	bool hasInt128Type() const override { return false; }

	bool defaultsToAIXPowerAlignment() const override { return true; }
	};

	// z/OS target
	template <typename Target>
	class LLVM_LIBRARY_VISIBILITY ZOSTargetInfo : public OSTargetInfo<Target> {
	protected:
	void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
	MacroBuilder &Builder) const override {
	// FIXME: _LONG_LONG should not be defined under -std=c89.
	Builder.defineMacro("_LONG_LONG");
	Builder.defineMacro("_OPEN_DEFAULT");
	// _UNIX03_WITHDRAWN is required to build libcxx.
	Builder.defineMacro("_UNIX03_WITHDRAWN");
	Builder.defineMacro("__370__");
	Builder.defineMacro("__BFP__");
	// FIXME: __BOOL__ should not be defined under -std=c89.
	Builder.defineMacro("__BOOL__");
	Builder.defineMacro("__LONGNAME__");
	Builder.defineMacro("__MVS__");
	Builder.defineMacro("__THW_370__");
	Builder.defineMacro("__THW_BIG_ENDIAN__");
	Builder.defineMacro("__TOS_390__");
	Builder.defineMacro("__TOS_MVS__");
	Builder.defineMacro("__XPLINK__");

	if (this->PointerWidth == 64)
	Builder.defineMacro("__64BIT__");

	if (Opts.CPlusPlus) {
	Builder.defineMacro("__DLL__");
	// _XOPEN_SOURCE=600 is required to build libcxx.
	Builder.defineMacro("_XOPEN_SOURCE", "600");
	}

	if (Opts.GNUMode) {
	Builder.defineMacro("_MI_BUILTIN");
	Builder.defineMacro("_EXT");
	}

	if (Opts.CPlusPlus && Opts.WChar) {
	// Macro __wchar_t is defined so that the wchar_t data
	// type is not declared as a typedef in system headers.
	Builder.defineMacro("__wchar_t");
	}

	this->PlatformName = llvm::Triple::getOSTypeName(Triple.getOS());
	}

	public:
	ZOSTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
	: OSTargetInfo<Target>(Triple, Opts) {
	this->WCharType = TargetInfo::UnsignedInt;
	this->MaxAlignedAttribute = 128;
	this->UseBitFieldTypeAlignment = false;
	this->UseZeroLengthBitfieldAlignment = true;
	this->UseLeadingZeroLengthBitfield = false;
	this->ZeroLengthBitfieldBoundary = 32;
	this->DefaultAlignForAttributeAligned = 128;
	}
	};

	void addWindowsDefines(const llvm::Triple &Triple, const LangOptions &Opts,
	MacroBuilder &Builder);

	// Windows target
	template <typename Target>
	class LLVM_LIBRARY_VISIBILITY WindowsTargetInfo : public OSTargetInfo<Target> {
	protected:
	void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
	MacroBuilder &Builder) const override {
	addWindowsDefines(Triple, Opts, Builder);
	}

	public:
	WindowsTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
	: OSTargetInfo<Target>(Triple, Opts) {
	this->WCharType = TargetInfo::UnsignedShort;
	this->WIntType = TargetInfo::UnsignedShort;
	}
	};

	template <typename Target>
	class LLVM_LIBRARY_VISIBILITY NaClTargetInfo : public OSTargetInfo<Target> {
	protected:
	void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
	MacroBuilder &Builder) const override {
	if (Opts.POSIXThreads)
	Builder.defineMacro("_REENTRANT");
	if (Opts.CPlusPlus)
	Builder.defineMacro("_GNU_SOURCE");

	DefineStd(Builder, "unix", Opts);
	Builder.defineMacro("__ELF__");
	Builder.defineMacro("__native_client__");
	}

	public:
	NaClTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
	: OSTargetInfo<Target>(Triple, Opts) {
	this->LongAlign = 32;
	this->LongWidth = 32;
	this->PointerAlign = 32;
	this->PointerWidth = 32;
	this->IntMaxType = TargetInfo::SignedLongLong;
	this->Int64Type = TargetInfo::SignedLongLong;
	this->DoubleAlign = 64;
	this->LongDoubleWidth = 64;
	this->LongDoubleAlign = 64;
	this->LongLongWidth = 64;
	this->LongLongAlign = 64;
	this->SizeType = TargetInfo::UnsignedInt;
	this->PtrDiffType = TargetInfo::SignedInt;
	this->IntPtrType = TargetInfo::SignedInt;
	// RegParmMax is inherited from the underlying architecture.
	this->LongDoubleFormat = &llvm::APFloat::IEEEdouble();
	if (Triple.getArch() == llvm::Triple::arm) {
	// Handled in ARM's setABI().
	} else if (Triple.getArch() == llvm::Triple::x86) {
	this->resetDataLayout("e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-"
	"i64:64-n8:16:32-S128");
	} else if (Triple.getArch() == llvm::Triple::x86_64) {
	this->resetDataLayout("e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-"
	"i64:64-n8:16:32:64-S128");
	} else if (Triple.getArch() == llvm::Triple::mipsel) {
	// Handled on mips' setDataLayout.
	} else {
	assert(Triple.getArch() == llvm::Triple::le32);
	this->resetDataLayout("e-p:32:32-i64:64");
	}
	}
	};

	// Fuchsia Target
	template <typename Target>
	class LLVM_LIBRARY_VISIBILITY FuchsiaTargetInfo : public OSTargetInfo<Target> {
	protected:
	void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
	MacroBuilder &Builder) const override {
	Builder.defineMacro("__Fuchsia__");
	Builder.defineMacro("__ELF__");
	if (Opts.POSIXThreads)
	Builder.defineMacro("_REENTRANT");
	// Required by the libc++ locale support.
	if (Opts.CPlusPlus)
	Builder.defineMacro("_GNU_SOURCE");
	}

	public:
	FuchsiaTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
	: OSTargetInfo<Target>(Triple, Opts) {
	this->MCountName = "__mcount";
	this->TheCXXABI.set(TargetCXXABI::Fuchsia);
	}
	};

	// WebAssembly target
	template <typename Target>
	class LLVM_LIBRARY_VISIBILITY WebAssemblyOSTargetInfo
	: public OSTargetInfo<Target> {
	protected:
	void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
	MacroBuilder &Builder) const override {
	// A common platform macro.
	if (Opts.POSIXThreads)
	Builder.defineMacro("_REENTRANT");
	// Follow g++ convention and predefine _GNU_SOURCE for C++.
	if (Opts.CPlusPlus)
	Builder.defineMacro("_GNU_SOURCE");
	// Indicate that we have __float128.
	Builder.defineMacro("__FLOAT128__");
	}

	public:
	explicit WebAssemblyOSTargetInfo(const llvm::Triple &Triple,
	const TargetOptions &Opts)
	: OSTargetInfo<Target>(Triple, Opts) {
	this->MCountName = "__mcount";
	this->TheCXXABI.set(TargetCXXABI::WebAssembly);
	this->HasFloat128 = true;
	}
	};

	// WASI target
	template <typename Target>
	class LLVM_LIBRARY_VISIBILITY WASITargetInfo
	: public WebAssemblyOSTargetInfo<Target> {
	void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
	MacroBuilder &Builder) const final {
	WebAssemblyOSTargetInfo<Target>::getOSDefines(Opts, Triple, Builder);
	Builder.defineMacro("__wasi__");
	}

	public:
	explicit WASITargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
	: WebAssemblyOSTargetInfo<Target>(Triple, Opts) {}
	};

	// Emscripten target
	template <typename Target>
	class LLVM_LIBRARY_VISIBILITY EmscriptenTargetInfo
	: public WebAssemblyOSTargetInfo<Target> {
	void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
	MacroBuilder &Builder) const final {
	WebAssemblyOSTargetInfo<Target>::getOSDefines(Opts, Triple, Builder);
	Builder.defineMacro("__EMSCRIPTEN__");
	if (Opts.POSIXThreads)
	Builder.defineMacro("__EMSCRIPTEN_PTHREADS__");
	}

	public:
	explicit EmscriptenTargetInfo(const llvm::Triple &Triple,
	const TargetOptions &Opts)
	: WebAssemblyOSTargetInfo<Target>(Triple, Opts) {
	// Keeping the alignment of long double to 8 bytes even though its size is
	// 16 bytes allows emscripten to have an 8-byte-aligned max_align_t which
	// in turn gives is a 8-byte aligned malloc.
	// Emscripten's ABI is unstable and we may change this back to 128 to match
	// the WebAssembly default in the future.
	this->LongDoubleAlign = 64;
	}
	};

	} // namespace targets
	} // namespace clang
	#endif // LLVM_CLANG_LIB_BASIC_TARGETS_OSTARGETS_H
	diff --git a/contrib/llvm-project/clang/lib/Driver/Driver.cpp b/contrib/llvm-project/clang/lib/Driver/Driver.cpp
	index 5c323cb6ea23..94a7553e273b 100644
	--- a/contrib/llvm-project/clang/lib/Driver/Driver.cpp
	+++ b/contrib/llvm-project/clang/lib/Driver/Driver.cpp
	@@ -1,5578 +1,5577 @@
	//===--- Driver.cpp - Clang GCC Compatible Driver -------------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#include "clang/Driver/Driver.h"
	#include "ToolChains/AIX.h"
	#include "ToolChains/AMDGPU.h"
	#include "ToolChains/AMDGPUOpenMP.h"
	#include "ToolChains/AVR.h"
	#include "ToolChains/Ananas.h"
	#include "ToolChains/BareMetal.h"
	#include "ToolChains/Clang.h"
	#include "ToolChains/CloudABI.h"
	#include "ToolChains/Contiki.h"
	#include "ToolChains/CrossWindows.h"
	#include "ToolChains/Cuda.h"
	#include "ToolChains/Darwin.h"
	#include "ToolChains/DragonFly.h"
	#include "ToolChains/FreeBSD.h"
	#include "ToolChains/Fuchsia.h"
	#include "ToolChains/Gnu.h"
	#include "ToolChains/HIP.h"
	#include "ToolChains/Haiku.h"
	#include "ToolChains/Hexagon.h"
	#include "ToolChains/Hurd.h"
	#include "ToolChains/Lanai.h"
	#include "ToolChains/Linux.h"
	#include "ToolChains/MSP430.h"
	#include "ToolChains/MSVC.h"
	#include "ToolChains/MinGW.h"
	#include "ToolChains/Minix.h"
	#include "ToolChains/MipsLinux.h"
	#include "ToolChains/Myriad.h"
	#include "ToolChains/NaCl.h"
	#include "ToolChains/NetBSD.h"
	#include "ToolChains/OpenBSD.h"
	#include "ToolChains/PPCLinux.h"
	#include "ToolChains/PS4CPU.h"
	#include "ToolChains/RISCVToolchain.h"
	#include "ToolChains/Solaris.h"
	#include "ToolChains/TCE.h"
	#include "ToolChains/VEToolchain.h"
	#include "ToolChains/WebAssembly.h"
	#include "ToolChains/XCore.h"
	#include "ToolChains/ZOS.h"
	#include "clang/Basic/TargetID.h"
	#include "clang/Basic/Version.h"
	#include "clang/Config/config.h"
	#include "clang/Driver/Action.h"
	#include "clang/Driver/Compilation.h"
	#include "clang/Driver/DriverDiagnostic.h"
	#include "clang/Driver/InputInfo.h"
	#include "clang/Driver/Job.h"
	#include "clang/Driver/Options.h"
	#include "clang/Driver/SanitizerArgs.h"
	#include "clang/Driver/Tool.h"
	#include "clang/Driver/ToolChain.h"
	#include "llvm/ADT/ArrayRef.h"
	#include "llvm/ADT/STLExtras.h"
	#include "llvm/ADT/SmallSet.h"
	#include "llvm/ADT/StringExtras.h"
	#include "llvm/ADT/StringRef.h"
	#include "llvm/ADT/StringSet.h"
	#include "llvm/ADT/StringSwitch.h"
	#include "llvm/Config/llvm-config.h"
	#include "llvm/Option/Arg.h"
	#include "llvm/Option/ArgList.h"
	#include "llvm/Option/OptSpecifier.h"
	#include "llvm/Option/OptTable.h"
	#include "llvm/Option/Option.h"
	#include "llvm/Support/CommandLine.h"
	#include "llvm/Support/ErrorHandling.h"
	#include "llvm/Support/ExitCodes.h"
	#include "llvm/Support/FileSystem.h"
	#include "llvm/Support/FormatVariadic.h"
	#include "llvm/Support/Host.h"
	#include "llvm/Support/MD5.h"
	#include "llvm/Support/Path.h"
	#include "llvm/Support/PrettyStackTrace.h"
	#include "llvm/Support/Process.h"
	#include "llvm/Support/Program.h"
	#include "llvm/Support/StringSaver.h"
	#include "llvm/Support/TargetRegistry.h"
	#include "llvm/Support/VirtualFileSystem.h"
	#include "llvm/Support/raw_ostream.h"
	#include <map>
	#include <memory>
	#include <utility>
	#if LLVM_ON_UNIX
	#include <unistd.h> // getpid
	#endif

	using namespace clang::driver;
	using namespace clang;
	using namespace llvm::opt;

	static llvm::Triple getHIPOffloadTargetTriple() {
	static const llvm::Triple T("amdgcn-amd-amdhsa");
	return T;
	}

	// static
	std::string Driver::GetResourcesPath(StringRef BinaryPath,
	StringRef CustomResourceDir) {
	// Since the resource directory is embedded in the module hash, it's important
	// that all places that need it call this function, so that they get the
	// exact same string ("a/../b/" and "b/" get different hashes, for example).

	// Dir is bin/ or lib/, depending on where BinaryPath is.
	std::string Dir = std::string(llvm::sys::path::parent_path(BinaryPath));

	SmallString<128> P(Dir);
	if (CustomResourceDir != "") {
	llvm::sys::path::append(P, CustomResourceDir);
	} else {
	// On Windows, libclang.dll is in bin/.
	// On non-Windows, libclang.so/.dylib is in lib/.
	// With a static-library build of libclang, LibClangPath will contain the
	// path of the embedding binary, which for LLVM binaries will be in bin/.
	// ../lib gets us to lib/ in both cases.
	P = llvm::sys::path::parent_path(Dir);
	llvm::sys::path::append(P, Twine("lib") + CLANG_LIBDIR_SUFFIX, "clang",
	CLANG_VERSION_STRING);
	}

	return std::string(P.str());
	}

	Driver::Driver(StringRef ClangExecutable, StringRef TargetTriple,
	DiagnosticsEngine &Diags, std::string Title,
	IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS)
	: Diags(Diags), VFS(std::move(VFS)), Mode(GCCMode),
	SaveTemps(SaveTempsNone), BitcodeEmbed(EmbedNone), LTOMode(LTOK_None),
	ClangExecutable(ClangExecutable), SysRoot(DEFAULT_SYSROOT),
	DriverTitle(Title), CCPrintStatReportFilename(), CCPrintOptionsFilename(),
	CCPrintHeadersFilename(), CCLogDiagnosticsFilename(),
	CCCPrintBindings(false), CCPrintOptions(false), CCPrintHeaders(false),
	CCLogDiagnostics(false), CCGenDiagnostics(false),
	CCPrintProcessStats(false), TargetTriple(TargetTriple),
	CCCGenericGCCName(""), Saver(Alloc), CheckInputsExist(true),
	GenReproducer(false), SuppressMissingInputWarning(false) {
	// Provide a sane fallback if no VFS is specified.
	if (!this->VFS)
	this->VFS = llvm::vfs::getRealFileSystem();

	Name = std::string(llvm::sys::path::filename(ClangExecutable));
	Dir = std::string(llvm::sys::path::parent_path(ClangExecutable));
	InstalledDir = Dir; // Provide a sensible default installed dir.

	if ((!SysRoot.empty()) && llvm::sys::path::is_relative(SysRoot)) {
	// Prepend InstalledDir if SysRoot is relative
	SmallString<128> P(InstalledDir);
	llvm::sys::path::append(P, SysRoot);
	SysRoot = std::string(P);
	}

	#if defined(CLANG_CONFIG_FILE_SYSTEM_DIR)
	SystemConfigDir = CLANG_CONFIG_FILE_SYSTEM_DIR;
	#endif
	#if defined(CLANG_CONFIG_FILE_USER_DIR)
	UserConfigDir = CLANG_CONFIG_FILE_USER_DIR;
	#endif

	// Compute the path to the resource directory.
	ResourceDir = GetResourcesPath(ClangExecutable, CLANG_RESOURCE_DIR);
	}

	void Driver::setDriverMode(StringRef Value) {
	static const std::string OptName =
	getOpts().getOption(options::OPT_driver_mode).getPrefixedName();
	if (auto M = llvm::StringSwitch<llvm::Optional<DriverMode>>(Value)
	.Case("gcc", GCCMode)
	.Case("g++", GXXMode)
	.Case("cpp", CPPMode)
	.Case("cl", CLMode)
	.Case("flang", FlangMode)
	.Default(None))
	Mode = *M;
	else
	Diag(diag::err_drv_unsupported_option_argument) << OptName << Value;
	}

	InputArgList Driver::ParseArgStrings(ArrayRef<const char *> ArgStrings,
	bool IsClCompatMode,
	bool &ContainsError) {
	llvm::PrettyStackTraceString CrashInfo("Command line argument parsing");
	ContainsError = false;

	unsigned IncludedFlagsBitmask;
	unsigned ExcludedFlagsBitmask;
	std::tie(IncludedFlagsBitmask, ExcludedFlagsBitmask) =
	getIncludeExcludeOptionFlagMasks(IsClCompatMode);

	// Make sure that Flang-only options don't pollute the Clang output
	// TODO: Make sure that Clang-only options don't pollute Flang output
	if (!IsFlangMode())
	ExcludedFlagsBitmask \|= options::FlangOnlyOption;

	unsigned MissingArgIndex, MissingArgCount;
	InputArgList Args =
	getOpts().ParseArgs(ArgStrings, MissingArgIndex, MissingArgCount,
	IncludedFlagsBitmask, ExcludedFlagsBitmask);

	// Check for missing argument error.
	if (MissingArgCount) {
	Diag(diag::err_drv_missing_argument)
	<< Args.getArgString(MissingArgIndex) << MissingArgCount;
	ContainsError \|=
	Diags.getDiagnosticLevel(diag::err_drv_missing_argument,
	SourceLocation()) > DiagnosticsEngine::Warning;
	}

	// Check for unsupported options.
	for (const Arg *A : Args) {
	if (A->getOption().hasFlag(options::Unsupported)) {
	unsigned DiagID;
	auto ArgString = A->getAsString(Args);
	std::string Nearest;
	if (getOpts().findNearest(
	ArgString, Nearest, IncludedFlagsBitmask,
	ExcludedFlagsBitmask \| options::Unsupported) > 1) {
	DiagID = diag::err_drv_unsupported_opt;
	Diag(DiagID) << ArgString;
	} else {
	DiagID = diag::err_drv_unsupported_opt_with_suggestion;
	Diag(DiagID) << ArgString << Nearest;
	}
	ContainsError \|= Diags.getDiagnosticLevel(DiagID, SourceLocation()) >
	DiagnosticsEngine::Warning;
	continue;
	}

	// Warn about -mcpu= without an argument.
	if (A->getOption().matches(options::OPT_mcpu_EQ) && A->containsValue("")) {
	Diag(diag::warn_drv_empty_joined_argument) << A->getAsString(Args);
	ContainsError \|= Diags.getDiagnosticLevel(
	diag::warn_drv_empty_joined_argument,
	SourceLocation()) > DiagnosticsEngine::Warning;
	}
	}

	for (const Arg *A : Args.filtered(options::OPT_UNKNOWN)) {
	unsigned DiagID;
	auto ArgString = A->getAsString(Args);
	std::string Nearest;
	if (getOpts().findNearest(
	ArgString, Nearest, IncludedFlagsBitmask, ExcludedFlagsBitmask) > 1) {
	DiagID = IsCLMode() ? diag::warn_drv_unknown_argument_clang_cl
	: diag::err_drv_unknown_argument;
	Diags.Report(DiagID) << ArgString;
	} else {
	DiagID = IsCLMode()
	? diag::warn_drv_unknown_argument_clang_cl_with_suggestion
	: diag::err_drv_unknown_argument_with_suggestion;
	Diags.Report(DiagID) << ArgString << Nearest;
	}
	ContainsError \|= Diags.getDiagnosticLevel(DiagID, SourceLocation()) >
	DiagnosticsEngine::Warning;
	}

	return Args;
	}

	// Determine which compilation mode we are in. We look for options which
	// affect the phase, starting with the earliest phases, and record which
	// option we used to determine the final phase.
	phases::ID Driver::getFinalPhase(const DerivedArgList &DAL,
	Arg **FinalPhaseArg) const {
	Arg *PhaseArg = nullptr;
	phases::ID FinalPhase;

	// -{E,EP,P,M,MM} only run the preprocessor.
	if (CCCIsCPP() \|\| (PhaseArg = DAL.getLastArg(options::OPT_E)) \|\|
	(PhaseArg = DAL.getLastArg(options::OPT__SLASH_EP)) \|\|
	(PhaseArg = DAL.getLastArg(options::OPT_M, options::OPT_MM)) \|\|
	(PhaseArg = DAL.getLastArg(options::OPT__SLASH_P))) {
	FinalPhase = phases::Preprocess;

	// --precompile only runs up to precompilation.
	} else if ((PhaseArg = DAL.getLastArg(options::OPT__precompile))) {
	FinalPhase = phases::Precompile;

	// -{fsyntax-only,-analyze,emit-ast} only run up to the compiler.
	} else if ((PhaseArg = DAL.getLastArg(options::OPT_fsyntax_only)) \|\|
	(PhaseArg = DAL.getLastArg(options::OPT_print_supported_cpus)) \|\|
	(PhaseArg = DAL.getLastArg(options::OPT_module_file_info)) \|\|
	(PhaseArg = DAL.getLastArg(options::OPT_verify_pch)) \|\|
	(PhaseArg = DAL.getLastArg(options::OPT_rewrite_objc)) \|\|
	(PhaseArg = DAL.getLastArg(options::OPT_rewrite_legacy_objc)) \|\|
	(PhaseArg = DAL.getLastArg(options::OPT__migrate)) \|\|
	(PhaseArg = DAL.getLastArg(options::OPT__analyze)) \|\|
	(PhaseArg = DAL.getLastArg(options::OPT_emit_ast))) {
	FinalPhase = phases::Compile;

	// -S only runs up to the backend.
	} else if ((PhaseArg = DAL.getLastArg(options::OPT_S))) {
	FinalPhase = phases::Backend;

	// -c compilation only runs up to the assembler.
	} else if ((PhaseArg = DAL.getLastArg(options::OPT_c))) {
	FinalPhase = phases::Assemble;

	// Otherwise do everything.
	} else
	FinalPhase = phases::Link;

	if (FinalPhaseArg)
	*FinalPhaseArg = PhaseArg;

	return FinalPhase;
	}

	static Arg *MakeInputArg(DerivedArgList &Args, const OptTable &Opts,
	StringRef Value, bool Claim = true) {
	Arg *A = new Arg(Opts.getOption(options::OPT_INPUT), Value,
	Args.getBaseArgs().MakeIndex(Value), Value.data());
	Args.AddSynthesizedArg(A);
	if (Claim)
	A->claim();
	return A;
	}

	DerivedArgList *Driver::TranslateInputArgs(const InputArgList &Args) const {
	const llvm::opt::OptTable &Opts = getOpts();
	DerivedArgList *DAL = new DerivedArgList(Args);

	bool HasNostdlib = Args.hasArg(options::OPT_nostdlib);
	bool HasNostdlibxx = Args.hasArg(options::OPT_nostdlibxx);
	bool HasNodefaultlib = Args.hasArg(options::OPT_nodefaultlibs);
	for (Arg *A : Args) {
	// Unfortunately, we have to parse some forwarding options (-Xassembler,
	// -Xlinker, -Xpreprocessor) because we either integrate their functionality
	// (assembler and preprocessor), or bypass a previous driver ('collect2').

	// Rewrite linker options, to replace --no-demangle with a custom internal
	// option.
	if ((A->getOption().matches(options::OPT_Wl_COMMA) \|\|
	A->getOption().matches(options::OPT_Xlinker)) &&
	A->containsValue("--no-demangle")) {
	// Add the rewritten no-demangle argument.
	DAL->AddFlagArg(A, Opts.getOption(options::OPT_Z_Xlinker__no_demangle));

	// Add the remaining values as Xlinker arguments.
	for (StringRef Val : A->getValues())
	if (Val != "--no-demangle")
	DAL->AddSeparateArg(A, Opts.getOption(options::OPT_Xlinker), Val);

	continue;
	}

	// Rewrite preprocessor options, to replace -Wp,-MD,FOO which is used by
	// some build systems. We don't try to be complete here because we don't
	// care to encourage this usage model.
	if (A->getOption().matches(options::OPT_Wp_COMMA) &&
	(A->getValue(0) == StringRef("-MD") \|\|
	A->getValue(0) == StringRef("-MMD"))) {
	// Rewrite to -MD/-MMD along with -MF.
	if (A->getValue(0) == StringRef("-MD"))
	DAL->AddFlagArg(A, Opts.getOption(options::OPT_MD));
	else
	DAL->AddFlagArg(A, Opts.getOption(options::OPT_MMD));
	if (A->getNumValues() == 2)
	DAL->AddSeparateArg(A, Opts.getOption(options::OPT_MF), A->getValue(1));
	continue;
	}

	// Rewrite reserved library names.
	if (A->getOption().matches(options::OPT_l)) {
	StringRef Value = A->getValue();

	// Rewrite unless -nostdlib is present.
	if (!HasNostdlib && !HasNodefaultlib && !HasNostdlibxx &&
	Value == "stdc++") {
	DAL->AddFlagArg(A, Opts.getOption(options::OPT_Z_reserved_lib_stdcxx));
	continue;
	}

	// Rewrite unconditionally.
	if (Value == "cc_kext") {
	DAL->AddFlagArg(A, Opts.getOption(options::OPT_Z_reserved_lib_cckext));
	continue;
	}
	}

	// Pick up inputs via the -- option.
	if (A->getOption().matches(options::OPT__DASH_DASH)) {
	A->claim();
	for (StringRef Val : A->getValues())
	DAL->append(MakeInputArg(*DAL, Opts, Val, false));
	continue;
	}

	DAL->append(A);
	}

	// Enforce -static if -miamcu is present.
	if (Args.hasFlag(options::OPT_miamcu, options::OPT_mno_iamcu, false))
	DAL->AddFlagArg(0, Opts.getOption(options::OPT_static));

	// Add a default value of -mlinker-version=, if one was given and the user
	// didn't specify one.
	#if defined(HOST_LINK_VERSION)
	if (!Args.hasArg(options::OPT_mlinker_version_EQ) &&
	strlen(HOST_LINK_VERSION) > 0) {
	DAL->AddJoinedArg(0, Opts.getOption(options::OPT_mlinker_version_EQ),
	HOST_LINK_VERSION);
	DAL->getLastArg(options::OPT_mlinker_version_EQ)->claim();
	}
	#endif

	return DAL;
	}

	/// Compute target triple from args.
	///
	/// This routine provides the logic to compute a target triple from various
	/// args passed to the driver and the default triple string.
	static llvm::Triple computeTargetTriple(const Driver &D,
	StringRef TargetTriple,
	const ArgList &Args,
	StringRef DarwinArchName = "") {
	// FIXME: Already done in Compilation *Driver::BuildCompilation
	if (const Arg *A = Args.getLastArg(options::OPT_target))
	TargetTriple = A->getValue();

	llvm::Triple Target(llvm::Triple::normalize(TargetTriple));

	// GNU/Hurd's triples should have been -hurd-gnu*, but were historically made
	// -gnu* only, and we can not change this, so we have to detect that case as
	// being the Hurd OS.
	if (TargetTriple.find("-unknown-gnu") != StringRef::npos \|\|
	TargetTriple.find("-pc-gnu") != StringRef::npos)
	Target.setOSName("hurd");

	// Handle Apple-specific options available here.
	if (Target.isOSBinFormatMachO()) {
	// If an explicit Darwin arch name is given, that trumps all.
	if (!DarwinArchName.empty()) {
	tools::darwin::setTripleTypeForMachOArchName(Target, DarwinArchName);
	return Target;
	}

	// Handle the Darwin '-arch' flag.
	if (Arg *A = Args.getLastArg(options::OPT_arch)) {
	StringRef ArchName = A->getValue();
	tools::darwin::setTripleTypeForMachOArchName(Target, ArchName);
	}
	}

	// Handle pseudo-target flags '-mlittle-endian'/'-EL' and
	// '-mbig-endian'/'-EB'.
	if (Arg *A = Args.getLastArg(options::OPT_mlittle_endian,
	options::OPT_mbig_endian)) {
	if (A->getOption().matches(options::OPT_mlittle_endian)) {
	llvm::Triple LE = Target.getLittleEndianArchVariant();
	if (LE.getArch() != llvm::Triple::UnknownArch)
	Target = std::move(LE);
	} else {
	llvm::Triple BE = Target.getBigEndianArchVariant();
	if (BE.getArch() != llvm::Triple::UnknownArch)
	Target = std::move(BE);
	}
	}

	// Skip further flag support on OSes which don't support '-m32' or '-m64'.
	if (Target.getArch() == llvm::Triple::tce \|\|
	Target.getOS() == llvm::Triple::Minix)
	return Target;

	// On AIX, the env OBJECT_MODE may affect the resulting arch variant.
	if (Target.isOSAIX()) {
	if (Optional<std::string> ObjectModeValue =
	llvm::sys::Process::GetEnv("OBJECT_MODE")) {
	StringRef ObjectMode = *ObjectModeValue;
	llvm::Triple::ArchType AT = llvm::Triple::UnknownArch;

	if (ObjectMode.equals("64")) {
	AT = Target.get64BitArchVariant().getArch();
	} else if (ObjectMode.equals("32")) {
	AT = Target.get32BitArchVariant().getArch();
	} else {
	D.Diag(diag::err_drv_invalid_object_mode) << ObjectMode;
	}

	if (AT != llvm::Triple::UnknownArch && AT != Target.getArch())
	Target.setArch(AT);
	}
	}

	// Handle pseudo-target flags '-m64', '-mx32', '-m32' and '-m16'.
	Arg *A = Args.getLastArg(options::OPT_m64, options::OPT_mx32,
	options::OPT_m32, options::OPT_m16);
	if (A) {
	llvm::Triple::ArchType AT = llvm::Triple::UnknownArch;

	if (A->getOption().matches(options::OPT_m64)) {
	AT = Target.get64BitArchVariant().getArch();
	if (Target.getEnvironment() == llvm::Triple::GNUX32)
	Target.setEnvironment(llvm::Triple::GNU);
	else if (Target.getEnvironment() == llvm::Triple::MuslX32)
	Target.setEnvironment(llvm::Triple::Musl);
	} else if (A->getOption().matches(options::OPT_mx32) &&
	Target.get64BitArchVariant().getArch() == llvm::Triple::x86_64) {
	AT = llvm::Triple::x86_64;
	if (Target.getEnvironment() == llvm::Triple::Musl)
	Target.setEnvironment(llvm::Triple::MuslX32);
	else
	Target.setEnvironment(llvm::Triple::GNUX32);
	} else if (A->getOption().matches(options::OPT_m32)) {
	AT = Target.get32BitArchVariant().getArch();
	if (Target.getEnvironment() == llvm::Triple::GNUX32)
	Target.setEnvironment(llvm::Triple::GNU);
	else if (Target.getEnvironment() == llvm::Triple::MuslX32)
	Target.setEnvironment(llvm::Triple::Musl);
	} else if (A->getOption().matches(options::OPT_m16) &&
	Target.get32BitArchVariant().getArch() == llvm::Triple::x86) {
	AT = llvm::Triple::x86;
	Target.setEnvironment(llvm::Triple::CODE16);
	}

	if (AT != llvm::Triple::UnknownArch && AT != Target.getArch())
	Target.setArch(AT);
	}

	// Handle -miamcu flag.
	if (Args.hasFlag(options::OPT_miamcu, options::OPT_mno_iamcu, false)) {
	if (Target.get32BitArchVariant().getArch() != llvm::Triple::x86)
	D.Diag(diag::err_drv_unsupported_opt_for_target) << "-miamcu"
	<< Target.str();

	if (A && !A->getOption().matches(options::OPT_m32))
	D.Diag(diag::err_drv_argument_not_allowed_with)
	<< "-miamcu" << A->getBaseArg().getAsString(Args);

	Target.setArch(llvm::Triple::x86);
	Target.setArchName("i586");
	Target.setEnvironment(llvm::Triple::UnknownEnvironment);
	Target.setEnvironmentName("");
	Target.setOS(llvm::Triple::ELFIAMCU);
	Target.setVendor(llvm::Triple::UnknownVendor);
	Target.setVendorName("intel");
	}

	// If target is MIPS adjust the target triple
	// accordingly to provided ABI name.
	A = Args.getLastArg(options::OPT_mabi_EQ);
	if (A && Target.isMIPS()) {
	StringRef ABIName = A->getValue();
	if (ABIName == "32") {
	Target = Target.get32BitArchVariant();
	if (Target.getEnvironment() == llvm::Triple::GNUABI64 \|\|
	Target.getEnvironment() == llvm::Triple::GNUABIN32)
	Target.setEnvironment(llvm::Triple::GNU);
	} else if (ABIName == "n32") {
	Target = Target.get64BitArchVariant();
	if (Target.getEnvironment() == llvm::Triple::GNU \|\|
	Target.getEnvironment() == llvm::Triple::GNUABI64)
	Target.setEnvironment(llvm::Triple::GNUABIN32);
	} else if (ABIName == "64") {
	Target = Target.get64BitArchVariant();
	if (Target.getEnvironment() == llvm::Triple::GNU \|\|
	Target.getEnvironment() == llvm::Triple::GNUABIN32)
	Target.setEnvironment(llvm::Triple::GNUABI64);
	}
	}

	// If target is RISC-V adjust the target triple according to
	// provided architecture name
	A = Args.getLastArg(options::OPT_march_EQ);
	if (A && Target.isRISCV()) {
	StringRef ArchName = A->getValue();
	if (ArchName.startswith_insensitive("rv32"))
	Target.setArch(llvm::Triple::riscv32);
	else if (ArchName.startswith_insensitive("rv64"))
	Target.setArch(llvm::Triple::riscv64);
	}

	return Target;
	}

	// Parse the LTO options and record the type of LTO compilation
	// based on which -f(no-)?lto(=.)? or -f(no-)?offload-lto(=.)?
	// option occurs last.
	static llvm::Optional<driver::LTOKind>
	parseLTOMode(Driver &D, const llvm::opt::ArgList &Args, OptSpecifier OptPos,
	OptSpecifier OptNeg, OptSpecifier OptEq, bool IsOffload) {
	driver::LTOKind LTOMode = LTOK_None;
	// Non-offload LTO allows -flto=auto and -flto=jobserver. Offload LTO does
	// not support those options.
	if (!Args.hasFlag(OptPos, OptEq, OptNeg, false) &&
	(IsOffload \|\|
	(!Args.hasFlag(options::OPT_flto_EQ_auto, options::OPT_fno_lto, false) &&
	!Args.hasFlag(options::OPT_flto_EQ_jobserver, options::OPT_fno_lto,
	false))))
	return None;

	StringRef LTOName("full");

	const Arg *A = Args.getLastArg(OptEq);
	if (A)
	LTOName = A->getValue();

	LTOMode = llvm::StringSwitch<LTOKind>(LTOName)
	.Case("full", LTOK_Full)
	.Case("thin", LTOK_Thin)
	.Default(LTOK_Unknown);

	if (LTOMode == LTOK_Unknown) {
	assert(A);
	D.Diag(diag::err_drv_unsupported_option_argument)
	<< A->getOption().getName() << A->getValue();
	return None;
	}
	return LTOMode;
	}

	// Parse the LTO options.
	void Driver::setLTOMode(const llvm::opt::ArgList &Args) {
	LTOMode = LTOK_None;
	if (auto M = parseLTOMode(*this, Args, options::OPT_flto,
	options::OPT_fno_lto, options::OPT_flto_EQ,
	/IsOffload=/false))
	LTOMode = M.getValue();

	OffloadLTOMode = LTOK_None;
	if (auto M = parseLTOMode(*this, Args, options::OPT_foffload_lto,
	options::OPT_fno_offload_lto,
	options::OPT_foffload_lto_EQ,
	/IsOffload=/true))
	OffloadLTOMode = M.getValue();
	}

	/// Compute the desired OpenMP runtime from the flags provided.
	Driver::OpenMPRuntimeKind Driver::getOpenMPRuntime(const ArgList &Args) const {
	StringRef RuntimeName(CLANG_DEFAULT_OPENMP_RUNTIME);

	const Arg *A = Args.getLastArg(options::OPT_fopenmp_EQ);
	if (A)
	RuntimeName = A->getValue();

	auto RT = llvm::StringSwitch<OpenMPRuntimeKind>(RuntimeName)
	.Case("libomp", OMPRT_OMP)
	.Case("libgomp", OMPRT_GOMP)
	.Case("libiomp5", OMPRT_IOMP5)
	.Default(OMPRT_Unknown);

	if (RT == OMPRT_Unknown) {
	if (A)
	Diag(diag::err_drv_unsupported_option_argument)
	<< A->getOption().getName() << A->getValue();
	else
	// FIXME: We could use a nicer diagnostic here.
	Diag(diag::err_drv_unsupported_opt) << "-fopenmp";
	}

	return RT;
	}

	void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
	InputList &Inputs) {

	//
	// CUDA/HIP
	//
	// We need to generate a CUDA/HIP toolchain if any of the inputs has a CUDA
	// or HIP type. However, mixed CUDA/HIP compilation is not supported.
	bool IsCuda =
	llvm::any_of(Inputs, [](std::pair<types::ID, const llvm::opt::Arg *> &I) {
	return types::isCuda(I.first);
	});
	bool IsHIP =
	llvm::any_of(Inputs,
	[](std::pair<types::ID, const llvm::opt::Arg *> &I) {
	return types::isHIP(I.first);
	}) \|\|
	C.getInputArgs().hasArg(options::OPT_hip_link);
	if (IsCuda && IsHIP) {
	Diag(clang::diag::err_drv_mix_cuda_hip);
	return;
	}
	if (IsCuda) {
	const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
	const llvm::Triple &HostTriple = HostTC->getTriple();
	StringRef DeviceTripleStr;
	auto OFK = Action::OFK_Cuda;
	DeviceTripleStr =
	HostTriple.isArch64Bit() ? "nvptx64-nvidia-cuda" : "nvptx-nvidia-cuda";
	llvm::Triple CudaTriple(DeviceTripleStr);
	// Use the CUDA and host triples as the key into the ToolChains map,
	// because the device toolchain we create depends on both.
	auto &CudaTC = ToolChains[CudaTriple.str() + "/" + HostTriple.str()];
	if (!CudaTC) {
	CudaTC = std::make_unique<toolchains::CudaToolChain>(
	this, CudaTriple, HostTC, C.getInputArgs(), OFK);
	}
	C.addOffloadDeviceToolChain(CudaTC.get(), OFK);
	} else if (IsHIP) {
	const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
	const llvm::Triple &HostTriple = HostTC->getTriple();
	auto OFK = Action::OFK_HIP;
	llvm::Triple HIPTriple = getHIPOffloadTargetTriple();
	// Use the HIP and host triples as the key into the ToolChains map,
	// because the device toolchain we create depends on both.
	auto &HIPTC = ToolChains[HIPTriple.str() + "/" + HostTriple.str()];
	if (!HIPTC) {
	HIPTC = std::make_unique<toolchains::HIPToolChain>(
	this, HIPTriple, HostTC, C.getInputArgs());
	}
	C.addOffloadDeviceToolChain(HIPTC.get(), OFK);
	}

	//
	// OpenMP
	//
	// We need to generate an OpenMP toolchain if the user specified targets with
	// the -fopenmp-targets option.
	if (Arg *OpenMPTargets =
	C.getInputArgs().getLastArg(options::OPT_fopenmp_targets_EQ)) {
	if (OpenMPTargets->getNumValues()) {
	// We expect that -fopenmp-targets is always used in conjunction with the
	// option -fopenmp specifying a valid runtime with offloading support,
	// i.e. libomp or libiomp.
	bool HasValidOpenMPRuntime = C.getInputArgs().hasFlag(
	options::OPT_fopenmp, options::OPT_fopenmp_EQ,
	options::OPT_fno_openmp, false);
	if (HasValidOpenMPRuntime) {
	OpenMPRuntimeKind OpenMPKind = getOpenMPRuntime(C.getInputArgs());
	HasValidOpenMPRuntime =
	OpenMPKind == OMPRT_OMP \|\| OpenMPKind == OMPRT_IOMP5;
	}

	if (HasValidOpenMPRuntime) {
	llvm::StringMap<const char *> FoundNormalizedTriples;
	for (const char *Val : OpenMPTargets->getValues()) {
	llvm::Triple TT(Val);
	std::string NormalizedName = TT.normalize();

	// Make sure we don't have a duplicate triple.
	auto Duplicate = FoundNormalizedTriples.find(NormalizedName);
	if (Duplicate != FoundNormalizedTriples.end()) {
	Diag(clang::diag::warn_drv_omp_offload_target_duplicate)
	<< Val << Duplicate->second;
	continue;
	}

	// Store the current triple so that we can check for duplicates in the
	// following iterations.
	FoundNormalizedTriples[NormalizedName] = Val;

	// If the specified target is invalid, emit a diagnostic.
	if (TT.getArch() == llvm::Triple::UnknownArch)
	Diag(clang::diag::err_drv_invalid_omp_target) << Val;
	else {
	const ToolChain *TC;
	// Device toolchains have to be selected differently. They pair host
	// and device in their implementation.
	if (TT.isNVPTX() \|\| TT.isAMDGCN()) {
	const ToolChain *HostTC =
	C.getSingleOffloadToolChain<Action::OFK_Host>();
	assert(HostTC && "Host toolchain should be always defined.");
	auto &DeviceTC =
	ToolChains[TT.str() + "/" + HostTC->getTriple().normalize()];
	if (!DeviceTC) {
	if (TT.isNVPTX())
	DeviceTC = std::make_unique<toolchains::CudaToolChain>(
	this, TT, HostTC, C.getInputArgs(), Action::OFK_OpenMP);
	else if (TT.isAMDGCN())
	DeviceTC =
	std::make_unique<toolchains::AMDGPUOpenMPToolChain>(
	this, TT, HostTC, C.getInputArgs());
	else
	assert(DeviceTC && "Device toolchain not defined.");
	}

	TC = DeviceTC.get();
	} else
	TC = &getToolChain(C.getInputArgs(), TT);
	C.addOffloadDeviceToolChain(TC, Action::OFK_OpenMP);
	}
	}
	} else
	Diag(clang::diag::err_drv_expecting_fopenmp_with_fopenmp_targets);
	} else
	Diag(clang::diag::warn_drv_empty_joined_argument)
	<< OpenMPTargets->getAsString(C.getInputArgs());
	}

	//
	// TODO: Add support for other offloading programming models here.
	//
	}

	/// Looks the given directories for the specified file.
	///
	/// \param[out] FilePath File path, if the file was found.
	/// \param[in] Dirs Directories used for the search.
	/// \param[in] FileName Name of the file to search for.
	/// \return True if file was found.
	///
	/// Looks for file specified by FileName sequentially in directories specified
	/// by Dirs.
	///
	static bool searchForFile(SmallVectorImpl<char> &FilePath,
	ArrayRef<StringRef> Dirs, StringRef FileName) {
	SmallString<128> WPath;
	for (const StringRef &Dir : Dirs) {
	if (Dir.empty())
	continue;
	WPath.clear();
	llvm::sys::path::append(WPath, Dir, FileName);
	llvm::sys::path::native(WPath);
	if (llvm::sys::fs::is_regular_file(WPath)) {
	FilePath = std::move(WPath);
	return true;
	}
	}
	return false;
	}

	bool Driver::readConfigFile(StringRef FileName) {
	// Try reading the given file.
	SmallVector<const char *, 32> NewCfgArgs;
	if (!llvm::cl::readConfigFile(FileName, Saver, NewCfgArgs)) {
	Diag(diag::err_drv_cannot_read_config_file) << FileName;
	return true;
	}

	// Read options from config file.
	llvm::SmallString<128> CfgFileName(FileName);
	llvm::sys::path::native(CfgFileName);
	ConfigFile = std::string(CfgFileName);
	bool ContainErrors;
	CfgOptions = std::make_unique<InputArgList>(
	ParseArgStrings(NewCfgArgs, IsCLMode(), ContainErrors));
	if (ContainErrors) {
	CfgOptions.reset();
	return true;
	}

	if (CfgOptions->hasArg(options::OPT_config)) {
	CfgOptions.reset();
	Diag(diag::err_drv_nested_config_file);
	return true;
	}

	// Claim all arguments that come from a configuration file so that the driver
	// does not warn on any that is unused.
	for (Arg A : CfgOptions)
	A->claim();
	return false;
	}

	bool Driver::loadConfigFile() {
	std::string CfgFileName;
	bool FileSpecifiedExplicitly = false;

	// Process options that change search path for config files.
	if (CLOptions) {
	if (CLOptions->hasArg(options::OPT_config_system_dir_EQ)) {
	SmallString<128> CfgDir;
	CfgDir.append(
	CLOptions->getLastArgValue(options::OPT_config_system_dir_EQ));
	if (!CfgDir.empty()) {
	if (llvm::sys::fs::make_absolute(CfgDir).value() != 0)
	SystemConfigDir.clear();
	else
	SystemConfigDir = std::string(CfgDir.begin(), CfgDir.end());
	}
	}
	if (CLOptions->hasArg(options::OPT_config_user_dir_EQ)) {
	SmallString<128> CfgDir;
	CfgDir.append(
	CLOptions->getLastArgValue(options::OPT_config_user_dir_EQ));
	if (!CfgDir.empty()) {
	if (llvm::sys::fs::make_absolute(CfgDir).value() != 0)
	UserConfigDir.clear();
	else
	UserConfigDir = std::string(CfgDir.begin(), CfgDir.end());
	}
	}
	}

	// First try to find config file specified in command line.
	if (CLOptions) {
	std::vector<std::string> ConfigFiles =
	CLOptions->getAllArgValues(options::OPT_config);
	if (ConfigFiles.size() > 1) {
	if (!std::all_of(ConfigFiles.begin(), ConfigFiles.end(),
	[ConfigFiles](const std::string &s) {
	return s == ConfigFiles[0];
	})) {
	Diag(diag::err_drv_duplicate_config);
	return true;
	}
	}

	if (!ConfigFiles.empty()) {
	CfgFileName = ConfigFiles.front();
	assert(!CfgFileName.empty());

	// If argument contains directory separator, treat it as a path to
	// configuration file.
	if (llvm::sys::path::has_parent_path(CfgFileName)) {
	SmallString<128> CfgFilePath;
	if (llvm::sys::path::is_relative(CfgFileName))
	llvm::sys::fs::current_path(CfgFilePath);
	llvm::sys::path::append(CfgFilePath, CfgFileName);
	if (!llvm::sys::fs::is_regular_file(CfgFilePath)) {
	Diag(diag::err_drv_config_file_not_exist) << CfgFilePath;
	return true;
	}
	return readConfigFile(CfgFilePath);
	}

	FileSpecifiedExplicitly = true;
	}
	}

	// If config file is not specified explicitly, try to deduce configuration
	// from executable name. For instance, an executable 'armv7l-clang' will
	// search for config file 'armv7l-clang.cfg'.
	if (CfgFileName.empty() && !ClangNameParts.TargetPrefix.empty())
	CfgFileName = ClangNameParts.TargetPrefix + '-' + ClangNameParts.ModeSuffix;

	if (CfgFileName.empty())
	return false;

	// Determine architecture part of the file name, if it is present.
	StringRef CfgFileArch = CfgFileName;
	size_t ArchPrefixLen = CfgFileArch.find('-');
	if (ArchPrefixLen == StringRef::npos)
	ArchPrefixLen = CfgFileArch.size();
	llvm::Triple CfgTriple;
	CfgFileArch = CfgFileArch.take_front(ArchPrefixLen);
	CfgTriple = llvm::Triple(llvm::Triple::normalize(CfgFileArch));
	if (CfgTriple.getArch() == llvm::Triple::ArchType::UnknownArch)
	ArchPrefixLen = 0;

	if (!StringRef(CfgFileName).endswith(".cfg"))
	CfgFileName += ".cfg";

	// If config file starts with architecture name and command line options
	// redefine architecture (with options like -m32 -LE etc), try finding new
	// config file with that architecture.
	SmallString<128> FixedConfigFile;
	size_t FixedArchPrefixLen = 0;
	if (ArchPrefixLen) {
	// Get architecture name from config file name like 'i386.cfg' or
	// 'armv7l-clang.cfg'.
	// Check if command line options changes effective triple.
	llvm::Triple EffectiveTriple = computeTargetTriple(*this,
	CfgTriple.getTriple(), *CLOptions);
	if (CfgTriple.getArch() != EffectiveTriple.getArch()) {
	FixedConfigFile = EffectiveTriple.getArchName();
	FixedArchPrefixLen = FixedConfigFile.size();
	// Append the rest of original file name so that file name transforms
	// like: i386-clang.cfg -> x86_64-clang.cfg.
	if (ArchPrefixLen < CfgFileName.size())
	FixedConfigFile += CfgFileName.substr(ArchPrefixLen);
	}
	}

	// Prepare list of directories where config file is searched for.
	StringRef CfgFileSearchDirs[] = {UserConfigDir, SystemConfigDir, Dir};

	// Try to find config file. First try file with corrected architecture.
	llvm::SmallString<128> CfgFilePath;
	if (!FixedConfigFile.empty()) {
	if (searchForFile(CfgFilePath, CfgFileSearchDirs, FixedConfigFile))
	return readConfigFile(CfgFilePath);
	// If 'x86_64-clang.cfg' was not found, try 'x86_64.cfg'.
	FixedConfigFile.resize(FixedArchPrefixLen);
	FixedConfigFile.append(".cfg");
	if (searchForFile(CfgFilePath, CfgFileSearchDirs, FixedConfigFile))
	return readConfigFile(CfgFilePath);
	}

	// Then try original file name.
	if (searchForFile(CfgFilePath, CfgFileSearchDirs, CfgFileName))
	return readConfigFile(CfgFilePath);

	// Finally try removing driver mode part: 'x86_64-clang.cfg' -> 'x86_64.cfg'.
	if (!ClangNameParts.ModeSuffix.empty() &&
	!ClangNameParts.TargetPrefix.empty()) {
	CfgFileName.assign(ClangNameParts.TargetPrefix);
	CfgFileName.append(".cfg");
	if (searchForFile(CfgFilePath, CfgFileSearchDirs, CfgFileName))
	return readConfigFile(CfgFilePath);
	}

	// Report error but only if config file was specified explicitly, by option
	// --config. If it was deduced from executable name, it is not an error.
	if (FileSpecifiedExplicitly) {
	Diag(diag::err_drv_config_file_not_found) << CfgFileName;
	for (const StringRef &SearchDir : CfgFileSearchDirs)
	if (!SearchDir.empty())
	Diag(diag::note_drv_config_file_searched_in) << SearchDir;
	return true;
	}

	return false;
	}

	Compilation Driver::BuildCompilation(ArrayRef<const char > ArgList) {
	llvm::PrettyStackTraceString CrashInfo("Compilation construction");

	// FIXME: Handle environment options which affect driver behavior, somewhere
	// (client?). GCC_EXEC_PREFIX, LPATH, CC_PRINT_OPTIONS.

	// We look for the driver mode option early, because the mode can affect
	// how other options are parsed.

	auto DriverMode = getDriverMode(ClangExecutable, ArgList.slice(1));
	if (!DriverMode.empty())
	setDriverMode(DriverMode);

	// FIXME: What are we going to do with -V and -b?

	// Arguments specified in command line.
	bool ContainsError;
	CLOptions = std::make_unique<InputArgList>(
	ParseArgStrings(ArgList.slice(1), IsCLMode(), ContainsError));

	// Try parsing configuration file.
	if (!ContainsError)
	ContainsError = loadConfigFile();
	bool HasConfigFile = !ContainsError && (CfgOptions.get() != nullptr);

	// All arguments, from both config file and command line.
	InputArgList Args = std::move(HasConfigFile ? std::move(*CfgOptions)
	: std::move(*CLOptions));

	// The args for config files or /clang: flags belong to different InputArgList
	// objects than Args. This copies an Arg from one of those other InputArgLists
	// to the ownership of Args.
	auto appendOneArg = [&Args](const Arg Opt, const Arg BaseArg) {
	unsigned Index = Args.MakeIndex(Opt->getSpelling());
	Arg *Copy = new llvm::opt::Arg(Opt->getOption(), Args.getArgString(Index),
	Index, BaseArg);
	Copy->getValues() = Opt->getValues();
	if (Opt->isClaimed())
	Copy->claim();
	Copy->setOwnsValues(Opt->getOwnsValues());
	Opt->setOwnsValues(false);
	Args.append(Copy);
	};

	if (HasConfigFile)
	for (auto Opt : CLOptions) {
	if (Opt->getOption().matches(options::OPT_config))
	continue;
	const Arg *BaseArg = &Opt->getBaseArg();
	if (BaseArg == Opt)
	BaseArg = nullptr;
	appendOneArg(Opt, BaseArg);
	}

	// In CL mode, look for any pass-through arguments
	if (IsCLMode() && !ContainsError) {
	SmallVector<const char *, 16> CLModePassThroughArgList;
	for (const auto *A : Args.filtered(options::OPT__SLASH_clang)) {
	A->claim();
	CLModePassThroughArgList.push_back(A->getValue());
	}

	if (!CLModePassThroughArgList.empty()) {
	// Parse any pass through args using default clang processing rather
	// than clang-cl processing.
	auto CLModePassThroughOptions = std::make_unique<InputArgList>(
	ParseArgStrings(CLModePassThroughArgList, false, ContainsError));

	if (!ContainsError)
	for (auto Opt : CLModePassThroughOptions) {
	appendOneArg(Opt, nullptr);
	}
	}
	}

	// Check for working directory option before accessing any files
	if (Arg *WD = Args.getLastArg(options::OPT_working_directory))
	if (VFS->setCurrentWorkingDirectory(WD->getValue()))
	Diag(diag::err_drv_unable_to_set_working_directory) << WD->getValue();

	// FIXME: This stuff needs to go into the Compilation, not the driver.
	bool CCCPrintPhases;

	// Silence driver warnings if requested
	Diags.setIgnoreAllWarnings(Args.hasArg(options::OPT_w));

	// -no-canonical-prefixes is used very early in main.
	Args.ClaimAllArgs(options::OPT_no_canonical_prefixes);

	// f(no-)integated-cc1 is also used very early in main.
	Args.ClaimAllArgs(options::OPT_fintegrated_cc1);
	Args.ClaimAllArgs(options::OPT_fno_integrated_cc1);

	// Ignore -pipe.
	Args.ClaimAllArgs(options::OPT_pipe);

	// Extract -ccc args.
	//
	// FIXME: We need to figure out where this behavior should live. Most of it
	// should be outside in the client; the parts that aren't should have proper
	// options, either by introducing new ones or by overloading gcc ones like -V
	// or -b.
	CCCPrintPhases = Args.hasArg(options::OPT_ccc_print_phases);
	CCCPrintBindings = Args.hasArg(options::OPT_ccc_print_bindings);
	if (const Arg *A = Args.getLastArg(options::OPT_ccc_gcc_name))
	CCCGenericGCCName = A->getValue();
	GenReproducer = Args.hasFlag(options::OPT_gen_reproducer,
	options::OPT_fno_crash_diagnostics,
	!!::getenv("FORCE_CLANG_DIAGNOSTICS_CRASH"));

	// Process -fproc-stat-report options.
	if (const Arg *A = Args.getLastArg(options::OPT_fproc_stat_report_EQ)) {
	CCPrintProcessStats = true;
	CCPrintStatReportFilename = A->getValue();
	}
	if (Args.hasArg(options::OPT_fproc_stat_report))
	CCPrintProcessStats = true;

	// FIXME: TargetTriple is used by the target-prefixed calls to as/ld
	// and getToolChain is const.
	if (IsCLMode()) {
	// clang-cl targets MSVC-style Win32.
	llvm::Triple T(TargetTriple);
	T.setOS(llvm::Triple::Win32);
	T.setVendor(llvm::Triple::PC);
	T.setEnvironment(llvm::Triple::MSVC);
	T.setObjectFormat(llvm::Triple::COFF);
	TargetTriple = T.str();
	}
	if (const Arg *A = Args.getLastArg(options::OPT_target))
	TargetTriple = A->getValue();
	if (const Arg *A = Args.getLastArg(options::OPT_ccc_install_dir))
	Dir = InstalledDir = A->getValue();
	for (const Arg *A : Args.filtered(options::OPT_B)) {
	A->claim();
	PrefixDirs.push_back(A->getValue(0));
	}
	if (Optional<std::string> CompilerPathValue =
	llvm::sys::Process::GetEnv("COMPILER_PATH")) {
	StringRef CompilerPath = *CompilerPathValue;
	while (!CompilerPath.empty()) {
	std::pair<StringRef, StringRef> Split =
	CompilerPath.split(llvm::sys::EnvPathSeparator);
	PrefixDirs.push_back(std::string(Split.first));
	CompilerPath = Split.second;
	}
	}
	if (const Arg *A = Args.getLastArg(options::OPT__sysroot_EQ))
	SysRoot = A->getValue();
	if (const Arg *A = Args.getLastArg(options::OPT__dyld_prefix_EQ))
	DyldPrefix = A->getValue();

	if (const Arg *A = Args.getLastArg(options::OPT_resource_dir))
	ResourceDir = A->getValue();

	if (const Arg *A = Args.getLastArg(options::OPT_save_temps_EQ)) {
	SaveTemps = llvm::StringSwitch<SaveTempsMode>(A->getValue())
	.Case("cwd", SaveTempsCwd)
	.Case("obj", SaveTempsObj)
	.Default(SaveTempsCwd);
	}

	setLTOMode(Args);

	// Process -fembed-bitcode= flags.
	if (Arg *A = Args.getLastArg(options::OPT_fembed_bitcode_EQ)) {
	StringRef Name = A->getValue();
	unsigned Model = llvm::StringSwitch<unsigned>(Name)
	.Case("off", EmbedNone)
	.Case("all", EmbedBitcode)
	.Case("bitcode", EmbedBitcode)
	.Case("marker", EmbedMarker)
	.Default(~0U);
	if (Model == ~0U) {
	Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args)
	<< Name;
	} else
	BitcodeEmbed = static_cast<BitcodeEmbedMode>(Model);
	}

	std::unique_ptr<llvm::opt::InputArgList> UArgs =
	std::make_unique<InputArgList>(std::move(Args));

	// Perform the default argument translations.
	DerivedArgList TranslatedArgs = TranslateInputArgs(UArgs);

	// Owned by the host.
	const ToolChain &TC = getToolChain(
	UArgs, computeTargetTriple(this, TargetTriple, *UArgs));

	// The compilation takes ownership of Args.
	Compilation C = new Compilation(this, TC, UArgs.release(), TranslatedArgs,
	ContainsError);

	if (!HandleImmediateArgs(*C))
	return C;

	// Construct the list of inputs.
	InputList Inputs;
	BuildInputs(C->getDefaultToolChain(), *TranslatedArgs, Inputs);

	// Populate the tool chains for the offloading devices, if any.
	CreateOffloadingDeviceToolChains(*C, Inputs);

	// Construct the list of abstract actions to perform for this compilation. On
	// MachO targets this uses the driver-driver and universal actions.
	if (TC.getTriple().isOSBinFormatMachO())
	BuildUniversalActions(*C, C->getDefaultToolChain(), Inputs);
	else
	BuildActions(*C, C->getArgs(), Inputs, C->getActions());

	if (CCCPrintPhases) {
	PrintActions(*C);
	return C;
	}

	BuildJobs(*C);

	return C;
	}

	static void printArgList(raw_ostream &OS, const llvm::opt::ArgList &Args) {
	llvm::opt::ArgStringList ASL;
	for (const auto *A : Args)
	A->render(Args, ASL);

	for (auto I = ASL.begin(), E = ASL.end(); I != E; ++I) {
	if (I != ASL.begin())
	OS << ' ';
	llvm::sys::printArg(OS, *I, true);
	}
	OS << '\n';
	}

	bool Driver::getCrashDiagnosticFile(StringRef ReproCrashFilename,
	SmallString<128> &CrashDiagDir) {
	using namespace llvm::sys;
	assert(llvm::Triple(llvm::sys::getProcessTriple()).isOSDarwin() &&
	"Only knows about .crash files on Darwin");

	// The .crash file can be found on at ~/Library/Logs/DiagnosticReports/
	// (or /Library/Logs/DiagnosticReports for root) and has the filename pattern
	// clang-<VERSION>_<YYYY-MM-DD-HHMMSS>_<hostname>.crash.
	path::home_directory(CrashDiagDir);
	if (CrashDiagDir.startswith("/var/root"))
	CrashDiagDir = "/";
	path::append(CrashDiagDir, "Library/Logs/DiagnosticReports");
	int PID =
	#if LLVM_ON_UNIX
	getpid();
	#else
	0;
	#endif
	std::error_code EC;
	fs::file_status FileStatus;
	TimePoint<> LastAccessTime;
	SmallString<128> CrashFilePath;
	// Lookup the .crash files and get the one generated by a subprocess spawned
	// by this driver invocation.
	for (fs::directory_iterator File(CrashDiagDir, EC), FileEnd;
	File != FileEnd && !EC; File.increment(EC)) {
	StringRef FileName = path::filename(File->path());
	if (!FileName.startswith(Name))
	continue;
	if (fs::status(File->path(), FileStatus))
	continue;
	llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> CrashFile =
	llvm::MemoryBuffer::getFile(File->path());
	if (!CrashFile)
	continue;
	// The first line should start with "Process:", otherwise this isn't a real
	// .crash file.
	StringRef Data = CrashFile.get()->getBuffer();
	if (!Data.startswith("Process:"))
	continue;
	// Parse parent process pid line, e.g: "Parent Process: clang-4.0 [79141]"
	size_t ParentProcPos = Data.find("Parent Process:");
	if (ParentProcPos == StringRef::npos)
	continue;
	size_t LineEnd = Data.find_first_of("\n", ParentProcPos);
	if (LineEnd == StringRef::npos)
	continue;
	StringRef ParentProcess = Data.slice(ParentProcPos+15, LineEnd).trim();
	int OpenBracket = -1, CloseBracket = -1;
	for (size_t i = 0, e = ParentProcess.size(); i < e; ++i) {
	if (ParentProcess[i] == '[')
	OpenBracket = i;
	if (ParentProcess[i] == ']')
	CloseBracket = i;
	}
	// Extract the parent process PID from the .crash file and check whether
	// it matches this driver invocation pid.
	int CrashPID;
	if (OpenBracket < 0 \|\| CloseBracket < 0 \|\|
	ParentProcess.slice(OpenBracket + 1, CloseBracket)
	.getAsInteger(10, CrashPID) \|\| CrashPID != PID) {
	continue;
	}

	// Found a .crash file matching the driver pid. To avoid getting an older
	// and misleading crash file, continue looking for the most recent.
	// FIXME: the driver can dispatch multiple cc1 invocations, leading to
	// multiple crashes poiting to the same parent process. Since the driver
	// does not collect pid information for the dispatched invocation there's
	// currently no way to distinguish among them.
	const auto FileAccessTime = FileStatus.getLastModificationTime();
	if (FileAccessTime > LastAccessTime) {
	CrashFilePath.assign(File->path());
	LastAccessTime = FileAccessTime;
	}
	}

	// If found, copy it over to the location of other reproducer files.
	if (!CrashFilePath.empty()) {
	EC = fs::copy_file(CrashFilePath, ReproCrashFilename);
	if (EC)
	return false;
	return true;
	}

	return false;
	}

	// When clang crashes, produce diagnostic information including the fully
	// preprocessed source file(s). Request that the developer attach the
	// diagnostic information to a bug report.
	void Driver::generateCompilationDiagnostics(
	Compilation &C, const Command &FailingCommand,
	StringRef AdditionalInformation, CompilationDiagnosticReport *Report) {
	if (C.getArgs().hasArg(options::OPT_fno_crash_diagnostics))
	return;

	// Don't try to generate diagnostics for link or dsymutil jobs.
	if (FailingCommand.getCreator().isLinkJob() \|\|
	FailingCommand.getCreator().isDsymutilJob())
	return;

	// Print the version of the compiler.
	PrintVersion(C, llvm::errs());

	// Suppress driver output and emit preprocessor output to temp file.
	Mode = CPPMode;
	CCGenDiagnostics = true;

	// Save the original job command(s).
	Command Cmd = FailingCommand;

	// Keep track of whether we produce any errors while trying to produce
	// preprocessed sources.
	DiagnosticErrorTrap Trap(Diags);

	// Suppress tool output.
	C.initCompilationForDiagnostics();

	// Construct the list of inputs.
	InputList Inputs;
	BuildInputs(C.getDefaultToolChain(), C.getArgs(), Inputs);

	for (InputList::iterator it = Inputs.begin(), ie = Inputs.end(); it != ie;) {
	bool IgnoreInput = false;

	// Ignore input from stdin or any inputs that cannot be preprocessed.
	// Check type first as not all linker inputs have a value.
	if (types::getPreprocessedType(it->first) == types::TY_INVALID) {
	IgnoreInput = true;
	} else if (!strcmp(it->second->getValue(), "-")) {
	Diag(clang::diag::note_drv_command_failed_diag_msg)
	<< "Error generating preprocessed source(s) - "
	"ignoring input from stdin.";
	IgnoreInput = true;
	}

	if (IgnoreInput) {
	it = Inputs.erase(it);
	ie = Inputs.end();
	} else {
	++it;
	}
	}

	if (Inputs.empty()) {
	Diag(clang::diag::note_drv_command_failed_diag_msg)
	<< "Error generating preprocessed source(s) - "
	"no preprocessable inputs.";
	return;
	}

	// Don't attempt to generate preprocessed files if multiple -arch options are
	// used, unless they're all duplicates.
	llvm::StringSet<> ArchNames;
	for (const Arg *A : C.getArgs()) {
	if (A->getOption().matches(options::OPT_arch)) {
	StringRef ArchName = A->getValue();
	ArchNames.insert(ArchName);
	}
	}
	if (ArchNames.size() > 1) {
	Diag(clang::diag::note_drv_command_failed_diag_msg)
	<< "Error generating preprocessed source(s) - cannot generate "
	"preprocessed source with multiple -arch options.";
	return;
	}

	// Construct the list of abstract actions to perform for this compilation. On
	// Darwin OSes this uses the driver-driver and builds universal actions.
	const ToolChain &TC = C.getDefaultToolChain();
	if (TC.getTriple().isOSBinFormatMachO())
	BuildUniversalActions(C, TC, Inputs);
	else
	BuildActions(C, C.getArgs(), Inputs, C.getActions());

	BuildJobs(C);

	// If there were errors building the compilation, quit now.
	if (Trap.hasErrorOccurred()) {
	Diag(clang::diag::note_drv_command_failed_diag_msg)
	<< "Error generating preprocessed source(s).";
	return;
	}

	// Generate preprocessed output.
	SmallVector<std::pair<int, const Command *>, 4> FailingCommands;
	C.ExecuteJobs(C.getJobs(), FailingCommands);

	// If any of the preprocessing commands failed, clean up and exit.
	if (!FailingCommands.empty()) {
	Diag(clang::diag::note_drv_command_failed_diag_msg)
	<< "Error generating preprocessed source(s).";
	return;
	}

	const ArgStringList &TempFiles = C.getTempFiles();
	if (TempFiles.empty()) {
	Diag(clang::diag::note_drv_command_failed_diag_msg)
	<< "Error generating preprocessed source(s).";
	return;
	}

	Diag(clang::diag::note_drv_command_failed_diag_msg)
	<< "\n********************\n\n"
	"PLEASE ATTACH THE FOLLOWING FILES TO THE BUG REPORT:\n"
	"Preprocessed source(s) and associated run script(s) are located at:";

	SmallString<128> VFS;
	SmallString<128> ReproCrashFilename;
	for (const char *TempFile : TempFiles) {
	Diag(clang::diag::note_drv_command_failed_diag_msg) << TempFile;
	if (Report)
	Report->TemporaryFiles.push_back(TempFile);
	if (ReproCrashFilename.empty()) {
	ReproCrashFilename = TempFile;
	llvm::sys::path::replace_extension(ReproCrashFilename, ".crash");
	}
	if (StringRef(TempFile).endswith(".cache")) {
	// In some cases (modules) we'll dump extra data to help with reproducing
	// the crash into a directory next to the output.
	VFS = llvm::sys::path::filename(TempFile);
	llvm::sys::path::append(VFS, "vfs", "vfs.yaml");
	}
	}

	// Assume associated files are based off of the first temporary file.
	CrashReportInfo CrashInfo(TempFiles[0], VFS);

	llvm::SmallString<128> Script(CrashInfo.Filename);
	llvm::sys::path::replace_extension(Script, "sh");
	std::error_code EC;
	llvm::raw_fd_ostream ScriptOS(Script, EC, llvm::sys::fs::CD_CreateNew,
	llvm::sys::fs::FA_Write,
	llvm::sys::fs::OF_Text);
	if (EC) {
	Diag(clang::diag::note_drv_command_failed_diag_msg)
	<< "Error generating run script: " << Script << " " << EC.message();
	} else {
	ScriptOS << "# Crash reproducer for " << getClangFullVersion() << "\n"
	<< "# Driver args: ";
	printArgList(ScriptOS, C.getInputArgs());
	ScriptOS << "# Original command: ";
	Cmd.Print(ScriptOS, "\n", /Quote=/true);
	Cmd.Print(ScriptOS, "\n", /Quote=/true, &CrashInfo);
	if (!AdditionalInformation.empty())
	ScriptOS << "\n# Additional information: " << AdditionalInformation
	<< "\n";
	if (Report)
	Report->TemporaryFiles.push_back(std::string(Script.str()));
	Diag(clang::diag::note_drv_command_failed_diag_msg) << Script;
	}

	// On darwin, provide information about the .crash diagnostic report.
	if (llvm::Triple(llvm::sys::getProcessTriple()).isOSDarwin()) {
	SmallString<128> CrashDiagDir;
	if (getCrashDiagnosticFile(ReproCrashFilename, CrashDiagDir)) {
	Diag(clang::diag::note_drv_command_failed_diag_msg)
	<< ReproCrashFilename.str();
	} else { // Suggest a directory for the user to look for .crash files.
	llvm::sys::path::append(CrashDiagDir, Name);
	CrashDiagDir += "_<YYYY-MM-DD-HHMMSS>_<hostname>.crash";
	Diag(clang::diag::note_drv_command_failed_diag_msg)
	<< "Crash backtrace is located in";
	Diag(clang::diag::note_drv_command_failed_diag_msg)
	<< CrashDiagDir.str();
	Diag(clang::diag::note_drv_command_failed_diag_msg)
	<< "(choose the .crash file that corresponds to your crash)";
	}
	}

	for (const auto &A : C.getArgs().filtered(options::OPT_frewrite_map_file_EQ))
	Diag(clang::diag::note_drv_command_failed_diag_msg) << A->getValue();

	Diag(clang::diag::note_drv_command_failed_diag_msg)
	<< "\n\n********************";
	}

	void Driver::setUpResponseFiles(Compilation &C, Command &Cmd) {
	// Since commandLineFitsWithinSystemLimits() may underestimate system's
	// capacity if the tool does not support response files, there is a chance/
	// that things will just work without a response file, so we silently just
	// skip it.
	if (Cmd.getResponseFileSupport().ResponseKind ==
	ResponseFileSupport::RF_None \|\|
	llvm::sys::commandLineFitsWithinSystemLimits(Cmd.getExecutable(),
	Cmd.getArguments()))
	return;

	std::string TmpName = GetTemporaryPath("response", "txt");
	Cmd.setResponseFile(C.addTempFile(C.getArgs().MakeArgString(TmpName)));
	}

	int Driver::ExecuteCompilation(
	Compilation &C,
	SmallVectorImpl<std::pair<int, const Command *>> &FailingCommands) {
	// Just print if -### was present.
	if (C.getArgs().hasArg(options::OPT__HASH_HASH_HASH)) {
	C.getJobs().Print(llvm::errs(), "\n", true);
	return 0;
	}

	// If there were errors building the compilation, quit now.
	if (Diags.hasErrorOccurred())
	return 1;

	// Set up response file names for each command, if necessary
	for (auto &Job : C.getJobs())
	setUpResponseFiles(C, Job);

	C.ExecuteJobs(C.getJobs(), FailingCommands);

	// If the command succeeded, we are done.
	if (FailingCommands.empty())
	return 0;

	// Otherwise, remove result files and print extra information about abnormal
	// failures.
	int Res = 0;
	for (const auto &CmdPair : FailingCommands) {
	int CommandRes = CmdPair.first;
	const Command *FailingCommand = CmdPair.second;

	// Remove result files if we're not saving temps.
	if (!isSaveTempsEnabled()) {
	const JobAction *JA = cast<JobAction>(&FailingCommand->getSource());
	C.CleanupFileMap(C.getResultFiles(), JA, true);

	// Failure result files are valid unless we crashed.
	if (CommandRes < 0)
	C.CleanupFileMap(C.getFailureResultFiles(), JA, true);
	}

	#if LLVM_ON_UNIX
	// llvm/lib/Support/Unix/Signals.inc will exit with a special return code
	// for SIGPIPE. Do not print diagnostics for this case.
	if (CommandRes == EX_IOERR) {
	Res = CommandRes;
	continue;
	}
	#endif

	// Print extra information about abnormal failures, if possible.
	//
	// This is ad-hoc, but we don't want to be excessively noisy. If the result
	// status was 1, assume the command failed normally. In particular, if it
	// was the compiler then assume it gave a reasonable error code. Failures
	// in other tools are less common, and they generally have worse
	// diagnostics, so always print the diagnostic there.
	const Tool &FailingTool = FailingCommand->getCreator();

	if (!FailingCommand->getCreator().hasGoodDiagnostics() \|\| CommandRes != 1) {
	// FIXME: See FIXME above regarding result code interpretation.
	if (CommandRes < 0)
	Diag(clang::diag::err_drv_command_signalled)
	<< FailingTool.getShortName();
	else
	Diag(clang::diag::err_drv_command_failed)
	<< FailingTool.getShortName() << CommandRes;
	}
	}
	return Res;
	}

	void Driver::PrintHelp(bool ShowHidden) const {
	unsigned IncludedFlagsBitmask;
	unsigned ExcludedFlagsBitmask;
	std::tie(IncludedFlagsBitmask, ExcludedFlagsBitmask) =
	getIncludeExcludeOptionFlagMasks(IsCLMode());

	ExcludedFlagsBitmask \|= options::NoDriverOption;
	if (!ShowHidden)
	ExcludedFlagsBitmask \|= HelpHidden;

	if (IsFlangMode())
	IncludedFlagsBitmask \|= options::FlangOption;
	else
	ExcludedFlagsBitmask \|= options::FlangOnlyOption;

	std::string Usage = llvm::formatv("{0} [options] file...", Name).str();
	getOpts().printHelp(llvm::outs(), Usage.c_str(), DriverTitle.c_str(),
	IncludedFlagsBitmask, ExcludedFlagsBitmask,
	/ShowAllAliases=/false);
	}

	void Driver::PrintVersion(const Compilation &C, raw_ostream &OS) const {
	if (IsFlangMode()) {
	OS << getClangToolFullVersion("flang-new") << '\n';
	} else {
	// FIXME: The following handlers should use a callback mechanism, we don't
	// know what the client would like to do.
	OS << getClangFullVersion() << '\n';
	}
	const ToolChain &TC = C.getDefaultToolChain();
	OS << "Target: " << TC.getTripleString() << '\n';

	// Print the threading model.
	if (Arg *A = C.getArgs().getLastArg(options::OPT_mthread_model)) {
	// Don't print if the ToolChain would have barfed on it already
	if (TC.isThreadModelSupported(A->getValue()))
	OS << "Thread model: " << A->getValue();
	} else
	OS << "Thread model: " << TC.getThreadModel();
	OS << '\n';

	// Print out the install directory.
	OS << "InstalledDir: " << InstalledDir << '\n';

	// If configuration file was used, print its path.
	if (!ConfigFile.empty())
	OS << "Configuration file: " << ConfigFile << '\n';
	}

	/// PrintDiagnosticCategories - Implement the --print-diagnostic-categories
	/// option.
	static void PrintDiagnosticCategories(raw_ostream &OS) {
	// Skip the empty category.
	for (unsigned i = 1, max = DiagnosticIDs::getNumberOfCategories(); i != max;
	++i)
	OS << i << ',' << DiagnosticIDs::getCategoryNameFromID(i) << '\n';
	}

	void Driver::HandleAutocompletions(StringRef PassedFlags) const {
	if (PassedFlags == "")
	return;
	// Print out all options that start with a given argument. This is used for
	// shell autocompletion.
	std::vector<std::string> SuggestedCompletions;
	std::vector<std::string> Flags;

	unsigned int DisableFlags =
	options::NoDriverOption \| options::Unsupported \| options::Ignored;

	// Make sure that Flang-only options don't pollute the Clang output
	// TODO: Make sure that Clang-only options don't pollute Flang output
	if (!IsFlangMode())
	DisableFlags \|= options::FlangOnlyOption;

	// Distinguish "--autocomplete=-someflag" and "--autocomplete=-someflag,"
	// because the latter indicates that the user put space before pushing tab
	// which should end up in a file completion.
	const bool HasSpace = PassedFlags.endswith(",");

	// Parse PassedFlags by "," as all the command-line flags are passed to this
	// function separated by ","
	StringRef TargetFlags = PassedFlags;
	while (TargetFlags != "") {
	StringRef CurFlag;
	std::tie(CurFlag, TargetFlags) = TargetFlags.split(",");
	Flags.push_back(std::string(CurFlag));
	}

	// We want to show cc1-only options only when clang is invoked with -cc1 or
	// -Xclang.
	if (llvm::is_contained(Flags, "-Xclang") \|\| llvm::is_contained(Flags, "-cc1"))
	DisableFlags &= ~options::NoDriverOption;

	const llvm::opt::OptTable &Opts = getOpts();
	StringRef Cur;
	Cur = Flags.at(Flags.size() - 1);
	StringRef Prev;
	if (Flags.size() >= 2) {
	Prev = Flags.at(Flags.size() - 2);
	SuggestedCompletions = Opts.suggestValueCompletions(Prev, Cur);
	}

	if (SuggestedCompletions.empty())
	SuggestedCompletions = Opts.suggestValueCompletions(Cur, "");

	// If Flags were empty, it means the user typed `clang [tab]` where we should
	// list all possible flags. If there was no value completion and the user
	// pressed tab after a space, we should fall back to a file completion.
	// We're printing a newline to be consistent with what we print at the end of
	// this function.
	if (SuggestedCompletions.empty() && HasSpace && !Flags.empty()) {
	llvm::outs() << '\n';
	return;
	}

	// When flag ends with '=' and there was no value completion, return empty
	// string and fall back to the file autocompletion.
	if (SuggestedCompletions.empty() && !Cur.endswith("=")) {
	// If the flag is in the form of "--autocomplete=-foo",
	// we were requested to print out all option names that start with "-foo".
	// For example, "--autocomplete=-fsyn" is expanded to "-fsyntax-only".
	SuggestedCompletions = Opts.findByPrefix(Cur, DisableFlags);

	// We have to query the -W flags manually as they're not in the OptTable.
	// TODO: Find a good way to add them to OptTable instead and them remove
	// this code.
	for (StringRef S : DiagnosticIDs::getDiagnosticFlags())
	if (S.startswith(Cur))
	SuggestedCompletions.push_back(std::string(S));
	}

	// Sort the autocomplete candidates so that shells print them out in a
	// deterministic order. We could sort in any way, but we chose
	// case-insensitive sorting for consistency with the -help option
	// which prints out options in the case-insensitive alphabetical order.
	llvm::sort(SuggestedCompletions, [](StringRef A, StringRef B) {
	if (int X = A.compare_insensitive(B))
	return X < 0;
	return A.compare(B) > 0;
	});

	llvm::outs() << llvm::join(SuggestedCompletions, "\n") << '\n';
	}

	bool Driver::HandleImmediateArgs(const Compilation &C) {
	// The order these options are handled in gcc is all over the place, but we
	// don't expect inconsistencies w.r.t. that to matter in practice.

	if (C.getArgs().hasArg(options::OPT_dumpmachine)) {
	llvm::outs() << C.getDefaultToolChain().getTripleString() << '\n';
	return false;
	}

	if (C.getArgs().hasArg(options::OPT_dumpversion)) {
	// Since -dumpversion is only implemented for pedantic GCC compatibility, we
	// return an answer which matches our definition of __VERSION__.
	llvm::outs() << CLANG_VERSION_STRING << "\n";
	return false;
	}

	if (C.getArgs().hasArg(options::OPT__print_diagnostic_categories)) {
	PrintDiagnosticCategories(llvm::outs());
	return false;
	}

	if (C.getArgs().hasArg(options::OPT_help) \|\|
	C.getArgs().hasArg(options::OPT__help_hidden)) {
	PrintHelp(C.getArgs().hasArg(options::OPT__help_hidden));
	return false;
	}

	if (C.getArgs().hasArg(options::OPT__version)) {
	// Follow gcc behavior and use stdout for --version and stderr for -v.
	PrintVersion(C, llvm::outs());
	return false;
	}

	if (C.getArgs().hasArg(options::OPT_v) \|\|
	C.getArgs().hasArg(options::OPT__HASH_HASH_HASH) \|\|
	C.getArgs().hasArg(options::OPT_print_supported_cpus)) {
	PrintVersion(C, llvm::errs());
	SuppressMissingInputWarning = true;
	}

	if (C.getArgs().hasArg(options::OPT_v)) {
	if (!SystemConfigDir.empty())
	llvm::errs() << "System configuration file directory: "
	<< SystemConfigDir << "\n";
	if (!UserConfigDir.empty())
	llvm::errs() << "User configuration file directory: "
	<< UserConfigDir << "\n";
	}

	const ToolChain &TC = C.getDefaultToolChain();

	if (C.getArgs().hasArg(options::OPT_v))
	TC.printVerboseInfo(llvm::errs());

	if (C.getArgs().hasArg(options::OPT_print_resource_dir)) {
	llvm::outs() << ResourceDir << '\n';
	return false;
	}

	if (C.getArgs().hasArg(options::OPT_print_search_dirs)) {
	llvm::outs() << "programs: =";
	bool separator = false;
	// Print -B and COMPILER_PATH.
	for (const std::string &Path : PrefixDirs) {
	if (separator)
	llvm::outs() << llvm::sys::EnvPathSeparator;
	llvm::outs() << Path;
	separator = true;
	}
	for (const std::string &Path : TC.getProgramPaths()) {
	if (separator)
	llvm::outs() << llvm::sys::EnvPathSeparator;
	llvm::outs() << Path;
	separator = true;
	}
	llvm::outs() << "\n";
	llvm::outs() << "libraries: =" << ResourceDir;

	StringRef sysroot = C.getSysRoot();

	for (const std::string &Path : TC.getFilePaths()) {
	// Always print a separator. ResourceDir was the first item shown.
	llvm::outs() << llvm::sys::EnvPathSeparator;
	// Interpretation of leading '=' is needed only for NetBSD.
	if (Path[0] == '=')
	llvm::outs() << sysroot << Path.substr(1);
	else
	llvm::outs() << Path;
	}
	llvm::outs() << "\n";
	return false;
	}

	if (C.getArgs().hasArg(options::OPT_print_runtime_dir)) {
	std::string CandidateRuntimePath = TC.getRuntimePath();
	if (getVFS().exists(CandidateRuntimePath))
	llvm::outs() << CandidateRuntimePath << '\n';
	else
	llvm::outs() << TC.getCompilerRTPath() << '\n';
	return false;
	}

	// FIXME: The following handlers should use a callback mechanism, we don't
	// know what the client would like to do.
	if (Arg *A = C.getArgs().getLastArg(options::OPT_print_file_name_EQ)) {
	llvm::outs() << GetFilePath(A->getValue(), TC) << "\n";
	return false;
	}

	if (Arg *A = C.getArgs().getLastArg(options::OPT_print_prog_name_EQ)) {
	StringRef ProgName = A->getValue();

	// Null program name cannot have a path.
	if (! ProgName.empty())
	llvm::outs() << GetProgramPath(ProgName, TC);

	llvm::outs() << "\n";
	return false;
	}

	if (Arg *A = C.getArgs().getLastArg(options::OPT_autocomplete)) {
	StringRef PassedFlags = A->getValue();
	HandleAutocompletions(PassedFlags);
	return false;
	}

	if (C.getArgs().hasArg(options::OPT_print_libgcc_file_name)) {
	ToolChain::RuntimeLibType RLT = TC.GetRuntimeLibType(C.getArgs());
	const llvm::Triple Triple(TC.ComputeEffectiveClangTriple(C.getArgs()));
	RegisterEffectiveTriple TripleRAII(TC, Triple);
	switch (RLT) {
	case ToolChain::RLT_CompilerRT:
	llvm::outs() << TC.getCompilerRT(C.getArgs(), "builtins") << "\n";
	break;
	case ToolChain::RLT_Libgcc:
	llvm::outs() << GetFilePath("libgcc.a", TC) << "\n";
	break;
	}
	return false;
	}

	if (C.getArgs().hasArg(options::OPT_print_multi_lib)) {
	for (const Multilib &Multilib : TC.getMultilibs())
	llvm::outs() << Multilib << "\n";
	return false;
	}

	if (C.getArgs().hasArg(options::OPT_print_multi_directory)) {
	const Multilib &Multilib = TC.getMultilib();
	if (Multilib.gccSuffix().empty())
	llvm::outs() << ".\n";
	else {
	StringRef Suffix(Multilib.gccSuffix());
	assert(Suffix.front() == '/');
	llvm::outs() << Suffix.substr(1) << "\n";
	}
	return false;
	}

	if (C.getArgs().hasArg(options::OPT_print_target_triple)) {
	llvm::outs() << TC.getTripleString() << "\n";
	return false;
	}

	if (C.getArgs().hasArg(options::OPT_print_effective_triple)) {
	const llvm::Triple Triple(TC.ComputeEffectiveClangTriple(C.getArgs()));
	llvm::outs() << Triple.getTriple() << "\n";
	return false;
	}

	if (C.getArgs().hasArg(options::OPT_print_multiarch)) {
	llvm::outs() << TC.getMultiarchTriple(*this, TC.getTriple(), SysRoot)
	<< "\n";
	return false;
	}

	if (C.getArgs().hasArg(options::OPT_print_targets)) {
	llvm::TargetRegistry::printRegisteredTargetsForVersion(llvm::outs());
	return false;
	}

	return true;
	}

	enum {
	TopLevelAction = 0,
	HeadSibAction = 1,
	OtherSibAction = 2,
	};

	// Display an action graph human-readably. Action A is the "sink" node
	// and latest-occuring action. Traversal is in pre-order, visiting the
	// inputs to each action before printing the action itself.
	static unsigned PrintActions1(const Compilation &C, Action *A,
	std::map<Action *, unsigned> &Ids,
	Twine Indent = {}, int Kind = TopLevelAction) {
	if (Ids.count(A)) // A was already visited.
	return Ids[A];

	std::string str;
	llvm::raw_string_ostream os(str);

	auto getSibIndent = [](int K) -> Twine {
	return (K == HeadSibAction) ? " " : (K == OtherSibAction) ? "\| " : "";
	};

	Twine SibIndent = Indent + getSibIndent(Kind);
	int SibKind = HeadSibAction;
	os << Action::getClassName(A->getKind()) << ", ";
	if (InputAction *IA = dyn_cast<InputAction>(A)) {
	os << "\"" << IA->getInputArg().getValue() << "\"";
	} else if (BindArchAction *BIA = dyn_cast<BindArchAction>(A)) {
	os << '"' << BIA->getArchName() << '"' << ", {"
	<< PrintActions1(C, *BIA->input_begin(), Ids, SibIndent, SibKind) << "}";
	} else if (OffloadAction *OA = dyn_cast<OffloadAction>(A)) {
	bool IsFirst = true;
	OA->doOnEachDependence(
	[&](Action A, const ToolChain TC, const char *BoundArch) {
	assert(TC && "Unknown host toolchain");
	// E.g. for two CUDA device dependences whose bound arch is sm_20 and
	// sm_35 this will generate:
	// "cuda-device" (nvptx64-nvidia-cuda:sm_20) {#ID}, "cuda-device"
	// (nvptx64-nvidia-cuda:sm_35) {#ID}
	if (!IsFirst)
	os << ", ";
	os << '"';
	os << A->getOffloadingKindPrefix();
	os << " (";
	os << TC->getTriple().normalize();
	if (BoundArch)
	os << ":" << BoundArch;
	os << ")";
	os << '"';
	os << " {" << PrintActions1(C, A, Ids, SibIndent, SibKind) << "}";
	IsFirst = false;
	SibKind = OtherSibAction;
	});
	} else {
	const ActionList *AL = &A->getInputs();

	if (AL->size()) {
	const char *Prefix = "{";
	for (Action PreRequisite : AL) {
	os << Prefix << PrintActions1(C, PreRequisite, Ids, SibIndent, SibKind);
	Prefix = ", ";
	SibKind = OtherSibAction;
	}
	os << "}";
	} else
	os << "{}";
	}

	// Append offload info for all options other than the offloading action
	// itself (e.g. (cuda-device, sm_20) or (cuda-host)).
	std::string offload_str;
	llvm::raw_string_ostream offload_os(offload_str);
	if (!isa<OffloadAction>(A)) {
	auto S = A->getOffloadingKindPrefix();
	if (!S.empty()) {
	offload_os << ", (" << S;
	if (A->getOffloadingArch())
	offload_os << ", " << A->getOffloadingArch();
	offload_os << ")";
	}
	}

	auto getSelfIndent = [](int K) -> Twine {
	return (K == HeadSibAction) ? "+- " : (K == OtherSibAction) ? "\|- " : "";
	};

	unsigned Id = Ids.size();
	Ids[A] = Id;
	llvm::errs() << Indent + getSelfIndent(Kind) << Id << ": " << os.str() << ", "
	<< types::getTypeName(A->getType()) << offload_os.str() << "\n";

	return Id;
	}

	// Print the action graphs in a compilation C.
	// For example "clang -c file1.c file2.c" is composed of two subgraphs.
	void Driver::PrintActions(const Compilation &C) const {
	std::map<Action *, unsigned> Ids;
	for (Action *A : C.getActions())
	PrintActions1(C, A, Ids);
	}

	/// Check whether the given input tree contains any compilation or
	/// assembly actions.
	static bool ContainsCompileOrAssembleAction(const Action *A) {
	if (isa<CompileJobAction>(A) \|\| isa<BackendJobAction>(A) \|\|
	isa<AssembleJobAction>(A))
	return true;

	for (const Action *Input : A->inputs())
	if (ContainsCompileOrAssembleAction(Input))
	return true;

	return false;
	}

	void Driver::BuildUniversalActions(Compilation &C, const ToolChain &TC,
	const InputList &BAInputs) const {
	DerivedArgList &Args = C.getArgs();
	ActionList &Actions = C.getActions();
	llvm::PrettyStackTraceString CrashInfo("Building universal build actions");
	// Collect the list of architectures. Duplicates are allowed, but should only
	// be handled once (in the order seen).
	llvm::StringSet<> ArchNames;
	SmallVector<const char *, 4> Archs;
	for (Arg *A : Args) {
	if (A->getOption().matches(options::OPT_arch)) {
	// Validate the option here; we don't save the type here because its
	// particular spelling may participate in other driver choices.
	llvm::Triple::ArchType Arch =
	tools::darwin::getArchTypeForMachOArchName(A->getValue());
	if (Arch == llvm::Triple::UnknownArch) {
	Diag(clang::diag::err_drv_invalid_arch_name) << A->getAsString(Args);
	continue;
	}

	A->claim();
	if (ArchNames.insert(A->getValue()).second)
	Archs.push_back(A->getValue());
	}
	}

	// When there is no explicit arch for this platform, make sure we still bind
	// the architecture (to the default) so that -Xarch_ is handled correctly.
	if (!Archs.size())
	Archs.push_back(Args.MakeArgString(TC.getDefaultUniversalArchName()));

	ActionList SingleActions;
	BuildActions(C, Args, BAInputs, SingleActions);

	// Add in arch bindings for every top level action, as well as lipo and
	// dsymutil steps if needed.
	for (Action* Act : SingleActions) {
	// Make sure we can lipo this kind of output. If not (and it is an actual
	// output) then we disallow, since we can't create an output file with the
	// right name without overwriting it. We could remove this oddity by just
	// changing the output names to include the arch, which would also fix
	// -save-temps. Compatibility wins for now.

	if (Archs.size() > 1 && !types::canLipoType(Act->getType()))
	Diag(clang::diag::err_drv_invalid_output_with_multiple_archs)
	<< types::getTypeName(Act->getType());

	ActionList Inputs;
	for (unsigned i = 0, e = Archs.size(); i != e; ++i)
	Inputs.push_back(C.MakeAction<BindArchAction>(Act, Archs[i]));

	// Lipo if necessary, we do it this way because we need to set the arch flag
	// so that -Xarch_ gets overwritten.
	if (Inputs.size() == 1 \|\| Act->getType() == types::TY_Nothing)
	Actions.append(Inputs.begin(), Inputs.end());
	else
	Actions.push_back(C.MakeAction<LipoJobAction>(Inputs, Act->getType()));

	// Handle debug info queries.
	Arg *A = Args.getLastArg(options::OPT_g_Group);
	bool enablesDebugInfo = A && !A->getOption().matches(options::OPT_g0) &&
	!A->getOption().matches(options::OPT_gstabs);
	if ((enablesDebugInfo \|\| willEmitRemarks(Args)) &&
	ContainsCompileOrAssembleAction(Actions.back())) {

	// Add a 'dsymutil' step if necessary, when debug info is enabled and we
	// have a compile input. We need to run 'dsymutil' ourselves in such cases
	// because the debug info will refer to a temporary object file which
	// will be removed at the end of the compilation process.
	if (Act->getType() == types::TY_Image) {
	ActionList Inputs;
	Inputs.push_back(Actions.back());
	Actions.pop_back();
	Actions.push_back(
	C.MakeAction<DsymutilJobAction>(Inputs, types::TY_dSYM));
	}

	// Verify the debug info output.
	if (Args.hasArg(options::OPT_verify_debug_info)) {
	Action* LastAction = Actions.back();
	Actions.pop_back();
	Actions.push_back(C.MakeAction<VerifyDebugInfoJobAction>(
	LastAction, types::TY_Nothing));
	}
	}
	}
	}

	bool Driver::DiagnoseInputExistence(const DerivedArgList &Args, StringRef Value,
	types::ID Ty, bool TypoCorrect) const {
	if (!getCheckInputsExist())
	return true;

	// stdin always exists.
	if (Value == "-")
	return true;

	if (getVFS().exists(Value))
	return true;

	if (IsCLMode()) {
	if (!llvm::sys::path::is_absolute(Twine(Value)) &&
	llvm::sys::Process::FindInEnvPath("LIB", Value, ';'))
	return true;

	if (Args.hasArg(options::OPT__SLASH_link) && Ty == types::TY_Object) {
	// Arguments to the /link flag might cause the linker to search for object
	// and library files in paths we don't know about. Don't error in such
	// cases.
	return true;
	}
	}

	if (TypoCorrect) {
	// Check if the filename is a typo for an option flag. OptTable thinks
	// that all args that are not known options and that start with / are
	// filenames, but e.g. `/diagnostic:caret` is more likely a typo for
	// the option `/diagnostics:caret` than a reference to a file in the root
	// directory.
	unsigned IncludedFlagsBitmask;
	unsigned ExcludedFlagsBitmask;
	std::tie(IncludedFlagsBitmask, ExcludedFlagsBitmask) =
	getIncludeExcludeOptionFlagMasks(IsCLMode());
	std::string Nearest;
	if (getOpts().findNearest(Value, Nearest, IncludedFlagsBitmask,
	ExcludedFlagsBitmask) <= 1) {
	Diag(clang::diag::err_drv_no_such_file_with_suggestion)
	<< Value << Nearest;
	return false;
	}
	}

	Diag(clang::diag::err_drv_no_such_file) << Value;
	return false;
	}

	// Construct a the list of inputs and their types.
	void Driver::BuildInputs(const ToolChain &TC, DerivedArgList &Args,
	InputList &Inputs) const {
	const llvm::opt::OptTable &Opts = getOpts();
	// Track the current user specified (-x) input. We also explicitly track the
	// argument used to set the type; we only want to claim the type when we
	// actually use it, so we warn about unused -x arguments.
	types::ID InputType = types::TY_Nothing;
	Arg *InputTypeArg = nullptr;

	// The last /TC or /TP option sets the input type to C or C++ globally.
	if (Arg *TCTP = Args.getLastArgNoClaim(options::OPT__SLASH_TC,
	options::OPT__SLASH_TP)) {
	InputTypeArg = TCTP;
	InputType = TCTP->getOption().matches(options::OPT__SLASH_TC)
	? types::TY_C
	: types::TY_CXX;

	Arg *Previous = nullptr;
	bool ShowNote = false;
	for (Arg *A :
	Args.filtered(options::OPT__SLASH_TC, options::OPT__SLASH_TP)) {
	if (Previous) {
	Diag(clang::diag::warn_drv_overriding_flag_option)
	<< Previous->getSpelling() << A->getSpelling();
	ShowNote = true;
	}
	Previous = A;
	}
	if (ShowNote)
	Diag(clang::diag::note_drv_t_option_is_global);

	// No driver mode exposes -x and /TC or /TP; we don't support mixing them.
	assert(!Args.hasArg(options::OPT_x) && "-x and /TC or /TP is not allowed");
	}

	for (Arg *A : Args) {
	if (A->getOption().getKind() == Option::InputClass) {
	const char *Value = A->getValue();
	types::ID Ty = types::TY_INVALID;

	// Infer the input type if necessary.
	if (InputType == types::TY_Nothing) {
	// If there was an explicit arg for this, claim it.
	if (InputTypeArg)
	InputTypeArg->claim();

	// stdin must be handled specially.
	if (memcmp(Value, "-", 2) == 0) {
	if (IsFlangMode()) {
	Ty = types::TY_Fortran;
	} else {
	// If running with -E, treat as a C input (this changes the
	// builtin macros, for example). This may be overridden by -ObjC
	// below.
	//
	// Otherwise emit an error but still use a valid type to avoid
	// spurious errors (e.g., no inputs).
	if (!Args.hasArgNoClaim(options::OPT_E) && !CCCIsCPP())
	Diag(IsCLMode() ? clang::diag::err_drv_unknown_stdin_type_clang_cl
	: clang::diag::err_drv_unknown_stdin_type);
	Ty = types::TY_C;
	}
	} else {
	// Otherwise lookup by extension.
	// Fallback is C if invoked as C preprocessor, C++ if invoked with
	// clang-cl /E, or Object otherwise.
	// We use a host hook here because Darwin at least has its own
	// idea of what .s is.
	if (const char *Ext = strrchr(Value, '.'))
	Ty = TC.LookupTypeForExtension(Ext + 1);

	if (Ty == types::TY_INVALID) {
	if (CCCIsCPP())
	Ty = types::TY_C;
	else if (IsCLMode() && Args.hasArgNoClaim(options::OPT_E))
	Ty = types::TY_CXX;
	else
	Ty = types::TY_Object;
	}

	// If the driver is invoked as C++ compiler (like clang++ or c++) it
	// should autodetect some input files as C++ for g++ compatibility.
	if (CCCIsCXX()) {
	types::ID OldTy = Ty;
	Ty = types::lookupCXXTypeForCType(Ty);

	if (Ty != OldTy)
	Diag(clang::diag::warn_drv_treating_input_as_cxx)
	<< getTypeName(OldTy) << getTypeName(Ty);
	}

	// If running with -fthinlto-index=, extensions that normally identify
	// native object files actually identify LLVM bitcode files.
	if (Args.hasArgNoClaim(options::OPT_fthinlto_index_EQ) &&
	Ty == types::TY_Object)
	Ty = types::TY_LLVM_BC;
	}

	// -ObjC and -ObjC++ override the default language, but only for "source
	// files". We just treat everything that isn't a linker input as a
	// source file.
	//
	// FIXME: Clean this up if we move the phase sequence into the type.
	if (Ty != types::TY_Object) {
	if (Args.hasArg(options::OPT_ObjC))
	Ty = types::TY_ObjC;
	else if (Args.hasArg(options::OPT_ObjCXX))
	Ty = types::TY_ObjCXX;
	}
	} else {
	assert(InputTypeArg && "InputType set w/o InputTypeArg");
	if (!InputTypeArg->getOption().matches(options::OPT_x)) {
	// If emulating cl.exe, make sure that /TC and /TP don't affect input
	// object files.
	const char *Ext = strrchr(Value, '.');
	if (Ext && TC.LookupTypeForExtension(Ext + 1) == types::TY_Object)
	Ty = types::TY_Object;
	}
	if (Ty == types::TY_INVALID) {
	Ty = InputType;
	InputTypeArg->claim();
	}
	}

	if (DiagnoseInputExistence(Args, Value, Ty, /TypoCorrect=/true))
	Inputs.push_back(std::make_pair(Ty, A));

	} else if (A->getOption().matches(options::OPT__SLASH_Tc)) {
	StringRef Value = A->getValue();
	if (DiagnoseInputExistence(Args, Value, types::TY_C,
	/TypoCorrect=/false)) {
	Arg *InputArg = MakeInputArg(Args, Opts, A->getValue());
	Inputs.push_back(std::make_pair(types::TY_C, InputArg));
	}
	A->claim();
	} else if (A->getOption().matches(options::OPT__SLASH_Tp)) {
	StringRef Value = A->getValue();
	if (DiagnoseInputExistence(Args, Value, types::TY_CXX,
	/TypoCorrect=/false)) {
	Arg *InputArg = MakeInputArg(Args, Opts, A->getValue());
	Inputs.push_back(std::make_pair(types::TY_CXX, InputArg));
	}
	A->claim();
	} else if (A->getOption().hasFlag(options::LinkerInput)) {
	// Just treat as object type, we could make a special type for this if
	// necessary.
	Inputs.push_back(std::make_pair(types::TY_Object, A));

	} else if (A->getOption().matches(options::OPT_x)) {
	InputTypeArg = A;
	InputType = types::lookupTypeForTypeSpecifier(A->getValue());
	A->claim();

	// Follow gcc behavior and treat as linker input for invalid -x
	// options. Its not clear why we shouldn't just revert to unknown; but
	// this isn't very important, we might as well be bug compatible.
	if (!InputType) {
	Diag(clang::diag::err_drv_unknown_language) << A->getValue();
	InputType = types::TY_Object;
	}
	} else if (A->getOption().getID() == options::OPT_U) {
	assert(A->getNumValues() == 1 && "The /U option has one value.");
	StringRef Val = A->getValue(0);
	if (Val.find_first_of("/\\") != StringRef::npos) {
	// Warn about e.g. "/Users/me/myfile.c".
	Diag(diag::warn_slash_u_filename) << Val;
	Diag(diag::note_use_dashdash);
	}
	}
	}
	if (CCCIsCPP() && Inputs.empty()) {
	// If called as standalone preprocessor, stdin is processed
	// if no other input is present.
	Arg *A = MakeInputArg(Args, Opts, "-");
	Inputs.push_back(std::make_pair(types::TY_C, A));
	}
	}

	namespace {
	/// Provides a convenient interface for different programming models to generate
	/// the required device actions.
	class OffloadingActionBuilder final {
	/// Flag used to trace errors in the builder.
	bool IsValid = false;

	/// The compilation that is using this builder.
	Compilation &C;

	/// Map between an input argument and the offload kinds used to process it.
	std::map<const Arg *, unsigned> InputArgToOffloadKindMap;

	/// Builder interface. It doesn't build anything or keep any state.
	class DeviceActionBuilder {
	public:
	typedef const llvm::SmallVectorImpl<phases::ID> PhasesTy;

	enum ActionBuilderReturnCode {
	// The builder acted successfully on the current action.
	ABRT_Success,
	// The builder didn't have to act on the current action.
	ABRT_Inactive,
	// The builder was successful and requested the host action to not be
	// generated.
	ABRT_Ignore_Host,
	};

	protected:
	/// Compilation associated with this builder.
	Compilation &C;

	/// Tool chains associated with this builder. The same programming
	/// model may have associated one or more tool chains.
	SmallVector<const ToolChain *, 2> ToolChains;

	/// The derived arguments associated with this builder.
	DerivedArgList &Args;

	/// The inputs associated with this builder.
	const Driver::InputList &Inputs;

	/// The associated offload kind.
	Action::OffloadKind AssociatedOffloadKind = Action::OFK_None;

	public:
	DeviceActionBuilder(Compilation &C, DerivedArgList &Args,
	const Driver::InputList &Inputs,
	Action::OffloadKind AssociatedOffloadKind)
	: C(C), Args(Args), Inputs(Inputs),
	AssociatedOffloadKind(AssociatedOffloadKind) {}
	virtual ~DeviceActionBuilder() {}

	/// Fill up the array \a DA with all the device dependences that should be
	/// added to the provided host action \a HostAction. By default it is
	/// inactive.
	virtual ActionBuilderReturnCode
	getDeviceDependences(OffloadAction::DeviceDependences &DA,
	phases::ID CurPhase, phases::ID FinalPhase,
	PhasesTy &Phases) {
	return ABRT_Inactive;
	}

	/// Update the state to include the provided host action \a HostAction as a
	/// dependency of the current device action. By default it is inactive.
	virtual ActionBuilderReturnCode addDeviceDepences(Action *HostAction) {
	return ABRT_Inactive;
	}

	/// Append top level actions generated by the builder.
	virtual void appendTopLevelActions(ActionList &AL) {}

	/// Append linker device actions generated by the builder.
	virtual void appendLinkDeviceActions(ActionList &AL) {}

	/// Append linker host action generated by the builder.
	virtual Action* appendLinkHostActions(ActionList &AL) { return nullptr; }

	/// Append linker actions generated by the builder.
	virtual void appendLinkDependences(OffloadAction::DeviceDependences &DA) {}

	/// Initialize the builder. Return true if any initialization errors are
	/// found.
	virtual bool initialize() { return false; }

	/// Return true if the builder can use bundling/unbundling.
	virtual bool canUseBundlerUnbundler() const { return false; }

	/// Return true if this builder is valid. We have a valid builder if we have
	/// associated device tool chains.
	bool isValid() { return !ToolChains.empty(); }

	/// Return the associated offload kind.
	Action::OffloadKind getAssociatedOffloadKind() {
	return AssociatedOffloadKind;
	}
	};

	/// Base class for CUDA/HIP action builder. It injects device code in
	/// the host backend action.
	class CudaActionBuilderBase : public DeviceActionBuilder {
	protected:
	/// Flags to signal if the user requested host-only or device-only
	/// compilation.
	bool CompileHostOnly = false;
	bool CompileDeviceOnly = false;
	bool EmitLLVM = false;
	bool EmitAsm = false;

	/// ID to identify each device compilation. For CUDA it is simply the
	/// GPU arch string. For HIP it is either the GPU arch string or GPU
	/// arch string plus feature strings delimited by a plus sign, e.g.
	/// gfx906+xnack.
	struct TargetID {
	/// Target ID string which is persistent throughout the compilation.
	const char *ID;
	TargetID(CudaArch Arch) { ID = CudaArchToString(Arch); }
	TargetID(const char *ID) : ID(ID) {}
	operator const char *() { return ID; }
	operator StringRef() { return StringRef(ID); }
	};
	/// List of GPU architectures to use in this compilation.
	SmallVector<TargetID, 4> GpuArchList;

	/// The CUDA actions for the current input.
	ActionList CudaDeviceActions;

	/// The CUDA fat binary if it was generated for the current input.
	Action *CudaFatBinary = nullptr;

	/// Flag that is set to true if this builder acted on the current input.
	bool IsActive = false;

	/// Flag for -fgpu-rdc.
	bool Relocatable = false;

	/// Default GPU architecture if there's no one specified.
	CudaArch DefaultCudaArch = CudaArch::UNKNOWN;

	/// Method to generate compilation unit ID specified by option
	/// '-fuse-cuid='.
	enum UseCUIDKind { CUID_Hash, CUID_Random, CUID_None, CUID_Invalid };
	UseCUIDKind UseCUID = CUID_Hash;

	/// Compilation unit ID specified by option '-cuid='.
	StringRef FixedCUID;

	public:
	CudaActionBuilderBase(Compilation &C, DerivedArgList &Args,
	const Driver::InputList &Inputs,
	Action::OffloadKind OFKind)
	: DeviceActionBuilder(C, Args, Inputs, OFKind) {}

	ActionBuilderReturnCode addDeviceDepences(Action *HostAction) override {
	// While generating code for CUDA, we only depend on the host input action
	// to trigger the creation of all the CUDA device actions.

	// If we are dealing with an input action, replicate it for each GPU
	// architecture. If we are in host-only mode we return 'success' so that
	// the host uses the CUDA offload kind.
	if (auto *IA = dyn_cast<InputAction>(HostAction)) {
	assert(!GpuArchList.empty() &&
	"We should have at least one GPU architecture.");

	// If the host input is not CUDA or HIP, we don't need to bother about
	// this input.
	if (!(IA->getType() == types::TY_CUDA \|\|
	IA->getType() == types::TY_HIP \|\|
	IA->getType() == types::TY_PP_HIP)) {
	// The builder will ignore this input.
	IsActive = false;
	return ABRT_Inactive;
	}

	// Set the flag to true, so that the builder acts on the current input.
	IsActive = true;

	if (CompileHostOnly)
	return ABRT_Success;

	// Replicate inputs for each GPU architecture.
	auto Ty = IA->getType() == types::TY_HIP ? types::TY_HIP_DEVICE
	: types::TY_CUDA_DEVICE;
	std::string CUID = FixedCUID.str();
	if (CUID.empty()) {
	if (UseCUID == CUID_Random)
	CUID = llvm::utohexstr(llvm::sys::Process::GetRandomNumber(),
	/LowerCase=/true);
	else if (UseCUID == CUID_Hash) {
	llvm::MD5 Hasher;
	llvm::MD5::MD5Result Hash;
	SmallString<256> RealPath;
	llvm::sys::fs::real_path(IA->getInputArg().getValue(), RealPath,
	/expand_tilde=/true);
	Hasher.update(RealPath);
	for (auto *A : Args) {
	if (A->getOption().matches(options::OPT_INPUT))
	continue;
	Hasher.update(A->getAsString(Args));
	}
	Hasher.final(Hash);
	CUID = llvm::utohexstr(Hash.low(), /LowerCase=/true);
	}
	}
	IA->setId(CUID);

	for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) {
	CudaDeviceActions.push_back(
	C.MakeAction<InputAction>(IA->getInputArg(), Ty, IA->getId()));
	}

	return ABRT_Success;
	}

	// If this is an unbundling action use it as is for each CUDA toolchain.
	if (auto *UA = dyn_cast<OffloadUnbundlingJobAction>(HostAction)) {

	// If -fgpu-rdc is disabled, should not unbundle since there is no
	// device code to link.
	if (UA->getType() == types::TY_Object && !Relocatable)
	return ABRT_Inactive;

	CudaDeviceActions.clear();
	auto *IA = cast<InputAction>(UA->getInputs().back());
	std::string FileName = IA->getInputArg().getAsString(Args);
	// Check if the type of the file is the same as the action. Do not
	// unbundle it if it is not. Do not unbundle .so files, for example,
	// which are not object files.
	if (IA->getType() == types::TY_Object &&
	(!llvm::sys::path::has_extension(FileName) \|\|
	types::lookupTypeForExtension(
	llvm::sys::path::extension(FileName).drop_front()) !=
	types::TY_Object))
	return ABRT_Inactive;

	for (auto Arch : GpuArchList) {
	CudaDeviceActions.push_back(UA);
	UA->registerDependentActionInfo(ToolChains[0], Arch,
	AssociatedOffloadKind);
	}
	return ABRT_Success;
	}

	return IsActive ? ABRT_Success : ABRT_Inactive;
	}

	void appendTopLevelActions(ActionList &AL) override {
	// Utility to append actions to the top level list.
	auto AddTopLevel = [&](Action *A, TargetID TargetID) {
	OffloadAction::DeviceDependences Dep;
	Dep.add(A, ToolChains.front(), TargetID, AssociatedOffloadKind);
	AL.push_back(C.MakeAction<OffloadAction>(Dep, A->getType()));
	};

	// If we have a fat binary, add it to the list.
	if (CudaFatBinary) {
	AddTopLevel(CudaFatBinary, CudaArch::UNUSED);
	CudaDeviceActions.clear();
	CudaFatBinary = nullptr;
	return;
	}

	if (CudaDeviceActions.empty())
	return;

	// If we have CUDA actions at this point, that's because we have a have
	// partial compilation, so we should have an action for each GPU
	// architecture.
	assert(CudaDeviceActions.size() == GpuArchList.size() &&
	"Expecting one action per GPU architecture.");
	assert(ToolChains.size() == 1 &&
	"Expecting to have a sing CUDA toolchain.");
	for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I)
	AddTopLevel(CudaDeviceActions[I], GpuArchList[I]);

	CudaDeviceActions.clear();
	}

	/// Get canonicalized offload arch option. \returns empty StringRef if the
	/// option is invalid.
	virtual StringRef getCanonicalOffloadArch(StringRef Arch) = 0;

	virtual llvm::Optional<std::pair<llvm::StringRef, llvm::StringRef>>
	getConflictOffloadArchCombination(const std::set<StringRef> &GpuArchs) = 0;

	bool initialize() override {
	assert(AssociatedOffloadKind == Action::OFK_Cuda \|\|
	AssociatedOffloadKind == Action::OFK_HIP);

	// We don't need to support CUDA.
	if (AssociatedOffloadKind == Action::OFK_Cuda &&
	!C.hasOffloadToolChain<Action::OFK_Cuda>())
	return false;

	// We don't need to support HIP.
	if (AssociatedOffloadKind == Action::OFK_HIP &&
	!C.hasOffloadToolChain<Action::OFK_HIP>())
	return false;

	Relocatable = Args.hasFlag(options::OPT_fgpu_rdc,
	options::OPT_fno_gpu_rdc, /Default=/false);

	const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
	assert(HostTC && "No toolchain for host compilation.");
	if (HostTC->getTriple().isNVPTX() \|\|
	HostTC->getTriple().getArch() == llvm::Triple::amdgcn) {
	// We do not support targeting NVPTX/AMDGCN for host compilation. Throw
	// an error and abort pipeline construction early so we don't trip
	// asserts that assume device-side compilation.
	C.getDriver().Diag(diag::err_drv_cuda_host_arch)
	<< HostTC->getTriple().getArchName();
	return true;
	}

	ToolChains.push_back(
	AssociatedOffloadKind == Action::OFK_Cuda
	? C.getSingleOffloadToolChain<Action::OFK_Cuda>()
	: C.getSingleOffloadToolChain<Action::OFK_HIP>());

	Arg *PartialCompilationArg = Args.getLastArg(
	options::OPT_cuda_host_only, options::OPT_cuda_device_only,
	options::OPT_cuda_compile_host_device);
	CompileHostOnly = PartialCompilationArg &&
	PartialCompilationArg->getOption().matches(
	options::OPT_cuda_host_only);
	CompileDeviceOnly = PartialCompilationArg &&
	PartialCompilationArg->getOption().matches(
	options::OPT_cuda_device_only);
	EmitLLVM = Args.getLastArg(options::OPT_emit_llvm);
	EmitAsm = Args.getLastArg(options::OPT_S);
	FixedCUID = Args.getLastArgValue(options::OPT_cuid_EQ);
	if (Arg *A = Args.getLastArg(options::OPT_fuse_cuid_EQ)) {
	StringRef UseCUIDStr = A->getValue();
	UseCUID = llvm::StringSwitch<UseCUIDKind>(UseCUIDStr)
	.Case("hash", CUID_Hash)
	.Case("random", CUID_Random)
	.Case("none", CUID_None)
	.Default(CUID_Invalid);
	if (UseCUID == CUID_Invalid) {
	C.getDriver().Diag(diag::err_drv_invalid_value)
	<< A->getAsString(Args) << UseCUIDStr;
	C.setContainsError();
	return true;
	}
	}

	// Collect all cuda_gpu_arch parameters, removing duplicates.
	std::set<StringRef> GpuArchs;
	bool Error = false;
	for (Arg *A : Args) {
	if (!(A->getOption().matches(options::OPT_offload_arch_EQ) \|\|
	A->getOption().matches(options::OPT_no_offload_arch_EQ)))
	continue;
	A->claim();

	StringRef ArchStr = A->getValue();
	if (A->getOption().matches(options::OPT_no_offload_arch_EQ) &&
	ArchStr == "all") {
	GpuArchs.clear();
	continue;
	}
	ArchStr = getCanonicalOffloadArch(ArchStr);
	if (ArchStr.empty()) {
	Error = true;
	} else if (A->getOption().matches(options::OPT_offload_arch_EQ))
	GpuArchs.insert(ArchStr);
	else if (A->getOption().matches(options::OPT_no_offload_arch_EQ))
	GpuArchs.erase(ArchStr);
	else
	llvm_unreachable("Unexpected option.");
	}

	auto &&ConflictingArchs = getConflictOffloadArchCombination(GpuArchs);
	if (ConflictingArchs) {
	C.getDriver().Diag(clang::diag::err_drv_bad_offload_arch_combo)
	<< ConflictingArchs.getValue().first
	<< ConflictingArchs.getValue().second;
	C.setContainsError();
	return true;
	}

	// Collect list of GPUs remaining in the set.
	for (auto Arch : GpuArchs)
	GpuArchList.push_back(Arch.data());

	// Default to sm_20 which is the lowest common denominator for
	// supported GPUs. sm_20 code should work correctly, if
	// suboptimally, on all newer GPUs.
	if (GpuArchList.empty())
	GpuArchList.push_back(DefaultCudaArch);

	return Error;
	}
	};

	/// \brief CUDA action builder. It injects device code in the host backend
	/// action.
	class CudaActionBuilder final : public CudaActionBuilderBase {
	public:
	CudaActionBuilder(Compilation &C, DerivedArgList &Args,
	const Driver::InputList &Inputs)
	: CudaActionBuilderBase(C, Args, Inputs, Action::OFK_Cuda) {
	DefaultCudaArch = CudaArch::SM_20;
	}

	StringRef getCanonicalOffloadArch(StringRef ArchStr) override {
	CudaArch Arch = StringToCudaArch(ArchStr);
	if (Arch == CudaArch::UNKNOWN \|\| !IsNVIDIAGpuArch(Arch)) {
	C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr;
	return StringRef();
	}
	return CudaArchToString(Arch);
	}

	llvm::Optional<std::pair<llvm::StringRef, llvm::StringRef>>
	getConflictOffloadArchCombination(
	const std::set<StringRef> &GpuArchs) override {
	return llvm::None;
	}

	ActionBuilderReturnCode
	getDeviceDependences(OffloadAction::DeviceDependences &DA,
	phases::ID CurPhase, phases::ID FinalPhase,
	PhasesTy &Phases) override {
	if (!IsActive)
	return ABRT_Inactive;

	// If we don't have more CUDA actions, we don't have any dependences to
	// create for the host.
	if (CudaDeviceActions.empty())
	return ABRT_Success;

	assert(CudaDeviceActions.size() == GpuArchList.size() &&
	"Expecting one action per GPU architecture.");
	assert(!CompileHostOnly &&
	"Not expecting CUDA actions in host-only compilation.");

	// If we are generating code for the device or we are in a backend phase,
	// we attempt to generate the fat binary. We compile each arch to ptx and
	// assemble to cubin, then feed the cubin and the ptx into a device
	// "link" action, which uses fatbinary to combine these cubins into one
	// fatbin. The fatbin is then an input to the host action if not in
	// device-only mode.
	if (CompileDeviceOnly \|\| CurPhase == phases::Backend) {
	ActionList DeviceActions;
	for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) {
	// Produce the device action from the current phase up to the assemble
	// phase.
	for (auto Ph : Phases) {
	// Skip the phases that were already dealt with.
	if (Ph < CurPhase)
	continue;
	// We have to be consistent with the host final phase.
	if (Ph > FinalPhase)
	break;

	CudaDeviceActions[I] = C.getDriver().ConstructPhaseAction(
	C, Args, Ph, CudaDeviceActions[I], Action::OFK_Cuda);

	if (Ph == phases::Assemble)
	break;
	}

	// If we didn't reach the assemble phase, we can't generate the fat
	// binary. We don't need to generate the fat binary if we are not in
	// device-only mode.
	if (!isa<AssembleJobAction>(CudaDeviceActions[I]) \|\|
	CompileDeviceOnly)
	continue;

	Action *AssembleAction = CudaDeviceActions[I];
	assert(AssembleAction->getType() == types::TY_Object);
	assert(AssembleAction->getInputs().size() == 1);

	Action *BackendAction = AssembleAction->getInputs()[0];
	assert(BackendAction->getType() == types::TY_PP_Asm);

	for (auto &A : {AssembleAction, BackendAction}) {
	OffloadAction::DeviceDependences DDep;
	DDep.add(A, ToolChains.front(), GpuArchList[I], Action::OFK_Cuda);
	DeviceActions.push_back(
	C.MakeAction<OffloadAction>(DDep, A->getType()));
	}
	}

	// We generate the fat binary if we have device input actions.
	if (!DeviceActions.empty()) {
	CudaFatBinary =
	C.MakeAction<LinkJobAction>(DeviceActions, types::TY_CUDA_FATBIN);

	if (!CompileDeviceOnly) {
	DA.add(CudaFatBinary, ToolChains.front(), /BoundArch=/nullptr,
	Action::OFK_Cuda);
	// Clear the fat binary, it is already a dependence to an host
	// action.
	CudaFatBinary = nullptr;
	}

	// Remove the CUDA actions as they are already connected to an host
	// action or fat binary.
	CudaDeviceActions.clear();
	}

	// We avoid creating host action in device-only mode.
	return CompileDeviceOnly ? ABRT_Ignore_Host : ABRT_Success;
	} else if (CurPhase > phases::Backend) {
	// If we are past the backend phase and still have a device action, we
	// don't have to do anything as this action is already a device
	// top-level action.
	return ABRT_Success;
	}

	assert(CurPhase < phases::Backend && "Generating single CUDA "
	"instructions should only occur "
	"before the backend phase!");

	// By default, we produce an action for each device arch.
	for (Action *&A : CudaDeviceActions)
	A = C.getDriver().ConstructPhaseAction(C, Args, CurPhase, A);

	return ABRT_Success;
	}
	};
	/// \brief HIP action builder. It injects device code in the host backend
	/// action.
	class HIPActionBuilder final : public CudaActionBuilderBase {
	/// The linker inputs obtained for each device arch.
	SmallVector<ActionList, 8> DeviceLinkerInputs;
	bool GPUSanitize;
	// The default bundling behavior depends on the type of output, therefore
	// BundleOutput needs to be tri-value: None, true, or false.
	// Bundle code objects except --no-gpu-output is specified for device
	// only compilation. Bundle other type of output files only if
	// --gpu-bundle-output is specified for device only compilation.
	Optional<bool> BundleOutput;

	public:
	HIPActionBuilder(Compilation &C, DerivedArgList &Args,
	const Driver::InputList &Inputs)
	: CudaActionBuilderBase(C, Args, Inputs, Action::OFK_HIP) {
	DefaultCudaArch = CudaArch::GFX803;
	GPUSanitize = Args.hasFlag(options::OPT_fgpu_sanitize,
	options::OPT_fno_gpu_sanitize, false);
	if (Args.hasArg(options::OPT_gpu_bundle_output,
	options::OPT_no_gpu_bundle_output))
	BundleOutput = Args.hasFlag(options::OPT_gpu_bundle_output,
	options::OPT_no_gpu_bundle_output);
	}

	bool canUseBundlerUnbundler() const override { return true; }

	StringRef getCanonicalOffloadArch(StringRef IdStr) override {
	llvm::StringMap<bool> Features;
	auto ArchStr =
	parseTargetID(getHIPOffloadTargetTriple(), IdStr, &Features);
	if (!ArchStr) {
	C.getDriver().Diag(clang::diag::err_drv_bad_target_id) << IdStr;
	C.setContainsError();
	return StringRef();
	}
	auto CanId = getCanonicalTargetID(ArchStr.getValue(), Features);
	return Args.MakeArgStringRef(CanId);
	};

	llvm::Optional<std::pair<llvm::StringRef, llvm::StringRef>>
	getConflictOffloadArchCombination(
	const std::set<StringRef> &GpuArchs) override {
	return getConflictTargetIDCombination(GpuArchs);
	}

	ActionBuilderReturnCode
	getDeviceDependences(OffloadAction::DeviceDependences &DA,
	phases::ID CurPhase, phases::ID FinalPhase,
	PhasesTy &Phases) override {
	// amdgcn does not support linking of object files, therefore we skip
	// backend and assemble phases to output LLVM IR. Except for generating
	// non-relocatable device coee, where we generate fat binary for device
	// code and pass to host in Backend phase.
	if (CudaDeviceActions.empty())
	return ABRT_Success;

	assert(((CurPhase == phases::Link && Relocatable) \|\|
	CudaDeviceActions.size() == GpuArchList.size()) &&
	"Expecting one action per GPU architecture.");
	assert(!CompileHostOnly &&
	"Not expecting CUDA actions in host-only compilation.");

	if (!Relocatable && CurPhase == phases::Backend && !EmitLLVM &&
	!EmitAsm) {
	// If we are in backend phase, we attempt to generate the fat binary.
	// We compile each arch to IR and use a link action to generate code
	// object containing ISA. Then we use a special "link" action to create
	// a fat binary containing all the code objects for different GPU's.
	// The fat binary is then an input to the host action.
	for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) {
	if (C.getDriver().isUsingLTO(/IsOffload=/true)) {
	// When LTO is enabled, skip the backend and assemble phases and
	// use lld to link the bitcode.
	ActionList AL;
	AL.push_back(CudaDeviceActions[I]);
	// Create a link action to link device IR with device library
	// and generate ISA.
	CudaDeviceActions[I] =
	C.MakeAction<LinkJobAction>(AL, types::TY_Image);
	} else {
	// When LTO is not enabled, we follow the conventional
	// compiler phases, including backend and assemble phases.
	ActionList AL;
	auto BackendAction = C.getDriver().ConstructPhaseAction(
	C, Args, phases::Backend, CudaDeviceActions[I],
	AssociatedOffloadKind);
	auto AssembleAction = C.getDriver().ConstructPhaseAction(
	C, Args, phases::Assemble, BackendAction,
	AssociatedOffloadKind);
	AL.push_back(AssembleAction);
	// Create a link action to link device IR with device library
	// and generate ISA.
	CudaDeviceActions[I] =
	C.MakeAction<LinkJobAction>(AL, types::TY_Image);
	}

	// OffloadingActionBuilder propagates device arch until an offload
	// action. Since the next action for creating fatbin does
	// not have device arch, whereas the above link action and its input
	// have device arch, an offload action is needed to stop the null
	// device arch of the next action being propagated to the above link
	// action.
	OffloadAction::DeviceDependences DDep;
	DDep.add(CudaDeviceActions[I], ToolChains.front(), GpuArchList[I],
	AssociatedOffloadKind);
	CudaDeviceActions[I] = C.MakeAction<OffloadAction>(
	DDep, CudaDeviceActions[I]->getType());
	}

	if (!CompileDeviceOnly \|\| !BundleOutput.hasValue() \|\|
	BundleOutput.getValue()) {
	// Create HIP fat binary with a special "link" action.
	CudaFatBinary = C.MakeAction<LinkJobAction>(CudaDeviceActions,
	types::TY_HIP_FATBIN);

	if (!CompileDeviceOnly) {
	DA.add(CudaFatBinary, ToolChains.front(), /BoundArch=/nullptr,
	AssociatedOffloadKind);
	// Clear the fat binary, it is already a dependence to an host
	// action.
	CudaFatBinary = nullptr;
	}

	// Remove the CUDA actions as they are already connected to an host
	// action or fat binary.
	CudaDeviceActions.clear();
	}

	return CompileDeviceOnly ? ABRT_Ignore_Host : ABRT_Success;
	} else if (CurPhase == phases::Link) {
	// Save CudaDeviceActions to DeviceLinkerInputs for each GPU subarch.
	// This happens to each device action originated from each input file.
	// Later on, device actions in DeviceLinkerInputs are used to create
	// device link actions in appendLinkDependences and the created device
	// link actions are passed to the offload action as device dependence.
	DeviceLinkerInputs.resize(CudaDeviceActions.size());
	auto LI = DeviceLinkerInputs.begin();
	for (auto *A : CudaDeviceActions) {
	LI->push_back(A);
	++LI;
	}

	// We will pass the device action as a host dependence, so we don't
	// need to do anything else with them.
	CudaDeviceActions.clear();
	return ABRT_Success;
	}

	// By default, we produce an action for each device arch.
	for (Action *&A : CudaDeviceActions)
	A = C.getDriver().ConstructPhaseAction(C, Args, CurPhase, A,
	AssociatedOffloadKind);

	if (CompileDeviceOnly && CurPhase == FinalPhase &&
	BundleOutput.hasValue() && BundleOutput.getValue()) {
	for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) {
	OffloadAction::DeviceDependences DDep;
	DDep.add(CudaDeviceActions[I], ToolChains.front(), GpuArchList[I],
	AssociatedOffloadKind);
	CudaDeviceActions[I] = C.MakeAction<OffloadAction>(
	DDep, CudaDeviceActions[I]->getType());
	}
	CudaFatBinary =
	C.MakeAction<OffloadBundlingJobAction>(CudaDeviceActions);
	CudaDeviceActions.clear();
	}

	return (CompileDeviceOnly && CurPhase == FinalPhase) ? ABRT_Ignore_Host
	: ABRT_Success;
	}

	void appendLinkDeviceActions(ActionList &AL) override {
	if (DeviceLinkerInputs.size() == 0)
	return;

	assert(DeviceLinkerInputs.size() == GpuArchList.size() &&
	"Linker inputs and GPU arch list sizes do not match.");

	// Append a new link action for each device.
	unsigned I = 0;
	for (auto &LI : DeviceLinkerInputs) {
	// Each entry in DeviceLinkerInputs corresponds to a GPU arch.
	auto *DeviceLinkAction =
	C.MakeAction<LinkJobAction>(LI, types::TY_Image);
	// Linking all inputs for the current GPU arch.
	// LI contains all the inputs for the linker.
	OffloadAction::DeviceDependences DeviceLinkDeps;
	DeviceLinkDeps.add(DeviceLinkAction, ToolChains[0],
	GpuArchList[I], AssociatedOffloadKind);
	AL.push_back(C.MakeAction<OffloadAction>(DeviceLinkDeps,
	DeviceLinkAction->getType()));
	++I;
	}
	DeviceLinkerInputs.clear();

	// Create a host object from all the device images by embedding them
	// in a fat binary.
	OffloadAction::DeviceDependences DDeps;
	auto *TopDeviceLinkAction =
	C.MakeAction<LinkJobAction>(AL, types::TY_Object);
	DDeps.add(TopDeviceLinkAction, ToolChains[0],
	nullptr, AssociatedOffloadKind);

	// Offload the host object to the host linker.
	AL.push_back(C.MakeAction<OffloadAction>(DDeps, TopDeviceLinkAction->getType()));
	}

	Action* appendLinkHostActions(ActionList &AL) override { return AL.back(); }

	void appendLinkDependences(OffloadAction::DeviceDependences &DA) override {}
	};

	/// OpenMP action builder. The host bitcode is passed to the device frontend
	/// and all the device linked images are passed to the host link phase.
	class OpenMPActionBuilder final : public DeviceActionBuilder {
	/// The OpenMP actions for the current input.
	ActionList OpenMPDeviceActions;

	/// The linker inputs obtained for each toolchain.
	SmallVector<ActionList, 8> DeviceLinkerInputs;

	public:
	OpenMPActionBuilder(Compilation &C, DerivedArgList &Args,
	const Driver::InputList &Inputs)
	: DeviceActionBuilder(C, Args, Inputs, Action::OFK_OpenMP) {}

	ActionBuilderReturnCode
	getDeviceDependences(OffloadAction::DeviceDependences &DA,
	phases::ID CurPhase, phases::ID FinalPhase,
	PhasesTy &Phases) override {
	if (OpenMPDeviceActions.empty())
	return ABRT_Inactive;

	// We should always have an action for each input.
	assert(OpenMPDeviceActions.size() == ToolChains.size() &&
	"Number of OpenMP actions and toolchains do not match.");

	// The host only depends on device action in the linking phase, when all
	// the device images have to be embedded in the host image.
	if (CurPhase == phases::Link) {
	assert(ToolChains.size() == DeviceLinkerInputs.size() &&
	"Toolchains and linker inputs sizes do not match.");
	auto LI = DeviceLinkerInputs.begin();
	for (auto *A : OpenMPDeviceActions) {
	LI->push_back(A);
	++LI;
	}

	// We passed the device action as a host dependence, so we don't need to
	// do anything else with them.
	OpenMPDeviceActions.clear();
	return ABRT_Success;
	}

	// By default, we produce an action for each device arch.
	for (Action *&A : OpenMPDeviceActions)
	A = C.getDriver().ConstructPhaseAction(C, Args, CurPhase, A);

	return ABRT_Success;
	}

	ActionBuilderReturnCode addDeviceDepences(Action *HostAction) override {

	// If this is an input action replicate it for each OpenMP toolchain.
	if (auto *IA = dyn_cast<InputAction>(HostAction)) {
	OpenMPDeviceActions.clear();
	for (unsigned I = 0; I < ToolChains.size(); ++I)
	OpenMPDeviceActions.push_back(
	C.MakeAction<InputAction>(IA->getInputArg(), IA->getType()));
	return ABRT_Success;
	}

	// If this is an unbundling action use it as is for each OpenMP toolchain.
	if (auto *UA = dyn_cast<OffloadUnbundlingJobAction>(HostAction)) {
	OpenMPDeviceActions.clear();
	auto *IA = cast<InputAction>(UA->getInputs().back());
	std::string FileName = IA->getInputArg().getAsString(Args);
	// Check if the type of the file is the same as the action. Do not
	// unbundle it if it is not. Do not unbundle .so files, for example,
	// which are not object files.
	if (IA->getType() == types::TY_Object &&
	(!llvm::sys::path::has_extension(FileName) \|\|
	types::lookupTypeForExtension(
	llvm::sys::path::extension(FileName).drop_front()) !=
	types::TY_Object))
	return ABRT_Inactive;
	for (unsigned I = 0; I < ToolChains.size(); ++I) {
	OpenMPDeviceActions.push_back(UA);
	UA->registerDependentActionInfo(
	ToolChains[I], /BoundArch=/StringRef(), Action::OFK_OpenMP);
	}
	return ABRT_Success;
	}

	// When generating code for OpenMP we use the host compile phase result as
	// a dependence to the device compile phase so that it can learn what
	// declarations should be emitted. However, this is not the only use for
	// the host action, so we prevent it from being collapsed.
	if (isa<CompileJobAction>(HostAction)) {
	HostAction->setCannotBeCollapsedWithNextDependentAction();
	assert(ToolChains.size() == OpenMPDeviceActions.size() &&
	"Toolchains and device action sizes do not match.");
	OffloadAction::HostDependence HDep(
	HostAction, C.getSingleOffloadToolChain<Action::OFK_Host>(),
	/BoundArch=/nullptr, Action::OFK_OpenMP);
	auto TC = ToolChains.begin();
	for (Action *&A : OpenMPDeviceActions) {
	assert(isa<CompileJobAction>(A));
	OffloadAction::DeviceDependences DDep;
	DDep.add(A, TC, /BoundArch=*/nullptr, Action::OFK_OpenMP);
	A = C.MakeAction<OffloadAction>(HDep, DDep);
	++TC;
	}
	}
	return ABRT_Success;
	}

	void appendTopLevelActions(ActionList &AL) override {
	if (OpenMPDeviceActions.empty())
	return;

	// We should always have an action for each input.
	assert(OpenMPDeviceActions.size() == ToolChains.size() &&
	"Number of OpenMP actions and toolchains do not match.");

	// Append all device actions followed by the proper offload action.
	auto TI = ToolChains.begin();
	for (auto *A : OpenMPDeviceActions) {
	OffloadAction::DeviceDependences Dep;
	Dep.add(A, TI, /BoundArch=*/nullptr, Action::OFK_OpenMP);
	AL.push_back(C.MakeAction<OffloadAction>(Dep, A->getType()));
	++TI;
	}
	// We no longer need the action stored in this builder.
	OpenMPDeviceActions.clear();
	}

	void appendLinkDeviceActions(ActionList &AL) override {
	assert(ToolChains.size() == DeviceLinkerInputs.size() &&
	"Toolchains and linker inputs sizes do not match.");

	// Append a new link action for each device.
	auto TC = ToolChains.begin();
	for (auto &LI : DeviceLinkerInputs) {
	auto *DeviceLinkAction =
	C.MakeAction<LinkJobAction>(LI, types::TY_Image);
	OffloadAction::DeviceDependences DeviceLinkDeps;
	DeviceLinkDeps.add(DeviceLinkAction, TC, /BoundArch=*/nullptr,
	Action::OFK_OpenMP);
	AL.push_back(C.MakeAction<OffloadAction>(DeviceLinkDeps,
	DeviceLinkAction->getType()));
	++TC;
	}
	DeviceLinkerInputs.clear();
	}

	Action* appendLinkHostActions(ActionList &AL) override {
	// Create wrapper bitcode from the result of device link actions and compile
	// it to an object which will be added to the host link command.
	auto *BC = C.MakeAction<OffloadWrapperJobAction>(AL, types::TY_LLVM_BC);
	auto *ASM = C.MakeAction<BackendJobAction>(BC, types::TY_PP_Asm);
	return C.MakeAction<AssembleJobAction>(ASM, types::TY_Object);
	}

	void appendLinkDependences(OffloadAction::DeviceDependences &DA) override {}

	bool initialize() override {
	// Get the OpenMP toolchains. If we don't get any, the action builder will
	// know there is nothing to do related to OpenMP offloading.
	auto OpenMPTCRange = C.getOffloadToolChains<Action::OFK_OpenMP>();
	for (auto TI = OpenMPTCRange.first, TE = OpenMPTCRange.second; TI != TE;
	++TI)
	ToolChains.push_back(TI->second);

	DeviceLinkerInputs.resize(ToolChains.size());
	return false;
	}

	bool canUseBundlerUnbundler() const override {
	// OpenMP should use bundled files whenever possible.
	return true;
	}
	};

	///
	/// TODO: Add the implementation for other specialized builders here.
	///

	/// Specialized builders being used by this offloading action builder.
	SmallVector<DeviceActionBuilder *, 4> SpecializedBuilders;

	/// Flag set to true if all valid builders allow file bundling/unbundling.
	bool CanUseBundler;

	public:
	OffloadingActionBuilder(Compilation &C, DerivedArgList &Args,
	const Driver::InputList &Inputs)
	: C(C) {
	// Create a specialized builder for each device toolchain.

	IsValid = true;

	// Create a specialized builder for CUDA.
	SpecializedBuilders.push_back(new CudaActionBuilder(C, Args, Inputs));

	// Create a specialized builder for HIP.
	SpecializedBuilders.push_back(new HIPActionBuilder(C, Args, Inputs));

	// Create a specialized builder for OpenMP.
	SpecializedBuilders.push_back(new OpenMPActionBuilder(C, Args, Inputs));

	//
	// TODO: Build other specialized builders here.
	//

	// Initialize all the builders, keeping track of errors. If all valid
	// builders agree that we can use bundling, set the flag to true.
	unsigned ValidBuilders = 0u;
	unsigned ValidBuildersSupportingBundling = 0u;
	for (auto *SB : SpecializedBuilders) {
	IsValid = IsValid && !SB->initialize();

	// Update the counters if the builder is valid.
	if (SB->isValid()) {
	++ValidBuilders;
	if (SB->canUseBundlerUnbundler())
	++ValidBuildersSupportingBundling;
	}
	}
	CanUseBundler =
	ValidBuilders && ValidBuilders == ValidBuildersSupportingBundling;
	}

	~OffloadingActionBuilder() {
	for (auto *SB : SpecializedBuilders)
	delete SB;
	}

	/// Generate an action that adds device dependences (if any) to a host action.
	/// If no device dependence actions exist, just return the host action \a
	/// HostAction. If an error is found or if no builder requires the host action
	/// to be generated, return nullptr.
	Action *
	addDeviceDependencesToHostAction(Action HostAction, const Arg InputArg,
	phases::ID CurPhase, phases::ID FinalPhase,
	DeviceActionBuilder::PhasesTy &Phases) {
	if (!IsValid)
	return nullptr;

	if (SpecializedBuilders.empty())
	return HostAction;

	assert(HostAction && "Invalid host action!");

	OffloadAction::DeviceDependences DDeps;
	// Check if all the programming models agree we should not emit the host
	// action. Also, keep track of the offloading kinds employed.
	auto &OffloadKind = InputArgToOffloadKindMap[InputArg];
	unsigned InactiveBuilders = 0u;
	unsigned IgnoringBuilders = 0u;
	for (auto *SB : SpecializedBuilders) {
	if (!SB->isValid()) {
	++InactiveBuilders;
	continue;
	}

	auto RetCode =
	SB->getDeviceDependences(DDeps, CurPhase, FinalPhase, Phases);

	// If the builder explicitly says the host action should be ignored,
	// we need to increment the variable that tracks the builders that request
	// the host object to be ignored.
	if (RetCode == DeviceActionBuilder::ABRT_Ignore_Host)
	++IgnoringBuilders;

	// Unless the builder was inactive for this action, we have to record the
	// offload kind because the host will have to use it.
	if (RetCode != DeviceActionBuilder::ABRT_Inactive)
	OffloadKind \|= SB->getAssociatedOffloadKind();
	}

	// If all builders agree that the host object should be ignored, just return
	// nullptr.
	if (IgnoringBuilders &&
	SpecializedBuilders.size() == (InactiveBuilders + IgnoringBuilders))
	return nullptr;

	if (DDeps.getActions().empty())
	return HostAction;

	// We have dependences we need to bundle together. We use an offload action
	// for that.
	OffloadAction::HostDependence HDep(
	HostAction, C.getSingleOffloadToolChain<Action::OFK_Host>(),
	/BoundArch=/nullptr, DDeps);
	return C.MakeAction<OffloadAction>(HDep, DDeps);
	}

	/// Generate an action that adds a host dependence to a device action. The
	/// results will be kept in this action builder. Return true if an error was
	/// found.
	bool addHostDependenceToDeviceActions(Action *&HostAction,
	const Arg *InputArg) {
	if (!IsValid)
	return true;

	// If we are supporting bundling/unbundling and the current action is an
	// input action of non-source file, we replace the host action by the
	// unbundling action. The bundler tool has the logic to detect if an input
	// is a bundle or not and if the input is not a bundle it assumes it is a
	// host file. Therefore it is safe to create an unbundling action even if
	// the input is not a bundle.
	if (CanUseBundler && isa<InputAction>(HostAction) &&
	InputArg->getOption().getKind() == llvm::opt::Option::InputClass &&
	(!types::isSrcFile(HostAction->getType()) \|\|
	HostAction->getType() == types::TY_PP_HIP)) {
	auto UnbundlingHostAction =
	C.MakeAction<OffloadUnbundlingJobAction>(HostAction);
	UnbundlingHostAction->registerDependentActionInfo(
	C.getSingleOffloadToolChain<Action::OFK_Host>(),
	/BoundArch=/StringRef(), Action::OFK_Host);
	HostAction = UnbundlingHostAction;
	}

	assert(HostAction && "Invalid host action!");

	// Register the offload kinds that are used.
	auto &OffloadKind = InputArgToOffloadKindMap[InputArg];
	for (auto *SB : SpecializedBuilders) {
	if (!SB->isValid())
	continue;

	auto RetCode = SB->addDeviceDepences(HostAction);

	// Host dependences for device actions are not compatible with that same
	// action being ignored.
	assert(RetCode != DeviceActionBuilder::ABRT_Ignore_Host &&
	"Host dependence not expected to be ignored.!");

	// Unless the builder was inactive for this action, we have to record the
	// offload kind because the host will have to use it.
	if (RetCode != DeviceActionBuilder::ABRT_Inactive)
	OffloadKind \|= SB->getAssociatedOffloadKind();
	}

	// Do not use unbundler if the Host does not depend on device action.
	if (OffloadKind == Action::OFK_None && CanUseBundler)
	if (auto *UA = dyn_cast<OffloadUnbundlingJobAction>(HostAction))
	HostAction = UA->getInputs().back();

	return false;
	}

	/// Add the offloading top level actions to the provided action list. This
	/// function can replace the host action by a bundling action if the
	/// programming models allow it.
	bool appendTopLevelActions(ActionList &AL, Action *HostAction,
	const Arg *InputArg) {
	// Get the device actions to be appended.
	ActionList OffloadAL;
	for (auto *SB : SpecializedBuilders) {
	if (!SB->isValid())
	continue;
	SB->appendTopLevelActions(OffloadAL);
	}

	// If we can use the bundler, replace the host action by the bundling one in
	// the resulting list. Otherwise, just append the device actions. For
	// device only compilation, HostAction is a null pointer, therefore only do
	// this when HostAction is not a null pointer.
	if (CanUseBundler && HostAction &&
	HostAction->getType() != types::TY_Nothing && !OffloadAL.empty()) {
	// Add the host action to the list in order to create the bundling action.
	OffloadAL.push_back(HostAction);

	// We expect that the host action was just appended to the action list
	// before this method was called.
	assert(HostAction == AL.back() && "Host action not in the list??");
	HostAction = C.MakeAction<OffloadBundlingJobAction>(OffloadAL);
	AL.back() = HostAction;
	} else
	AL.append(OffloadAL.begin(), OffloadAL.end());

	// Propagate to the current host action (if any) the offload information
	// associated with the current input.
	if (HostAction)
	HostAction->propagateHostOffloadInfo(InputArgToOffloadKindMap[InputArg],
	/BoundArch=/nullptr);
	return false;
	}

	Action* makeHostLinkAction() {
	// Build a list of device linking actions.
	ActionList DeviceAL;
	for (DeviceActionBuilder *SB : SpecializedBuilders) {
	if (!SB->isValid())
	continue;
	SB->appendLinkDeviceActions(DeviceAL);
	}

	if (DeviceAL.empty())
	return nullptr;

	// Let builders add host linking actions.
	Action* HA = nullptr;
	for (DeviceActionBuilder *SB : SpecializedBuilders) {
	if (!SB->isValid())
	continue;
	HA = SB->appendLinkHostActions(DeviceAL);
	}
	return HA;
	}

	/// Processes the host linker action. This currently consists of replacing it
	/// with an offload action if there are device link objects and propagate to
	/// the host action all the offload kinds used in the current compilation. The
	/// resulting action is returned.
	Action processHostLinkAction(Action HostAction) {
	// Add all the dependences from the device linking actions.
	OffloadAction::DeviceDependences DDeps;
	for (auto *SB : SpecializedBuilders) {
	if (!SB->isValid())
	continue;

	SB->appendLinkDependences(DDeps);
	}

	// Calculate all the offload kinds used in the current compilation.
	unsigned ActiveOffloadKinds = 0u;
	for (auto &I : InputArgToOffloadKindMap)
	ActiveOffloadKinds \|= I.second;

	// If we don't have device dependencies, we don't have to create an offload
	// action.
	if (DDeps.getActions().empty()) {
	// Propagate all the active kinds to host action. Given that it is a link
	// action it is assumed to depend on all actions generated so far.
	HostAction->propagateHostOffloadInfo(ActiveOffloadKinds,
	/BoundArch=/nullptr);
	return HostAction;
	}

	// Create the offload action with all dependences. When an offload action
	// is created the kinds are propagated to the host action, so we don't have
	// to do that explicitly here.
	OffloadAction::HostDependence HDep(
	HostAction, C.getSingleOffloadToolChain<Action::OFK_Host>(),
	/BoundArch/ nullptr, ActiveOffloadKinds);
	return C.MakeAction<OffloadAction>(HDep, DDeps);
	}
	};
	} // anonymous namespace.

	void Driver::handleArguments(Compilation &C, DerivedArgList &Args,
	const InputList &Inputs,
	ActionList &Actions) const {

	// Ignore /Yc/Yu if both /Yc and /Yu passed but with different filenames.
	Arg *YcArg = Args.getLastArg(options::OPT__SLASH_Yc);
	Arg *YuArg = Args.getLastArg(options::OPT__SLASH_Yu);
	if (YcArg && YuArg && strcmp(YcArg->getValue(), YuArg->getValue()) != 0) {
	Diag(clang::diag::warn_drv_ycyu_different_arg_clang_cl);
	Args.eraseArg(options::OPT__SLASH_Yc);
	Args.eraseArg(options::OPT__SLASH_Yu);
	YcArg = YuArg = nullptr;
	}
	if (YcArg && Inputs.size() > 1) {
	Diag(clang::diag::warn_drv_yc_multiple_inputs_clang_cl);
	Args.eraseArg(options::OPT__SLASH_Yc);
	YcArg = nullptr;
	}

	Arg *FinalPhaseArg;
	phases::ID FinalPhase = getFinalPhase(Args, &FinalPhaseArg);

	if (FinalPhase == phases::Link) {
	if (Args.hasArg(options::OPT_emit_llvm))
	Diag(clang::diag::err_drv_emit_llvm_link);
	if (IsCLMode() && LTOMode != LTOK_None &&
	!Args.getLastArgValue(options::OPT_fuse_ld_EQ)
	.equals_insensitive("lld"))
	Diag(clang::diag::err_drv_lto_without_lld);
	}

	if (FinalPhase == phases::Preprocess \|\| Args.hasArg(options::OPT__SLASH_Y_)) {
	// If only preprocessing or /Y- is used, all pch handling is disabled.
	// Rather than check for it everywhere, just remove clang-cl pch-related
	// flags here.
	Args.eraseArg(options::OPT__SLASH_Fp);
	Args.eraseArg(options::OPT__SLASH_Yc);
	Args.eraseArg(options::OPT__SLASH_Yu);
	YcArg = YuArg = nullptr;
	}

	unsigned LastPLSize = 0;
	for (auto &I : Inputs) {
	types::ID InputType = I.first;
	const Arg *InputArg = I.second;

	auto PL = types::getCompilationPhases(InputType);
	LastPLSize = PL.size();

	// If the first step comes after the final phase we are doing as part of
	// this compilation, warn the user about it.
	phases::ID InitialPhase = PL[0];
	if (InitialPhase > FinalPhase) {
	if (InputArg->isClaimed())
	continue;

	// Claim here to avoid the more general unused warning.
	InputArg->claim();

	// Suppress all unused style warnings with -Qunused-arguments
	if (Args.hasArg(options::OPT_Qunused_arguments))
	continue;

	// Special case when final phase determined by binary name, rather than
	// by a command-line argument with a corresponding Arg.
	if (CCCIsCPP())
	Diag(clang::diag::warn_drv_input_file_unused_by_cpp)
	<< InputArg->getAsString(Args) << getPhaseName(InitialPhase);
	// Special case '-E' warning on a previously preprocessed file to make
	// more sense.
	else if (InitialPhase == phases::Compile &&
	(Args.getLastArg(options::OPT__SLASH_EP,
	options::OPT__SLASH_P) \|\|
	Args.getLastArg(options::OPT_E) \|\|
	Args.getLastArg(options::OPT_M, options::OPT_MM)) &&
	getPreprocessedType(InputType) == types::TY_INVALID)
	Diag(clang::diag::warn_drv_preprocessed_input_file_unused)
	<< InputArg->getAsString(Args) << !!FinalPhaseArg
	<< (FinalPhaseArg ? FinalPhaseArg->getOption().getName() : "");
	else
	Diag(clang::diag::warn_drv_input_file_unused)
	<< InputArg->getAsString(Args) << getPhaseName(InitialPhase)
	<< !!FinalPhaseArg
	<< (FinalPhaseArg ? FinalPhaseArg->getOption().getName() : "");
	continue;
	}

	if (YcArg) {
	// Add a separate precompile phase for the compile phase.
	if (FinalPhase >= phases::Compile) {
	const types::ID HeaderType = lookupHeaderTypeForSourceType(InputType);
	// Build the pipeline for the pch file.
	Action ClangClPch = C.MakeAction<InputAction>(InputArg, HeaderType);
	for (phases::ID Phase : types::getCompilationPhases(HeaderType))
	ClangClPch = ConstructPhaseAction(C, Args, Phase, ClangClPch);
	assert(ClangClPch);
	Actions.push_back(ClangClPch);
	// The driver currently exits after the first failed command. This
	// relies on that behavior, to make sure if the pch generation fails,
	// the main compilation won't run.
	// FIXME: If the main compilation fails, the PCH generation should
	// probably not be considered successful either.
	}
	}
	}

	// If we are linking, claim any options which are obviously only used for
	// compilation.
	// FIXME: Understand why the last Phase List length is used here.
	if (FinalPhase == phases::Link && LastPLSize == 1) {
	Args.ClaimAllArgs(options::OPT_CompileOnly_Group);
	Args.ClaimAllArgs(options::OPT_cl_compile_Group);
	}
	}

	void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
	const InputList &Inputs, ActionList &Actions) const {
	llvm::PrettyStackTraceString CrashInfo("Building compilation actions");

	if (!SuppressMissingInputWarning && Inputs.empty()) {
	Diag(clang::diag::err_drv_no_input_files);
	return;
	}

	// Reject -Z* at the top level, these options should never have been exposed
	// by gcc.
	if (Arg *A = Args.getLastArg(options::OPT_Z_Joined))
	Diag(clang::diag::err_drv_use_of_Z_option) << A->getAsString(Args);

	// Diagnose misuse of /Fo.
	if (Arg *A = Args.getLastArg(options::OPT__SLASH_Fo)) {
	StringRef V = A->getValue();
	if (Inputs.size() > 1 && !V.empty() &&
	!llvm::sys::path::is_separator(V.back())) {
	// Check whether /Fo tries to name an output file for multiple inputs.
	Diag(clang::diag::err_drv_out_file_argument_with_multiple_sources)
	<< A->getSpelling() << V;
	Args.eraseArg(options::OPT__SLASH_Fo);
	}
	}

	// Diagnose misuse of /Fa.
	if (Arg *A = Args.getLastArg(options::OPT__SLASH_Fa)) {
	StringRef V = A->getValue();
	if (Inputs.size() > 1 && !V.empty() &&
	!llvm::sys::path::is_separator(V.back())) {
	// Check whether /Fa tries to name an asm file for multiple inputs.
	Diag(clang::diag::err_drv_out_file_argument_with_multiple_sources)
	<< A->getSpelling() << V;
	Args.eraseArg(options::OPT__SLASH_Fa);
	}
	}

	// Diagnose misuse of /o.
	if (Arg *A = Args.getLastArg(options::OPT__SLASH_o)) {
	if (A->getValue()[0] == '\0') {
	// It has to have a value.
	Diag(clang::diag::err_drv_missing_argument) << A->getSpelling() << 1;
	Args.eraseArg(options::OPT__SLASH_o);
	}
	}

	handleArguments(C, Args, Inputs, Actions);

	// Builder to be used to build offloading actions.
	OffloadingActionBuilder OffloadBuilder(C, Args, Inputs);

	// Construct the actions to perform.
	HeaderModulePrecompileJobAction *HeaderModuleAction = nullptr;
	ActionList LinkerInputs;
	ActionList MergerInputs;

	for (auto &I : Inputs) {
	types::ID InputType = I.first;
	const Arg *InputArg = I.second;

	auto PL = types::getCompilationPhases(*this, Args, InputType);
	if (PL.empty())
	continue;

	auto FullPL = types::getCompilationPhases(InputType);

	// Build the pipeline for this file.
	Action Current = C.MakeAction<InputAction>(InputArg, InputType);

	// Use the current host action in any of the offloading actions, if
	// required.
	if (OffloadBuilder.addHostDependenceToDeviceActions(Current, InputArg))
	break;

	for (phases::ID Phase : PL) {

	// Add any offload action the host action depends on.
	Current = OffloadBuilder.addDeviceDependencesToHostAction(
	Current, InputArg, Phase, PL.back(), FullPL);
	if (!Current)
	break;

	// Queue linker inputs.
	if (Phase == phases::Link) {
	assert(Phase == PL.back() && "linking must be final compilation step.");
	LinkerInputs.push_back(Current);
	Current = nullptr;
	break;
	}

	// TODO: Consider removing this because the merged may not end up being
	// the final Phase in the pipeline. Perhaps the merged could just merge
	// and then pass an artifact of some sort to the Link Phase.
	// Queue merger inputs.
	if (Phase == phases::IfsMerge) {
	assert(Phase == PL.back() && "merging must be final compilation step.");
	MergerInputs.push_back(Current);
	Current = nullptr;
	break;
	}

	// Each precompiled header file after a module file action is a module
	// header of that same module file, rather than being compiled to a
	// separate PCH.
	if (Phase == phases::Precompile && HeaderModuleAction &&
	getPrecompiledType(InputType) == types::TY_PCH) {
	HeaderModuleAction->addModuleHeaderInput(Current);
	Current = nullptr;
	break;
	}

	// FIXME: Should we include any prior module file outputs as inputs of
	// later actions in the same command line?

	// Otherwise construct the appropriate action.
	Action *NewCurrent = ConstructPhaseAction(C, Args, Phase, Current);

	// We didn't create a new action, so we will just move to the next phase.
	if (NewCurrent == Current)
	continue;

	if (auto *HMA = dyn_cast<HeaderModulePrecompileJobAction>(NewCurrent))
	HeaderModuleAction = HMA;

	Current = NewCurrent;

	// Use the current host action in any of the offloading actions, if
	// required.
	if (OffloadBuilder.addHostDependenceToDeviceActions(Current, InputArg))
	break;

	if (Current->getType() == types::TY_Nothing)
	break;
	}

	// If we ended with something, add to the output list.
	if (Current)
	Actions.push_back(Current);

	// Add any top level actions generated for offloading.
	OffloadBuilder.appendTopLevelActions(Actions, Current, InputArg);
	}

	// Add a link action if necessary.
	if (!LinkerInputs.empty()) {
	if (Action *Wrapper = OffloadBuilder.makeHostLinkAction())
	LinkerInputs.push_back(Wrapper);
	Action *LA;
	// Check if this Linker Job should emit a static library.
	if (ShouldEmitStaticLibrary(Args)) {
	LA = C.MakeAction<StaticLibJobAction>(LinkerInputs, types::TY_Image);
	} else {
	LA = C.MakeAction<LinkJobAction>(LinkerInputs, types::TY_Image);
	}
	LA = OffloadBuilder.processHostLinkAction(LA);
	Actions.push_back(LA);
	}

	// Add an interface stubs merge action if necessary.
	if (!MergerInputs.empty())
	Actions.push_back(
	C.MakeAction<IfsMergeJobAction>(MergerInputs, types::TY_Image));

	if (Args.hasArg(options::OPT_emit_interface_stubs)) {
	auto PhaseList = types::getCompilationPhases(
	types::TY_IFS_CPP,
	Args.hasArg(options::OPT_c) ? phases::Compile : phases::LastPhase);

	ActionList MergerInputs;

	for (auto &I : Inputs) {
	types::ID InputType = I.first;
	const Arg *InputArg = I.second;

	// Currently clang and the llvm assembler do not support generating symbol
	// stubs from assembly, so we skip the input on asm files. For ifs files
	// we rely on the normal pipeline setup in the pipeline setup code above.
	if (InputType == types::TY_IFS \|\| InputType == types::TY_PP_Asm \|\|
	InputType == types::TY_Asm)
	continue;

	Action Current = C.MakeAction<InputAction>(InputArg, InputType);

	for (auto Phase : PhaseList) {
	switch (Phase) {
	default:
	llvm_unreachable(
	"IFS Pipeline can only consist of Compile followed by IfsMerge.");
	case phases::Compile: {
	// Only IfsMerge (llvm-ifs) can handle .o files by looking for ifs
	// files where the .o file is located. The compile action can not
	// handle this.
	if (InputType == types::TY_Object)
	break;

	Current = C.MakeAction<CompileJobAction>(Current, types::TY_IFS_CPP);
	break;
	}
	case phases::IfsMerge: {
	assert(Phase == PhaseList.back() &&
	"merging must be final compilation step.");
	MergerInputs.push_back(Current);
	Current = nullptr;
	break;
	}
	}
	}

	// If we ended with something, add to the output list.
	if (Current)
	Actions.push_back(Current);
	}

	// Add an interface stubs merge action if necessary.
	if (!MergerInputs.empty())
	Actions.push_back(
	C.MakeAction<IfsMergeJobAction>(MergerInputs, types::TY_Image));
	}

	// If --print-supported-cpus, -mcpu=? or -mtune=? is specified, build a custom
	// Compile phase that prints out supported cpu models and quits.
	if (Arg *A = Args.getLastArg(options::OPT_print_supported_cpus)) {
	// Use the -mcpu=? flag as the dummy input to cc1.
	Actions.clear();
	Action InputAc = C.MakeAction<InputAction>(A, types::TY_C);
	Actions.push_back(
	C.MakeAction<PrecompileJobAction>(InputAc, types::TY_Nothing));
	for (auto &I : Inputs)
	I.second->claim();
	}

	// Claim ignored clang-cl options.
	Args.ClaimAllArgs(options::OPT_cl_ignored_Group);

	// Claim --cuda-host-only and --cuda-compile-host-device, which may be passed
	// to non-CUDA compilations and should not trigger warnings there.
	Args.ClaimAllArgs(options::OPT_cuda_host_only);
	Args.ClaimAllArgs(options::OPT_cuda_compile_host_device);
	}

	Action *Driver::ConstructPhaseAction(
	Compilation &C, const ArgList &Args, phases::ID Phase, Action *Input,
	Action::OffloadKind TargetDeviceOffloadKind) const {
	llvm::PrettyStackTraceString CrashInfo("Constructing phase actions");

	// Some types skip the assembler phase (e.g., llvm-bc), but we can't
	// encode this in the steps because the intermediate type depends on
	// arguments. Just special case here.
	if (Phase == phases::Assemble && Input->getType() != types::TY_PP_Asm)
	return Input;

	// Build the appropriate action.
	switch (Phase) {
	case phases::Link:
	llvm_unreachable("link action invalid here.");
	case phases::IfsMerge:
	llvm_unreachable("ifsmerge action invalid here.");
	case phases::Preprocess: {
	types::ID OutputTy;
	// -M and -MM specify the dependency file name by altering the output type,
	// -if -MD and -MMD are not specified.
	if (Args.hasArg(options::OPT_M, options::OPT_MM) &&
	!Args.hasArg(options::OPT_MD, options::OPT_MMD)) {
	OutputTy = types::TY_Dependencies;
	} else {
	OutputTy = Input->getType();
	if (!Args.hasFlag(options::OPT_frewrite_includes,
	options::OPT_fno_rewrite_includes, false) &&
	!Args.hasFlag(options::OPT_frewrite_imports,
	options::OPT_fno_rewrite_imports, false) &&
	!CCGenDiagnostics)
	OutputTy = types::getPreprocessedType(OutputTy);
	assert(OutputTy != types::TY_INVALID &&
	"Cannot preprocess this input type!");
	}
	return C.MakeAction<PreprocessJobAction>(Input, OutputTy);
	}
	case phases::Precompile: {
	types::ID OutputTy = getPrecompiledType(Input->getType());
	assert(OutputTy != types::TY_INVALID &&
	"Cannot precompile this input type!");

	// If we're given a module name, precompile header file inputs as a
	// module, not as a precompiled header.
	const char *ModName = nullptr;
	if (OutputTy == types::TY_PCH) {
	if (Arg *A = Args.getLastArg(options::OPT_fmodule_name_EQ))
	ModName = A->getValue();
	if (ModName)
	OutputTy = types::TY_ModuleFile;
	}

	if (Args.hasArg(options::OPT_fsyntax_only)) {
	// Syntax checks should not emit a PCH file
	OutputTy = types::TY_Nothing;
	}

	if (ModName)
	return C.MakeAction<HeaderModulePrecompileJobAction>(Input, OutputTy,
	ModName);
	return C.MakeAction<PrecompileJobAction>(Input, OutputTy);
	}
	case phases::Compile: {
	if (Args.hasArg(options::OPT_fsyntax_only))
	return C.MakeAction<CompileJobAction>(Input, types::TY_Nothing);
	if (Args.hasArg(options::OPT_rewrite_objc))
	return C.MakeAction<CompileJobAction>(Input, types::TY_RewrittenObjC);
	if (Args.hasArg(options::OPT_rewrite_legacy_objc))
	return C.MakeAction<CompileJobAction>(Input,
	types::TY_RewrittenLegacyObjC);
	if (Args.hasArg(options::OPT__analyze))
	return C.MakeAction<AnalyzeJobAction>(Input, types::TY_Plist);
	if (Args.hasArg(options::OPT__migrate))
	return C.MakeAction<MigrateJobAction>(Input, types::TY_Remap);
	if (Args.hasArg(options::OPT_emit_ast))
	return C.MakeAction<CompileJobAction>(Input, types::TY_AST);
	if (Args.hasArg(options::OPT_module_file_info))
	return C.MakeAction<CompileJobAction>(Input, types::TY_ModuleFile);
	if (Args.hasArg(options::OPT_verify_pch))
	return C.MakeAction<VerifyPCHJobAction>(Input, types::TY_Nothing);
	return C.MakeAction<CompileJobAction>(Input, types::TY_LLVM_BC);
	}
	case phases::Backend: {
	if (isUsingLTO() && TargetDeviceOffloadKind == Action::OFK_None) {
	types::ID Output =
	Args.hasArg(options::OPT_S) ? types::TY_LTO_IR : types::TY_LTO_BC;
	return C.MakeAction<BackendJobAction>(Input, Output);
	}
	if (Args.hasArg(options::OPT_emit_llvm) \|\|
	(TargetDeviceOffloadKind == Action::OFK_HIP &&
	Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
	false))) {
	types::ID Output =
	Args.hasArg(options::OPT_S) ? types::TY_LLVM_IR : types::TY_LLVM_BC;
	return C.MakeAction<BackendJobAction>(Input, Output);
	}
	return C.MakeAction<BackendJobAction>(Input, types::TY_PP_Asm);
	}
	case phases::Assemble:
	return C.MakeAction<AssembleJobAction>(std::move(Input), types::TY_Object);
	}

	llvm_unreachable("invalid phase in ConstructPhaseAction");
	}

	void Driver::BuildJobs(Compilation &C) const {
	llvm::PrettyStackTraceString CrashInfo("Building compilation jobs");

	Arg *FinalOutput = C.getArgs().getLastArg(options::OPT_o);

	// It is an error to provide a -o option if we are making multiple output
	// files. There are exceptions:
	//
	// IfsMergeJob: when generating interface stubs enabled we want to be able to
	// generate the stub file at the same time that we generate the real
	// library/a.out. So when a .o, .so, etc are the output, with clang interface
	// stubs there will also be a .ifs and .ifso at the same location.
	//
	// CompileJob of type TY_IFS_CPP: when generating interface stubs is enabled
	// and -c is passed, we still want to be able to generate a .ifs file while
	// we are also generating .o files. So we allow more than one output file in
	// this case as well.
	//
	if (FinalOutput) {
	unsigned NumOutputs = 0;
	unsigned NumIfsOutputs = 0;
	for (const Action *A : C.getActions())
	if (A->getType() != types::TY_Nothing &&
	!(A->getKind() == Action::IfsMergeJobClass \|\|
	(A->getType() == clang::driver::types::TY_IFS_CPP &&
	A->getKind() == clang::driver::Action::CompileJobClass &&
	0 == NumIfsOutputs++) \|\|
	(A->getKind() == Action::BindArchClass && A->getInputs().size() &&
	A->getInputs().front()->getKind() == Action::IfsMergeJobClass)))
	++NumOutputs;

	if (NumOutputs > 1) {
	Diag(clang::diag::err_drv_output_argument_with_multiple_files);
	FinalOutput = nullptr;
	}
	}

	const llvm::Triple &RawTriple = C.getDefaultToolChain().getTriple();
	if (RawTriple.isOSAIX()) {
	if (Arg *A = C.getArgs().getLastArg(options::OPT_G))
	Diag(diag::err_drv_unsupported_opt_for_target)
	<< A->getSpelling() << RawTriple.str();
	if (LTOMode == LTOK_Thin)
	Diag(diag::err_drv_clang_unsupported) << "thinLTO on AIX";
	}

	// Collect the list of architectures.
	llvm::StringSet<> ArchNames;
	if (RawTriple.isOSBinFormatMachO())
	for (const Arg *A : C.getArgs())
	if (A->getOption().matches(options::OPT_arch))
	ArchNames.insert(A->getValue());

	// Set of (Action, canonical ToolChain triple) pairs we've built jobs for.
	std::map<std::pair<const Action *, std::string>, InputInfo> CachedResults;
	for (Action *A : C.getActions()) {
	// If we are linking an image for multiple archs then the linker wants
	// -arch_multiple and -final_output <final image name>. Unfortunately, this
	// doesn't fit in cleanly because we have to pass this information down.
	//
	// FIXME: This is a hack; find a cleaner way to integrate this into the
	// process.
	const char *LinkingOutput = nullptr;
	if (isa<LipoJobAction>(A)) {
	if (FinalOutput)
	LinkingOutput = FinalOutput->getValue();
	else
	LinkingOutput = getDefaultImageName();
	}

	BuildJobsForAction(C, A, &C.getDefaultToolChain(),
	/BoundArch/ StringRef(),
	/AtTopLevel/ true,
	/MultipleArchs/ ArchNames.size() > 1,
	/LinkingOutput/ LinkingOutput, CachedResults,
	/TargetDeviceOffloadKind/ Action::OFK_None);
	}

	// If we have more than one job, then disable integrated-cc1 for now. Do this
	// also when we need to report process execution statistics.
	if (C.getJobs().size() > 1 \|\| CCPrintProcessStats)
	for (auto &J : C.getJobs())
	J.InProcess = false;

	if (CCPrintProcessStats) {
	C.setPostCallback([=](const Command &Cmd, int Res) {
	Optional<llvm::sys::ProcessStatistics> ProcStat =
	Cmd.getProcessStatistics();
	if (!ProcStat)
	return;

	const char *LinkingOutput = nullptr;
	if (FinalOutput)
	LinkingOutput = FinalOutput->getValue();
	else if (!Cmd.getOutputFilenames().empty())
	LinkingOutput = Cmd.getOutputFilenames().front().c_str();
	else
	LinkingOutput = getDefaultImageName();

	if (CCPrintStatReportFilename.empty()) {
	using namespace llvm;
	// Human readable output.
	outs() << sys::path::filename(Cmd.getExecutable()) << ": "
	<< "output=" << LinkingOutput;
	outs() << ", total="
	<< format("%.3f", ProcStat->TotalTime.count() / 1000.) << " ms"
	<< ", user="
	<< format("%.3f", ProcStat->UserTime.count() / 1000.) << " ms"
	<< ", mem=" << ProcStat->PeakMemory << " Kb\n";
	} else {
	// CSV format.
	std::string Buffer;
	llvm::raw_string_ostream Out(Buffer);
	llvm::sys::printArg(Out, llvm::sys::path::filename(Cmd.getExecutable()),
	/Quote/ true);
	Out << ',';
	llvm::sys::printArg(Out, LinkingOutput, true);
	Out << ',' << ProcStat->TotalTime.count() << ','
	<< ProcStat->UserTime.count() << ',' << ProcStat->PeakMemory
	<< '\n';
	Out.flush();
	std::error_code EC;
	llvm::raw_fd_ostream OS(CCPrintStatReportFilename.c_str(), EC,
	llvm::sys::fs::OF_Append \|
	llvm::sys::fs::OF_Text);
	if (EC)
	return;
	auto L = OS.lock();
	if (!L) {
	llvm::errs() << "ERROR: Cannot lock file "
	<< CCPrintStatReportFilename << ": "
	<< toString(L.takeError()) << "\n";
	return;
	}
	OS << Buffer;
	OS.flush();
	}
	});
	}

	// If the user passed -Qunused-arguments or there were errors, don't warn
	// about any unused arguments.
	if (Diags.hasErrorOccurred() \|\|
	C.getArgs().hasArg(options::OPT_Qunused_arguments))
	return;

	// Claim -### here.
	(void)C.getArgs().hasArg(options::OPT__HASH_HASH_HASH);

	// Claim --driver-mode, --rsp-quoting, it was handled earlier.
	(void)C.getArgs().hasArg(options::OPT_driver_mode);
	(void)C.getArgs().hasArg(options::OPT_rsp_quoting);

	for (Arg *A : C.getArgs()) {
	// FIXME: It would be nice to be able to send the argument to the
	// DiagnosticsEngine, so that extra values, position, and so on could be
	// printed.
	if (!A->isClaimed()) {
	if (A->getOption().hasFlag(options::NoArgumentUnused))
	continue;

	// Suppress the warning automatically if this is just a flag, and it is an
	// instance of an argument we already claimed.
	const Option &Opt = A->getOption();
	if (Opt.getKind() == Option::FlagClass) {
	bool DuplicateClaimed = false;

	for (const Arg *AA : C.getArgs().filtered(&Opt)) {
	if (AA->isClaimed()) {
	DuplicateClaimed = true;
	break;
	}
	}

	if (DuplicateClaimed)
	continue;
	}

	// In clang-cl, don't mention unknown arguments here since they have
	// already been warned about.
	if (!IsCLMode() \|\| !A->getOption().matches(options::OPT_UNKNOWN))
	Diag(clang::diag::warn_drv_unused_argument)
	<< A->getAsString(C.getArgs());
	}
	}
	}

	namespace {
	/// Utility class to control the collapse of dependent actions and select the
	/// tools accordingly.
	class ToolSelector final {
	/// The tool chain this selector refers to.
	const ToolChain &TC;

	/// The compilation this selector refers to.
	const Compilation &C;

	/// The base action this selector refers to.
	const JobAction *BaseAction;

	/// Set to true if the current toolchain refers to host actions.
	bool IsHostSelector;

	/// Set to true if save-temps and embed-bitcode functionalities are active.
	bool SaveTemps;
	bool EmbedBitcode;

	/// Get previous dependent action or null if that does not exist. If
	/// \a CanBeCollapsed is false, that action must be legal to collapse or
	/// null will be returned.
	const JobAction *getPrevDependentAction(const ActionList &Inputs,
	ActionList &SavedOffloadAction,
	bool CanBeCollapsed = true) {
	// An option can be collapsed only if it has a single input.
	if (Inputs.size() != 1)
	return nullptr;

	Action CurAction = Inputs.begin();
	if (CanBeCollapsed &&
	!CurAction->isCollapsingWithNextDependentActionLegal())
	return nullptr;

	// If the input action is an offload action. Look through it and save any
	// offload action that can be dropped in the event of a collapse.
	if (auto *OA = dyn_cast<OffloadAction>(CurAction)) {
	// If the dependent action is a device action, we will attempt to collapse
	// only with other device actions. Otherwise, we would do the same but
	// with host actions only.
	if (!IsHostSelector) {
	if (OA->hasSingleDeviceDependence(/DoNotConsiderHostActions=/true)) {
	CurAction =
	OA->getSingleDeviceDependence(/DoNotConsiderHostActions=/true);
	if (CanBeCollapsed &&
	!CurAction->isCollapsingWithNextDependentActionLegal())
	return nullptr;
	SavedOffloadAction.push_back(OA);
	return dyn_cast<JobAction>(CurAction);
	}
	} else if (OA->hasHostDependence()) {
	CurAction = OA->getHostDependence();
	if (CanBeCollapsed &&
	!CurAction->isCollapsingWithNextDependentActionLegal())
	return nullptr;
	SavedOffloadAction.push_back(OA);
	return dyn_cast<JobAction>(CurAction);
	}
	return nullptr;
	}

	return dyn_cast<JobAction>(CurAction);
	}

	/// Return true if an assemble action can be collapsed.
	bool canCollapseAssembleAction() const {
	return TC.useIntegratedAs() && !SaveTemps &&
	!C.getArgs().hasArg(options::OPT_via_file_asm) &&
	!C.getArgs().hasArg(options::OPT__SLASH_FA) &&
	!C.getArgs().hasArg(options::OPT__SLASH_Fa);
	}

	/// Return true if a preprocessor action can be collapsed.
	bool canCollapsePreprocessorAction() const {
	return !C.getArgs().hasArg(options::OPT_no_integrated_cpp) &&
	!C.getArgs().hasArg(options::OPT_traditional_cpp) && !SaveTemps &&
	!C.getArgs().hasArg(options::OPT_rewrite_objc);
	}

	/// Struct that relates an action with the offload actions that would be
	/// collapsed with it.
	struct JobActionInfo final {
	/// The action this info refers to.
	const JobAction *JA = nullptr;
	/// The offload actions we need to take care off if this action is
	/// collapsed.
	ActionList SavedOffloadAction;
	};

	/// Append collapsed offload actions from the give nnumber of elements in the
	/// action info array.
	static void AppendCollapsedOffloadAction(ActionList &CollapsedOffloadAction,
	ArrayRef<JobActionInfo> &ActionInfo,
	unsigned ElementNum) {
	assert(ElementNum <= ActionInfo.size() && "Invalid number of elements.");
	for (unsigned I = 0; I < ElementNum; ++I)
	CollapsedOffloadAction.append(ActionInfo[I].SavedOffloadAction.begin(),
	ActionInfo[I].SavedOffloadAction.end());
	}

	/// Functions that attempt to perform the combining. They detect if that is
	/// legal, and if so they update the inputs \a Inputs and the offload action
	/// that were collapsed in \a CollapsedOffloadAction. A tool that deals with
	/// the combined action is returned. If the combining is not legal or if the
	/// tool does not exist, null is returned.
	/// Currently three kinds of collapsing are supported:
	/// - Assemble + Backend + Compile;
	/// - Assemble + Backend ;
	/// - Backend + Compile.
	const Tool *
	combineAssembleBackendCompile(ArrayRef<JobActionInfo> ActionInfo,
	ActionList &Inputs,
	ActionList &CollapsedOffloadAction) {
	if (ActionInfo.size() < 3 \|\| !canCollapseAssembleAction())
	return nullptr;
	auto *AJ = dyn_cast<AssembleJobAction>(ActionInfo[0].JA);
	auto *BJ = dyn_cast<BackendJobAction>(ActionInfo[1].JA);
	auto *CJ = dyn_cast<CompileJobAction>(ActionInfo[2].JA);
	if (!AJ \|\| !BJ \|\| !CJ)
	return nullptr;

	// Get compiler tool.
	const Tool T = TC.SelectTool(CJ);
	if (!T)
	return nullptr;

	// When using -fembed-bitcode, it is required to have the same tool (clang)
	// for both CompilerJA and BackendJA. Otherwise, combine two stages.
	if (EmbedBitcode) {
	const Tool BT = TC.SelectTool(BJ);
	if (BT == T)
	return nullptr;
	}

	if (!T->hasIntegratedAssembler())
	return nullptr;

	Inputs = CJ->getInputs();
	AppendCollapsedOffloadAction(CollapsedOffloadAction, ActionInfo,
	/NumElements=/3);
	return T;
	}
	const Tool *combineAssembleBackend(ArrayRef<JobActionInfo> ActionInfo,
	ActionList &Inputs,
	ActionList &CollapsedOffloadAction) {
	if (ActionInfo.size() < 2 \|\| !canCollapseAssembleAction())
	return nullptr;
	auto *AJ = dyn_cast<AssembleJobAction>(ActionInfo[0].JA);
	auto *BJ = dyn_cast<BackendJobAction>(ActionInfo[1].JA);
	if (!AJ \|\| !BJ)
	return nullptr;

	// Get backend tool.
	const Tool T = TC.SelectTool(BJ);
	if (!T)
	return nullptr;

	if (!T->hasIntegratedAssembler())
	return nullptr;

	Inputs = BJ->getInputs();
	AppendCollapsedOffloadAction(CollapsedOffloadAction, ActionInfo,
	/NumElements=/2);
	return T;
	}
	const Tool *combineBackendCompile(ArrayRef<JobActionInfo> ActionInfo,
	ActionList &Inputs,
	ActionList &CollapsedOffloadAction) {
	if (ActionInfo.size() < 2)
	return nullptr;
	auto *BJ = dyn_cast<BackendJobAction>(ActionInfo[0].JA);
	auto *CJ = dyn_cast<CompileJobAction>(ActionInfo[1].JA);
	if (!BJ \|\| !CJ)
	return nullptr;

	// Check if the initial input (to the compile job or its predessor if one
	// exists) is LLVM bitcode. In that case, no preprocessor step is required
	// and we can still collapse the compile and backend jobs when we have
	// -save-temps. I.e. there is no need for a separate compile job just to
	// emit unoptimized bitcode.
	bool InputIsBitcode = true;
	for (size_t i = 1; i < ActionInfo.size(); i++)
	if (ActionInfo[i].JA->getType() != types::TY_LLVM_BC &&
	ActionInfo[i].JA->getType() != types::TY_LTO_BC) {
	InputIsBitcode = false;
	break;
	}
	if (!InputIsBitcode && !canCollapsePreprocessorAction())
	return nullptr;

	// Get compiler tool.
	const Tool T = TC.SelectTool(CJ);
	if (!T)
	return nullptr;

	if (T->canEmitIR() && ((SaveTemps && !InputIsBitcode) \|\| EmbedBitcode))
	return nullptr;

	Inputs = CJ->getInputs();
	AppendCollapsedOffloadAction(CollapsedOffloadAction, ActionInfo,
	/NumElements=/2);
	return T;
	}

	/// Updates the inputs if the obtained tool supports combining with
	/// preprocessor action, and the current input is indeed a preprocessor
	/// action. If combining results in the collapse of offloading actions, those
	/// are appended to \a CollapsedOffloadAction.
	void combineWithPreprocessor(const Tool *T, ActionList &Inputs,
	ActionList &CollapsedOffloadAction) {
	if (!T \|\| !canCollapsePreprocessorAction() \|\| !T->hasIntegratedCPP())
	return;

	// Attempt to get a preprocessor action dependence.
	ActionList PreprocessJobOffloadActions;
	ActionList NewInputs;
	for (Action *A : Inputs) {
	auto *PJ = getPrevDependentAction({A}, PreprocessJobOffloadActions);
	if (!PJ \|\| !isa<PreprocessJobAction>(PJ)) {
	NewInputs.push_back(A);
	continue;
	}

	// This is legal to combine. Append any offload action we found and add the
	// current input to preprocessor inputs.
	CollapsedOffloadAction.append(PreprocessJobOffloadActions.begin(),
	PreprocessJobOffloadActions.end());
	NewInputs.append(PJ->input_begin(), PJ->input_end());
	}
	Inputs = NewInputs;
	}

	public:
	ToolSelector(const JobAction *BaseAction, const ToolChain &TC,
	const Compilation &C, bool SaveTemps, bool EmbedBitcode)
	: TC(TC), C(C), BaseAction(BaseAction), SaveTemps(SaveTemps),
	EmbedBitcode(EmbedBitcode) {
	assert(BaseAction && "Invalid base action.");
	IsHostSelector = BaseAction->getOffloadingDeviceKind() == Action::OFK_None;
	}

	/// Check if a chain of actions can be combined and return the tool that can
	/// handle the combination of actions. The pointer to the current inputs \a
	/// Inputs and the list of offload actions \a CollapsedOffloadActions
	/// connected to collapsed actions are updated accordingly. The latter enables
	/// the caller of the selector to process them afterwards instead of just
	/// dropping them. If no suitable tool is found, null will be returned.
	const Tool *getTool(ActionList &Inputs,
	ActionList &CollapsedOffloadAction) {
	//
	// Get the largest chain of actions that we could combine.
	//

	SmallVector<JobActionInfo, 5> ActionChain(1);
	ActionChain.back().JA = BaseAction;
	while (ActionChain.back().JA) {
	const Action *CurAction = ActionChain.back().JA;

	// Grow the chain by one element.
	ActionChain.resize(ActionChain.size() + 1);
	JobActionInfo &AI = ActionChain.back();

	// Attempt to fill it with the
	AI.JA =
	getPrevDependentAction(CurAction->getInputs(), AI.SavedOffloadAction);
	}

	// Pop the last action info as it could not be filled.
	ActionChain.pop_back();

	//
	// Attempt to combine actions. If all combining attempts failed, just return
	// the tool of the provided action. At the end we attempt to combine the
	// action with any preprocessor action it may depend on.
	//

	const Tool *T = combineAssembleBackendCompile(ActionChain, Inputs,
	CollapsedOffloadAction);
	if (!T)
	T = combineAssembleBackend(ActionChain, Inputs, CollapsedOffloadAction);
	if (!T)
	T = combineBackendCompile(ActionChain, Inputs, CollapsedOffloadAction);
	if (!T) {
	Inputs = BaseAction->getInputs();
	T = TC.SelectTool(*BaseAction);
	}

	combineWithPreprocessor(T, Inputs, CollapsedOffloadAction);
	return T;
	}
	};
	}

	/// Return a string that uniquely identifies the result of a job. The bound arch
	/// is not necessarily represented in the toolchain's triple -- for example,
	/// armv7 and armv7s both map to the same triple -- so we need both in our map.
	/// Also, we need to add the offloading device kind, as the same tool chain can
	/// be used for host and device for some programming models, e.g. OpenMP.
	static std::string GetTriplePlusArchString(const ToolChain *TC,
	StringRef BoundArch,
	Action::OffloadKind OffloadKind) {
	std::string TriplePlusArch = TC->getTriple().normalize();
	if (!BoundArch.empty()) {
	TriplePlusArch += "-";
	TriplePlusArch += BoundArch;
	}
	TriplePlusArch += "-";
	TriplePlusArch += Action::GetOffloadKindName(OffloadKind);
	return TriplePlusArch;
	}

	InputInfo Driver::BuildJobsForAction(
	Compilation &C, const Action A, const ToolChain TC, StringRef BoundArch,
	bool AtTopLevel, bool MultipleArchs, const char *LinkingOutput,
	std::map<std::pair<const Action *, std::string>, InputInfo> &CachedResults,
	Action::OffloadKind TargetDeviceOffloadKind) const {
	std::pair<const Action *, std::string> ActionTC = {
	A, GetTriplePlusArchString(TC, BoundArch, TargetDeviceOffloadKind)};
	auto CachedResult = CachedResults.find(ActionTC);
	if (CachedResult != CachedResults.end()) {
	return CachedResult->second;
	}
	InputInfo Result = BuildJobsForActionNoCache(
	C, A, TC, BoundArch, AtTopLevel, MultipleArchs, LinkingOutput,
	CachedResults, TargetDeviceOffloadKind);
	CachedResults[ActionTC] = Result;
	return Result;
	}

	InputInfo Driver::BuildJobsForActionNoCache(
	Compilation &C, const Action A, const ToolChain TC, StringRef BoundArch,
	bool AtTopLevel, bool MultipleArchs, const char *LinkingOutput,
	std::map<std::pair<const Action *, std::string>, InputInfo> &CachedResults,
	Action::OffloadKind TargetDeviceOffloadKind) const {
	llvm::PrettyStackTraceString CrashInfo("Building compilation jobs");

	InputInfoList OffloadDependencesInputInfo;
	bool BuildingForOffloadDevice = TargetDeviceOffloadKind != Action::OFK_None;
	if (const OffloadAction *OA = dyn_cast<OffloadAction>(A)) {
	// The 'Darwin' toolchain is initialized only when its arguments are
	// computed. Get the default arguments for OFK_None to ensure that
	// initialization is performed before processing the offload action.
	// FIXME: Remove when darwin's toolchain is initialized during construction.
	C.getArgsForToolChain(TC, BoundArch, Action::OFK_None);

	// The offload action is expected to be used in four different situations.
	//
	// a) Set a toolchain/architecture/kind for a host action:
	// Host Action 1 -> OffloadAction -> Host Action 2
	//
	// b) Set a toolchain/architecture/kind for a device action;
	// Device Action 1 -> OffloadAction -> Device Action 2
	//
	// c) Specify a device dependence to a host action;
	// Device Action 1 _
	// \
	// Host Action 1 ---> OffloadAction -> Host Action 2
	//
	// d) Specify a host dependence to a device action.
	// Host Action 1 _
	// \
	// Device Action 1 ---> OffloadAction -> Device Action 2
	//
	// For a) and b), we just return the job generated for the dependence. For
	// c) and d) we override the current action with the host/device dependence
	// if the current toolchain is host/device and set the offload dependences
	// info with the jobs obtained from the device/host dependence(s).

	// If there is a single device option, just generate the job for it.
	if (OA->hasSingleDeviceDependence()) {
	InputInfo DevA;
	OA->doOnEachDeviceDependence([&](Action DepA, const ToolChain DepTC,
	const char *DepBoundArch) {
	DevA =
	BuildJobsForAction(C, DepA, DepTC, DepBoundArch, AtTopLevel,
	/MultipleArchs/ !!DepBoundArch, LinkingOutput,
	CachedResults, DepA->getOffloadingDeviceKind());
	});
	return DevA;
	}

	// If 'Action 2' is host, we generate jobs for the device dependences and
	// override the current action with the host dependence. Otherwise, we
	// generate the host dependences and override the action with the device
	// dependence. The dependences can't therefore be a top-level action.
	OA->doOnEachDependence(
	/IsHostDependence=/BuildingForOffloadDevice,
	[&](Action DepA, const ToolChain DepTC, const char *DepBoundArch) {
	OffloadDependencesInputInfo.push_back(BuildJobsForAction(
	C, DepA, DepTC, DepBoundArch, /AtTopLevel=/false,
	/MultipleArchs/ !!DepBoundArch, LinkingOutput, CachedResults,
	DepA->getOffloadingDeviceKind()));
	});

	A = BuildingForOffloadDevice
	? OA->getSingleDeviceDependence(/DoNotConsiderHostActions=/true)
	: OA->getHostDependence();
	}

	if (const InputAction *IA = dyn_cast<InputAction>(A)) {
	// FIXME: It would be nice to not claim this here; maybe the old scheme of
	// just using Args was better?
	const Arg &Input = IA->getInputArg();
	Input.claim();
	if (Input.getOption().matches(options::OPT_INPUT)) {
	const char *Name = Input.getValue();
	return InputInfo(A, Name, /* _BaseInput = */ Name);
	}
	return InputInfo(A, &Input, /* _BaseInput = */ "");
	}

	if (const BindArchAction *BAA = dyn_cast<BindArchAction>(A)) {
	const ToolChain *TC;
	StringRef ArchName = BAA->getArchName();

	if (!ArchName.empty())
	TC = &getToolChain(C.getArgs(),
	computeTargetTriple(*this, TargetTriple,
	C.getArgs(), ArchName));
	else
	TC = &C.getDefaultToolChain();

	return BuildJobsForAction(C, *BAA->input_begin(), TC, ArchName, AtTopLevel,
	MultipleArchs, LinkingOutput, CachedResults,
	TargetDeviceOffloadKind);
	}


	ActionList Inputs = A->getInputs();

	const JobAction *JA = cast<JobAction>(A);
	ActionList CollapsedOffloadActions;

	ToolSelector TS(JA, *TC, C, isSaveTempsEnabled(),
	embedBitcodeInObject() && !isUsingLTO());
	const Tool *T = TS.getTool(Inputs, CollapsedOffloadActions);

	if (!T)
	return InputInfo();

	if (BuildingForOffloadDevice &&
	A->getOffloadingDeviceKind() == Action::OFK_OpenMP) {
	if (TC->getTriple().isAMDGCN()) {
	// AMDGCN treats backend and assemble actions as no-op because
	// linker does not support object files.
	if (const BackendJobAction *BA = dyn_cast<BackendJobAction>(A)) {
	return BuildJobsForAction(C, *BA->input_begin(), TC, BoundArch,
	AtTopLevel, MultipleArchs, LinkingOutput,
	CachedResults, TargetDeviceOffloadKind);
	}

	if (const AssembleJobAction *AA = dyn_cast<AssembleJobAction>(A)) {
	return BuildJobsForAction(C, *AA->input_begin(), TC, BoundArch,
	AtTopLevel, MultipleArchs, LinkingOutput,
	CachedResults, TargetDeviceOffloadKind);
	}
	}
	}

	// If we've collapsed action list that contained OffloadAction we
	// need to build jobs for host/device-side inputs it may have held.
	for (const auto *OA : CollapsedOffloadActions)
	cast<OffloadAction>(OA)->doOnEachDependence(
	/IsHostDependence=/BuildingForOffloadDevice,
	[&](Action DepA, const ToolChain DepTC, const char *DepBoundArch) {
	OffloadDependencesInputInfo.push_back(BuildJobsForAction(
	C, DepA, DepTC, DepBoundArch, /* AtTopLevel */ false,
	/MultipleArchs=/!!DepBoundArch, LinkingOutput, CachedResults,
	DepA->getOffloadingDeviceKind()));
	});

	// Only use pipes when there is exactly one input.
	InputInfoList InputInfos;
	for (const Action *Input : Inputs) {
	// Treat dsymutil and verify sub-jobs as being at the top-level too, they
	// shouldn't get temporary output names.
	// FIXME: Clean this up.
	bool SubJobAtTopLevel =
	AtTopLevel && (isa<DsymutilJobAction>(A) \|\| isa<VerifyJobAction>(A));
	InputInfos.push_back(BuildJobsForAction(
	C, Input, TC, BoundArch, SubJobAtTopLevel, MultipleArchs, LinkingOutput,
	CachedResults, A->getOffloadingDeviceKind()));
	}

	// Always use the first input as the base input.
	const char *BaseInput = InputInfos[0].getBaseInput();

	// ... except dsymutil actions, which use their actual input as the base
	// input.
	if (JA->getType() == types::TY_dSYM)
	BaseInput = InputInfos[0].getFilename();

	// ... and in header module compilations, which use the module name.
	if (auto *ModuleJA = dyn_cast<HeaderModulePrecompileJobAction>(JA))
	BaseInput = ModuleJA->getModuleName();

	// Append outputs of offload device jobs to the input list
	if (!OffloadDependencesInputInfo.empty())
	InputInfos.append(OffloadDependencesInputInfo.begin(),
	OffloadDependencesInputInfo.end());

	// Set the effective triple of the toolchain for the duration of this job.
	llvm::Triple EffectiveTriple;
	const ToolChain &ToolTC = T->getToolChain();
	const ArgList &Args =
	C.getArgsForToolChain(TC, BoundArch, A->getOffloadingDeviceKind());
	if (InputInfos.size() != 1) {
	EffectiveTriple = llvm::Triple(ToolTC.ComputeEffectiveClangTriple(Args));
	} else {
	// Pass along the input type if it can be unambiguously determined.
	EffectiveTriple = llvm::Triple(
	ToolTC.ComputeEffectiveClangTriple(Args, InputInfos[0].getType()));
	}
	RegisterEffectiveTriple TripleRAII(ToolTC, EffectiveTriple);

	// Determine the place to write output to, if any.
	InputInfo Result;
	InputInfoList UnbundlingResults;
	if (auto *UA = dyn_cast<OffloadUnbundlingJobAction>(JA)) {
	// If we have an unbundling job, we need to create results for all the
	// outputs. We also update the results cache so that other actions using
	// this unbundling action can get the right results.
	for (auto &UI : UA->getDependentActionsInfo()) {
	assert(UI.DependentOffloadKind != Action::OFK_None &&
	"Unbundling with no offloading??");

	// Unbundling actions are never at the top level. When we generate the
	// offloading prefix, we also do that for the host file because the
	// unbundling action does not change the type of the output which can
	// cause a overwrite.
	std::string OffloadingPrefix = Action::GetOffloadingFileNamePrefix(
	UI.DependentOffloadKind,
	UI.DependentToolChain->getTriple().normalize(),
	/CreatePrefixForHost=/true);
	auto CurI = InputInfo(
	UA,
	GetNamedOutputPath(C, *UA, BaseInput, UI.DependentBoundArch,
	/AtTopLevel=/false,
	MultipleArchs \|\|
	UI.DependentOffloadKind == Action::OFK_HIP,
	OffloadingPrefix),
	BaseInput);
	// Save the unbundling result.
	UnbundlingResults.push_back(CurI);

	// Get the unique string identifier for this dependence and cache the
	// result.
	StringRef Arch;
	if (TargetDeviceOffloadKind == Action::OFK_HIP) {
	if (UI.DependentOffloadKind == Action::OFK_Host)
	Arch = StringRef();
	else
	Arch = UI.DependentBoundArch;
	} else
	Arch = BoundArch;

	CachedResults[{A, GetTriplePlusArchString(UI.DependentToolChain, Arch,
	UI.DependentOffloadKind)}] =
	CurI;
	}

	// Now that we have all the results generated, select the one that should be
	// returned for the current depending action.
	std::pair<const Action *, std::string> ActionTC = {
	A, GetTriplePlusArchString(TC, BoundArch, TargetDeviceOffloadKind)};
	assert(CachedResults.find(ActionTC) != CachedResults.end() &&
	"Result does not exist??");
	Result = CachedResults[ActionTC];
	} else if (JA->getType() == types::TY_Nothing)
	Result = InputInfo(A, BaseInput);
	else {
	// We only have to generate a prefix for the host if this is not a top-level
	// action.
	std::string OffloadingPrefix = Action::GetOffloadingFileNamePrefix(
	A->getOffloadingDeviceKind(), TC->getTriple().normalize(),
	/CreatePrefixForHost=/!!A->getOffloadingHostActiveKinds() &&
	!AtTopLevel);
	if (isa<OffloadWrapperJobAction>(JA)) {
	if (Arg *FinalOutput = C.getArgs().getLastArg(options::OPT_o))
	BaseInput = FinalOutput->getValue();
	else
	BaseInput = getDefaultImageName();
	BaseInput =
	C.getArgs().MakeArgString(std::string(BaseInput) + "-wrapper");
	}
	Result = InputInfo(A, GetNamedOutputPath(C, *JA, BaseInput, BoundArch,
	AtTopLevel, MultipleArchs,
	OffloadingPrefix),
	BaseInput);
	}

	if (CCCPrintBindings && !CCGenDiagnostics) {
	llvm::errs() << "# \"" << T->getToolChain().getTripleString() << '"'
	<< " - \"" << T->getName() << "\", inputs: [";
	for (unsigned i = 0, e = InputInfos.size(); i != e; ++i) {
	llvm::errs() << InputInfos[i].getAsString();
	if (i + 1 != e)
	llvm::errs() << ", ";
	}
	if (UnbundlingResults.empty())
	llvm::errs() << "], output: " << Result.getAsString() << "\n";
	else {
	llvm::errs() << "], outputs: [";
	for (unsigned i = 0, e = UnbundlingResults.size(); i != e; ++i) {
	llvm::errs() << UnbundlingResults[i].getAsString();
	if (i + 1 != e)
	llvm::errs() << ", ";
	}
	llvm::errs() << "] \n";
	}
	} else {
	if (UnbundlingResults.empty())
	T->ConstructJob(
	C, *JA, Result, InputInfos,
	C.getArgsForToolChain(TC, BoundArch, JA->getOffloadingDeviceKind()),
	LinkingOutput);
	else
	T->ConstructJobMultipleOutputs(
	C, *JA, UnbundlingResults, InputInfos,
	C.getArgsForToolChain(TC, BoundArch, JA->getOffloadingDeviceKind()),
	LinkingOutput);
	}
	return Result;
	}

	const char *Driver::getDefaultImageName() const {
	llvm::Triple Target(llvm::Triple::normalize(TargetTriple));
	return Target.isOSWindows() ? "a.exe" : "a.out";
	}

	/// Create output filename based on ArgValue, which could either be a
	/// full filename, filename without extension, or a directory. If ArgValue
	/// does not provide a filename, then use BaseName, and use the extension
	/// suitable for FileType.
	static const char *MakeCLOutputFilename(const ArgList &Args, StringRef ArgValue,
	StringRef BaseName,
	types::ID FileType) {
	SmallString<128> Filename = ArgValue;

	if (ArgValue.empty()) {
	// If the argument is empty, output to BaseName in the current dir.
	Filename = BaseName;
	} else if (llvm::sys::path::is_separator(Filename.back())) {
	// If the argument is a directory, output to BaseName in that dir.
	llvm::sys::path::append(Filename, BaseName);
	}

	if (!llvm::sys::path::has_extension(ArgValue)) {
	// If the argument didn't provide an extension, then set it.
	const char *Extension = types::getTypeTempSuffix(FileType, true);

	if (FileType == types::TY_Image &&
	Args.hasArg(options::OPT__SLASH_LD, options::OPT__SLASH_LDd)) {
	// The output file is a dll.
	Extension = "dll";
	}

	llvm::sys::path::replace_extension(Filename, Extension);
	}

	return Args.MakeArgString(Filename.c_str());
	}

	static bool HasPreprocessOutput(const Action &JA) {
	if (isa<PreprocessJobAction>(JA))
	return true;
	if (isa<OffloadAction>(JA) && isa<PreprocessJobAction>(JA.getInputs()[0]))
	return true;
	if (isa<OffloadBundlingJobAction>(JA) &&
	HasPreprocessOutput(*(JA.getInputs()[0])))
	return true;
	return false;
	}

	const char *Driver::GetNamedOutputPath(Compilation &C, const JobAction &JA,
	const char *BaseInput,
	StringRef OrigBoundArch, bool AtTopLevel,
	bool MultipleArchs,
	StringRef OffloadingPrefix) const {
	std::string BoundArch = OrigBoundArch.str();
	#if defined(_WIN32)
	// BoundArch may contains ':', which is invalid in file names on Windows,
	// therefore replace it with '%'.
	std::replace(BoundArch.begin(), BoundArch.end(), ':', '@');
	#endif

	llvm::PrettyStackTraceString CrashInfo("Computing output path");
	// Output to a user requested destination?
	if (AtTopLevel && !isa<DsymutilJobAction>(JA) && !isa<VerifyJobAction>(JA)) {
	if (Arg *FinalOutput = C.getArgs().getLastArg(options::OPT_o))
	return C.addResultFile(FinalOutput->getValue(), &JA);
	}

	// For /P, preprocess to file named after BaseInput.
	if (C.getArgs().hasArg(options::OPT__SLASH_P)) {
	assert(AtTopLevel && isa<PreprocessJobAction>(JA));
	StringRef BaseName = llvm::sys::path::filename(BaseInput);
	StringRef NameArg;
	if (Arg *A = C.getArgs().getLastArg(options::OPT__SLASH_Fi))
	NameArg = A->getValue();
	return C.addResultFile(
	MakeCLOutputFilename(C.getArgs(), NameArg, BaseName, types::TY_PP_C),
	&JA);
	}

	// Default to writing to stdout?
	if (AtTopLevel && !CCGenDiagnostics && HasPreprocessOutput(JA)) {
	return "-";
	}

	if (JA.getType() == types::TY_ModuleFile &&
	C.getArgs().getLastArg(options::OPT_module_file_info)) {
	return "-";
	}

	// Is this the assembly listing for /FA?
	if (JA.getType() == types::TY_PP_Asm &&
	(C.getArgs().hasArg(options::OPT__SLASH_FA) \|\|
	C.getArgs().hasArg(options::OPT__SLASH_Fa))) {
	// Use /Fa and the input filename to determine the asm file name.
	StringRef BaseName = llvm::sys::path::filename(BaseInput);
	StringRef FaValue = C.getArgs().getLastArgValue(options::OPT__SLASH_Fa);
	return C.addResultFile(
	MakeCLOutputFilename(C.getArgs(), FaValue, BaseName, JA.getType()),
	&JA);
	}

	// Output to a temporary file?
	if ((!AtTopLevel && !isSaveTempsEnabled() &&
	!C.getArgs().hasArg(options::OPT__SLASH_Fo)) \|\|
	CCGenDiagnostics) {
	StringRef Name = llvm::sys::path::filename(BaseInput);
	std::pair<StringRef, StringRef> Split = Name.split('.');
	SmallString<128> TmpName;
	const char *Suffix = types::getTypeTempSuffix(JA.getType(), IsCLMode());
	Arg *A = C.getArgs().getLastArg(options::OPT_fcrash_diagnostics_dir);
	if (CCGenDiagnostics && A) {
	SmallString<128> CrashDirectory(A->getValue());
	if (!getVFS().exists(CrashDirectory))
	llvm::sys::fs::create_directories(CrashDirectory);
	llvm::sys::path::append(CrashDirectory, Split.first);
	const char *Middle = Suffix ? "-%%%%%%." : "-%%%%%%";
	std::error_code EC = llvm::sys::fs::createUniqueFile(
	CrashDirectory + Middle + Suffix, TmpName);
	if (EC) {
	Diag(clang::diag::err_unable_to_make_temp) << EC.message();
	return "";
	}
	} else {
	TmpName = GetTemporaryPath(Split.first, Suffix);
	}
	return C.addTempFile(C.getArgs().MakeArgString(TmpName));
	}

	SmallString<128> BasePath(BaseInput);
	SmallString<128> ExternalPath("");
	StringRef BaseName;

	// Dsymutil actions should use the full path.
	if (isa<DsymutilJobAction>(JA) && C.getArgs().hasArg(options::OPT_dsym_dir)) {
	ExternalPath += C.getArgs().getLastArg(options::OPT_dsym_dir)->getValue();
	// We use posix style here because the tests (specifically
	// darwin-dsymutil.c) demonstrate that posix style paths are acceptable
	// even on Windows and if we don't then the similar test covering this
	// fails.
	llvm::sys::path::append(ExternalPath, llvm::sys::path::Style::posix,
	llvm::sys::path::filename(BasePath));
	BaseName = ExternalPath;
	} else if (isa<DsymutilJobAction>(JA) \|\| isa<VerifyJobAction>(JA))
	BaseName = BasePath;
	else
	BaseName = llvm::sys::path::filename(BasePath);

	// Determine what the derived output name should be.
	const char *NamedOutput;

	if ((JA.getType() == types::TY_Object \|\| JA.getType() == types::TY_LTO_BC) &&
	C.getArgs().hasArg(options::OPT__SLASH_Fo, options::OPT__SLASH_o)) {
	// The /Fo or /o flag decides the object filename.
	StringRef Val =
	C.getArgs()
	.getLastArg(options::OPT__SLASH_Fo, options::OPT__SLASH_o)
	->getValue();
	NamedOutput =
	MakeCLOutputFilename(C.getArgs(), Val, BaseName, types::TY_Object);
	} else if (JA.getType() == types::TY_Image &&
	C.getArgs().hasArg(options::OPT__SLASH_Fe,
	options::OPT__SLASH_o)) {
	// The /Fe or /o flag names the linked file.
	StringRef Val =
	C.getArgs()
	.getLastArg(options::OPT__SLASH_Fe, options::OPT__SLASH_o)
	->getValue();
	NamedOutput =
	MakeCLOutputFilename(C.getArgs(), Val, BaseName, types::TY_Image);
	} else if (JA.getType() == types::TY_Image) {
	if (IsCLMode()) {
	// clang-cl uses BaseName for the executable name.
	NamedOutput =
	MakeCLOutputFilename(C.getArgs(), "", BaseName, types::TY_Image);
	} else {
	SmallString<128> Output(getDefaultImageName());
	// HIP image for device compilation with -fno-gpu-rdc is per compilation
	// unit.
	bool IsHIPNoRDC = JA.getOffloadingDeviceKind() == Action::OFK_HIP &&
	!C.getArgs().hasFlag(options::OPT_fgpu_rdc,
	options::OPT_fno_gpu_rdc, false);
	if (IsHIPNoRDC) {
	Output = BaseName;
	llvm::sys::path::replace_extension(Output, "");
	}
	Output += OffloadingPrefix;
	if (MultipleArchs && !BoundArch.empty()) {
	Output += "-";
	Output.append(BoundArch);
	}
	if (IsHIPNoRDC)
	Output += ".out";
	NamedOutput = C.getArgs().MakeArgString(Output.c_str());
	}
	} else if (JA.getType() == types::TY_PCH && IsCLMode()) {
	NamedOutput = C.getArgs().MakeArgString(GetClPchPath(C, BaseName));
	} else {
	const char *Suffix = types::getTypeTempSuffix(JA.getType(), IsCLMode());
	assert(Suffix && "All types used for output should have a suffix.");

	std::string::size_type End = std::string::npos;
	if (!types::appendSuffixForType(JA.getType()))
	End = BaseName.rfind('.');
	SmallString<128> Suffixed(BaseName.substr(0, End));
	Suffixed += OffloadingPrefix;
	if (MultipleArchs && !BoundArch.empty()) {
	Suffixed += "-";
	Suffixed.append(BoundArch);
	}
	// When using both -save-temps and -emit-llvm, use a ".tmp.bc" suffix for
	// the unoptimized bitcode so that it does not get overwritten by the ".bc"
	// optimized bitcode output.
	auto IsHIPRDCInCompilePhase = [](const JobAction &JA,
	const llvm::opt::DerivedArgList &Args) {
	// The relocatable compilation in HIP implies -emit-llvm. Similarly, use a
	// ".tmp.bc" suffix for the unoptimized bitcode (generated in the compile
	// phase.)
	return isa<CompileJobAction>(JA) &&
	JA.getOffloadingDeviceKind() == Action::OFK_HIP &&
	Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
	false);
	};
	if (!AtTopLevel && JA.getType() == types::TY_LLVM_BC &&
	(C.getArgs().hasArg(options::OPT_emit_llvm) \|\|
	IsHIPRDCInCompilePhase(JA, C.getArgs())))
	Suffixed += ".tmp";
	Suffixed += '.';
	Suffixed += Suffix;
	NamedOutput = C.getArgs().MakeArgString(Suffixed.c_str());
	}

	// Prepend object file path if -save-temps=obj
	if (!AtTopLevel && isSaveTempsObj() && C.getArgs().hasArg(options::OPT_o) &&
	JA.getType() != types::TY_PCH) {
	Arg *FinalOutput = C.getArgs().getLastArg(options::OPT_o);
	SmallString<128> TempPath(FinalOutput->getValue());
	llvm::sys::path::remove_filename(TempPath);
	StringRef OutputFileName = llvm::sys::path::filename(NamedOutput);
	llvm::sys::path::append(TempPath, OutputFileName);
	NamedOutput = C.getArgs().MakeArgString(TempPath.c_str());
	}

	// If we're saving temps and the temp file conflicts with the input file,
	// then avoid overwriting input file.
	if (!AtTopLevel && isSaveTempsEnabled() && NamedOutput == BaseName) {
	bool SameFile = false;
	SmallString<256> Result;
	llvm::sys::fs::current_path(Result);
	llvm::sys::path::append(Result, BaseName);
	llvm::sys::fs::equivalent(BaseInput, Result.c_str(), SameFile);
	// Must share the same path to conflict.
	if (SameFile) {
	StringRef Name = llvm::sys::path::filename(BaseInput);
	std::pair<StringRef, StringRef> Split = Name.split('.');
	std::string TmpName = GetTemporaryPath(
	Split.first, types::getTypeTempSuffix(JA.getType(), IsCLMode()));
	return C.addTempFile(C.getArgs().MakeArgString(TmpName));
	}
	}

	// As an annoying special case, PCH generation doesn't strip the pathname.
	if (JA.getType() == types::TY_PCH && !IsCLMode()) {
	llvm::sys::path::remove_filename(BasePath);
	if (BasePath.empty())
	BasePath = NamedOutput;
	else
	llvm::sys::path::append(BasePath, NamedOutput);
	return C.addResultFile(C.getArgs().MakeArgString(BasePath.c_str()), &JA);
	} else {
	return C.addResultFile(NamedOutput, &JA);
	}
	}

	std::string Driver::GetFilePath(StringRef Name, const ToolChain &TC) const {
	// Search for Name in a list of paths.
	auto SearchPaths = [&](const llvm::SmallVectorImpl<std::string> &P)
	-> llvm::Optional<std::string> {
	// Respect a limited subset of the '-Bprefix' functionality in GCC by
	// attempting to use this prefix when looking for file paths.
	for (const auto &Dir : P) {
	if (Dir.empty())
	continue;
	SmallString<128> P(Dir[0] == '=' ? SysRoot + Dir.substr(1) : Dir);
	llvm::sys::path::append(P, Name);
	if (llvm::sys::fs::exists(Twine(P)))
	return std::string(P);
	}
	return None;
	};

	if (auto P = SearchPaths(PrefixDirs))
	return *P;

	SmallString<128> R(ResourceDir);
	llvm::sys::path::append(R, Name);
	if (llvm::sys::fs::exists(Twine(R)))
	return std::string(R.str());

	SmallString<128> P(TC.getCompilerRTPath());
	llvm::sys::path::append(P, Name);
	if (llvm::sys::fs::exists(Twine(P)))
	return std::string(P.str());

	SmallString<128> D(Dir);
	llvm::sys::path::append(D, "..", Name);
	if (llvm::sys::fs::exists(Twine(D)))
	return std::string(D.str());

	if (auto P = SearchPaths(TC.getLibraryPaths()))
	return *P;

	if (auto P = SearchPaths(TC.getFilePaths()))
	return *P;

	return std::string(Name);
	}

	void Driver::generatePrefixedToolNames(
	StringRef Tool, const ToolChain &TC,
	SmallVectorImpl<std::string> &Names) const {
	// FIXME: Needs a better variable than TargetTriple
	Names.emplace_back((TargetTriple + "-" + Tool).str());
	Names.emplace_back(Tool);
	}

	static bool ScanDirForExecutable(SmallString<128> &Dir, StringRef Name) {
	llvm::sys::path::append(Dir, Name);
	if (llvm::sys::fs::can_execute(Twine(Dir)))
	return true;
	llvm::sys::path::remove_filename(Dir);
	return false;
	}

	std::string Driver::GetProgramPath(StringRef Name, const ToolChain &TC) const {
	SmallVector<std::string, 2> TargetSpecificExecutables;
	generatePrefixedToolNames(Name, TC, TargetSpecificExecutables);

	// Respect a limited subset of the '-Bprefix' functionality in GCC by
	// attempting to use this prefix when looking for program paths.
	for (const auto &PrefixDir : PrefixDirs) {
	if (llvm::sys::fs::is_directory(PrefixDir)) {
	SmallString<128> P(PrefixDir);
	if (ScanDirForExecutable(P, Name))
	return std::string(P.str());
	} else {
	SmallString<128> P((PrefixDir + Name).str());
	if (llvm::sys::fs::can_execute(Twine(P)))
	return std::string(P.str());
	}
	}

	const ToolChain::path_list &List = TC.getProgramPaths();
	for (const auto &TargetSpecificExecutable : TargetSpecificExecutables) {
	// For each possible name of the tool look for it in
	// program paths first, then the path.
	// Higher priority names will be first, meaning that
	// a higher priority name in the path will be found
	// instead of a lower priority name in the program path.
	// E.g. <triple>-gcc on the path will be found instead
	// of gcc in the program path
	for (const auto &Path : List) {
	SmallString<128> P(Path);
	if (ScanDirForExecutable(P, TargetSpecificExecutable))
	return std::string(P.str());
	}

	// Fall back to the path
	if (llvm::ErrorOr<std::string> P =
	llvm::sys::findProgramByName(TargetSpecificExecutable))
	return *P;
	}

	return std::string(Name);
	}

	std::string Driver::GetTemporaryPath(StringRef Prefix, StringRef Suffix) const {
	SmallString<128> Path;
	std::error_code EC = llvm::sys::fs::createTemporaryFile(Prefix, Suffix, Path);
	if (EC) {
	Diag(clang::diag::err_unable_to_make_temp) << EC.message();
	return "";
	}

	return std::string(Path.str());
	}

	std::string Driver::GetTemporaryDirectory(StringRef Prefix) const {
	SmallString<128> Path;
	std::error_code EC = llvm::sys::fs::createUniqueDirectory(Prefix, Path);
	if (EC) {
	Diag(clang::diag::err_unable_to_make_temp) << EC.message();
	return "";
	}

	return std::string(Path.str());
	}

	std::string Driver::GetClPchPath(Compilation &C, StringRef BaseName) const {
	SmallString<128> Output;
	if (Arg *FpArg = C.getArgs().getLastArg(options::OPT__SLASH_Fp)) {
	// FIXME: If anybody needs it, implement this obscure rule:
	// "If you specify a directory without a file name, the default file name
	// is VCx0.pch., where x is the major version of Visual C++ in use."
	Output = FpArg->getValue();

	// "If you do not specify an extension as part of the path name, an
	// extension of .pch is assumed. "
	if (!llvm::sys::path::has_extension(Output))
	Output += ".pch";
	} else {
	if (Arg *YcArg = C.getArgs().getLastArg(options::OPT__SLASH_Yc))
	Output = YcArg->getValue();
	if (Output.empty())
	Output = BaseName;
	llvm::sys::path::replace_extension(Output, ".pch");
	}
	return std::string(Output.str());
	}

	const ToolChain &Driver::getToolChain(const ArgList &Args,
	const llvm::Triple &Target) const {

	auto &TC = ToolChains[Target.str()];
	if (!TC) {
	switch (Target.getOS()) {
	case llvm::Triple::AIX:
	TC = std::make_unique<toolchains::AIX>(*this, Target, Args);
	break;
	case llvm::Triple::Haiku:
	TC = std::make_unique<toolchains::Haiku>(*this, Target, Args);
	break;
	case llvm::Triple::Ananas:
	TC = std::make_unique<toolchains::Ananas>(*this, Target, Args);
	break;
	case llvm::Triple::CloudABI:
	TC = std::make_unique<toolchains::CloudABI>(*this, Target, Args);
	break;
	case llvm::Triple::Darwin:
	case llvm::Triple::MacOSX:
	case llvm::Triple::IOS:
	case llvm::Triple::TvOS:
	case llvm::Triple::WatchOS:
	TC = std::make_unique<toolchains::DarwinClang>(*this, Target, Args);
	break;
	case llvm::Triple::DragonFly:
	TC = std::make_unique<toolchains::DragonFly>(*this, Target, Args);
	break;
	case llvm::Triple::OpenBSD:
	TC = std::make_unique<toolchains::OpenBSD>(*this, Target, Args);
	break;
	case llvm::Triple::NetBSD:
	TC = std::make_unique<toolchains::NetBSD>(*this, Target, Args);
	break;
	case llvm::Triple::FreeBSD:
	TC = std::make_unique<toolchains::FreeBSD>(*this, Target, Args);
	break;
	case llvm::Triple::Minix:
	TC = std::make_unique<toolchains::Minix>(*this, Target, Args);
	break;
	case llvm::Triple::Linux:
	case llvm::Triple::ELFIAMCU:
	if (Target.getArch() == llvm::Triple::hexagon)
	TC = std::make_unique<toolchains::HexagonToolChain>(*this, Target,
	Args);
	else if ((Target.getVendor() == llvm::Triple::MipsTechnologies) &&
	!Target.hasEnvironment())
	TC = std::make_unique<toolchains::MipsLLVMToolChain>(*this, Target,
	Args);
	else if (Target.isPPC())
	TC = std::make_unique<toolchains::PPCLinuxToolChain>(*this, Target,
	Args);
	else if (Target.getArch() == llvm::Triple::ve)
	TC = std::make_unique<toolchains::VEToolChain>(*this, Target, Args);

	else
	TC = std::make_unique<toolchains::Linux>(*this, Target, Args);
	break;
	case llvm::Triple::NaCl:
	TC = std::make_unique<toolchains::NaClToolChain>(*this, Target, Args);
	break;
	case llvm::Triple::Fuchsia:
	TC = std::make_unique<toolchains::Fuchsia>(*this, Target, Args);
	break;
	case llvm::Triple::Solaris:
	TC = std::make_unique<toolchains::Solaris>(*this, Target, Args);
	break;
	case llvm::Triple::AMDHSA:
	TC = std::make_unique<toolchains::ROCMToolChain>(*this, Target, Args);
	break;
	case llvm::Triple::AMDPAL:
	case llvm::Triple::Mesa3D:
	TC = std::make_unique<toolchains::AMDGPUToolChain>(*this, Target, Args);
	break;
	case llvm::Triple::Win32:
	switch (Target.getEnvironment()) {
	default:
	if (Target.isOSBinFormatELF())
	TC = std::make_unique<toolchains::Generic_ELF>(*this, Target, Args);
	else if (Target.isOSBinFormatMachO())
	TC = std::make_unique<toolchains::MachO>(*this, Target, Args);
	else
	TC = std::make_unique<toolchains::Generic_GCC>(*this, Target, Args);
	break;
	case llvm::Triple::GNU:
	TC = std::make_unique<toolchains::MinGW>(*this, Target, Args);
	break;
	case llvm::Triple::Itanium:
	TC = std::make_unique<toolchains::CrossWindowsToolChain>(*this, Target,
	Args);
	break;
	case llvm::Triple::MSVC:
	case llvm::Triple::UnknownEnvironment:
	if (Args.getLastArgValue(options::OPT_fuse_ld_EQ)
	.startswith_insensitive("bfd"))
	TC = std::make_unique<toolchains::CrossWindowsToolChain>(
	*this, Target, Args);
	else
	TC =
	std::make_unique<toolchains::MSVCToolChain>(*this, Target, Args);
	break;
	}
	break;
	case llvm::Triple::PS4:
	TC = std::make_unique<toolchains::PS4CPU>(*this, Target, Args);
	break;
	case llvm::Triple::Contiki:
	TC = std::make_unique<toolchains::Contiki>(*this, Target, Args);
	break;
	case llvm::Triple::Hurd:
	TC = std::make_unique<toolchains::Hurd>(*this, Target, Args);
	break;
	case llvm::Triple::ZOS:
	TC = std::make_unique<toolchains::ZOS>(*this, Target, Args);
	break;
	default:
	// Of these targets, Hexagon is the only one that might have
	// an OS of Linux, in which case it got handled above already.
	switch (Target.getArch()) {
	case llvm::Triple::tce:
	TC = std::make_unique<toolchains::TCEToolChain>(*this, Target, Args);
	break;
	case llvm::Triple::tcele:
	TC = std::make_unique<toolchains::TCELEToolChain>(*this, Target, Args);
	break;
	case llvm::Triple::hexagon:
	TC = std::make_unique<toolchains::HexagonToolChain>(*this, Target,
	Args);
	break;
	case llvm::Triple::lanai:
	TC = std::make_unique<toolchains::LanaiToolChain>(*this, Target, Args);
	break;
	case llvm::Triple::xcore:
	TC = std::make_unique<toolchains::XCoreToolChain>(*this, Target, Args);
	break;
	case llvm::Triple::wasm32:
	case llvm::Triple::wasm64:
	TC = std::make_unique<toolchains::WebAssembly>(*this, Target, Args);
	break;
	case llvm::Triple::avr:
	TC = std::make_unique<toolchains::AVRToolChain>(*this, Target, Args);
	break;
	case llvm::Triple::msp430:
	TC =
	std::make_unique<toolchains::MSP430ToolChain>(*this, Target, Args);
	break;
	case llvm::Triple::riscv32:
	case llvm::Triple::riscv64:
	if (toolchains::RISCVToolChain::hasGCCToolchain(*this, Args))
	TC =
	std::make_unique<toolchains::RISCVToolChain>(*this, Target, Args);
	else
	TC = std::make_unique<toolchains::BareMetal>(*this, Target, Args);
	break;
	case llvm::Triple::ve:
	TC = std::make_unique<toolchains::VEToolChain>(*this, Target, Args);
	break;
	default:
	if (Target.getVendor() == llvm::Triple::Myriad)
	TC = std::make_unique<toolchains::MyriadToolChain>(*this, Target,
	Args);
	else if (toolchains::BareMetal::handlesTarget(Target))
	TC = std::make_unique<toolchains::BareMetal>(*this, Target, Args);
	else if (Target.isOSBinFormatELF())
	TC = std::make_unique<toolchains::Generic_ELF>(*this, Target, Args);
	else if (Target.isOSBinFormatMachO())
	TC = std::make_unique<toolchains::MachO>(*this, Target, Args);
	else
	TC = std::make_unique<toolchains::Generic_GCC>(*this, Target, Args);
	}
	}
	}

	// Intentionally omitted from the switch above: llvm::Triple::CUDA. CUDA
	// compiles always need two toolchains, the CUDA toolchain and the host
	// toolchain. So the only valid way to create a CUDA toolchain is via
	// CreateOffloadingDeviceToolChains.

	return *TC;
	}

	bool Driver::ShouldUseClangCompiler(const JobAction &JA) const {
	// Say "no" if there is not exactly one input of a type clang understands.
	if (JA.size() != 1 \|\|
	!types::isAcceptedByClang((*JA.input_begin())->getType()))
	return false;

	// And say "no" if this is not a kind of action clang understands.
	if (!isa<PreprocessJobAction>(JA) && !isa<PrecompileJobAction>(JA) &&
	!isa<CompileJobAction>(JA) && !isa<BackendJobAction>(JA))
	return false;

	return true;
	}

	bool Driver::ShouldUseFlangCompiler(const JobAction &JA) const {
	// Say "no" if there is not exactly one input of a type flang understands.
	if (JA.size() != 1 \|\|
	!types::isFortran((*JA.input_begin())->getType()))
	return false;

	// And say "no" if this is not a kind of action flang understands.
	if (!isa<PreprocessJobAction>(JA) && !isa<CompileJobAction>(JA) && !isa<BackendJobAction>(JA))
	return false;

	return true;
	}

	bool Driver::ShouldEmitStaticLibrary(const ArgList &Args) const {
	// Only emit static library if the flag is set explicitly.
	if (Args.hasArg(options::OPT_emit_static_lib))
	return true;
	return false;
	}

	/// GetReleaseVersion - Parse (([0-9]+)(.([0-9]+)(.([0-9]+)?))?)? and return the
	/// grouped values as integers. Numbers which are not provided are set to 0.
	///
	/// \return True if the entire string was parsed (9.2), or all groups were
	/// parsed (10.3.5extrastuff).
	bool Driver::GetReleaseVersion(StringRef Str, unsigned &Major, unsigned &Minor,
	unsigned &Micro, bool &HadExtra) {
	HadExtra = false;

	Major = Minor = Micro = 0;
	if (Str.empty())
	return false;

	if (Str.consumeInteger(10, Major))
	return false;
	if (Str.empty())
	return true;
	if (Str[0] != '.')
	return false;

	Str = Str.drop_front(1);

	if (Str.consumeInteger(10, Minor))
	return false;
	if (Str.empty())
	return true;
	if (Str[0] != '.')
	return false;
	Str = Str.drop_front(1);

	if (Str.consumeInteger(10, Micro))
	return false;
	if (!Str.empty())
	HadExtra = true;
	return true;
	}

	/// Parse digits from a string \p Str and fulfill \p Digits with
	/// the parsed numbers. This method assumes that the max number of
	/// digits to look for is equal to Digits.size().
	///
	/// \return True if the entire string was parsed and there are
	/// no extra characters remaining at the end.
	bool Driver::GetReleaseVersion(StringRef Str,
	MutableArrayRef<unsigned> Digits) {
	if (Str.empty())
	return false;

	unsigned CurDigit = 0;
	while (CurDigit < Digits.size()) {
	unsigned Digit;
	if (Str.consumeInteger(10, Digit))
	return false;
	Digits[CurDigit] = Digit;
	if (Str.empty())
	return true;
	if (Str[0] != '.')
	return false;
	Str = Str.drop_front(1);
	CurDigit++;
	}

	// More digits than requested, bail out...
	return false;
	}

	std::pair<unsigned, unsigned>
	Driver::getIncludeExcludeOptionFlagMasks(bool IsClCompatMode) const {
	unsigned IncludedFlagsBitmask = 0;
	unsigned ExcludedFlagsBitmask = options::NoDriverOption;

	if (IsClCompatMode) {
	// Include CL and Core options.
	IncludedFlagsBitmask \|= options::CLOption;
	IncludedFlagsBitmask \|= options::CoreOption;
	} else {
	ExcludedFlagsBitmask \|= options::CLOption;
	}

	return std::make_pair(IncludedFlagsBitmask, ExcludedFlagsBitmask);
	}

	bool clang::driver::isOptimizationLevelFast(const ArgList &Args) {
	return Args.hasFlag(options::OPT_Ofast, options::OPT_O_Group, false);
	}

	bool clang::driver::willEmitRemarks(const ArgList &Args) {
	// -fsave-optimization-record enables it.
	if (Args.hasFlag(options::OPT_fsave_optimization_record,
	options::OPT_fno_save_optimization_record, false))
	return true;

	// -fsave-optimization-record=<format> enables it as well.
	if (Args.hasFlag(options::OPT_fsave_optimization_record_EQ,
	options::OPT_fno_save_optimization_record, false))
	return true;

	// -foptimization-record-file alone enables it too.
	if (Args.hasFlag(options::OPT_foptimization_record_file_EQ,
	options::OPT_fno_save_optimization_record, false))
	return true;

	// -foptimization-record-passes alone enables it too.
	if (Args.hasFlag(options::OPT_foptimization_record_passes_EQ,
	options::OPT_fno_save_optimization_record, false))
	return true;
	return false;
	}

	llvm::StringRef clang::driver::getDriverMode(StringRef ProgName,
	ArrayRef<const char *> Args) {
	static const std::string OptName =
	getDriverOptTable().getOption(options::OPT_driver_mode).getPrefixedName();
	llvm::StringRef Opt;
	for (StringRef Arg : Args) {
	if (!Arg.startswith(OptName))
	continue;
	Opt = Arg;
	- break;
	}
	if (Opt.empty())
	Opt = ToolChain::getTargetAndModeFromProgramName(ProgName).DriverMode;
	return Opt.consume_front(OptName) ? Opt : "";
	}

	bool driver::IsClangCL(StringRef DriverMode) { return DriverMode.equals("cl"); }
	diff --git a/contrib/llvm-project/clang/lib/Driver/ToolChains/AMDGPU.cpp b/contrib/llvm-project/clang/lib/Driver/ToolChains/AMDGPU.cpp
	index d63c5e12c4af..4a7413112b55 100644
	--- a/contrib/llvm-project/clang/lib/Driver/ToolChains/AMDGPU.cpp
	+++ b/contrib/llvm-project/clang/lib/Driver/ToolChains/AMDGPU.cpp
	@@ -1,895 +1,930 @@
	//===--- AMDGPU.cpp - AMDGPU ToolChain Implementations ----------- C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#include "AMDGPU.h"
	#include "CommonArgs.h"
	#include "clang/Basic/TargetID.h"
	#include "clang/Driver/Compilation.h"
	#include "clang/Driver/DriverDiagnostic.h"
	#include "clang/Driver/InputInfo.h"
	#include "clang/Driver/Options.h"
	#include "llvm/Option/ArgList.h"
	#include "llvm/Support/Error.h"
	#include "llvm/Support/FileUtilities.h"
	#include "llvm/Support/LineIterator.h"
	#include "llvm/Support/Path.h"
	#include "llvm/Support/VirtualFileSystem.h"
	#include <system_error>

	#define AMDGPU_ARCH_PROGRAM_NAME "amdgpu-arch"

	using namespace clang::driver;
	using namespace clang::driver::tools;
	using namespace clang::driver::toolchains;
	using namespace clang;
	using namespace llvm::opt;

	// Look for sub-directory starts with PackageName under ROCm candidate path.
	// If there is one and only one matching sub-directory found, append the
	// sub-directory to Path. If there is no matching sub-directory or there are
	// more than one matching sub-directories, diagnose them. Returns the full
	// path of the package if there is only one matching sub-directory, otherwise
	// returns an empty string.
	llvm::SmallString<0>
	RocmInstallationDetector::findSPACKPackage(const Candidate &Cand,
	StringRef PackageName) {
	if (!Cand.isSPACK())
	return {};
	std::error_code EC;
	std::string Prefix = Twine(PackageName + "-" + Cand.SPACKReleaseStr).str();
	llvm::SmallVector<llvm::SmallString<0>> SubDirs;
	for (llvm::vfs::directory_iterator File = D.getVFS().dir_begin(Cand.Path, EC),
	FileEnd;
	File != FileEnd && !EC; File.increment(EC)) {
	llvm::StringRef FileName = llvm::sys::path::filename(File->path());
	if (FileName.startswith(Prefix)) {
	SubDirs.push_back(FileName);
	if (SubDirs.size() > 1)
	break;
	}
	}
	if (SubDirs.size() == 1) {
	auto PackagePath = Cand.Path;
	llvm::sys::path::append(PackagePath, SubDirs[0]);
	return PackagePath;
	}
	if (SubDirs.size() == 0 && Verbose) {
	llvm::errs() << "SPACK package " << Prefix << " not found at " << Cand.Path
	<< '\n';
	return {};
	}

	if (SubDirs.size() > 1 && Verbose) {
	llvm::errs() << "Cannot use SPACK package " << Prefix << " at " << Cand.Path
	<< " due to multiple installations for the same version\n";
	}
	return {};
	}

	void RocmInstallationDetector::scanLibDevicePath(llvm::StringRef Path) {
	assert(!Path.empty());

	const StringRef Suffix(".bc");
	const StringRef Suffix2(".amdgcn.bc");

	std::error_code EC;
	for (llvm::vfs::directory_iterator LI = D.getVFS().dir_begin(Path, EC), LE;
	!EC && LI != LE; LI = LI.increment(EC)) {
	StringRef FilePath = LI->path();
	StringRef FileName = llvm::sys::path::filename(FilePath);
	if (!FileName.endswith(Suffix))
	continue;

	StringRef BaseName;
	if (FileName.endswith(Suffix2))
	BaseName = FileName.drop_back(Suffix2.size());
	else if (FileName.endswith(Suffix))
	BaseName = FileName.drop_back(Suffix.size());

	if (BaseName == "ocml") {
	OCML = FilePath;
	} else if (BaseName == "ockl") {
	OCKL = FilePath;
	} else if (BaseName == "opencl") {
	OpenCL = FilePath;
	} else if (BaseName == "hip") {
	HIP = FilePath;
	} else if (BaseName == "asanrtl") {
	AsanRTL = FilePath;
	} else if (BaseName == "oclc_finite_only_off") {
	FiniteOnly.Off = FilePath;
	} else if (BaseName == "oclc_finite_only_on") {
	FiniteOnly.On = FilePath;
	} else if (BaseName == "oclc_daz_opt_on") {
	DenormalsAreZero.On = FilePath;
	} else if (BaseName == "oclc_daz_opt_off") {
	DenormalsAreZero.Off = FilePath;
	} else if (BaseName == "oclc_correctly_rounded_sqrt_on") {
	CorrectlyRoundedSqrt.On = FilePath;
	} else if (BaseName == "oclc_correctly_rounded_sqrt_off") {
	CorrectlyRoundedSqrt.Off = FilePath;
	} else if (BaseName == "oclc_unsafe_math_on") {
	UnsafeMath.On = FilePath;
	} else if (BaseName == "oclc_unsafe_math_off") {
	UnsafeMath.Off = FilePath;
	} else if (BaseName == "oclc_wavefrontsize64_on") {
	WavefrontSize64.On = FilePath;
	} else if (BaseName == "oclc_wavefrontsize64_off") {
	WavefrontSize64.Off = FilePath;
	} else {
	// Process all bitcode filenames that look like
	// ocl_isa_version_XXX.amdgcn.bc
	const StringRef DeviceLibPrefix = "oclc_isa_version_";
	if (!BaseName.startswith(DeviceLibPrefix))
	continue;

	StringRef IsaVersionNumber =
	BaseName.drop_front(DeviceLibPrefix.size());

	llvm::Twine GfxName = Twine("gfx") + IsaVersionNumber;
	SmallString<8> Tmp;
	LibDeviceMap.insert(
	std::make_pair(GfxName.toStringRef(Tmp), FilePath.str()));
	}
	}
	}

	// Parse and extract version numbers from `.hipVersion`. Return `true` if
	// the parsing fails.
	bool RocmInstallationDetector::parseHIPVersionFile(llvm::StringRef V) {
	SmallVector<StringRef, 4> VersionParts;
	V.split(VersionParts, '\n');
	unsigned Major = ~0U;
	unsigned Minor = ~0U;
	for (auto Part : VersionParts) {
	auto Splits = Part.rtrim().split('=');
	if (Splits.first == "HIP_VERSION_MAJOR") {
	if (Splits.second.getAsInteger(0, Major))
	return true;
	} else if (Splits.first == "HIP_VERSION_MINOR") {
	if (Splits.second.getAsInteger(0, Minor))
	return true;
	} else if (Splits.first == "HIP_VERSION_PATCH")
	VersionPatch = Splits.second.str();
	}
	if (Major == ~0U \|\| Minor == ~0U)
	return true;
	VersionMajorMinor = llvm::VersionTuple(Major, Minor);
	DetectedVersion =
	(Twine(Major) + "." + Twine(Minor) + "." + VersionPatch).str();
	return false;
	}

	/// \returns a list of candidate directories for ROCm installation, which is
	/// cached and populated only once.
	const SmallVectorImpl<RocmInstallationDetector::Candidate> &
	RocmInstallationDetector::getInstallationPathCandidates() {

	// Return the cached candidate list if it has already been populated.
	if (!ROCmSearchDirs.empty())
	return ROCmSearchDirs;

	auto DoPrintROCmSearchDirs = [&]() {
	if (PrintROCmSearchDirs)
	for (auto Cand : ROCmSearchDirs) {
	llvm::errs() << "ROCm installation search path";
	if (Cand.isSPACK())
	llvm::errs() << " (Spack " << Cand.SPACKReleaseStr << ")";
	llvm::errs() << ": " << Cand.Path << '\n';
	}
	};

	// For candidate specified by --rocm-path we do not do strict check, i.e.,
	// checking existence of HIP version file and device library files.
	if (!RocmPathArg.empty()) {
	ROCmSearchDirs.emplace_back(RocmPathArg.str());
	DoPrintROCmSearchDirs();
	return ROCmSearchDirs;
	} else if (const char *RocmPathEnv = ::getenv("ROCM_PATH")) {
	if (!StringRef(RocmPathEnv).empty()) {
	ROCmSearchDirs.emplace_back(RocmPathEnv);
	DoPrintROCmSearchDirs();
	return ROCmSearchDirs;
	}
	}

	// Try to find relative to the compiler binary.
	const char *InstallDir = D.getInstalledDir();

	// Check both a normal Unix prefix position of the clang binary, as well as
	// the Windows-esque layout the ROCm packages use with the host architecture
	// subdirectory of bin.
	auto DeduceROCmPath = [](StringRef ClangPath) {
	// Strip off directory (usually bin)
	StringRef ParentDir = llvm::sys::path::parent_path(ClangPath);
	StringRef ParentName = llvm::sys::path::filename(ParentDir);

	// Some builds use bin/{host arch}, so go up again.
	if (ParentName == "bin") {
	ParentDir = llvm::sys::path::parent_path(ParentDir);
	ParentName = llvm::sys::path::filename(ParentDir);
	}

	// Detect ROCm packages built with SPACK.
	// clang is installed at
	// <rocm_root>/llvm-amdgpu-<rocm_release_string>-<hash>/bin directory.
	// We only consider the parent directory of llvm-amdgpu package as ROCm
	// installation candidate for SPACK.
	if (ParentName.startswith("llvm-amdgpu-")) {
	auto SPACKPostfix =
	ParentName.drop_front(strlen("llvm-amdgpu-")).split('-');
	auto SPACKReleaseStr = SPACKPostfix.first;
	if (!SPACKReleaseStr.empty()) {
	ParentDir = llvm::sys::path::parent_path(ParentDir);
	return Candidate(ParentDir.str(), /StrictChecking=/true,
	SPACKReleaseStr);
	}
	}

	// Some versions of the rocm llvm package install to /opt/rocm/llvm/bin
	// Some versions of the aomp package install to /opt/rocm/aomp/bin
	if (ParentName == "llvm" \|\| ParentName.startswith("aomp"))
	ParentDir = llvm::sys::path::parent_path(ParentDir);

	return Candidate(ParentDir.str(), /StrictChecking=/true);
	};

	// Deduce ROCm path by the path used to invoke clang. Do not resolve symbolic
	// link of clang itself.
	ROCmSearchDirs.emplace_back(DeduceROCmPath(InstallDir));

	// Deduce ROCm path by the real path of the invoked clang, resolving symbolic
	// link of clang itself.
	llvm::SmallString<256> RealClangPath;
	llvm::sys::fs::real_path(D.getClangProgramPath(), RealClangPath);
	auto ParentPath = llvm::sys::path::parent_path(RealClangPath);
	if (ParentPath != InstallDir)
	ROCmSearchDirs.emplace_back(DeduceROCmPath(ParentPath));

	// Device library may be installed in clang or resource directory.
	auto ClangRoot = llvm::sys::path::parent_path(InstallDir);
	auto RealClangRoot = llvm::sys::path::parent_path(ParentPath);
	ROCmSearchDirs.emplace_back(ClangRoot.str(), /StrictChecking=/true);
	if (RealClangRoot != ClangRoot)
	ROCmSearchDirs.emplace_back(RealClangRoot.str(), /StrictChecking=/true);
	ROCmSearchDirs.emplace_back(D.ResourceDir,
	/StrictChecking=/true);

	ROCmSearchDirs.emplace_back(D.SysRoot + "/opt/rocm",
	/StrictChecking=/true);

	// Find the latest /opt/rocm-{release} directory.
	std::error_code EC;
	std::string LatestROCm;
	llvm::VersionTuple LatestVer;
	// Get ROCm version from ROCm directory name.
	auto GetROCmVersion = [](StringRef DirName) {
	llvm::VersionTuple V;
	std::string VerStr = DirName.drop_front(strlen("rocm-")).str();
	// The ROCm directory name follows the format of
	// rocm-{major}.{minor}.{subMinor}[-{build}]
	std::replace(VerStr.begin(), VerStr.end(), '-', '.');
	V.tryParse(VerStr);
	return V;
	};
	for (llvm::vfs::directory_iterator
	File = D.getVFS().dir_begin(D.SysRoot + "/opt", EC),
	FileEnd;
	File != FileEnd && !EC; File.increment(EC)) {
	llvm::StringRef FileName = llvm::sys::path::filename(File->path());
	if (!FileName.startswith("rocm-"))
	continue;
	if (LatestROCm.empty()) {
	LatestROCm = FileName.str();
	LatestVer = GetROCmVersion(LatestROCm);
	continue;
	}
	auto Ver = GetROCmVersion(FileName);
	if (LatestVer < Ver) {
	LatestROCm = FileName.str();
	LatestVer = Ver;
	}
	}
	if (!LatestROCm.empty())
	ROCmSearchDirs.emplace_back(D.SysRoot + "/opt/" + LatestROCm,
	/StrictChecking=/true);

	DoPrintROCmSearchDirs();
	return ROCmSearchDirs;
	}

	RocmInstallationDetector::RocmInstallationDetector(
	const Driver &D, const llvm::Triple &HostTriple,
	const llvm::opt::ArgList &Args, bool DetectHIPRuntime, bool DetectDeviceLib)
	: D(D) {
	Verbose = Args.hasArg(options::OPT_v);
	RocmPathArg = Args.getLastArgValue(clang::driver::options::OPT_rocm_path_EQ);
	PrintROCmSearchDirs =
	Args.hasArg(clang::driver::options::OPT_print_rocm_search_dirs);
	RocmDeviceLibPathArg =
	Args.getAllArgValues(clang::driver::options::OPT_rocm_device_lib_path_EQ);
	HIPPathArg = Args.getLastArgValue(clang::driver::options::OPT_hip_path_EQ);
	if (auto *A = Args.getLastArg(clang::driver::options::OPT_hip_version_EQ)) {
	HIPVersionArg = A->getValue();
	unsigned Major = ~0U;
	unsigned Minor = ~0U;
	SmallVector<StringRef, 3> Parts;
	HIPVersionArg.split(Parts, '.');
	if (Parts.size())
	Parts[0].getAsInteger(0, Major);
	if (Parts.size() > 1)
	Parts[1].getAsInteger(0, Minor);
	if (Parts.size() > 2)
	VersionPatch = Parts[2].str();
	if (VersionPatch.empty())
	VersionPatch = "0";
	if (Major != ~0U && Minor == ~0U)
	Minor = 0;
	if (Major == ~0U \|\| Minor == ~0U)
	D.Diag(diag::err_drv_invalid_value)
	<< A->getAsString(Args) << HIPVersionArg;

	VersionMajorMinor = llvm::VersionTuple(Major, Minor);
	DetectedVersion =
	(Twine(Major) + "." + Twine(Minor) + "." + VersionPatch).str();
	} else {
	VersionPatch = DefaultVersionPatch;
	VersionMajorMinor =
	llvm::VersionTuple(DefaultVersionMajor, DefaultVersionMinor);
	DetectedVersion = (Twine(DefaultVersionMajor) + "." +
	Twine(DefaultVersionMinor) + "." + VersionPatch)
	.str();
	}

	if (DetectHIPRuntime)
	detectHIPRuntime();
	if (DetectDeviceLib)
	detectDeviceLibrary();
	}

	void RocmInstallationDetector::detectDeviceLibrary() {
	assert(LibDevicePath.empty());

	if (!RocmDeviceLibPathArg.empty())
	LibDevicePath = RocmDeviceLibPathArg[RocmDeviceLibPathArg.size() - 1];
	else if (const char *LibPathEnv = ::getenv("HIP_DEVICE_LIB_PATH"))
	LibDevicePath = LibPathEnv;

	auto &FS = D.getVFS();
	if (!LibDevicePath.empty()) {
	// Maintain compatability with HIP flag/envvar pointing directly at the
	// bitcode library directory. This points directly at the library path instead
	// of the rocm root installation.
	if (!FS.exists(LibDevicePath))
	return;

	scanLibDevicePath(LibDevicePath);
	HasDeviceLibrary = allGenericLibsValid() && !LibDeviceMap.empty();
	return;
	}

	// The install path situation in old versions of ROCm is a real mess, and
	// use a different install layout. Multiple copies of the device libraries
	// exist for each frontend project, and differ depending on which build
	// system produced the packages. Standalone OpenCL builds also have a
	// different directory structure from the ROCm OpenCL package.
	auto &ROCmDirs = getInstallationPathCandidates();
	for (const auto &Candidate : ROCmDirs) {
	auto CandidatePath = Candidate.Path;

	// Check device library exists at the given path.
	auto CheckDeviceLib = [&](StringRef Path) {
	bool CheckLibDevice = (!NoBuiltinLibs \|\| Candidate.StrictChecking);
	if (CheckLibDevice && !FS.exists(Path))
	return false;

	scanLibDevicePath(Path);

	if (!NoBuiltinLibs) {
	// Check that the required non-target libraries are all available.
	if (!allGenericLibsValid())
	return false;

	// Check that we have found at least one libdevice that we can link in
	// if -nobuiltinlib hasn't been specified.
	if (LibDeviceMap.empty())
	return false;
	}
	return true;
	};

	// The possible structures are:
	// - ${ROCM_ROOT}/amdgcn/bitcode/*
	// - ${ROCM_ROOT}/lib/*
	// - ${ROCM_ROOT}/lib/bitcode/*
	// so try to detect these layouts.
	static constexpr std::array<const char *, 2> SubDirsList[] = {
	{"amdgcn", "bitcode"},
	{"lib", ""},
	{"lib", "bitcode"},
	};

	// Make a path by appending sub-directories to InstallPath.
	auto MakePath = [&](const llvm::ArrayRef<const char *> &SubDirs) {
	auto Path = CandidatePath;
	for (auto SubDir : SubDirs)
	llvm::sys::path::append(Path, SubDir);
	return Path;
	};

	for (auto SubDirs : SubDirsList) {
	LibDevicePath = MakePath(SubDirs);
	HasDeviceLibrary = CheckDeviceLib(LibDevicePath);
	if (HasDeviceLibrary)
	return;
	}
	}
	}

	void RocmInstallationDetector::detectHIPRuntime() {
	SmallVector<Candidate, 4> HIPSearchDirs;
	if (!HIPPathArg.empty())
	HIPSearchDirs.emplace_back(HIPPathArg.str(), /StrictChecking=/true);
	else
	HIPSearchDirs.append(getInstallationPathCandidates());
	auto &FS = D.getVFS();

	for (const auto &Candidate : HIPSearchDirs) {
	InstallPath = Candidate.Path;
	if (InstallPath.empty() \|\| !FS.exists(InstallPath))
	continue;
	// HIP runtime built by SPACK is installed to
	// <rocm_root>/hip-<rocm_release_string>-<hash> directory.
	auto SPACKPath = findSPACKPackage(Candidate, "hip");
	InstallPath = SPACKPath.empty() ? InstallPath : SPACKPath;

	BinPath = InstallPath;
	llvm::sys::path::append(BinPath, "bin");
	IncludePath = InstallPath;
	llvm::sys::path::append(IncludePath, "include");
	LibPath = InstallPath;
	llvm::sys::path::append(LibPath, "lib");

	llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile =
	FS.getBufferForFile(BinPath + "/.hipVersion");
	if (!VersionFile && Candidate.StrictChecking)
	continue;

	if (HIPVersionArg.empty() && VersionFile)
	if (parseHIPVersionFile((*VersionFile)->getBuffer()))
	continue;

	HasHIPRuntime = true;
	return;
	}
	HasHIPRuntime = false;
	}

	void RocmInstallationDetector::print(raw_ostream &OS) const {
	if (hasHIPRuntime())
	OS << "Found HIP installation: " << InstallPath << ", version "
	<< DetectedVersion << '\n';
	}

	void RocmInstallationDetector::AddHIPIncludeArgs(const ArgList &DriverArgs,
	ArgStringList &CC1Args) const {
	bool UsesRuntimeWrapper = VersionMajorMinor > llvm::VersionTuple(3, 5);

	if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
	// HIP header includes standard library wrapper headers under clang
	// cuda_wrappers directory. Since these wrapper headers include_next
	// standard C++ headers, whereas libc++ headers include_next other clang
	// headers. The include paths have to follow this order:
	// - wrapper include path
	// - standard C++ include path
	// - other clang include path
	// Since standard C++ and other clang include paths are added in other
	// places after this function, here we only need to make sure wrapper
	// include path is added.
	//
	// ROCm 3.5 does not fully support the wrapper headers. Therefore it needs
	// a workaround.
	SmallString<128> P(D.ResourceDir);
	if (UsesRuntimeWrapper)
	llvm::sys::path::append(P, "include", "cuda_wrappers");
	CC1Args.push_back("-internal-isystem");
	CC1Args.push_back(DriverArgs.MakeArgString(P));
	}

	if (DriverArgs.hasArg(options::OPT_nogpuinc))
	return;

	if (!hasHIPRuntime()) {
	D.Diag(diag::err_drv_no_hip_runtime);
	return;
	}

	CC1Args.push_back("-internal-isystem");
	CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath()));
	if (UsesRuntimeWrapper)
	CC1Args.append({"-include", "__clang_hip_runtime_wrapper.h"});
	}

	void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA,
	const InputInfo &Output,
	const InputInfoList &Inputs,
	const ArgList &Args,
	const char *LinkingOutput) const {

	std::string Linker = getToolChain().GetProgramPath(getShortName());
	ArgStringList CmdArgs;
	addLinkerCompressDebugSectionsOption(getToolChain(), Args, CmdArgs);
	AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA);
	CmdArgs.push_back("-shared");
	CmdArgs.push_back("-o");
	CmdArgs.push_back(Output.getFilename());
	C.addCommand(std::make_unique<Command>(
	JA, *this, ResponseFileSupport::AtFileCurCP(), Args.MakeArgString(Linker),
	CmdArgs, Inputs, Output));
	}

	void amdgpu::getAMDGPUTargetFeatures(const Driver &D,
	const llvm::Triple &Triple,
	const llvm::opt::ArgList &Args,
	std::vector<StringRef> &Features) {
	// Add target ID features to -target-feature options. No diagnostics should
	// be emitted here since invalid target ID is diagnosed at other places.
	StringRef TargetID = Args.getLastArgValue(options::OPT_mcpu_EQ);
	if (!TargetID.empty()) {
	llvm::StringMap<bool> FeatureMap;
	auto OptionalGpuArch = parseTargetID(Triple, TargetID, &FeatureMap);
	if (OptionalGpuArch) {
	StringRef GpuArch = OptionalGpuArch.getValue();
	// Iterate through all possible target ID features for the given GPU.
	// If it is mapped to true, add +feature.
	// If it is mapped to false, add -feature.
	// If it is not in the map (default), do not add it
	for (auto &&Feature : getAllPossibleTargetIDFeatures(Triple, GpuArch)) {
	auto Pos = FeatureMap.find(Feature);
	if (Pos == FeatureMap.end())
	continue;
	Features.push_back(Args.MakeArgStringRef(
	(Twine(Pos->second ? "+" : "-") + Feature).str()));
	}
	}
	}

	if (Args.hasFlag(options::OPT_mwavefrontsize64,
	options::OPT_mno_wavefrontsize64, false))
	Features.push_back("+wavefrontsize64");

	handleTargetFeaturesGroup(
	Args, Features, options::OPT_m_amdgpu_Features_Group);
	}

	/// AMDGPU Toolchain
	AMDGPUToolChain::AMDGPUToolChain(const Driver &D, const llvm::Triple &Triple,
	const ArgList &Args)
	: Generic_ELF(D, Triple, Args),
	OptionsDefault(
	{{options::OPT_O, "3"}, {options::OPT_cl_std_EQ, "CL1.2"}}) {
	// Check code object version options. Emit warnings for legacy options
	// and errors for the last invalid code object version options.
	// It is done here to avoid repeated warning or error messages for
	// each tool invocation.
	checkAMDGPUCodeObjectVersion(D, Args);
	}

	Tool *AMDGPUToolChain::buildLinker() const {
	return new tools::amdgpu::Linker(*this);
	}

	DerivedArgList *
	AMDGPUToolChain::TranslateArgs(const DerivedArgList &Args, StringRef BoundArch,
	Action::OffloadKind DeviceOffloadKind) const {

	DerivedArgList *DAL =
	Generic_ELF::TranslateArgs(Args, BoundArch, DeviceOffloadKind);

	const OptTable &Opts = getDriver().getOpts();

	if (!DAL)
	DAL = new DerivedArgList(Args.getBaseArgs());

	for (Arg *A : Args) {
	if (!shouldSkipArgument(A))
	DAL->append(A);
	}

	checkTargetID(*DAL);

	if (!Args.getLastArgValue(options::OPT_x).equals("cl"))
	return DAL;

	// Phase 1 (.cl -> .bc)
	if (Args.hasArg(options::OPT_c) && Args.hasArg(options::OPT_emit_llvm)) {
	DAL->AddFlagArg(nullptr, Opts.getOption(getTriple().isArch64Bit()
	? options::OPT_m64
	: options::OPT_m32));

	// Have to check OPT_O4, OPT_O0 & OPT_Ofast separately
	// as they defined that way in Options.td
	if (!Args.hasArg(options::OPT_O, options::OPT_O0, options::OPT_O4,
	options::OPT_Ofast))
	DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_O),
	getOptionDefault(options::OPT_O));
	}

	return DAL;
	}

	bool AMDGPUToolChain::getDefaultDenormsAreZeroForTarget(
	llvm::AMDGPU::GPUKind Kind) {

	// Assume nothing without a specific target.
	if (Kind == llvm::AMDGPU::GK_NONE)
	return false;

	const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind);

	// Default to enabling f32 denormals by default on subtargets where fma is
	// fast with denormals
	const bool BothDenormAndFMAFast =
	(ArchAttr & llvm::AMDGPU::FEATURE_FAST_FMA_F32) &&
	(ArchAttr & llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32);
	return !BothDenormAndFMAFast;
	}

	llvm::DenormalMode AMDGPUToolChain::getDefaultDenormalModeForType(
	const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
	const llvm::fltSemantics *FPType) const {
	// Denormals should always be enabled for f16 and f64.
	if (!FPType \|\| FPType != &llvm::APFloat::IEEEsingle())
	return llvm::DenormalMode::getIEEE();

	if (JA.getOffloadingDeviceKind() == Action::OFK_HIP \|\|
	JA.getOffloadingDeviceKind() == Action::OFK_Cuda) {
	auto Arch = getProcessorFromTargetID(getTriple(), JA.getOffloadingArch());
	auto Kind = llvm::AMDGPU::parseArchAMDGCN(Arch);
	if (FPType && FPType == &llvm::APFloat::IEEEsingle() &&
	DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero,
	options::OPT_fno_gpu_flush_denormals_to_zero,
	getDefaultDenormsAreZeroForTarget(Kind)))
	return llvm::DenormalMode::getPreserveSign();

	return llvm::DenormalMode::getIEEE();
	}

	const StringRef GpuArch = getGPUArch(DriverArgs);
	auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);

	// TODO: There are way too many flags that change this. Do we need to check
	// them all?
	bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) \|\|
	getDefaultDenormsAreZeroForTarget(Kind);

	// Outputs are flushed to zero (FTZ), preserving sign. Denormal inputs are
	// also implicit treated as zero (DAZ).
	return DAZ ? llvm::DenormalMode::getPreserveSign() :
	llvm::DenormalMode::getIEEE();
	}

	bool AMDGPUToolChain::isWave64(const llvm::opt::ArgList &DriverArgs,
	llvm::AMDGPU::GPUKind Kind) {
	const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind);
	bool HasWave32 = (ArchAttr & llvm::AMDGPU::FEATURE_WAVE32);

	return !HasWave32 \|\| DriverArgs.hasFlag(
	options::OPT_mwavefrontsize64, options::OPT_mno_wavefrontsize64, false);
	}


	/// ROCM Toolchain
	ROCMToolChain::ROCMToolChain(const Driver &D, const llvm::Triple &Triple,
	const ArgList &Args)
	: AMDGPUToolChain(D, Triple, Args) {
	RocmInstallation.detectDeviceLibrary();
	}

	void AMDGPUToolChain::addClangTargetOptions(
	const llvm::opt::ArgList &DriverArgs,
	llvm::opt::ArgStringList &CC1Args,
	Action::OffloadKind DeviceOffloadingKind) const {
	// Default to "hidden" visibility, as object level linking will not be
	// supported for the foreseeable future.
	if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ,
	options::OPT_fvisibility_ms_compat)) {
	CC1Args.push_back("-fvisibility");
	CC1Args.push_back("hidden");
	CC1Args.push_back("-fapply-global-visibility-to-externs");
	}
	}

	StringRef
	AMDGPUToolChain::getGPUArch(const llvm::opt::ArgList &DriverArgs) const {
	return getProcessorFromTargetID(
	getTriple(), DriverArgs.getLastArgValue(options::OPT_mcpu_EQ));
	}

	AMDGPUToolChain::ParsedTargetIDType
	AMDGPUToolChain::getParsedTargetID(const llvm::opt::ArgList &DriverArgs) const {
	StringRef TargetID = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ);
	if (TargetID.empty())
	return {None, None, None};

	llvm::StringMap<bool> FeatureMap;
	auto OptionalGpuArch = parseTargetID(getTriple(), TargetID, &FeatureMap);
	if (!OptionalGpuArch)
	return {TargetID.str(), None, None};

	return {TargetID.str(), OptionalGpuArch.getValue().str(), FeatureMap};
	}

	void AMDGPUToolChain::checkTargetID(
	const llvm::opt::ArgList &DriverArgs) const {
	auto PTID = getParsedTargetID(DriverArgs);
	if (PTID.OptionalTargetID && !PTID.OptionalGPUArch) {
	getDriver().Diag(clang::diag::err_drv_bad_target_id)
	<< PTID.OptionalTargetID.getValue();
	}
	}

	llvm::Error
	AMDGPUToolChain::detectSystemGPUs(const ArgList &Args,
	SmallVector<std::string, 1> &GPUArchs) const {
	std::string Program;
	if (Arg *A = Args.getLastArg(options::OPT_amdgpu_arch_tool_EQ))
	Program = A->getValue();
	else
	Program = GetProgramPath(AMDGPU_ARCH_PROGRAM_NAME);
	llvm::SmallString<64> OutputFile;
	llvm::sys::fs::createTemporaryFile("print-system-gpus", "" /* No Suffix */,
	OutputFile);
	llvm::FileRemover OutputRemover(OutputFile.c_str());
	llvm::Optional<llvm::StringRef> Redirects[] = {
	{""},
	OutputFile.str(),
	{""},
	};

	std::string ErrorMessage;
	if (int Result = llvm::sys::ExecuteAndWait(
	Program.c_str(), {}, {}, Redirects, /* SecondsToWait */ 0,
	/MemoryLimit/ 0, &ErrorMessage)) {
	if (Result > 0) {
	ErrorMessage = "Exited with error code " + std::to_string(Result);
	} else if (Result == -1) {
	ErrorMessage = "Execute failed: " + ErrorMessage;
	} else {
	ErrorMessage = "Crashed: " + ErrorMessage;
	}

	return llvm::createStringError(std::error_code(),
	Program + ": " + ErrorMessage);
	}

	llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> OutputBuf =
	llvm::MemoryBuffer::getFile(OutputFile.c_str());
	if (!OutputBuf) {
	return llvm::createStringError(OutputBuf.getError(),
	"Failed to read stdout of " + Program +
	": " + OutputBuf.getError().message());
	}

	for (llvm::line_iterator LineIt(**OutputBuf); !LineIt.is_at_end(); ++LineIt) {
	GPUArchs.push_back(LineIt->str());
	}
	return llvm::Error::success();
	}

	llvm::Error AMDGPUToolChain::getSystemGPUArch(const ArgList &Args,
	std::string &GPUArch) const {
	// detect the AMDGPU installed in system
	SmallVector<std::string, 1> GPUArchs;
	auto Err = detectSystemGPUs(Args, GPUArchs);
	if (Err) {
	return Err;
	}
	if (GPUArchs.empty()) {
	return llvm::createStringError(std::error_code(),
	"No AMD GPU detected in the system");
	}
	GPUArch = GPUArchs[0];
	if (GPUArchs.size() > 1) {
	bool AllSame = std::all_of(
	GPUArchs.begin(), GPUArchs.end(),
	[&](const StringRef &GPUArch) { return GPUArch == GPUArchs.front(); });
	if (!AllSame)
	return llvm::createStringError(
	std::error_code(), "Multiple AMD GPUs found with different archs");
	}
	return llvm::Error::success();
	}

	void ROCMToolChain::addClangTargetOptions(
	const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
	Action::OffloadKind DeviceOffloadingKind) const {
	AMDGPUToolChain::addClangTargetOptions(DriverArgs, CC1Args,
	DeviceOffloadingKind);

	// For the OpenCL case where there is no offload target, accept -nostdlib to
	// disable bitcode linking.
	if (DeviceOffloadingKind == Action::OFK_None &&
	DriverArgs.hasArg(options::OPT_nostdlib))
	return;

	if (DriverArgs.hasArg(options::OPT_nogpulib))
	return;

	if (!RocmInstallation.hasDeviceLibrary()) {
	getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 0;
	return;
	}

	// Get the device name and canonicalize it
	const StringRef GpuArch = getGPUArch(DriverArgs);
	auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
	const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind);
	std::string LibDeviceFile = RocmInstallation.getLibDeviceFile(CanonArch);
	if (LibDeviceFile.empty()) {
	getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 1 << GpuArch;
	return;
	}

	bool Wave64 = isWave64(DriverArgs, Kind);

	// TODO: There are way too many flags that change this. Do we need to check
	// them all?
	bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) \|\|
	getDefaultDenormsAreZeroForTarget(Kind);
	bool FiniteOnly = DriverArgs.hasArg(options::OPT_cl_finite_math_only);

	bool UnsafeMathOpt =
	DriverArgs.hasArg(options::OPT_cl_unsafe_math_optimizations);
	bool FastRelaxedMath = DriverArgs.hasArg(options::OPT_cl_fast_relaxed_math);
	bool CorrectSqrt =
	DriverArgs.hasArg(options::OPT_cl_fp32_correctly_rounded_divide_sqrt);

	// Add the OpenCL specific bitcode library.
	llvm::SmallVector<std::string, 12> BCLibs;
	BCLibs.push_back(RocmInstallation.getOpenCLPath().str());

	// Add the generic set of libraries.
	BCLibs.append(RocmInstallation.getCommonBitcodeLibs(
	DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt,
	FastRelaxedMath, CorrectSqrt));

	llvm::for_each(BCLibs, [&](StringRef BCFile) {
	CC1Args.push_back("-mlink-builtin-bitcode");
	CC1Args.push_back(DriverArgs.MakeArgString(BCFile));
	});
	}

	llvm::SmallVector<std::string, 12>
	RocmInstallationDetector::getCommonBitcodeLibs(
	const llvm::opt::ArgList &DriverArgs, StringRef LibDeviceFile, bool Wave64,
	bool DAZ, bool FiniteOnly, bool UnsafeMathOpt, bool FastRelaxedMath,
	bool CorrectSqrt) const {

	llvm::SmallVector<std::string, 12> BCLibs;

	auto AddBCLib = [&](StringRef BCFile) { BCLibs.push_back(BCFile.str()); };

	AddBCLib(getOCMLPath());
	AddBCLib(getOCKLPath());
	AddBCLib(getDenormalsAreZeroPath(DAZ));
	AddBCLib(getUnsafeMathPath(UnsafeMathOpt \|\| FastRelaxedMath));
	AddBCLib(getFiniteOnlyPath(FiniteOnly \|\| FastRelaxedMath));
	AddBCLib(getCorrectlyRoundedSqrtPath(CorrectSqrt));
	AddBCLib(getWavefrontSize64Path(Wave64));
	AddBCLib(LibDeviceFile);

	return BCLibs;
	}

	bool AMDGPUToolChain::shouldSkipArgument(const llvm::opt::Arg *A) const {
	Option O = A->getOption();
	if (O.matches(options::OPT_fPIE) \|\| O.matches(options::OPT_fpie))
	return true;
	return false;
	}
	+
	+llvm::SmallVector<std::string, 12>
	+ROCMToolChain::getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs,
	+ const std::string &GPUArch) const {
	+ auto Kind = llvm::AMDGPU::parseArchAMDGCN(GPUArch);
	+ const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind);
	+
	+ std::string LibDeviceFile = RocmInstallation.getLibDeviceFile(CanonArch);
	+ if (LibDeviceFile.empty()) {
	+ getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 1 << GPUArch;
	+ return {};
	+ }
	+
	+ // If --hip-device-lib is not set, add the default bitcode libraries.
	+ // TODO: There are way too many flags that change this. Do we need to check
	+ // them all?
	+ bool DAZ = DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero,
	+ options::OPT_fno_gpu_flush_denormals_to_zero,
	+ getDefaultDenormsAreZeroForTarget(Kind));
	+ bool FiniteOnly = DriverArgs.hasFlag(
	+ options::OPT_ffinite_math_only, options::OPT_fno_finite_math_only, false);
	+ bool UnsafeMathOpt =
	+ DriverArgs.hasFlag(options::OPT_funsafe_math_optimizations,
	+ options::OPT_fno_unsafe_math_optimizations, false);
	+ bool FastRelaxedMath = DriverArgs.hasFlag(options::OPT_ffast_math,
	+ options::OPT_fno_fast_math, false);
	+ bool CorrectSqrt = DriverArgs.hasFlag(
	+ options::OPT_fhip_fp32_correctly_rounded_divide_sqrt,
	+ options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt);
	+ bool Wave64 = isWave64(DriverArgs, Kind);
	+
	+ return RocmInstallation.getCommonBitcodeLibs(
	+ DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt,
	+ FastRelaxedMath, CorrectSqrt);
	+}
	\ No newline at end of file
	diff --git a/contrib/llvm-project/clang/lib/Driver/ToolChains/AMDGPU.h b/contrib/llvm-project/clang/lib/Driver/ToolChains/AMDGPU.h
	index 50ed3b3ded9a..a4bcf315ca76 100644
	--- a/contrib/llvm-project/clang/lib/Driver/ToolChains/AMDGPU.h
	+++ b/contrib/llvm-project/clang/lib/Driver/ToolChains/AMDGPU.h
	@@ -1,145 +1,150 @@
	//===--- AMDGPU.h - AMDGPU ToolChain Implementations ----------- C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_AMDGPU_H
	#define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_AMDGPU_H

	#include "Gnu.h"
	#include "ROCm.h"
	#include "clang/Basic/TargetID.h"
	#include "clang/Driver/Options.h"
	#include "clang/Driver/Tool.h"
	#include "clang/Driver/ToolChain.h"
	#include "llvm/ADT/SmallString.h"
	#include "llvm/Support/TargetParser.h"

	#include <map>

	namespace clang {
	namespace driver {

	namespace tools {
	namespace amdgpu {

	class LLVM_LIBRARY_VISIBILITY Linker : public Tool {
	public:
	Linker(const ToolChain &TC) : Tool("amdgpu::Linker", "ld.lld", TC) {}
	bool isLinkJob() const override { return true; }
	bool hasIntegratedCPP() const override { return false; }
	void ConstructJob(Compilation &C, const JobAction &JA,
	const InputInfo &Output, const InputInfoList &Inputs,
	const llvm::opt::ArgList &TCArgs,
	const char *LinkingOutput) const override;
	};

	void getAMDGPUTargetFeatures(const Driver &D, const llvm::Triple &Triple,
	const llvm::opt::ArgList &Args,
	std::vector<StringRef> &Features);

	} // end namespace amdgpu
	} // end namespace tools

	namespace toolchains {

	class LLVM_LIBRARY_VISIBILITY AMDGPUToolChain : public Generic_ELF {
	protected:
	const std::map<options::ID, const StringRef> OptionsDefault;

	Tool *buildLinker() const override;
	const StringRef getOptionDefault(options::ID OptID) const {
	auto opt = OptionsDefault.find(OptID);
	assert(opt != OptionsDefault.end() && "No Default for Option");
	return opt->second;
	}

	public:
	AMDGPUToolChain(const Driver &D, const llvm::Triple &Triple,
	const llvm::opt::ArgList &Args);
	unsigned GetDefaultDwarfVersion() const override { return 4; }
	bool IsIntegratedAssemblerDefault() const override { return true; }
	bool IsMathErrnoDefault() const override { return false; }

	bool useIntegratedAs() const override { return true; }
	bool isCrossCompiling() const override { return true; }
	bool isPICDefault() const override { return false; }
	bool isPIEDefault() const override { return false; }
	bool isPICDefaultForced() const override { return false; }
	bool SupportsProfiling() const override { return false; }

	llvm::opt::DerivedArgList *
	TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch,
	Action::OffloadKind DeviceOffloadKind) const override;

	void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
	llvm::opt::ArgStringList &CC1Args,
	Action::OffloadKind DeviceOffloadKind) const override;

	/// Return whether denormals should be flushed, and treated as 0 by default
	/// for the subtarget.
	static bool getDefaultDenormsAreZeroForTarget(llvm::AMDGPU::GPUKind GPUKind);

	llvm::DenormalMode getDefaultDenormalModeForType(
	const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
	const llvm::fltSemantics *FPType = nullptr) const override;

	static bool isWave64(const llvm::opt::ArgList &DriverArgs,
	llvm::AMDGPU::GPUKind Kind);
	/// Needed for using lto.
	bool HasNativeLLVMSupport() const override {
	return true;
	}

	/// Needed for translating LTO options.
	const char *getDefaultLinker() const override { return "ld.lld"; }

	/// Should skip argument.
	bool shouldSkipArgument(const llvm::opt::Arg *Arg) const;

	/// Uses amdgpu_arch tool to get arch of the system GPU. Will return error
	/// if unable to find one.
	llvm::Error getSystemGPUArch(const llvm::opt::ArgList &Args,
	std::string &GPUArch) const;

	protected:
	/// Check and diagnose invalid target ID specified by -mcpu.
	virtual void checkTargetID(const llvm::opt::ArgList &DriverArgs) const;

	/// The struct type returned by getParsedTargetID.
	struct ParsedTargetIDType {
	Optional<std::string> OptionalTargetID;
	Optional<std::string> OptionalGPUArch;
	Optional<llvm::StringMap<bool>> OptionalFeatures;
	};

	/// Get target ID, GPU arch, and target ID features if the target ID is
	/// specified and valid.
	ParsedTargetIDType
	getParsedTargetID(const llvm::opt::ArgList &DriverArgs) const;

	/// Get GPU arch from -mcpu without checking.
	StringRef getGPUArch(const llvm::opt::ArgList &DriverArgs) const;

	llvm::Error detectSystemGPUs(const llvm::opt::ArgList &Args,
	SmallVector<std::string, 1> &GPUArchs) const;
	};

	class LLVM_LIBRARY_VISIBILITY ROCMToolChain : public AMDGPUToolChain {
	public:
	ROCMToolChain(const Driver &D, const llvm::Triple &Triple,
	const llvm::opt::ArgList &Args);
	void
	addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
	llvm::opt::ArgStringList &CC1Args,
	Action::OffloadKind DeviceOffloadKind) const override;
	+
	+ // Returns a list of device library names shared by different languages
	+ llvm::SmallVector<std::string, 12>
	+ getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs,
	+ const std::string &GPUArch) const;
	};

	} // end namespace toolchains
	} // end namespace driver
	} // end namespace clang

	#endif // LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_AMDGPU_H
	diff --git a/contrib/llvm-project/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp b/contrib/llvm-project/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
	index fe1d19c2dd67..135e3694434d 100644
	--- a/contrib/llvm-project/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
	+++ b/contrib/llvm-project/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
	@@ -1,304 +1,326 @@
	//===- AMDGPUOpenMP.cpp - AMDGPUOpenMP ToolChain Implementation -- C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#include "AMDGPUOpenMP.h"
	#include "AMDGPU.h"
	#include "CommonArgs.h"
	+#include "ToolChains/ROCm.h"
	#include "clang/Basic/DiagnosticDriver.h"
	#include "clang/Driver/Compilation.h"
	#include "clang/Driver/Driver.h"
	#include "clang/Driver/DriverDiagnostic.h"
	#include "clang/Driver/InputInfo.h"
	#include "clang/Driver/Options.h"
	+#include "llvm/ADT/STLExtras.h"
	#include "llvm/Support/FileSystem.h"
	#include "llvm/Support/FormatAdapters.h"
	#include "llvm/Support/FormatVariadic.h"
	#include "llvm/Support/Path.h"

	using namespace clang::driver;
	using namespace clang::driver::toolchains;
	using namespace clang::driver::tools;
	using namespace clang;
	using namespace llvm::opt;

	namespace {

	static const char *getOutputFileName(Compilation &C, StringRef Base,
	const char *Postfix,
	const char *Extension) {
	const char *OutputFileName;
	if (C.getDriver().isSaveTempsEnabled()) {
	OutputFileName =
	C.getArgs().MakeArgString(Base.str() + Postfix + "." + Extension);
	} else {
	std::string TmpName =
	C.getDriver().GetTemporaryPath(Base.str() + Postfix, Extension);
	OutputFileName = C.addTempFile(C.getArgs().MakeArgString(TmpName));
	}
	return OutputFileName;
	}

	static void addLLCOptArg(const llvm::opt::ArgList &Args,
	llvm::opt::ArgStringList &CmdArgs) {
	if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
	StringRef OOpt = "0";
	if (A->getOption().matches(options::OPT_O4) \|\|
	A->getOption().matches(options::OPT_Ofast))
	OOpt = "3";
	else if (A->getOption().matches(options::OPT_O0))
	OOpt = "0";
	else if (A->getOption().matches(options::OPT_O)) {
	// Clang and opt support -Os/-Oz; llc only supports -O0, -O1, -O2 and -O3
	// so we map -Os/-Oz to -O2.
	// Only clang supports -Og, and maps it to -O1.
	// We map anything else to -O2.
	OOpt = llvm::StringSwitch<const char *>(A->getValue())
	.Case("1", "1")
	.Case("2", "2")
	.Case("3", "3")
	.Case("s", "2")
	.Case("z", "2")
	.Case("g", "1")
	.Default("0");
	}
	CmdArgs.push_back(Args.MakeArgString("-O" + OOpt));
	}
	}

	static bool checkSystemForAMDGPU(const ArgList &Args, const AMDGPUToolChain &TC,
	std::string &GPUArch) {
	if (auto Err = TC.getSystemGPUArch(Args, GPUArch)) {
	std::string ErrMsg =
	llvm::formatv("{0}", llvm::fmt_consume(std::move(Err)));
	TC.getDriver().Diag(diag::err_drv_undetermined_amdgpu_arch) << ErrMsg;
	return false;
	}

	return true;
	}
	} // namespace

	const char *AMDGCN::OpenMPLinker::constructLLVMLinkCommand(
	- Compilation &C, const JobAction &JA, const InputInfoList &Inputs,
	- const ArgList &Args, StringRef SubArchName,
	- StringRef OutputFilePrefix) const {
	+ const toolchains::AMDGPUOpenMPToolChain &AMDGPUOpenMPTC, Compilation &C,
	+ const JobAction &JA, const InputInfoList &Inputs, const ArgList &Args,
	+ StringRef SubArchName, StringRef OutputFilePrefix) const {
	ArgStringList CmdArgs;

	for (const auto &II : Inputs)
	if (II.isFilename())
	CmdArgs.push_back(II.getFilename());
	+
	+ if (Args.hasArg(options::OPT_l)) {
	+ auto Lm = Args.getAllArgValues(options::OPT_l);
	+ bool HasLibm = false;
	+ for (auto &Lib : Lm) {
	+ if (Lib == "m") {
	+ HasLibm = true;
	+ break;
	+ }
	+ }
	+
	+ if (HasLibm) {
	+ SmallVector<std::string, 12> BCLibs =
	+ AMDGPUOpenMPTC.getCommonDeviceLibNames(Args, SubArchName.str());
	+ llvm::for_each(BCLibs, [&](StringRef BCFile) {
	+ CmdArgs.push_back(Args.MakeArgString(BCFile));
	+ });
	+ }
	+ }
	+
	// Add an intermediate output file.
	CmdArgs.push_back("-o");
	const char *OutputFileName =
	getOutputFileName(C, OutputFilePrefix, "-linked", "bc");
	CmdArgs.push_back(OutputFileName);
	const char *Exec =
	Args.MakeArgString(getToolChain().GetProgramPath("llvm-link"));
	C.addCommand(std::make_unique<Command>(
	JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs,
	InputInfo(&JA, Args.MakeArgString(OutputFileName))));
	return OutputFileName;
	}

	const char *AMDGCN::OpenMPLinker::constructLlcCommand(
	Compilation &C, const JobAction &JA, const InputInfoList &Inputs,
	const llvm::opt::ArgList &Args, llvm::StringRef SubArchName,
	llvm::StringRef OutputFilePrefix, const char *InputFileName,
	bool OutputIsAsm) const {
	// Construct llc command.
	ArgStringList LlcArgs;
	// The input to llc is the output from opt.
	LlcArgs.push_back(InputFileName);
	// Pass optimization arg to llc.
	addLLCOptArg(Args, LlcArgs);
	LlcArgs.push_back("-mtriple=amdgcn-amd-amdhsa");
	LlcArgs.push_back(Args.MakeArgString("-mcpu=" + SubArchName));
	LlcArgs.push_back(
	Args.MakeArgString(Twine("-filetype=") + (OutputIsAsm ? "asm" : "obj")));

	for (const Arg *A : Args.filtered(options::OPT_mllvm)) {
	LlcArgs.push_back(A->getValue(0));
	}

	// Add output filename
	LlcArgs.push_back("-o");
	const char *LlcOutputFile =
	getOutputFileName(C, OutputFilePrefix, "", OutputIsAsm ? "s" : "o");
	LlcArgs.push_back(LlcOutputFile);
	const char *Llc = Args.MakeArgString(getToolChain().GetProgramPath("llc"));
	C.addCommand(std::make_unique<Command>(
	JA, *this, ResponseFileSupport::AtFileCurCP(), Llc, LlcArgs, Inputs,
	InputInfo(&JA, Args.MakeArgString(LlcOutputFile))));
	return LlcOutputFile;
	}

	void AMDGCN::OpenMPLinker::constructLldCommand(
	Compilation &C, const JobAction &JA, const InputInfoList &Inputs,
	const InputInfo &Output, const llvm::opt::ArgList &Args,
	const char *InputFileName) const {
	// Construct lld command.
	// The output from ld.lld is an HSA code object file.
	ArgStringList LldArgs{"-flavor", "gnu", "--no-undefined",
	"-shared", "-o", Output.getFilename(),
	InputFileName};

	const char *Lld = Args.MakeArgString(getToolChain().GetProgramPath("lld"));
	C.addCommand(std::make_unique<Command>(
	JA, *this, ResponseFileSupport::AtFileCurCP(), Lld, LldArgs, Inputs,
	InputInfo(&JA, Args.MakeArgString(Output.getFilename()))));
	}

	// For amdgcn the inputs of the linker job are device bitcode and output is
	// object file. It calls llvm-link, opt, llc, then lld steps.
	void AMDGCN::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA,
	const InputInfo &Output,
	const InputInfoList &Inputs,
	const ArgList &Args,
	const char *LinkingOutput) const {
	const ToolChain &TC = getToolChain();
	assert(getToolChain().getTriple().isAMDGCN() && "Unsupported target");

	const toolchains::AMDGPUOpenMPToolChain &AMDGPUOpenMPTC =
	static_cast<const toolchains::AMDGPUOpenMPToolChain &>(TC);

	std::string GPUArch = Args.getLastArgValue(options::OPT_march_EQ).str();
	if (GPUArch.empty()) {
	if (!checkSystemForAMDGPU(Args, AMDGPUOpenMPTC, GPUArch))
	return;
	}

	// Prefix for temporary file name.
	std::string Prefix;
	for (const auto &II : Inputs)
	if (II.isFilename())
	Prefix = llvm::sys::path::stem(II.getFilename()).str() + "-" + GPUArch;
	assert(Prefix.length() && "no linker inputs are files ");

	// Each command outputs different files.
	- const char *LLVMLinkCommand =
	- constructLLVMLinkCommand(C, JA, Inputs, Args, GPUArch, Prefix);
	+ const char *LLVMLinkCommand = constructLLVMLinkCommand(
	+ AMDGPUOpenMPTC, C, JA, Inputs, Args, GPUArch, Prefix);

	// Produce readable assembly if save-temps is enabled.
	if (C.getDriver().isSaveTempsEnabled())
	constructLlcCommand(C, JA, Inputs, Args, GPUArch, Prefix, LLVMLinkCommand,
	/OutputIsAsm=/true);
	const char *LlcCommand = constructLlcCommand(C, JA, Inputs, Args, GPUArch,
	Prefix, LLVMLinkCommand);
	constructLldCommand(C, JA, Inputs, Output, Args, LlcCommand);
	}

	AMDGPUOpenMPToolChain::AMDGPUOpenMPToolChain(const Driver &D,
	const llvm::Triple &Triple,
	const ToolChain &HostTC,
	const ArgList &Args)
	: ROCMToolChain(D, Triple, Args), HostTC(HostTC) {
	// Lookup binaries into the driver directory, this is used to
	// discover the clang-offload-bundler executable.
	getProgramPaths().push_back(getDriver().Dir);
	}

	void AMDGPUOpenMPToolChain::addClangTargetOptions(
	const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
	Action::OffloadKind DeviceOffloadingKind) const {
	HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);

	std::string GPUArch = DriverArgs.getLastArgValue(options::OPT_march_EQ).str();
	if (GPUArch.empty()) {
	if (!checkSystemForAMDGPU(DriverArgs, *this, GPUArch))
	return;
	}

	assert(DeviceOffloadingKind == Action::OFK_OpenMP &&
	"Only OpenMP offloading kinds are supported.");

	CC1Args.push_back("-target-cpu");
	CC1Args.push_back(DriverArgs.MakeArgStringRef(GPUArch));
	CC1Args.push_back("-fcuda-is-device");

	if (DriverArgs.hasArg(options::OPT_nogpulib))
	return;

	std::string BitcodeSuffix;
	if (DriverArgs.hasFlag(options::OPT_fopenmp_target_new_runtime,
	options::OPT_fno_openmp_target_new_runtime, false))
	BitcodeSuffix = "new-amdgcn-" + GPUArch;
	else
	BitcodeSuffix = "amdgcn-" + GPUArch;

	addOpenMPDeviceRTL(getDriver(), DriverArgs, CC1Args, BitcodeSuffix,
	getTriple());
	}

	llvm::opt::DerivedArgList *AMDGPUOpenMPToolChain::TranslateArgs(
	const llvm::opt::DerivedArgList &Args, StringRef BoundArch,
	Action::OffloadKind DeviceOffloadKind) const {
	DerivedArgList *DAL =
	HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
	if (!DAL)
	DAL = new DerivedArgList(Args.getBaseArgs());

	const OptTable &Opts = getDriver().getOpts();

	if (DeviceOffloadKind != Action::OFK_OpenMP) {
	for (Arg *A : Args) {
	DAL->append(A);
	}
	}

	if (!BoundArch.empty()) {
	DAL->eraseArg(options::OPT_march_EQ);
	DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
	BoundArch);
	}

	return DAL;
	}

	Tool *AMDGPUOpenMPToolChain::buildLinker() const {
	assert(getTriple().isAMDGCN());
	return new tools::AMDGCN::OpenMPLinker(*this);
	}

	void AMDGPUOpenMPToolChain::addClangWarningOptions(
	ArgStringList &CC1Args) const {
	HostTC.addClangWarningOptions(CC1Args);
	}

	ToolChain::CXXStdlibType
	AMDGPUOpenMPToolChain::GetCXXStdlibType(const ArgList &Args) const {
	return HostTC.GetCXXStdlibType(Args);
	}

	void AMDGPUOpenMPToolChain::AddClangSystemIncludeArgs(
	const ArgList &DriverArgs, ArgStringList &CC1Args) const {
	HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
	}

	void AMDGPUOpenMPToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
	ArgStringList &CC1Args) const {
	HostTC.AddIAMCUIncludeArgs(Args, CC1Args);
	}

	SanitizerMask AMDGPUOpenMPToolChain::getSupportedSanitizers() const {
	// The AMDGPUOpenMPToolChain only supports sanitizers in the sense that it
	// allows sanitizer arguments on the command line if they are supported by the
	// host toolchain. The AMDGPUOpenMPToolChain will actually ignore any command
	// line arguments for any of these "supported" sanitizers. That means that no
	// sanitization of device code is actually supported at this time.
	//
	// This behavior is necessary because the host and device toolchains
	// invocations often share the command line, so the device toolchain must
	// tolerate flags meant only for the host toolchain.
	return HostTC.getSupportedSanitizers();
	}

	VersionTuple
	AMDGPUOpenMPToolChain::computeMSVCVersion(const Driver *D,
	const ArgList &Args) const {
	return HostTC.computeMSVCVersion(D, Args);
	}
	diff --git a/contrib/llvm-project/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h b/contrib/llvm-project/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h
	index effca7e212cc..233256bf7378 100644
	--- a/contrib/llvm-project/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h
	+++ b/contrib/llvm-project/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h
	@@ -1,106 +1,110 @@
	//===- AMDGPUOpenMP.h - AMDGPUOpenMP ToolChain Implementation -- C++ ----===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_AMDGPUOPENMP_H
	#define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_AMDGPUOPENMP_H

	#include "AMDGPU.h"
	#include "clang/Driver/Tool.h"
	#include "clang/Driver/ToolChain.h"

	namespace clang {
	namespace driver {

	+namespace toolchains {
	+class AMDGPUOpenMPToolChain;
	+}
	+
	namespace tools {

	namespace AMDGCN {
	// Runs llvm-link/opt/llc/lld, which links multiple LLVM bitcode, together with
	// device library, then compiles it to ISA in a shared object.
	class LLVM_LIBRARY_VISIBILITY OpenMPLinker : public Tool {
	public:
	OpenMPLinker(const ToolChain &TC)
	: Tool("AMDGCN::OpenMPLinker", "amdgcn-link", TC) {}

	bool hasIntegratedCPP() const override { return false; }

	void ConstructJob(Compilation &C, const JobAction &JA,
	const InputInfo &Output, const InputInfoList &Inputs,
	const llvm::opt::ArgList &TCArgs,
	const char *LinkingOutput) const override;

	private:
	/// \return llvm-link output file name.
	- const char *constructLLVMLinkCommand(Compilation &C, const JobAction &JA,
	- const InputInfoList &Inputs,
	- const llvm::opt::ArgList &Args,
	- llvm::StringRef SubArchName,
	- llvm::StringRef OutputFilePrefix) const;
	+ const char *constructLLVMLinkCommand(
	+ const toolchains::AMDGPUOpenMPToolChain &AMDGPUOpenMPTC, Compilation &C,
	+ const JobAction &JA, const InputInfoList &Inputs,
	+ const llvm::opt::ArgList &Args, llvm::StringRef SubArchName,
	+ llvm::StringRef OutputFilePrefix) const;

	/// \return llc output file name.
	const char *constructLlcCommand(Compilation &C, const JobAction &JA,
	const InputInfoList &Inputs,
	const llvm::opt::ArgList &Args,
	llvm::StringRef SubArchName,
	llvm::StringRef OutputFilePrefix,
	const char *InputFileName,
	bool OutputIsAsm = false) const;

	void constructLldCommand(Compilation &C, const JobAction &JA,
	const InputInfoList &Inputs, const InputInfo &Output,
	const llvm::opt::ArgList &Args,
	const char *InputFileName) const;
	};

	} // end namespace AMDGCN
	} // end namespace tools

	namespace toolchains {

	class LLVM_LIBRARY_VISIBILITY AMDGPUOpenMPToolChain final
	: public ROCMToolChain {
	public:
	AMDGPUOpenMPToolChain(const Driver &D, const llvm::Triple &Triple,
	const ToolChain &HostTC,
	const llvm::opt::ArgList &Args);

	const llvm::Triple *getAuxTriple() const override {
	return &HostTC.getTriple();
	}

	llvm::opt::DerivedArgList *
	TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch,
	Action::OffloadKind DeviceOffloadKind) const override;
	void
	addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
	llvm::opt::ArgStringList &CC1Args,
	Action::OffloadKind DeviceOffloadKind) const override;
	void addClangWarningOptions(llvm::opt::ArgStringList &CC1Args) const override;
	CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const override;
	void
	AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
	llvm::opt::ArgStringList &CC1Args) const override;
	void AddIAMCUIncludeArgs(const llvm::opt::ArgList &DriverArgs,
	llvm::opt::ArgStringList &CC1Args) const override;

	SanitizerMask getSupportedSanitizers() const override;

	VersionTuple
	computeMSVCVersion(const Driver *D,
	const llvm::opt::ArgList &Args) const override;

	const ToolChain &HostTC;

	protected:
	Tool *buildLinker() const override;
	};

	} // end namespace toolchains
	} // end namespace driver
	} // end namespace clang

	#endif // LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_AMDGPUOPENMP_H
	diff --git a/contrib/llvm-project/clang/lib/Driver/ToolChains/Clang.cpp b/contrib/llvm-project/clang/lib/Driver/ToolChains/Clang.cpp
	index 4a7dc3a33a5f..cb38ab51327c 100644
	--- a/contrib/llvm-project/clang/lib/Driver/ToolChains/Clang.cpp
	+++ b/contrib/llvm-project/clang/lib/Driver/ToolChains/Clang.cpp
	@@ -1,7810 +1,7811 @@
	//===-- Clang.cpp - Clang+LLVM ToolChain Implementations --------- C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#include "Clang.h"
	#include "AMDGPU.h"
	#include "Arch/AArch64.h"
	#include "Arch/ARM.h"
	#include "Arch/M68k.h"
	#include "Arch/Mips.h"
	#include "Arch/PPC.h"
	#include "Arch/RISCV.h"
	#include "Arch/Sparc.h"
	#include "Arch/SystemZ.h"
	#include "Arch/VE.h"
	#include "Arch/X86.h"
	#include "CommonArgs.h"
	#include "Hexagon.h"
	#include "MSP430.h"
	#include "PS4CPU.h"
	#include "clang/Basic/CharInfo.h"
	#include "clang/Basic/CodeGenOptions.h"
	#include "clang/Basic/LangOptions.h"
	#include "clang/Basic/ObjCRuntime.h"
	#include "clang/Basic/Version.h"
	#include "clang/Driver/Distro.h"
	#include "clang/Driver/DriverDiagnostic.h"
	#include "clang/Driver/InputInfo.h"
	#include "clang/Driver/Options.h"
	#include "clang/Driver/SanitizerArgs.h"
	#include "clang/Driver/XRayArgs.h"
	#include "llvm/ADT/StringExtras.h"
	#include "llvm/Config/llvm-config.h"
	#include "llvm/Option/ArgList.h"
	#include "llvm/Support/CodeGen.h"
	#include "llvm/Support/Compiler.h"
	#include "llvm/Support/Compression.h"
	#include "llvm/Support/FileSystem.h"
	#include "llvm/Support/Host.h"
	#include "llvm/Support/Path.h"
	#include "llvm/Support/Process.h"
	#include "llvm/Support/TargetParser.h"
	#include "llvm/Support/YAMLParser.h"

	using namespace clang::driver;
	using namespace clang::driver::tools;
	using namespace clang;
	using namespace llvm::opt;

	static void CheckPreprocessingOptions(const Driver &D, const ArgList &Args) {
	if (Arg *A =
	Args.getLastArg(clang::driver::options::OPT_C, options::OPT_CC)) {
	if (!Args.hasArg(options::OPT_E) && !Args.hasArg(options::OPT__SLASH_P) &&
	!Args.hasArg(options::OPT__SLASH_EP) && !D.CCCIsCPP()) {
	D.Diag(clang::diag::err_drv_argument_only_allowed_with)
	<< A->getBaseArg().getAsString(Args)
	<< (D.IsCLMode() ? "/E, /P or /EP" : "-E");
	}
	}
	}

	static void CheckCodeGenerationOptions(const Driver &D, const ArgList &Args) {
	// In gcc, only ARM checks this, but it seems reasonable to check universally.
	if (Args.hasArg(options::OPT_static))
	if (const Arg *A =
	Args.getLastArg(options::OPT_dynamic, options::OPT_mdynamic_no_pic))
	D.Diag(diag::err_drv_argument_not_allowed_with) << A->getAsString(Args)
	<< "-static";
	}

	// Add backslashes to escape spaces and other backslashes.
	// This is used for the space-separated argument list specified with
	// the -dwarf-debug-flags option.
	static void EscapeSpacesAndBackslashes(const char *Arg,
	SmallVectorImpl<char> &Res) {
	for (; *Arg; ++Arg) {
	switch (*Arg) {
	default:
	break;
	case ' ':
	case '\\':
	Res.push_back('\\');
	break;
	}
	Res.push_back(*Arg);
	}
	}

	// Quote target names for inclusion in GNU Make dependency files.
	// Only the characters '$', '#', ' ', '\t' are quoted.
	static void QuoteTarget(StringRef Target, SmallVectorImpl<char> &Res) {
	for (unsigned i = 0, e = Target.size(); i != e; ++i) {
	switch (Target[i]) {
	case ' ':
	case '\t':
	// Escape the preceding backslashes
	for (int j = i - 1; j >= 0 && Target[j] == '\\'; --j)
	Res.push_back('\\');

	// Escape the space/tab
	Res.push_back('\\');
	break;
	case '$':
	Res.push_back('$');
	break;
	case '#':
	Res.push_back('\\');
	break;
	default:
	break;
	}

	Res.push_back(Target[i]);
	}
	}

	/// Apply \a Work on the current tool chain \a RegularToolChain and any other
	/// offloading tool chain that is associated with the current action \a JA.
	static void
	forAllAssociatedToolChains(Compilation &C, const JobAction &JA,
	const ToolChain &RegularToolChain,
	llvm::function_ref<void(const ToolChain &)> Work) {
	// Apply Work on the current/regular tool chain.
	Work(RegularToolChain);

	// Apply Work on all the offloading tool chains associated with the current
	// action.
	if (JA.isHostOffloading(Action::OFK_Cuda))
	Work(*C.getSingleOffloadToolChain<Action::OFK_Cuda>());
	else if (JA.isDeviceOffloading(Action::OFK_Cuda))
	Work(*C.getSingleOffloadToolChain<Action::OFK_Host>());
	else if (JA.isHostOffloading(Action::OFK_HIP))
	Work(*C.getSingleOffloadToolChain<Action::OFK_HIP>());
	else if (JA.isDeviceOffloading(Action::OFK_HIP))
	Work(*C.getSingleOffloadToolChain<Action::OFK_Host>());

	if (JA.isHostOffloading(Action::OFK_OpenMP)) {
	auto TCs = C.getOffloadToolChains<Action::OFK_OpenMP>();
	for (auto II = TCs.first, IE = TCs.second; II != IE; ++II)
	Work(*II->second);
	} else if (JA.isDeviceOffloading(Action::OFK_OpenMP))
	Work(*C.getSingleOffloadToolChain<Action::OFK_Host>());

	//
	// TODO: Add support for other offloading programming models here.
	//
	}

	/// This is a helper function for validating the optional refinement step
	/// parameter in reciprocal argument strings. Return false if there is an error
	/// parsing the refinement step. Otherwise, return true and set the Position
	/// of the refinement step in the input string.
	static bool getRefinementStep(StringRef In, const Driver &D,
	const Arg &A, size_t &Position) {
	const char RefinementStepToken = ':';
	Position = In.find(RefinementStepToken);
	if (Position != StringRef::npos) {
	StringRef Option = A.getOption().getName();
	StringRef RefStep = In.substr(Position + 1);
	// Allow exactly one numeric character for the additional refinement
	// step parameter. This is reasonable for all currently-supported
	// operations and architectures because we would expect that a larger value
	// of refinement steps would cause the estimate "optimization" to
	// under-perform the native operation. Also, if the estimate does not
	// converge quickly, it probably will not ever converge, so further
	// refinement steps will not produce a better answer.
	if (RefStep.size() != 1) {
	D.Diag(diag::err_drv_invalid_value) << Option << RefStep;
	return false;
	}
	char RefStepChar = RefStep[0];
	if (RefStepChar < '0' \|\| RefStepChar > '9') {
	D.Diag(diag::err_drv_invalid_value) << Option << RefStep;
	return false;
	}
	}
	return true;
	}

	/// The -mrecip flag requires processing of many optional parameters.
	static void ParseMRecip(const Driver &D, const ArgList &Args,
	ArgStringList &OutStrings) {
	StringRef DisabledPrefixIn = "!";
	StringRef DisabledPrefixOut = "!";
	StringRef EnabledPrefixOut = "";
	StringRef Out = "-mrecip=";

	Arg *A = Args.getLastArg(options::OPT_mrecip, options::OPT_mrecip_EQ);
	if (!A)
	return;

	unsigned NumOptions = A->getNumValues();
	if (NumOptions == 0) {
	// No option is the same as "all".
	OutStrings.push_back(Args.MakeArgString(Out + "all"));
	return;
	}

	// Pass through "all", "none", or "default" with an optional refinement step.
	if (NumOptions == 1) {
	StringRef Val = A->getValue(0);
	size_t RefStepLoc;
	if (!getRefinementStep(Val, D, *A, RefStepLoc))
	return;
	StringRef ValBase = Val.slice(0, RefStepLoc);
	if (ValBase == "all" \|\| ValBase == "none" \|\| ValBase == "default") {
	OutStrings.push_back(Args.MakeArgString(Out + Val));
	return;
	}
	}

	// Each reciprocal type may be enabled or disabled individually.
	// Check each input value for validity, concatenate them all back together,
	// and pass through.

	llvm::StringMap<bool> OptionStrings;
	OptionStrings.insert(std::make_pair("divd", false));
	OptionStrings.insert(std::make_pair("divf", false));
	OptionStrings.insert(std::make_pair("vec-divd", false));
	OptionStrings.insert(std::make_pair("vec-divf", false));
	OptionStrings.insert(std::make_pair("sqrtd", false));
	OptionStrings.insert(std::make_pair("sqrtf", false));
	OptionStrings.insert(std::make_pair("vec-sqrtd", false));
	OptionStrings.insert(std::make_pair("vec-sqrtf", false));

	for (unsigned i = 0; i != NumOptions; ++i) {
	StringRef Val = A->getValue(i);

	bool IsDisabled = Val.startswith(DisabledPrefixIn);
	// Ignore the disablement token for string matching.
	if (IsDisabled)
	Val = Val.substr(1);

	size_t RefStep;
	if (!getRefinementStep(Val, D, *A, RefStep))
	return;

	StringRef ValBase = Val.slice(0, RefStep);
	llvm::StringMap<bool>::iterator OptionIter = OptionStrings.find(ValBase);
	if (OptionIter == OptionStrings.end()) {
	// Try again specifying float suffix.
	OptionIter = OptionStrings.find(ValBase.str() + 'f');
	if (OptionIter == OptionStrings.end()) {
	// The input name did not match any known option string.
	D.Diag(diag::err_drv_unknown_argument) << Val;
	return;
	}
	// The option was specified without a float or double suffix.
	// Make sure that the double entry was not already specified.
	// The float entry will be checked below.
	if (OptionStrings[ValBase.str() + 'd']) {
	D.Diag(diag::err_drv_invalid_value) << A->getOption().getName() << Val;
	return;
	}
	}

	if (OptionIter->second == true) {
	// Duplicate option specified.
	D.Diag(diag::err_drv_invalid_value) << A->getOption().getName() << Val;
	return;
	}

	// Mark the matched option as found. Do not allow duplicate specifiers.
	OptionIter->second = true;

	// If the precision was not specified, also mark the double entry as found.
	if (ValBase.back() != 'f' && ValBase.back() != 'd')
	OptionStrings[ValBase.str() + 'd'] = true;

	// Build the output string.
	StringRef Prefix = IsDisabled ? DisabledPrefixOut : EnabledPrefixOut;
	Out = Args.MakeArgString(Out + Prefix + Val);
	if (i != NumOptions - 1)
	Out = Args.MakeArgString(Out + ",");
	}

	OutStrings.push_back(Args.MakeArgString(Out));
	}

	/// The -mprefer-vector-width option accepts either a positive integer
	/// or the string "none".
	static void ParseMPreferVectorWidth(const Driver &D, const ArgList &Args,
	ArgStringList &CmdArgs) {
	Arg *A = Args.getLastArg(options::OPT_mprefer_vector_width_EQ);
	if (!A)
	return;

	StringRef Value = A->getValue();
	if (Value == "none") {
	CmdArgs.push_back("-mprefer-vector-width=none");
	} else {
	unsigned Width;
	if (Value.getAsInteger(10, Width)) {
	D.Diag(diag::err_drv_invalid_value) << A->getOption().getName() << Value;
	return;
	}
	CmdArgs.push_back(Args.MakeArgString("-mprefer-vector-width=" + Value));
	}
	}

	static void getWebAssemblyTargetFeatures(const ArgList &Args,
	std::vector<StringRef> &Features) {
	handleTargetFeaturesGroup(Args, Features, options::OPT_m_wasm_Features_Group);
	}

	static void getTargetFeatures(const Driver &D, const llvm::Triple &Triple,
	const ArgList &Args, ArgStringList &CmdArgs,
	bool ForAS, bool IsAux = false) {
	std::vector<StringRef> Features;
	switch (Triple.getArch()) {
	default:
	break;
	case llvm::Triple::mips:
	case llvm::Triple::mipsel:
	case llvm::Triple::mips64:
	case llvm::Triple::mips64el:
	mips::getMIPSTargetFeatures(D, Triple, Args, Features);
	break;

	case llvm::Triple::arm:
	case llvm::Triple::armeb:
	case llvm::Triple::thumb:
	case llvm::Triple::thumbeb:
	arm::getARMTargetFeatures(D, Triple, Args, CmdArgs, Features, ForAS);
	break;

	case llvm::Triple::ppc:
	case llvm::Triple::ppcle:
	case llvm::Triple::ppc64:
	case llvm::Triple::ppc64le:
	ppc::getPPCTargetFeatures(D, Triple, Args, Features);
	break;
	case llvm::Triple::riscv32:
	case llvm::Triple::riscv64:
	riscv::getRISCVTargetFeatures(D, Triple, Args, Features);
	break;
	case llvm::Triple::systemz:
	systemz::getSystemZTargetFeatures(D, Args, Features);
	break;
	case llvm::Triple::aarch64:
	case llvm::Triple::aarch64_32:
	case llvm::Triple::aarch64_be:
	aarch64::getAArch64TargetFeatures(D, Triple, Args, Features, ForAS);
	break;
	case llvm::Triple::x86:
	case llvm::Triple::x86_64:
	x86::getX86TargetFeatures(D, Triple, Args, Features);
	break;
	case llvm::Triple::hexagon:
	hexagon::getHexagonTargetFeatures(D, Args, Features);
	break;
	case llvm::Triple::wasm32:
	case llvm::Triple::wasm64:
	getWebAssemblyTargetFeatures(Args, Features);
	break;
	case llvm::Triple::sparc:
	case llvm::Triple::sparcel:
	case llvm::Triple::sparcv9:
	sparc::getSparcTargetFeatures(D, Args, Features);
	break;
	case llvm::Triple::r600:
	case llvm::Triple::amdgcn:
	amdgpu::getAMDGPUTargetFeatures(D, Triple, Args, Features);
	break;
	case llvm::Triple::m68k:
	m68k::getM68kTargetFeatures(D, Triple, Args, Features);
	break;
	case llvm::Triple::msp430:
	msp430::getMSP430TargetFeatures(D, Args, Features);
	break;
	case llvm::Triple::ve:
	ve::getVETargetFeatures(D, Args, Features);
	break;
	}

	for (auto Feature : unifyTargetFeatures(Features)) {
	CmdArgs.push_back(IsAux ? "-aux-target-feature" : "-target-feature");
	CmdArgs.push_back(Feature.data());
	}
	}

	static bool
	shouldUseExceptionTablesForObjCExceptions(const ObjCRuntime &runtime,
	const llvm::Triple &Triple) {
	// We use the zero-cost exception tables for Objective-C if the non-fragile
	// ABI is enabled or when compiling for x86_64 and ARM on Snow Leopard and
	// later.
	if (runtime.isNonFragile())
	return true;

	if (!Triple.isMacOSX())
	return false;

	return (!Triple.isMacOSXVersionLT(10, 5) &&
	(Triple.getArch() == llvm::Triple::x86_64 \|\|
	Triple.getArch() == llvm::Triple::arm));
	}

	/// Adds exception related arguments to the driver command arguments. There's a
	/// master flag, -fexceptions and also language specific flags to enable/disable
	/// C++ and Objective-C exceptions. This makes it possible to for example
	/// disable C++ exceptions but enable Objective-C exceptions.
	static bool addExceptionArgs(const ArgList &Args, types::ID InputType,
	const ToolChain &TC, bool KernelOrKext,
	const ObjCRuntime &objcRuntime,
	ArgStringList &CmdArgs) {
	const llvm::Triple &Triple = TC.getTriple();

	if (KernelOrKext) {
	// -mkernel and -fapple-kext imply no exceptions, so claim exception related
	// arguments now to avoid warnings about unused arguments.
	Args.ClaimAllArgs(options::OPT_fexceptions);
	Args.ClaimAllArgs(options::OPT_fno_exceptions);
	Args.ClaimAllArgs(options::OPT_fobjc_exceptions);
	Args.ClaimAllArgs(options::OPT_fno_objc_exceptions);
	Args.ClaimAllArgs(options::OPT_fcxx_exceptions);
	Args.ClaimAllArgs(options::OPT_fno_cxx_exceptions);
	Args.ClaimAllArgs(options::OPT_fasync_exceptions);
	Args.ClaimAllArgs(options::OPT_fno_async_exceptions);
	return false;
	}

	// See if the user explicitly enabled exceptions.
	bool EH = Args.hasFlag(options::OPT_fexceptions, options::OPT_fno_exceptions,
	false);

	bool EHa = Args.hasFlag(options::OPT_fasync_exceptions,
	options::OPT_fno_async_exceptions, false);
	if (EHa) {
	CmdArgs.push_back("-fasync-exceptions");
	EH = true;
	}

	// Obj-C exceptions are enabled by default, regardless of -fexceptions. This
	// is not necessarily sensible, but follows GCC.
	if (types::isObjC(InputType) &&
	Args.hasFlag(options::OPT_fobjc_exceptions,
	options::OPT_fno_objc_exceptions, true)) {
	CmdArgs.push_back("-fobjc-exceptions");

	EH \|= shouldUseExceptionTablesForObjCExceptions(objcRuntime, Triple);
	}

	if (types::isCXX(InputType)) {
	// Disable C++ EH by default on XCore and PS4.
	bool CXXExceptionsEnabled =
	Triple.getArch() != llvm::Triple::xcore && !Triple.isPS4CPU();
	Arg *ExceptionArg = Args.getLastArg(
	options::OPT_fcxx_exceptions, options::OPT_fno_cxx_exceptions,
	options::OPT_fexceptions, options::OPT_fno_exceptions);
	if (ExceptionArg)
	CXXExceptionsEnabled =
	ExceptionArg->getOption().matches(options::OPT_fcxx_exceptions) \|\|
	ExceptionArg->getOption().matches(options::OPT_fexceptions);

	if (CXXExceptionsEnabled) {
	CmdArgs.push_back("-fcxx-exceptions");

	EH = true;
	}
	}

	// OPT_fignore_exceptions means exception could still be thrown,
	// but no clean up or catch would happen in current module.
	// So we do not set EH to false.
	Args.AddLastArg(CmdArgs, options::OPT_fignore_exceptions);

	if (EH)
	CmdArgs.push_back("-fexceptions");
	return EH;
	}

	static bool ShouldEnableAutolink(const ArgList &Args, const ToolChain &TC,
	const JobAction &JA) {
	bool Default = true;
	if (TC.getTriple().isOSDarwin()) {
	// The native darwin assembler doesn't support the linker_option directives,
	// so we disable them if we think the .s file will be passed to it.
	Default = TC.useIntegratedAs();
	}
	// The linker_option directives are intended for host compilation.
	if (JA.isDeviceOffloading(Action::OFK_Cuda) \|\|
	JA.isDeviceOffloading(Action::OFK_HIP))
	Default = false;
	return Args.hasFlag(options::OPT_fautolink, options::OPT_fno_autolink,
	Default);
	}

	// Convert an arg of the form "-gN" or "-ggdbN" or one of their aliases
	// to the corresponding DebugInfoKind.
	static codegenoptions::DebugInfoKind DebugLevelToInfoKind(const Arg &A) {
	assert(A.getOption().matches(options::OPT_gN_Group) &&
	"Not a -g option that specifies a debug-info level");
	if (A.getOption().matches(options::OPT_g0) \|\|
	A.getOption().matches(options::OPT_ggdb0))
	return codegenoptions::NoDebugInfo;
	if (A.getOption().matches(options::OPT_gline_tables_only) \|\|
	A.getOption().matches(options::OPT_ggdb1))
	return codegenoptions::DebugLineTablesOnly;
	if (A.getOption().matches(options::OPT_gline_directives_only))
	return codegenoptions::DebugDirectivesOnly;
	return codegenoptions::DebugInfoConstructor;
	}

	static bool mustUseNonLeafFramePointerForTarget(const llvm::Triple &Triple) {
	switch (Triple.getArch()){
	default:
	return false;
	case llvm::Triple::arm:
	case llvm::Triple::thumb:
	// ARM Darwin targets require a frame pointer to be always present to aid
	// offline debugging via backtraces.
	return Triple.isOSDarwin();
	}
	}

	static bool useFramePointerForTargetByDefault(const ArgList &Args,
	const llvm::Triple &Triple) {
	if (Args.hasArg(options::OPT_pg) && !Args.hasArg(options::OPT_mfentry))
	return true;

	switch (Triple.getArch()) {
	case llvm::Triple::xcore:
	case llvm::Triple::wasm32:
	case llvm::Triple::wasm64:
	case llvm::Triple::msp430:
	// XCore never wants frame pointers, regardless of OS.
	// WebAssembly never wants frame pointers.
	return false;
	case llvm::Triple::ppc:
	case llvm::Triple::ppcle:
	case llvm::Triple::ppc64:
	case llvm::Triple::ppc64le:
	case llvm::Triple::riscv32:
	case llvm::Triple::riscv64:
	case llvm::Triple::amdgcn:
	case llvm::Triple::r600:
	return !areOptimizationsEnabled(Args);
	default:
	break;
	}

	if (Triple.isOSNetBSD()) {
	return !areOptimizationsEnabled(Args);
	}

	if (Triple.isOSLinux() \|\| Triple.getOS() == llvm::Triple::CloudABI \|\|
	Triple.isOSHurd()) {
	switch (Triple.getArch()) {
	// Don't use a frame pointer on linux if optimizing for certain targets.
	case llvm::Triple::arm:
	case llvm::Triple::armeb:
	case llvm::Triple::thumb:
	case llvm::Triple::thumbeb:
	if (Triple.isAndroid())
	return true;
	LLVM_FALLTHROUGH;
	case llvm::Triple::mips64:
	case llvm::Triple::mips64el:
	case llvm::Triple::mips:
	case llvm::Triple::mipsel:
	case llvm::Triple::systemz:
	case llvm::Triple::x86:
	case llvm::Triple::x86_64:
	return !areOptimizationsEnabled(Args);
	default:
	return true;
	}
	}

	if (Triple.isOSWindows()) {
	switch (Triple.getArch()) {
	case llvm::Triple::x86:
	return !areOptimizationsEnabled(Args);
	case llvm::Triple::x86_64:
	return Triple.isOSBinFormatMachO();
	case llvm::Triple::arm:
	case llvm::Triple::thumb:
	// Windows on ARM builds with FPO disabled to aid fast stack walking
	return true;
	default:
	// All other supported Windows ISAs use xdata unwind information, so frame
	// pointers are not generally useful.
	return false;
	}
	}

	return true;
	}

	static CodeGenOptions::FramePointerKind
	getFramePointerKind(const ArgList &Args, const llvm::Triple &Triple) {
	// We have 4 states:
	//
	// 00) leaf retained, non-leaf retained
	// 01) leaf retained, non-leaf omitted (this is invalid)
	// 10) leaf omitted, non-leaf retained
	// (what -momit-leaf-frame-pointer was designed for)
	// 11) leaf omitted, non-leaf omitted
	//
	// "omit" options taking precedence over "no-omit" options is the only way
	// to make 3 valid states representable
	Arg *A = Args.getLastArg(options::OPT_fomit_frame_pointer,
	options::OPT_fno_omit_frame_pointer);
	bool OmitFP = A && A->getOption().matches(options::OPT_fomit_frame_pointer);
	bool NoOmitFP =
	A && A->getOption().matches(options::OPT_fno_omit_frame_pointer);
	bool OmitLeafFP = Args.hasFlag(options::OPT_momit_leaf_frame_pointer,
	options::OPT_mno_omit_leaf_frame_pointer,
	Triple.isAArch64() \|\| Triple.isPS4CPU());
	if (NoOmitFP \|\| mustUseNonLeafFramePointerForTarget(Triple) \|\|
	(!OmitFP && useFramePointerForTargetByDefault(Args, Triple))) {
	if (OmitLeafFP)
	return CodeGenOptions::FramePointerKind::NonLeaf;
	return CodeGenOptions::FramePointerKind::All;
	}
	return CodeGenOptions::FramePointerKind::None;
	}

	/// Add a CC1 option to specify the debug compilation directory.
	static void addDebugCompDirArg(const ArgList &Args, ArgStringList &CmdArgs,
	const llvm::vfs::FileSystem &VFS) {
	if (Arg *A = Args.getLastArg(options::OPT_ffile_compilation_dir_EQ,
	options::OPT_fdebug_compilation_dir_EQ)) {
	if (A->getOption().matches(options::OPT_ffile_compilation_dir_EQ))
	CmdArgs.push_back(Args.MakeArgString(Twine("-fdebug-compilation-dir=") +
	A->getValue()));
	else
	A->render(Args, CmdArgs);
	} else if (llvm::ErrorOr<std::string> CWD =
	VFS.getCurrentWorkingDirectory()) {
	CmdArgs.push_back(Args.MakeArgString("-fdebug-compilation-dir=" + *CWD));
	}
	}

	/// Add a CC1 and CC1AS option to specify the debug file path prefix map.
	static void addDebugPrefixMapArg(const Driver &D, const ArgList &Args, ArgStringList &CmdArgs) {
	for (const Arg *A : Args.filtered(options::OPT_ffile_prefix_map_EQ,
	options::OPT_fdebug_prefix_map_EQ)) {
	StringRef Map = A->getValue();
	if (Map.find('=') == StringRef::npos)
	D.Diag(diag::err_drv_invalid_argument_to_option)
	<< Map << A->getOption().getName();
	else
	CmdArgs.push_back(Args.MakeArgString("-fdebug-prefix-map=" + Map));
	A->claim();
	}
	}

	/// Add a CC1 and CC1AS option to specify the macro file path prefix map.
	static void addMacroPrefixMapArg(const Driver &D, const ArgList &Args,
	ArgStringList &CmdArgs) {
	for (const Arg *A : Args.filtered(options::OPT_ffile_prefix_map_EQ,
	options::OPT_fmacro_prefix_map_EQ)) {
	StringRef Map = A->getValue();
	if (Map.find('=') == StringRef::npos)
	D.Diag(diag::err_drv_invalid_argument_to_option)
	<< Map << A->getOption().getName();
	else
	CmdArgs.push_back(Args.MakeArgString("-fmacro-prefix-map=" + Map));
	A->claim();
	}
	}

	/// Add a CC1 and CC1AS option to specify the coverage file path prefix map.
	static void addCoveragePrefixMapArg(const Driver &D, const ArgList &Args,
	ArgStringList &CmdArgs) {
	for (const Arg *A : Args.filtered(options::OPT_ffile_prefix_map_EQ,
	options::OPT_fcoverage_prefix_map_EQ)) {
	StringRef Map = A->getValue();
	if (Map.find('=') == StringRef::npos)
	D.Diag(diag::err_drv_invalid_argument_to_option)
	<< Map << A->getOption().getName();
	else
	CmdArgs.push_back(Args.MakeArgString("-fcoverage-prefix-map=" + Map));
	A->claim();
	}
	}

	/// Vectorize at all optimization levels greater than 1 except for -Oz.
	/// For -Oz the loop vectorizer is disabled, while the slp vectorizer is
	/// enabled.
	static bool shouldEnableVectorizerAtOLevel(const ArgList &Args, bool isSlpVec) {
	if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
	if (A->getOption().matches(options::OPT_O4) \|\|
	A->getOption().matches(options::OPT_Ofast))
	return true;

	if (A->getOption().matches(options::OPT_O0))
	return false;

	assert(A->getOption().matches(options::OPT_O) && "Must have a -O flag");

	// Vectorize -Os.
	StringRef S(A->getValue());
	if (S == "s")
	return true;

	// Don't vectorize -Oz, unless it's the slp vectorizer.
	if (S == "z")
	return isSlpVec;

	unsigned OptLevel = 0;
	if (S.getAsInteger(10, OptLevel))
	return false;

	return OptLevel > 1;
	}

	return false;
	}

	/// Add -x lang to \p CmdArgs for \p Input.
	static void addDashXForInput(const ArgList &Args, const InputInfo &Input,
	ArgStringList &CmdArgs) {
	// When using -verify-pch, we don't want to provide the type
	// 'precompiled-header' if it was inferred from the file extension
	if (Args.hasArg(options::OPT_verify_pch) && Input.getType() == types::TY_PCH)
	return;

	CmdArgs.push_back("-x");
	if (Args.hasArg(options::OPT_rewrite_objc))
	CmdArgs.push_back(types::getTypeName(types::TY_PP_ObjCXX));
	else {
	// Map the driver type to the frontend type. This is mostly an identity
	// mapping, except that the distinction between module interface units
	// and other source files does not exist at the frontend layer.
	const char *ClangType;
	switch (Input.getType()) {
	case types::TY_CXXModule:
	ClangType = "c++";
	break;
	case types::TY_PP_CXXModule:
	ClangType = "c++-cpp-output";
	break;
	default:
	ClangType = types::getTypeName(Input.getType());
	break;
	}
	CmdArgs.push_back(ClangType);
	}
	}

	static void addPGOAndCoverageFlags(const ToolChain &TC, Compilation &C,
	const Driver &D, const InputInfo &Output,
	const ArgList &Args,
	ArgStringList &CmdArgs) {

	auto *PGOGenerateArg = Args.getLastArg(options::OPT_fprofile_generate,
	options::OPT_fprofile_generate_EQ,
	options::OPT_fno_profile_generate);
	if (PGOGenerateArg &&
	PGOGenerateArg->getOption().matches(options::OPT_fno_profile_generate))
	PGOGenerateArg = nullptr;

	auto *CSPGOGenerateArg = Args.getLastArg(options::OPT_fcs_profile_generate,
	options::OPT_fcs_profile_generate_EQ,
	options::OPT_fno_profile_generate);
	if (CSPGOGenerateArg &&
	CSPGOGenerateArg->getOption().matches(options::OPT_fno_profile_generate))
	CSPGOGenerateArg = nullptr;

	auto *ProfileGenerateArg = Args.getLastArg(
	options::OPT_fprofile_instr_generate,
	options::OPT_fprofile_instr_generate_EQ,
	options::OPT_fno_profile_instr_generate);
	if (ProfileGenerateArg &&
	ProfileGenerateArg->getOption().matches(
	options::OPT_fno_profile_instr_generate))
	ProfileGenerateArg = nullptr;

	if (PGOGenerateArg && ProfileGenerateArg)
	D.Diag(diag::err_drv_argument_not_allowed_with)
	<< PGOGenerateArg->getSpelling() << ProfileGenerateArg->getSpelling();

	auto *ProfileUseArg = getLastProfileUseArg(Args);

	if (PGOGenerateArg && ProfileUseArg)
	D.Diag(diag::err_drv_argument_not_allowed_with)
	<< ProfileUseArg->getSpelling() << PGOGenerateArg->getSpelling();

	if (ProfileGenerateArg && ProfileUseArg)
	D.Diag(diag::err_drv_argument_not_allowed_with)
	<< ProfileGenerateArg->getSpelling() << ProfileUseArg->getSpelling();

	if (CSPGOGenerateArg && PGOGenerateArg) {
	D.Diag(diag::err_drv_argument_not_allowed_with)
	<< CSPGOGenerateArg->getSpelling() << PGOGenerateArg->getSpelling();
	PGOGenerateArg = nullptr;
	}

	if (TC.getTriple().isOSAIX()) {
	if (PGOGenerateArg)
	if (!D.isUsingLTO(false /IsDeviceOffloadAction /) \|\|
	D.getLTOMode() != LTOK_Full)
	D.Diag(clang::diag::err_drv_argument_only_allowed_with)
	<< PGOGenerateArg->getSpelling() << "-flto";
	if (ProfileGenerateArg)
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< ProfileGenerateArg->getSpelling() << TC.getTriple().str();
	if (Arg *ProfileSampleUseArg = getLastProfileSampleUseArg(Args))
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< ProfileSampleUseArg->getSpelling() << TC.getTriple().str();
	}

	if (ProfileGenerateArg) {
	if (ProfileGenerateArg->getOption().matches(
	options::OPT_fprofile_instr_generate_EQ))
	CmdArgs.push_back(Args.MakeArgString(Twine("-fprofile-instrument-path=") +
	ProfileGenerateArg->getValue()));
	// The default is to use Clang Instrumentation.
	CmdArgs.push_back("-fprofile-instrument=clang");
	if (TC.getTriple().isWindowsMSVCEnvironment()) {
	// Add dependent lib for clang_rt.profile
	CmdArgs.push_back(Args.MakeArgString(
	"--dependent-lib=" + TC.getCompilerRTBasename(Args, "profile")));
	}
	}

	Arg *PGOGenArg = nullptr;
	if (PGOGenerateArg) {
	assert(!CSPGOGenerateArg);
	PGOGenArg = PGOGenerateArg;
	CmdArgs.push_back("-fprofile-instrument=llvm");
	}
	if (CSPGOGenerateArg) {
	assert(!PGOGenerateArg);
	PGOGenArg = CSPGOGenerateArg;
	CmdArgs.push_back("-fprofile-instrument=csllvm");
	}
	if (PGOGenArg) {
	if (TC.getTriple().isWindowsMSVCEnvironment()) {
	// Add dependent lib for clang_rt.profile
	CmdArgs.push_back(Args.MakeArgString(
	"--dependent-lib=" + TC.getCompilerRTBasename(Args, "profile")));
	}
	if (PGOGenArg->getOption().matches(
	PGOGenerateArg ? options::OPT_fprofile_generate_EQ
	: options::OPT_fcs_profile_generate_EQ)) {
	SmallString<128> Path(PGOGenArg->getValue());
	llvm::sys::path::append(Path, "default_%m.profraw");
	CmdArgs.push_back(
	Args.MakeArgString(Twine("-fprofile-instrument-path=") + Path));
	}
	}

	if (ProfileUseArg) {
	if (ProfileUseArg->getOption().matches(options::OPT_fprofile_instr_use_EQ))
	CmdArgs.push_back(Args.MakeArgString(
	Twine("-fprofile-instrument-use-path=") + ProfileUseArg->getValue()));
	else if ((ProfileUseArg->getOption().matches(
	options::OPT_fprofile_use_EQ) \|\|
	ProfileUseArg->getOption().matches(
	options::OPT_fprofile_instr_use))) {
	SmallString<128> Path(
	ProfileUseArg->getNumValues() == 0 ? "" : ProfileUseArg->getValue());
	if (Path.empty() \|\| llvm::sys::fs::is_directory(Path))
	llvm::sys::path::append(Path, "default.profdata");
	CmdArgs.push_back(
	Args.MakeArgString(Twine("-fprofile-instrument-use-path=") + Path));
	}
	}

	bool EmitCovNotes = Args.hasFlag(options::OPT_ftest_coverage,
	options::OPT_fno_test_coverage, false) \|\|
	Args.hasArg(options::OPT_coverage);
	bool EmitCovData = TC.needsGCovInstrumentation(Args);
	if (EmitCovNotes)
	CmdArgs.push_back("-ftest-coverage");
	if (EmitCovData)
	CmdArgs.push_back("-fprofile-arcs");

	if (Args.hasFlag(options::OPT_fcoverage_mapping,
	options::OPT_fno_coverage_mapping, false)) {
	if (!ProfileGenerateArg)
	D.Diag(clang::diag::err_drv_argument_only_allowed_with)
	<< "-fcoverage-mapping"
	<< "-fprofile-instr-generate";

	CmdArgs.push_back("-fcoverage-mapping");
	}

	if (Arg *A = Args.getLastArg(options::OPT_ffile_compilation_dir_EQ,
	options::OPT_fcoverage_compilation_dir_EQ)) {
	if (A->getOption().matches(options::OPT_ffile_compilation_dir_EQ))
	CmdArgs.push_back(Args.MakeArgString(
	Twine("-fcoverage-compilation-dir=") + A->getValue()));
	else
	A->render(Args, CmdArgs);
	} else if (llvm::ErrorOr<std::string> CWD =
	D.getVFS().getCurrentWorkingDirectory()) {
	CmdArgs.push_back(Args.MakeArgString("-fcoverage-compilation-dir=" + *CWD));
	}

	if (Args.hasArg(options::OPT_fprofile_exclude_files_EQ)) {
	auto *Arg = Args.getLastArg(options::OPT_fprofile_exclude_files_EQ);
	if (!Args.hasArg(options::OPT_coverage))
	D.Diag(clang::diag::err_drv_argument_only_allowed_with)
	<< "-fprofile-exclude-files="
	<< "--coverage";

	StringRef v = Arg->getValue();
	CmdArgs.push_back(
	Args.MakeArgString(Twine("-fprofile-exclude-files=" + v)));
	}

	if (Args.hasArg(options::OPT_fprofile_filter_files_EQ)) {
	auto *Arg = Args.getLastArg(options::OPT_fprofile_filter_files_EQ);
	if (!Args.hasArg(options::OPT_coverage))
	D.Diag(clang::diag::err_drv_argument_only_allowed_with)
	<< "-fprofile-filter-files="
	<< "--coverage";

	StringRef v = Arg->getValue();
	CmdArgs.push_back(Args.MakeArgString(Twine("-fprofile-filter-files=" + v)));
	}

	if (const auto *A = Args.getLastArg(options::OPT_fprofile_update_EQ)) {
	StringRef Val = A->getValue();
	if (Val == "atomic" \|\| Val == "prefer-atomic")
	CmdArgs.push_back("-fprofile-update=atomic");
	else if (Val != "single")
	D.Diag(diag::err_drv_unsupported_option_argument)
	<< A->getOption().getName() << Val;
	} else if (TC.getSanitizerArgs().needsTsanRt()) {
	CmdArgs.push_back("-fprofile-update=atomic");
	}

	// Leave -fprofile-dir= an unused argument unless .gcda emission is
	// enabled. To be polite, with '-fprofile-arcs -fno-profile-arcs' consider
	// the flag used. There is no -fno-profile-dir, so the user has no
	// targeted way to suppress the warning.
	Arg *FProfileDir = nullptr;
	if (Args.hasArg(options::OPT_fprofile_arcs) \|\|
	Args.hasArg(options::OPT_coverage))
	FProfileDir = Args.getLastArg(options::OPT_fprofile_dir);

	// Put the .gcno and .gcda files (if needed) next to the object file or
	// bitcode file in the case of LTO.
	// FIXME: There should be a simpler way to find the object file for this
	// input, and this code probably does the wrong thing for commands that
	// compile and link all at once.
	if ((Args.hasArg(options::OPT_c) \|\| Args.hasArg(options::OPT_S)) &&
	(EmitCovNotes \|\| EmitCovData) && Output.isFilename()) {
	SmallString<128> OutputFilename;
	if (Arg *FinalOutput = C.getArgs().getLastArg(options::OPT__SLASH_Fo))
	OutputFilename = FinalOutput->getValue();
	else if (Arg *FinalOutput = C.getArgs().getLastArg(options::OPT_o))
	OutputFilename = FinalOutput->getValue();
	else
	OutputFilename = llvm::sys::path::filename(Output.getBaseInput());
	SmallString<128> CoverageFilename = OutputFilename;
	if (llvm::sys::path::is_relative(CoverageFilename))
	(void)D.getVFS().makeAbsolute(CoverageFilename);
	llvm::sys::path::replace_extension(CoverageFilename, "gcno");

	CmdArgs.push_back("-coverage-notes-file");
	CmdArgs.push_back(Args.MakeArgString(CoverageFilename));

	if (EmitCovData) {
	if (FProfileDir) {
	CoverageFilename = FProfileDir->getValue();
	llvm::sys::path::append(CoverageFilename, OutputFilename);
	}
	llvm::sys::path::replace_extension(CoverageFilename, "gcda");
	CmdArgs.push_back("-coverage-data-file");
	CmdArgs.push_back(Args.MakeArgString(CoverageFilename));
	}
	}
	}

	/// Check whether the given input tree contains any compilation actions.
	static bool ContainsCompileAction(const Action *A) {
	if (isa<CompileJobAction>(A) \|\| isa<BackendJobAction>(A))
	return true;

	for (const auto &AI : A->inputs())
	if (ContainsCompileAction(AI))
	return true;

	return false;
	}

	/// Check if -relax-all should be passed to the internal assembler.
	/// This is done by default when compiling non-assembler source with -O0.
	static bool UseRelaxAll(Compilation &C, const ArgList &Args) {
	bool RelaxDefault = true;

	if (Arg *A = Args.getLastArg(options::OPT_O_Group))
	RelaxDefault = A->getOption().matches(options::OPT_O0);

	if (RelaxDefault) {
	RelaxDefault = false;
	for (const auto &Act : C.getActions()) {
	if (ContainsCompileAction(Act)) {
	RelaxDefault = true;
	break;
	}
	}
	}

	return Args.hasFlag(options::OPT_mrelax_all, options::OPT_mno_relax_all,
	RelaxDefault);
	}

	// Extract the integer N from a string spelled "-dwarf-N", returning 0
	// on mismatch. The StringRef input (rather than an Arg) allows
	// for use by the "-Xassembler" option parser.
	static unsigned DwarfVersionNum(StringRef ArgValue) {
	return llvm::StringSwitch<unsigned>(ArgValue)
	.Case("-gdwarf-2", 2)
	.Case("-gdwarf-3", 3)
	.Case("-gdwarf-4", 4)
	.Case("-gdwarf-5", 5)
	.Default(0);
	}

	// Find a DWARF format version option.
	// This function is a complementary for DwarfVersionNum().
	static const Arg *getDwarfNArg(const ArgList &Args) {
	return Args.getLastArg(options::OPT_gdwarf_2, options::OPT_gdwarf_3,
	options::OPT_gdwarf_4, options::OPT_gdwarf_5,
	options::OPT_gdwarf);
	}

	static void RenderDebugEnablingArgs(const ArgList &Args, ArgStringList &CmdArgs,
	codegenoptions::DebugInfoKind DebugInfoKind,
	unsigned DwarfVersion,
	llvm::DebuggerKind DebuggerTuning) {
	switch (DebugInfoKind) {
	case codegenoptions::DebugDirectivesOnly:
	CmdArgs.push_back("-debug-info-kind=line-directives-only");
	break;
	case codegenoptions::DebugLineTablesOnly:
	CmdArgs.push_back("-debug-info-kind=line-tables-only");
	break;
	case codegenoptions::DebugInfoConstructor:
	CmdArgs.push_back("-debug-info-kind=constructor");
	break;
	case codegenoptions::LimitedDebugInfo:
	CmdArgs.push_back("-debug-info-kind=limited");
	break;
	case codegenoptions::FullDebugInfo:
	CmdArgs.push_back("-debug-info-kind=standalone");
	break;
	case codegenoptions::UnusedTypeInfo:
	CmdArgs.push_back("-debug-info-kind=unused-types");
	break;
	default:
	break;
	}
	if (DwarfVersion > 0)
	CmdArgs.push_back(
	Args.MakeArgString("-dwarf-version=" + Twine(DwarfVersion)));
	switch (DebuggerTuning) {
	case llvm::DebuggerKind::GDB:
	CmdArgs.push_back("-debugger-tuning=gdb");
	break;
	case llvm::DebuggerKind::LLDB:
	CmdArgs.push_back("-debugger-tuning=lldb");
	break;
	case llvm::DebuggerKind::SCE:
	CmdArgs.push_back("-debugger-tuning=sce");
	break;
	case llvm::DebuggerKind::DBX:
	CmdArgs.push_back("-debugger-tuning=dbx");
	break;
	default:
	break;
	}
	}

	static bool checkDebugInfoOption(const Arg *A, const ArgList &Args,
	const Driver &D, const ToolChain &TC) {
	assert(A && "Expected non-nullptr argument.");
	if (TC.supportsDebugInfoOption(A))
	return true;
	D.Diag(diag::warn_drv_unsupported_debug_info_opt_for_target)
	<< A->getAsString(Args) << TC.getTripleString();
	return false;
	}

	static void RenderDebugInfoCompressionArgs(const ArgList &Args,
	ArgStringList &CmdArgs,
	const Driver &D,
	const ToolChain &TC) {
	const Arg *A = Args.getLastArg(options::OPT_gz_EQ);
	if (!A)
	return;
	if (checkDebugInfoOption(A, Args, D, TC)) {
	StringRef Value = A->getValue();
	if (Value == "none") {
	CmdArgs.push_back("--compress-debug-sections=none");
	} else if (Value == "zlib" \|\| Value == "zlib-gnu") {
	if (llvm::zlib::isAvailable()) {
	CmdArgs.push_back(
	Args.MakeArgString("--compress-debug-sections=" + Twine(Value)));
	} else {
	D.Diag(diag::warn_debug_compression_unavailable);
	}
	} else {
	D.Diag(diag::err_drv_unsupported_option_argument)
	<< A->getOption().getName() << Value;
	}
	}
	}

	static const char *RelocationModelName(llvm::Reloc::Model Model) {
	switch (Model) {
	case llvm::Reloc::Static:
	return "static";
	case llvm::Reloc::PIC_:
	return "pic";
	case llvm::Reloc::DynamicNoPIC:
	return "dynamic-no-pic";
	case llvm::Reloc::ROPI:
	return "ropi";
	case llvm::Reloc::RWPI:
	return "rwpi";
	case llvm::Reloc::ROPI_RWPI:
	return "ropi-rwpi";
	}
	llvm_unreachable("Unknown Reloc::Model kind");
	}
	static void handleAMDGPUCodeObjectVersionOptions(const Driver &D,
	const ArgList &Args,
	ArgStringList &CmdArgs) {
	// If no version was requested by the user, use the default value from the
	// back end. This is consistent with the value returned from
	// getAMDGPUCodeObjectVersion. This lets clang emit IR for amdgpu without
	// requiring the corresponding llvm to have the AMDGPU target enabled,
	// provided the user (e.g. front end tests) can use the default.
	if (haveAMDGPUCodeObjectVersionArgument(D, Args)) {
	unsigned CodeObjVer = getAMDGPUCodeObjectVersion(D, Args);
	CmdArgs.insert(CmdArgs.begin() + 1,
	Args.MakeArgString(Twine("--amdhsa-code-object-version=") +
	Twine(CodeObjVer)));
	CmdArgs.insert(CmdArgs.begin() + 1, "-mllvm");
	}
	}

	void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA,
	const Driver &D, const ArgList &Args,
	ArgStringList &CmdArgs,
	const InputInfo &Output,
	const InputInfoList &Inputs) const {
	const bool IsIAMCU = getToolChain().getTriple().isOSIAMCU();

	CheckPreprocessingOptions(D, Args);

	Args.AddLastArg(CmdArgs, options::OPT_C);
	Args.AddLastArg(CmdArgs, options::OPT_CC);

	// Handle dependency file generation.
	Arg *ArgM = Args.getLastArg(options::OPT_MM);
	if (!ArgM)
	ArgM = Args.getLastArg(options::OPT_M);
	Arg *ArgMD = Args.getLastArg(options::OPT_MMD);
	if (!ArgMD)
	ArgMD = Args.getLastArg(options::OPT_MD);

	// -M and -MM imply -w.
	if (ArgM)
	CmdArgs.push_back("-w");
	else
	ArgM = ArgMD;

	if (ArgM) {
	// Determine the output location.
	const char *DepFile;
	if (Arg *MF = Args.getLastArg(options::OPT_MF)) {
	DepFile = MF->getValue();
	C.addFailureResultFile(DepFile, &JA);
	} else if (Output.getType() == types::TY_Dependencies) {
	DepFile = Output.getFilename();
	} else if (!ArgMD) {
	DepFile = "-";
	} else {
	DepFile = getDependencyFileName(Args, Inputs);
	C.addFailureResultFile(DepFile, &JA);
	}
	CmdArgs.push_back("-dependency-file");
	CmdArgs.push_back(DepFile);

	bool HasTarget = false;
	for (const Arg *A : Args.filtered(options::OPT_MT, options::OPT_MQ)) {
	HasTarget = true;
	A->claim();
	if (A->getOption().matches(options::OPT_MT)) {
	A->render(Args, CmdArgs);
	} else {
	CmdArgs.push_back("-MT");
	SmallString<128> Quoted;
	QuoteTarget(A->getValue(), Quoted);
	CmdArgs.push_back(Args.MakeArgString(Quoted));
	}
	}

	// Add a default target if one wasn't specified.
	if (!HasTarget) {
	const char *DepTarget;

	// If user provided -o, that is the dependency target, except
	// when we are only generating a dependency file.
	Arg *OutputOpt = Args.getLastArg(options::OPT_o);
	if (OutputOpt && Output.getType() != types::TY_Dependencies) {
	DepTarget = OutputOpt->getValue();
	} else {
	// Otherwise derive from the base input.
	//
	// FIXME: This should use the computed output file location.
	SmallString<128> P(Inputs[0].getBaseInput());
	llvm::sys::path::replace_extension(P, "o");
	DepTarget = Args.MakeArgString(llvm::sys::path::filename(P));
	}

	CmdArgs.push_back("-MT");
	SmallString<128> Quoted;
	QuoteTarget(DepTarget, Quoted);
	CmdArgs.push_back(Args.MakeArgString(Quoted));
	}

	if (ArgM->getOption().matches(options::OPT_M) \|\|
	ArgM->getOption().matches(options::OPT_MD))
	CmdArgs.push_back("-sys-header-deps");
	if ((isa<PrecompileJobAction>(JA) &&
	!Args.hasArg(options::OPT_fno_module_file_deps)) \|\|
	Args.hasArg(options::OPT_fmodule_file_deps))
	CmdArgs.push_back("-module-file-deps");
	}

	if (Args.hasArg(options::OPT_MG)) {
	if (!ArgM \|\| ArgM->getOption().matches(options::OPT_MD) \|\|
	ArgM->getOption().matches(options::OPT_MMD))
	D.Diag(diag::err_drv_mg_requires_m_or_mm);
	CmdArgs.push_back("-MG");
	}

	Args.AddLastArg(CmdArgs, options::OPT_MP);
	Args.AddLastArg(CmdArgs, options::OPT_MV);

	// Add offload include arguments specific for CUDA/HIP. This must happen
	// before we -I or -include anything else, because we must pick up the
	// CUDA/HIP headers from the particular CUDA/ROCm installation, rather than
	// from e.g. /usr/local/include.
	if (JA.isOffloading(Action::OFK_Cuda))
	getToolChain().AddCudaIncludeArgs(Args, CmdArgs);
	if (JA.isOffloading(Action::OFK_HIP))
	getToolChain().AddHIPIncludeArgs(Args, CmdArgs);

	// If we are offloading to a target via OpenMP we need to include the
	// openmp_wrappers folder which contains alternative system headers.
	if (JA.isDeviceOffloading(Action::OFK_OpenMP) &&
	- getToolChain().getTriple().isNVPTX()){
	+ (getToolChain().getTriple().isNVPTX() \|\|
	+ getToolChain().getTriple().isAMDGCN())) {
	if (!Args.hasArg(options::OPT_nobuiltininc)) {
	// Add openmp_wrappers/* to our system include path. This lets us wrap
	// standard library headers.
	SmallString<128> P(D.ResourceDir);
	llvm::sys::path::append(P, "include");
	llvm::sys::path::append(P, "openmp_wrappers");
	CmdArgs.push_back("-internal-isystem");
	CmdArgs.push_back(Args.MakeArgString(P));
	}

	CmdArgs.push_back("-include");
	CmdArgs.push_back("__clang_openmp_device_functions.h");
	}

	// Add -i* options, and automatically translate to
	// -include-pch/-include-pth for transparent PCH support. It's
	// wonky, but we include looking for .gch so we can support seamless
	// replacement into a build system already set up to be generating
	// .gch files.

	if (getToolChain().getDriver().IsCLMode()) {
	const Arg *YcArg = Args.getLastArg(options::OPT__SLASH_Yc);
	const Arg *YuArg = Args.getLastArg(options::OPT__SLASH_Yu);
	if (YcArg && JA.getKind() >= Action::PrecompileJobClass &&
	JA.getKind() <= Action::AssembleJobClass) {
	CmdArgs.push_back(Args.MakeArgString("-building-pch-with-obj"));
	// -fpch-instantiate-templates is the default when creating
	// precomp using /Yc
	if (Args.hasFlag(options::OPT_fpch_instantiate_templates,
	options::OPT_fno_pch_instantiate_templates, true))
	CmdArgs.push_back(Args.MakeArgString("-fpch-instantiate-templates"));
	}
	if (YcArg \|\| YuArg) {
	StringRef ThroughHeader = YcArg ? YcArg->getValue() : YuArg->getValue();
	if (!isa<PrecompileJobAction>(JA)) {
	CmdArgs.push_back("-include-pch");
	CmdArgs.push_back(Args.MakeArgString(D.GetClPchPath(
	C, !ThroughHeader.empty()
	? ThroughHeader
	: llvm::sys::path::filename(Inputs[0].getBaseInput()))));
	}

	if (ThroughHeader.empty()) {
	CmdArgs.push_back(Args.MakeArgString(
	Twine("-pch-through-hdrstop-") + (YcArg ? "create" : "use")));
	} else {
	CmdArgs.push_back(
	Args.MakeArgString(Twine("-pch-through-header=") + ThroughHeader));
	}
	}
	}

	bool RenderedImplicitInclude = false;
	for (const Arg *A : Args.filtered(options::OPT_clang_i_Group)) {
	if (A->getOption().matches(options::OPT_include)) {
	// Handling of gcc-style gch precompiled headers.
	bool IsFirstImplicitInclude = !RenderedImplicitInclude;
	RenderedImplicitInclude = true;

	bool FoundPCH = false;
	SmallString<128> P(A->getValue());
	// We want the files to have a name like foo.h.pch. Add a dummy extension
	// so that replace_extension does the right thing.
	P += ".dummy";
	llvm::sys::path::replace_extension(P, "pch");
	if (llvm::sys::fs::exists(P))
	FoundPCH = true;

	if (!FoundPCH) {
	llvm::sys::path::replace_extension(P, "gch");
	if (llvm::sys::fs::exists(P)) {
	FoundPCH = true;
	}
	}

	if (FoundPCH) {
	if (IsFirstImplicitInclude) {
	A->claim();
	CmdArgs.push_back("-include-pch");
	CmdArgs.push_back(Args.MakeArgString(P));
	continue;
	} else {
	// Ignore the PCH if not first on command line and emit warning.
	D.Diag(diag::warn_drv_pch_not_first_include) << P
	<< A->getAsString(Args);
	}
	}
	} else if (A->getOption().matches(options::OPT_isystem_after)) {
	// Handling of paths which must come late. These entries are handled by
	// the toolchain itself after the resource dir is inserted in the right
	// search order.
	// Do not claim the argument so that the use of the argument does not
	// silently go unnoticed on toolchains which do not honour the option.
	continue;
	} else if (A->getOption().matches(options::OPT_stdlibxx_isystem)) {
	// Translated to -internal-isystem by the driver, no need to pass to cc1.
	continue;
	}

	// Not translated, render as usual.
	A->claim();
	A->render(Args, CmdArgs);
	}

	Args.AddAllArgs(CmdArgs,
	{options::OPT_D, options::OPT_U, options::OPT_I_Group,
	options::OPT_F, options::OPT_index_header_map});

	// Add -Wp, and -Xpreprocessor if using the preprocessor.

	// FIXME: There is a very unfortunate problem here, some troubled
	// souls abuse -Wp, to pass preprocessor options in gcc syntax. To
	// really support that we would have to parse and then translate
	// those options. :(
	Args.AddAllArgValues(CmdArgs, options::OPT_Wp_COMMA,
	options::OPT_Xpreprocessor);

	// -I- is a deprecated GCC feature, reject it.
	if (Arg *A = Args.getLastArg(options::OPT_I_))
	D.Diag(diag::err_drv_I_dash_not_supported) << A->getAsString(Args);

	// If we have a --sysroot, and don't have an explicit -isysroot flag, add an
	// -isysroot to the CC1 invocation.
	StringRef sysroot = C.getSysRoot();
	if (sysroot != "") {
	if (!Args.hasArg(options::OPT_isysroot)) {
	CmdArgs.push_back("-isysroot");
	CmdArgs.push_back(C.getArgs().MakeArgString(sysroot));
	}
	}

	// Parse additional include paths from environment variables.
	// FIXME: We should probably sink the logic for handling these from the
	// frontend into the driver. It will allow deleting 4 otherwise unused flags.
	// CPATH - included following the user specified includes (but prior to
	// builtin and standard includes).
	addDirectoryList(Args, CmdArgs, "-I", "CPATH");
	// C_INCLUDE_PATH - system includes enabled when compiling C.
	addDirectoryList(Args, CmdArgs, "-c-isystem", "C_INCLUDE_PATH");
	// CPLUS_INCLUDE_PATH - system includes enabled when compiling C++.
	addDirectoryList(Args, CmdArgs, "-cxx-isystem", "CPLUS_INCLUDE_PATH");
	// OBJC_INCLUDE_PATH - system includes enabled when compiling ObjC.
	addDirectoryList(Args, CmdArgs, "-objc-isystem", "OBJC_INCLUDE_PATH");
	// OBJCPLUS_INCLUDE_PATH - system includes enabled when compiling ObjC++.
	addDirectoryList(Args, CmdArgs, "-objcxx-isystem", "OBJCPLUS_INCLUDE_PATH");

	// While adding the include arguments, we also attempt to retrieve the
	// arguments of related offloading toolchains or arguments that are specific
	// of an offloading programming model.

	// Add C++ include arguments, if needed.
	if (types::isCXX(Inputs[0].getType())) {
	bool HasStdlibxxIsystem = Args.hasArg(options::OPT_stdlibxx_isystem);
	forAllAssociatedToolChains(
	C, JA, getToolChain(),
	[&Args, &CmdArgs, HasStdlibxxIsystem](const ToolChain &TC) {
	HasStdlibxxIsystem ? TC.AddClangCXXStdlibIsystemArgs(Args, CmdArgs)
	: TC.AddClangCXXStdlibIncludeArgs(Args, CmdArgs);
	});
	}

	// Add system include arguments for all targets but IAMCU.
	if (!IsIAMCU)
	forAllAssociatedToolChains(C, JA, getToolChain(),
	[&Args, &CmdArgs](const ToolChain &TC) {
	TC.AddClangSystemIncludeArgs(Args, CmdArgs);
	});
	else {
	// For IAMCU add special include arguments.
	getToolChain().AddIAMCUIncludeArgs(Args, CmdArgs);
	}

	addMacroPrefixMapArg(D, Args, CmdArgs);
	addCoveragePrefixMapArg(D, Args, CmdArgs);
	}

	// FIXME: Move to target hook.
	static bool isSignedCharDefault(const llvm::Triple &Triple) {
	switch (Triple.getArch()) {
	default:
	return true;

	case llvm::Triple::aarch64:
	case llvm::Triple::aarch64_32:
	case llvm::Triple::aarch64_be:
	case llvm::Triple::arm:
	case llvm::Triple::armeb:
	case llvm::Triple::thumb:
	case llvm::Triple::thumbeb:
	if (Triple.isOSDarwin() \|\| Triple.isOSWindows())
	return true;
	return false;

	case llvm::Triple::ppc:
	case llvm::Triple::ppc64:
	if (Triple.isOSDarwin())
	return true;
	return false;

	case llvm::Triple::hexagon:
	case llvm::Triple::ppcle:
	case llvm::Triple::ppc64le:
	case llvm::Triple::riscv32:
	case llvm::Triple::riscv64:
	case llvm::Triple::systemz:
	case llvm::Triple::xcore:
	return false;
	}
	}

	static bool hasMultipleInvocations(const llvm::Triple &Triple,
	const ArgList &Args) {
	// Supported only on Darwin where we invoke the compiler multiple times
	// followed by an invocation to lipo.
	if (!Triple.isOSDarwin())
	return false;
	// If more than one "-arch <arch>" is specified, we're targeting multiple
	// architectures resulting in a fat binary.
	return Args.getAllArgValues(options::OPT_arch).size() > 1;
	}

	static bool checkRemarksOptions(const Driver &D, const ArgList &Args,
	const llvm::Triple &Triple) {
	// When enabling remarks, we need to error if:
	// * The remark file is specified but we're targeting multiple architectures,
	// which means more than one remark file is being generated.
	bool hasMultipleInvocations = ::hasMultipleInvocations(Triple, Args);
	bool hasExplicitOutputFile =
	Args.getLastArg(options::OPT_foptimization_record_file_EQ);
	if (hasMultipleInvocations && hasExplicitOutputFile) {
	D.Diag(diag::err_drv_invalid_output_with_multiple_archs)
	<< "-foptimization-record-file";
	return false;
	}
	return true;
	}

	static void renderRemarksOptions(const ArgList &Args, ArgStringList &CmdArgs,
	const llvm::Triple &Triple,
	const InputInfo &Input,
	const InputInfo &Output, const JobAction &JA) {
	StringRef Format = "yaml";
	if (const Arg *A = Args.getLastArg(options::OPT_fsave_optimization_record_EQ))
	Format = A->getValue();

	CmdArgs.push_back("-opt-record-file");

	const Arg *A = Args.getLastArg(options::OPT_foptimization_record_file_EQ);
	if (A) {
	CmdArgs.push_back(A->getValue());
	} else {
	bool hasMultipleArchs =
	Triple.isOSDarwin() && // Only supported on Darwin platforms.
	Args.getAllArgValues(options::OPT_arch).size() > 1;

	SmallString<128> F;

	if (Args.hasArg(options::OPT_c) \|\| Args.hasArg(options::OPT_S)) {
	if (Arg *FinalOutput = Args.getLastArg(options::OPT_o))
	F = FinalOutput->getValue();
	} else {
	if (Format != "yaml" && // For YAML, keep the original behavior.
	Triple.isOSDarwin() && // Enable this only on darwin, since it's the only platform supporting .dSYM bundles.
	Output.isFilename())
	F = Output.getFilename();
	}

	if (F.empty()) {
	// Use the input filename.
	F = llvm::sys::path::stem(Input.getBaseInput());

	// If we're compiling for an offload architecture (i.e. a CUDA device),
	// we need to make the file name for the device compilation different
	// from the host compilation.
	if (!JA.isDeviceOffloading(Action::OFK_None) &&
	!JA.isDeviceOffloading(Action::OFK_Host)) {
	llvm::sys::path::replace_extension(F, "");
	F += Action::GetOffloadingFileNamePrefix(JA.getOffloadingDeviceKind(),
	Triple.normalize());
	F += "-";
	F += JA.getOffloadingArch();
	}
	}

	// If we're having more than one "-arch", we should name the files
	// differently so that every cc1 invocation writes to a different file.
	// We're doing that by appending "-<arch>" with "<arch>" being the arch
	// name from the triple.
	if (hasMultipleArchs) {
	// First, remember the extension.
	SmallString<64> OldExtension = llvm::sys::path::extension(F);
	// then, remove it.
	llvm::sys::path::replace_extension(F, "");
	// attach -<arch> to it.
	F += "-";
	F += Triple.getArchName();
	// put back the extension.
	llvm::sys::path::replace_extension(F, OldExtension);
	}

	SmallString<32> Extension;
	Extension += "opt.";
	Extension += Format;

	llvm::sys::path::replace_extension(F, Extension);
	CmdArgs.push_back(Args.MakeArgString(F));
	}

	if (const Arg *A =
	Args.getLastArg(options::OPT_foptimization_record_passes_EQ)) {
	CmdArgs.push_back("-opt-record-passes");
	CmdArgs.push_back(A->getValue());
	}

	if (!Format.empty()) {
	CmdArgs.push_back("-opt-record-format");
	CmdArgs.push_back(Format.data());
	}
	}

	void AddAAPCSVolatileBitfieldArgs(const ArgList &Args, ArgStringList &CmdArgs) {
	if (!Args.hasFlag(options::OPT_faapcs_bitfield_width,
	options::OPT_fno_aapcs_bitfield_width, true))
	CmdArgs.push_back("-fno-aapcs-bitfield-width");

	if (Args.getLastArg(options::OPT_ForceAAPCSBitfieldLoad))
	CmdArgs.push_back("-faapcs-bitfield-load");
	}

	namespace {
	void RenderARMABI(const llvm::Triple &Triple, const ArgList &Args,
	ArgStringList &CmdArgs) {
	// Select the ABI to use.
	// FIXME: Support -meabi.
	// FIXME: Parts of this are duplicated in the backend, unify this somehow.
	const char *ABIName = nullptr;
	if (Arg *A = Args.getLastArg(options::OPT_mabi_EQ)) {
	ABIName = A->getValue();
	} else {
	std::string CPU = getCPUName(Args, Triple, /FromAs/ false);
	ABIName = llvm::ARM::computeDefaultTargetABI(Triple, CPU).data();
	}

	CmdArgs.push_back("-target-abi");
	CmdArgs.push_back(ABIName);
	}
	}

	void Clang::AddARMTargetArgs(const llvm::Triple &Triple, const ArgList &Args,
	ArgStringList &CmdArgs, bool KernelOrKext) const {
	RenderARMABI(Triple, Args, CmdArgs);

	// Determine floating point ABI from the options & target defaults.
	arm::FloatABI ABI = arm::getARMFloatABI(getToolChain(), Args);
	if (ABI == arm::FloatABI::Soft) {
	// Floating point operations and argument passing are soft.
	// FIXME: This changes CPP defines, we need -target-soft-float.
	CmdArgs.push_back("-msoft-float");
	CmdArgs.push_back("-mfloat-abi");
	CmdArgs.push_back("soft");
	} else if (ABI == arm::FloatABI::SoftFP) {
	// Floating point operations are hard, but argument passing is soft.
	CmdArgs.push_back("-mfloat-abi");
	CmdArgs.push_back("soft");
	} else {
	// Floating point operations and argument passing are hard.
	assert(ABI == arm::FloatABI::Hard && "Invalid float abi!");
	CmdArgs.push_back("-mfloat-abi");
	CmdArgs.push_back("hard");
	}

	// Forward the -mglobal-merge option for explicit control over the pass.
	if (Arg *A = Args.getLastArg(options::OPT_mglobal_merge,
	options::OPT_mno_global_merge)) {
	CmdArgs.push_back("-mllvm");
	if (A->getOption().matches(options::OPT_mno_global_merge))
	CmdArgs.push_back("-arm-global-merge=false");
	else
	CmdArgs.push_back("-arm-global-merge=true");
	}

	if (!Args.hasFlag(options::OPT_mimplicit_float,
	options::OPT_mno_implicit_float, true))
	CmdArgs.push_back("-no-implicit-float");

	if (Args.getLastArg(options::OPT_mcmse))
	CmdArgs.push_back("-mcmse");

	AddAAPCSVolatileBitfieldArgs(Args, CmdArgs);
	}

	void Clang::RenderTargetOptions(const llvm::Triple &EffectiveTriple,
	const ArgList &Args, bool KernelOrKext,
	ArgStringList &CmdArgs) const {
	const ToolChain &TC = getToolChain();

	// Add the target features
	getTargetFeatures(TC.getDriver(), EffectiveTriple, Args, CmdArgs, false);

	// Add target specific flags.
	switch (TC.getArch()) {
	default:
	break;

	case llvm::Triple::arm:
	case llvm::Triple::armeb:
	case llvm::Triple::thumb:
	case llvm::Triple::thumbeb:
	// Use the effective triple, which takes into account the deployment target.
	AddARMTargetArgs(EffectiveTriple, Args, CmdArgs, KernelOrKext);
	CmdArgs.push_back("-fallow-half-arguments-and-returns");
	break;

	case llvm::Triple::aarch64:
	case llvm::Triple::aarch64_32:
	case llvm::Triple::aarch64_be:
	AddAArch64TargetArgs(Args, CmdArgs);
	CmdArgs.push_back("-fallow-half-arguments-and-returns");
	break;

	case llvm::Triple::mips:
	case llvm::Triple::mipsel:
	case llvm::Triple::mips64:
	case llvm::Triple::mips64el:
	AddMIPSTargetArgs(Args, CmdArgs);
	break;

	case llvm::Triple::ppc:
	case llvm::Triple::ppcle:
	case llvm::Triple::ppc64:
	case llvm::Triple::ppc64le:
	AddPPCTargetArgs(Args, CmdArgs);
	break;

	case llvm::Triple::riscv32:
	case llvm::Triple::riscv64:
	AddRISCVTargetArgs(Args, CmdArgs);
	break;

	case llvm::Triple::sparc:
	case llvm::Triple::sparcel:
	case llvm::Triple::sparcv9:
	AddSparcTargetArgs(Args, CmdArgs);
	break;

	case llvm::Triple::systemz:
	AddSystemZTargetArgs(Args, CmdArgs);
	break;

	case llvm::Triple::x86:
	case llvm::Triple::x86_64:
	AddX86TargetArgs(Args, CmdArgs);
	break;

	case llvm::Triple::lanai:
	AddLanaiTargetArgs(Args, CmdArgs);
	break;

	case llvm::Triple::hexagon:
	AddHexagonTargetArgs(Args, CmdArgs);
	break;

	case llvm::Triple::wasm32:
	case llvm::Triple::wasm64:
	AddWebAssemblyTargetArgs(Args, CmdArgs);
	break;

	case llvm::Triple::ve:
	AddVETargetArgs(Args, CmdArgs);
	break;
	}
	}

	namespace {
	void RenderAArch64ABI(const llvm::Triple &Triple, const ArgList &Args,
	ArgStringList &CmdArgs) {
	const char *ABIName = nullptr;
	if (Arg *A = Args.getLastArg(options::OPT_mabi_EQ))
	ABIName = A->getValue();
	else if (Triple.isOSDarwin())
	ABIName = "darwinpcs";
	else
	ABIName = "aapcs";

	CmdArgs.push_back("-target-abi");
	CmdArgs.push_back(ABIName);
	}
	}

	void Clang::AddAArch64TargetArgs(const ArgList &Args,
	ArgStringList &CmdArgs) const {
	const llvm::Triple &Triple = getToolChain().getEffectiveTriple();

	if (!Args.hasFlag(options::OPT_mred_zone, options::OPT_mno_red_zone, true) \|\|
	Args.hasArg(options::OPT_mkernel) \|\|
	Args.hasArg(options::OPT_fapple_kext))
	CmdArgs.push_back("-disable-red-zone");

	if (!Args.hasFlag(options::OPT_mimplicit_float,
	options::OPT_mno_implicit_float, true))
	CmdArgs.push_back("-no-implicit-float");

	RenderAArch64ABI(Triple, Args, CmdArgs);

	if (Arg *A = Args.getLastArg(options::OPT_mfix_cortex_a53_835769,
	options::OPT_mno_fix_cortex_a53_835769)) {
	CmdArgs.push_back("-mllvm");
	if (A->getOption().matches(options::OPT_mfix_cortex_a53_835769))
	CmdArgs.push_back("-aarch64-fix-cortex-a53-835769=1");
	else
	CmdArgs.push_back("-aarch64-fix-cortex-a53-835769=0");
	} else if (Triple.isAndroid()) {
	// Enabled A53 errata (835769) workaround by default on android
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back("-aarch64-fix-cortex-a53-835769=1");
	}

	// Forward the -mglobal-merge option for explicit control over the pass.
	if (Arg *A = Args.getLastArg(options::OPT_mglobal_merge,
	options::OPT_mno_global_merge)) {
	CmdArgs.push_back("-mllvm");
	if (A->getOption().matches(options::OPT_mno_global_merge))
	CmdArgs.push_back("-aarch64-enable-global-merge=false");
	else
	CmdArgs.push_back("-aarch64-enable-global-merge=true");
	}

	// Enable/disable return address signing and indirect branch targets.
	if (Arg *A = Args.getLastArg(options::OPT_msign_return_address_EQ,
	options::OPT_mbranch_protection_EQ)) {

	const Driver &D = getToolChain().getDriver();

	StringRef Scope, Key;
	bool IndirectBranches;

	if (A->getOption().matches(options::OPT_msign_return_address_EQ)) {
	Scope = A->getValue();
	if (!Scope.equals("none") && !Scope.equals("non-leaf") &&
	!Scope.equals("all"))
	D.Diag(diag::err_invalid_branch_protection)
	<< Scope << A->getAsString(Args);
	Key = "a_key";
	IndirectBranches = false;
	} else {
	StringRef Err;
	llvm::AArch64::ParsedBranchProtection PBP;
	if (!llvm::AArch64::parseBranchProtection(A->getValue(), PBP, Err))
	D.Diag(diag::err_invalid_branch_protection)
	<< Err << A->getAsString(Args);
	Scope = PBP.Scope;
	Key = PBP.Key;
	IndirectBranches = PBP.BranchTargetEnforcement;
	}

	CmdArgs.push_back(
	Args.MakeArgString(Twine("-msign-return-address=") + Scope));
	CmdArgs.push_back(
	Args.MakeArgString(Twine("-msign-return-address-key=") + Key));
	if (IndirectBranches)
	CmdArgs.push_back("-mbranch-target-enforce");
	}

	// Handle -msve_vector_bits=<bits>
	if (Arg *A = Args.getLastArg(options::OPT_msve_vector_bits_EQ)) {
	StringRef Val = A->getValue();
	const Driver &D = getToolChain().getDriver();
	if (Val.equals("128") \|\| Val.equals("256") \|\| Val.equals("512") \|\|
	Val.equals("1024") \|\| Val.equals("2048"))
	CmdArgs.push_back(
	Args.MakeArgString(llvm::Twine("-msve-vector-bits=") + Val));
	// Silently drop requests for vector-length agnostic code as it's implied.
	else if (!Val.equals("scalable"))
	// Handle the unsupported values passed to msve-vector-bits.
	D.Diag(diag::err_drv_unsupported_option_argument)
	<< A->getOption().getName() << Val;
	}

	AddAAPCSVolatileBitfieldArgs(Args, CmdArgs);
	}

	void Clang::AddMIPSTargetArgs(const ArgList &Args,
	ArgStringList &CmdArgs) const {
	const Driver &D = getToolChain().getDriver();
	StringRef CPUName;
	StringRef ABIName;
	const llvm::Triple &Triple = getToolChain().getTriple();
	mips::getMipsCPUAndABI(Args, Triple, CPUName, ABIName);

	CmdArgs.push_back("-target-abi");
	CmdArgs.push_back(ABIName.data());

	mips::FloatABI ABI = mips::getMipsFloatABI(D, Args, Triple);
	if (ABI == mips::FloatABI::Soft) {
	// Floating point operations and argument passing are soft.
	CmdArgs.push_back("-msoft-float");
	CmdArgs.push_back("-mfloat-abi");
	CmdArgs.push_back("soft");
	} else {
	// Floating point operations and argument passing are hard.
	assert(ABI == mips::FloatABI::Hard && "Invalid float abi!");
	CmdArgs.push_back("-mfloat-abi");
	CmdArgs.push_back("hard");
	}

	if (Arg *A = Args.getLastArg(options::OPT_mldc1_sdc1,
	options::OPT_mno_ldc1_sdc1)) {
	if (A->getOption().matches(options::OPT_mno_ldc1_sdc1)) {
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back("-mno-ldc1-sdc1");
	}
	}

	if (Arg *A = Args.getLastArg(options::OPT_mcheck_zero_division,
	options::OPT_mno_check_zero_division)) {
	if (A->getOption().matches(options::OPT_mno_check_zero_division)) {
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back("-mno-check-zero-division");
	}
	}

	if (Arg *A = Args.getLastArg(options::OPT_G)) {
	StringRef v = A->getValue();
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back(Args.MakeArgString("-mips-ssection-threshold=" + v));
	A->claim();
	}

	Arg *GPOpt = Args.getLastArg(options::OPT_mgpopt, options::OPT_mno_gpopt);
	Arg *ABICalls =
	Args.getLastArg(options::OPT_mabicalls, options::OPT_mno_abicalls);

	// -mabicalls is the default for many MIPS environments, even with -fno-pic.
	// -mgpopt is the default for static, -fno-pic environments but these two
	// options conflict. We want to be certain that -mno-abicalls -mgpopt is
	// the only case where -mllvm -mgpopt is passed.
	// NOTE: We need a warning here or in the backend to warn when -mgpopt is
	// passed explicitly when compiling something with -mabicalls
	// (implictly) in affect. Currently the warning is in the backend.
	//
	// When the ABI in use is N64, we also need to determine the PIC mode that
	// is in use, as -fno-pic for N64 implies -mno-abicalls.
	bool NoABICalls =
	ABICalls && ABICalls->getOption().matches(options::OPT_mno_abicalls);

	llvm::Reloc::Model RelocationModel;
	unsigned PICLevel;
	bool IsPIE;
	std::tie(RelocationModel, PICLevel, IsPIE) =
	ParsePICArgs(getToolChain(), Args);

	NoABICalls = NoABICalls \|\|
	(RelocationModel == llvm::Reloc::Static && ABIName == "n64");

	bool WantGPOpt = GPOpt && GPOpt->getOption().matches(options::OPT_mgpopt);
	// We quietly ignore -mno-gpopt as the backend defaults to -mno-gpopt.
	if (NoABICalls && (!GPOpt \|\| WantGPOpt)) {
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back("-mgpopt");

	Arg *LocalSData = Args.getLastArg(options::OPT_mlocal_sdata,
	options::OPT_mno_local_sdata);
	Arg *ExternSData = Args.getLastArg(options::OPT_mextern_sdata,
	options::OPT_mno_extern_sdata);
	Arg *EmbeddedData = Args.getLastArg(options::OPT_membedded_data,
	options::OPT_mno_embedded_data);
	if (LocalSData) {
	CmdArgs.push_back("-mllvm");
	if (LocalSData->getOption().matches(options::OPT_mlocal_sdata)) {
	CmdArgs.push_back("-mlocal-sdata=1");
	} else {
	CmdArgs.push_back("-mlocal-sdata=0");
	}
	LocalSData->claim();
	}

	if (ExternSData) {
	CmdArgs.push_back("-mllvm");
	if (ExternSData->getOption().matches(options::OPT_mextern_sdata)) {
	CmdArgs.push_back("-mextern-sdata=1");
	} else {
	CmdArgs.push_back("-mextern-sdata=0");
	}
	ExternSData->claim();
	}

	if (EmbeddedData) {
	CmdArgs.push_back("-mllvm");
	if (EmbeddedData->getOption().matches(options::OPT_membedded_data)) {
	CmdArgs.push_back("-membedded-data=1");
	} else {
	CmdArgs.push_back("-membedded-data=0");
	}
	EmbeddedData->claim();
	}

	} else if ((!ABICalls \|\| (!NoABICalls && ABICalls)) && WantGPOpt)
	D.Diag(diag::warn_drv_unsupported_gpopt) << (ABICalls ? 0 : 1);

	if (GPOpt)
	GPOpt->claim();

	if (Arg *A = Args.getLastArg(options::OPT_mcompact_branches_EQ)) {
	StringRef Val = StringRef(A->getValue());
	if (mips::hasCompactBranches(CPUName)) {
	if (Val == "never" \|\| Val == "always" \|\| Val == "optimal") {
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back(Args.MakeArgString("-mips-compact-branches=" + Val));
	} else
	D.Diag(diag::err_drv_unsupported_option_argument)
	<< A->getOption().getName() << Val;
	} else
	D.Diag(diag::warn_target_unsupported_compact_branches) << CPUName;
	}

	if (Arg *A = Args.getLastArg(options::OPT_mrelax_pic_calls,
	options::OPT_mno_relax_pic_calls)) {
	if (A->getOption().matches(options::OPT_mno_relax_pic_calls)) {
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back("-mips-jalr-reloc=0");
	}
	}
	}

	void Clang::AddPPCTargetArgs(const ArgList &Args,
	ArgStringList &CmdArgs) const {
	// Select the ABI to use.
	const char *ABIName = nullptr;
	const llvm::Triple &T = getToolChain().getTriple();
	if (T.isOSBinFormatELF()) {
	switch (getToolChain().getArch()) {
	case llvm::Triple::ppc64: {
	if ((T.isOSFreeBSD() && T.getOSMajorVersion() >= 13) \|\|
	T.isOSOpenBSD() \|\| T.isMusl())
	ABIName = "elfv2";
	else
	ABIName = "elfv1";
	break;
	}
	case llvm::Triple::ppc64le:
	ABIName = "elfv2";
	break;
	default:
	break;
	}
	}

	bool IEEELongDouble = false;
	for (const Arg *A : Args.filtered(options::OPT_mabi_EQ)) {
	StringRef V = A->getValue();
	if (V == "ieeelongdouble")
	IEEELongDouble = true;
	else if (V == "ibmlongdouble")
	IEEELongDouble = false;
	else if (V != "altivec")
	// The ppc64 linux abis are all "altivec" abis by default. Accept and ignore
	// the option if given as we don't have backend support for any targets
	// that don't use the altivec abi.
	ABIName = A->getValue();
	}
	if (IEEELongDouble)
	CmdArgs.push_back("-mabi=ieeelongdouble");

	ppc::FloatABI FloatABI =
	ppc::getPPCFloatABI(getToolChain().getDriver(), Args);

	if (FloatABI == ppc::FloatABI::Soft) {
	// Floating point operations and argument passing are soft.
	CmdArgs.push_back("-msoft-float");
	CmdArgs.push_back("-mfloat-abi");
	CmdArgs.push_back("soft");
	} else {
	// Floating point operations and argument passing are hard.
	assert(FloatABI == ppc::FloatABI::Hard && "Invalid float abi!");
	CmdArgs.push_back("-mfloat-abi");
	CmdArgs.push_back("hard");
	}

	if (ABIName) {
	CmdArgs.push_back("-target-abi");
	CmdArgs.push_back(ABIName);
	}
	}

	static void SetRISCVSmallDataLimit(const ToolChain &TC, const ArgList &Args,
	ArgStringList &CmdArgs) {
	const Driver &D = TC.getDriver();
	const llvm::Triple &Triple = TC.getTriple();
	// Default small data limitation is eight.
	const char *SmallDataLimit = "8";
	// Get small data limitation.
	if (Args.getLastArg(options::OPT_shared, options::OPT_fpic,
	options::OPT_fPIC)) {
	// Not support linker relaxation for PIC.
	SmallDataLimit = "0";
	if (Args.hasArg(options::OPT_G)) {
	D.Diag(diag::warn_drv_unsupported_sdata);
	}
	} else if (Args.getLastArgValue(options::OPT_mcmodel_EQ)
	.equals_insensitive("large") &&
	(Triple.getArch() == llvm::Triple::riscv64)) {
	// Not support linker relaxation for RV64 with large code model.
	SmallDataLimit = "0";
	if (Args.hasArg(options::OPT_G)) {
	D.Diag(diag::warn_drv_unsupported_sdata);
	}
	} else if (Arg *A = Args.getLastArg(options::OPT_G)) {
	SmallDataLimit = A->getValue();
	}
	// Forward the -msmall-data-limit= option.
	CmdArgs.push_back("-msmall-data-limit");
	CmdArgs.push_back(SmallDataLimit);
	}

	void Clang::AddRISCVTargetArgs(const ArgList &Args,
	ArgStringList &CmdArgs) const {
	const llvm::Triple &Triple = getToolChain().getTriple();
	StringRef ABIName = riscv::getRISCVABI(Args, Triple);

	CmdArgs.push_back("-target-abi");
	CmdArgs.push_back(ABIName.data());

	SetRISCVSmallDataLimit(getToolChain(), Args, CmdArgs);

	std::string TuneCPU;

	if (const Arg *A = Args.getLastArg(clang::driver::options::OPT_mtune_EQ)) {
	StringRef Name = A->getValue();

	Name = llvm::RISCV::resolveTuneCPUAlias(Name, Triple.isArch64Bit());
	TuneCPU = std::string(Name);
	}

	if (!TuneCPU.empty()) {
	CmdArgs.push_back("-tune-cpu");
	CmdArgs.push_back(Args.MakeArgString(TuneCPU));
	}
	}

	void Clang::AddSparcTargetArgs(const ArgList &Args,
	ArgStringList &CmdArgs) const {
	sparc::FloatABI FloatABI =
	sparc::getSparcFloatABI(getToolChain().getDriver(), Args);

	if (FloatABI == sparc::FloatABI::Soft) {
	// Floating point operations and argument passing are soft.
	CmdArgs.push_back("-msoft-float");
	CmdArgs.push_back("-mfloat-abi");
	CmdArgs.push_back("soft");
	} else {
	// Floating point operations and argument passing are hard.
	assert(FloatABI == sparc::FloatABI::Hard && "Invalid float abi!");
	CmdArgs.push_back("-mfloat-abi");
	CmdArgs.push_back("hard");
	}
	}

	void Clang::AddSystemZTargetArgs(const ArgList &Args,
	ArgStringList &CmdArgs) const {
	bool HasBackchain = Args.hasFlag(options::OPT_mbackchain,
	options::OPT_mno_backchain, false);
	bool HasPackedStack = Args.hasFlag(options::OPT_mpacked_stack,
	options::OPT_mno_packed_stack, false);
	systemz::FloatABI FloatABI =
	systemz::getSystemZFloatABI(getToolChain().getDriver(), Args);
	bool HasSoftFloat = (FloatABI == systemz::FloatABI::Soft);
	if (HasBackchain && HasPackedStack && !HasSoftFloat) {
	const Driver &D = getToolChain().getDriver();
	D.Diag(diag::err_drv_unsupported_opt)
	<< "-mpacked-stack -mbackchain -mhard-float";
	}
	if (HasBackchain)
	CmdArgs.push_back("-mbackchain");
	if (HasPackedStack)
	CmdArgs.push_back("-mpacked-stack");
	if (HasSoftFloat) {
	// Floating point operations and argument passing are soft.
	CmdArgs.push_back("-msoft-float");
	CmdArgs.push_back("-mfloat-abi");
	CmdArgs.push_back("soft");
	}
	}

	void Clang::AddX86TargetArgs(const ArgList &Args,
	ArgStringList &CmdArgs) const {
	const Driver &D = getToolChain().getDriver();
	addX86AlignBranchArgs(D, Args, CmdArgs, /IsLTO=/false);

	if (!Args.hasFlag(options::OPT_mred_zone, options::OPT_mno_red_zone, true) \|\|
	Args.hasArg(options::OPT_mkernel) \|\|
	Args.hasArg(options::OPT_fapple_kext))
	CmdArgs.push_back("-disable-red-zone");

	if (!Args.hasFlag(options::OPT_mtls_direct_seg_refs,
	options::OPT_mno_tls_direct_seg_refs, true))
	CmdArgs.push_back("-mno-tls-direct-seg-refs");

	// Default to avoid implicit floating-point for kernel/kext code, but allow
	// that to be overridden with -mno-soft-float.
	bool NoImplicitFloat = (Args.hasArg(options::OPT_mkernel) \|\|
	Args.hasArg(options::OPT_fapple_kext));
	if (Arg *A = Args.getLastArg(
	options::OPT_msoft_float, options::OPT_mno_soft_float,
	options::OPT_mimplicit_float, options::OPT_mno_implicit_float)) {
	const Option &O = A->getOption();
	NoImplicitFloat = (O.matches(options::OPT_mno_implicit_float) \|\|
	O.matches(options::OPT_msoft_float));
	}
	if (NoImplicitFloat)
	CmdArgs.push_back("-no-implicit-float");

	if (Arg *A = Args.getLastArg(options::OPT_masm_EQ)) {
	StringRef Value = A->getValue();
	if (Value == "intel" \|\| Value == "att") {
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back(Args.MakeArgString("-x86-asm-syntax=" + Value));
	} else {
	D.Diag(diag::err_drv_unsupported_option_argument)
	<< A->getOption().getName() << Value;
	}
	} else if (D.IsCLMode()) {
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back("-x86-asm-syntax=intel");
	}

	// Set flags to support MCU ABI.
	if (Args.hasFlag(options::OPT_miamcu, options::OPT_mno_iamcu, false)) {
	CmdArgs.push_back("-mfloat-abi");
	CmdArgs.push_back("soft");
	CmdArgs.push_back("-mstack-alignment=4");
	}

	// Handle -mtune.

	// Default to "generic" unless -march is present or targetting the PS4.
	std::string TuneCPU;
	if (!Args.hasArg(clang::driver::options::OPT_march_EQ) &&
	!getToolChain().getTriple().isPS4CPU())
	TuneCPU = "generic";

	// Override based on -mtune.
	if (const Arg *A = Args.getLastArg(clang::driver::options::OPT_mtune_EQ)) {
	StringRef Name = A->getValue();

	if (Name == "native") {
	Name = llvm::sys::getHostCPUName();
	if (!Name.empty())
	TuneCPU = std::string(Name);
	} else
	TuneCPU = std::string(Name);
	}

	if (!TuneCPU.empty()) {
	CmdArgs.push_back("-tune-cpu");
	CmdArgs.push_back(Args.MakeArgString(TuneCPU));
	}
	}

	void Clang::AddHexagonTargetArgs(const ArgList &Args,
	ArgStringList &CmdArgs) const {
	CmdArgs.push_back("-mqdsp6-compat");
	CmdArgs.push_back("-Wreturn-type");

	if (auto G = toolchains::HexagonToolChain::getSmallDataThreshold(Args)) {
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back(Args.MakeArgString("-hexagon-small-data-threshold=" +
	Twine(G.getValue())));
	}

	if (!Args.hasArg(options::OPT_fno_short_enums))
	CmdArgs.push_back("-fshort-enums");
	if (Args.getLastArg(options::OPT_mieee_rnd_near)) {
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back("-enable-hexagon-ieee-rnd-near");
	}
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back("-machine-sink-split=0");
	}

	void Clang::AddLanaiTargetArgs(const ArgList &Args,
	ArgStringList &CmdArgs) const {
	if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) {
	StringRef CPUName = A->getValue();

	CmdArgs.push_back("-target-cpu");
	CmdArgs.push_back(Args.MakeArgString(CPUName));
	}
	if (Arg *A = Args.getLastArg(options::OPT_mregparm_EQ)) {
	StringRef Value = A->getValue();
	// Only support mregparm=4 to support old usage. Report error for all other
	// cases.
	int Mregparm;
	if (Value.getAsInteger(10, Mregparm)) {
	if (Mregparm != 4) {
	getToolChain().getDriver().Diag(
	diag::err_drv_unsupported_option_argument)
	<< A->getOption().getName() << Value;
	}
	}
	}
	}

	void Clang::AddWebAssemblyTargetArgs(const ArgList &Args,
	ArgStringList &CmdArgs) const {
	// Default to "hidden" visibility.
	if (!Args.hasArg(options::OPT_fvisibility_EQ,
	options::OPT_fvisibility_ms_compat)) {
	CmdArgs.push_back("-fvisibility");
	CmdArgs.push_back("hidden");
	}
	}

	void Clang::AddVETargetArgs(const ArgList &Args, ArgStringList &CmdArgs) const {
	// Floating point operations and argument passing are hard.
	CmdArgs.push_back("-mfloat-abi");
	CmdArgs.push_back("hard");
	}

	void Clang::DumpCompilationDatabase(Compilation &C, StringRef Filename,
	StringRef Target, const InputInfo &Output,
	const InputInfo &Input, const ArgList &Args) const {
	// If this is a dry run, do not create the compilation database file.
	if (C.getArgs().hasArg(options::OPT__HASH_HASH_HASH))
	return;

	using llvm::yaml::escape;
	const Driver &D = getToolChain().getDriver();

	if (!CompilationDatabase) {
	std::error_code EC;
	auto File = std::make_unique<llvm::raw_fd_ostream>(
	Filename, EC, llvm::sys::fs::OF_TextWithCRLF);
	if (EC) {
	D.Diag(clang::diag::err_drv_compilationdatabase) << Filename
	<< EC.message();
	return;
	}
	CompilationDatabase = std::move(File);
	}
	auto &CDB = *CompilationDatabase;
	auto CWD = D.getVFS().getCurrentWorkingDirectory();
	if (!CWD)
	CWD = ".";
	CDB << "{ \"directory\": \"" << escape(*CWD) << "\"";
	CDB << ", \"file\": \"" << escape(Input.getFilename()) << "\"";
	CDB << ", \"output\": \"" << escape(Output.getFilename()) << "\"";
	CDB << ", \"arguments\": [\"" << escape(D.ClangExecutable) << "\"";
	SmallString<128> Buf;
	Buf = "-x";
	Buf += types::getTypeName(Input.getType());
	CDB << ", \"" << escape(Buf) << "\"";
	if (!D.SysRoot.empty() && !Args.hasArg(options::OPT__sysroot_EQ)) {
	Buf = "--sysroot=";
	Buf += D.SysRoot;
	CDB << ", \"" << escape(Buf) << "\"";
	}
	CDB << ", \"" << escape(Input.getFilename()) << "\"";
	for (auto &A: Args) {
	auto &O = A->getOption();
	// Skip language selection, which is positional.
	if (O.getID() == options::OPT_x)
	continue;
	// Skip writing dependency output and the compilation database itself.
	if (O.getGroup().isValid() && O.getGroup().getID() == options::OPT_M_Group)
	continue;
	if (O.getID() == options::OPT_gen_cdb_fragment_path)
	continue;
	// Skip inputs.
	if (O.getKind() == Option::InputClass)
	continue;
	// All other arguments are quoted and appended.
	ArgStringList ASL;
	A->render(Args, ASL);
	for (auto &it: ASL)
	CDB << ", \"" << escape(it) << "\"";
	}
	Buf = "--target=";
	Buf += Target;
	CDB << ", \"" << escape(Buf) << "\"]},\n";
	}

	void Clang::DumpCompilationDatabaseFragmentToDir(
	StringRef Dir, Compilation &C, StringRef Target, const InputInfo &Output,
	const InputInfo &Input, const llvm::opt::ArgList &Args) const {
	// If this is a dry run, do not create the compilation database file.
	if (C.getArgs().hasArg(options::OPT__HASH_HASH_HASH))
	return;

	if (CompilationDatabase)
	DumpCompilationDatabase(C, "", Target, Output, Input, Args);

	SmallString<256> Path = Dir;
	const auto &Driver = C.getDriver();
	Driver.getVFS().makeAbsolute(Path);
	auto Err = llvm::sys::fs::create_directory(Path, /IgnoreExisting=/true);
	if (Err) {
	Driver.Diag(diag::err_drv_compilationdatabase) << Dir << Err.message();
	return;
	}

	llvm::sys::path::append(
	Path,
	Twine(llvm::sys::path::filename(Input.getFilename())) + ".%%%%.json");
	int FD;
	SmallString<256> TempPath;
	Err = llvm::sys::fs::createUniqueFile(Path, FD, TempPath,
	llvm::sys::fs::OF_Text);
	if (Err) {
	Driver.Diag(diag::err_drv_compilationdatabase) << Path << Err.message();
	return;
	}
	CompilationDatabase =
	std::make_unique<llvm::raw_fd_ostream>(FD, /shouldClose=/true);
	DumpCompilationDatabase(C, "", Target, Output, Input, Args);
	}

	static bool CheckARMImplicitITArg(StringRef Value) {
	return Value == "always" \|\| Value == "never" \|\| Value == "arm" \|\|
	Value == "thumb";
	}

	static void AddARMImplicitITArgs(const ArgList &Args, ArgStringList &CmdArgs,
	StringRef Value) {
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back(Args.MakeArgString("-arm-implicit-it=" + Value));
	}

	static void CollectArgsForIntegratedAssembler(Compilation &C,
	const ArgList &Args,
	ArgStringList &CmdArgs,
	const Driver &D) {
	if (UseRelaxAll(C, Args))
	CmdArgs.push_back("-mrelax-all");

	// Only default to -mincremental-linker-compatible if we think we are
	// targeting the MSVC linker.
	bool DefaultIncrementalLinkerCompatible =
	C.getDefaultToolChain().getTriple().isWindowsMSVCEnvironment();
	if (Args.hasFlag(options::OPT_mincremental_linker_compatible,
	options::OPT_mno_incremental_linker_compatible,
	DefaultIncrementalLinkerCompatible))
	CmdArgs.push_back("-mincremental-linker-compatible");

	// If you add more args here, also add them to the block below that
	// starts with "// If CollectArgsForIntegratedAssembler() isn't called below".

	// When passing -I arguments to the assembler we sometimes need to
	// unconditionally take the next argument. For example, when parsing
	// '-Wa,-I -Wa,foo' we need to accept the -Wa,foo arg after seeing the
	// -Wa,-I arg and when parsing '-Wa,-I,foo' we need to accept the 'foo'
	// arg after parsing the '-I' arg.
	bool TakeNextArg = false;

	bool UseRelaxRelocations = C.getDefaultToolChain().useRelaxRelocations();
	bool UseNoExecStack = C.getDefaultToolChain().isNoExecStackDefault();
	const char *MipsTargetFeature = nullptr;
	StringRef ImplicitIt;
	for (const Arg *A :
	Args.filtered(options::OPT_Wa_COMMA, options::OPT_Xassembler,
	options::OPT_mimplicit_it_EQ)) {
	A->claim();

	if (A->getOption().getID() == options::OPT_mimplicit_it_EQ) {
	switch (C.getDefaultToolChain().getArch()) {
	case llvm::Triple::arm:
	case llvm::Triple::armeb:
	case llvm::Triple::thumb:
	case llvm::Triple::thumbeb:
	// Only store the value; the last value set takes effect.
	ImplicitIt = A->getValue();
	if (!CheckARMImplicitITArg(ImplicitIt))
	D.Diag(diag::err_drv_unsupported_option_argument)
	<< A->getOption().getName() << ImplicitIt;
	continue;
	default:
	break;
	}
	}

	for (StringRef Value : A->getValues()) {
	if (TakeNextArg) {
	CmdArgs.push_back(Value.data());
	TakeNextArg = false;
	continue;
	}

	if (C.getDefaultToolChain().getTriple().isOSBinFormatCOFF() &&
	Value == "-mbig-obj")
	continue; // LLVM handles bigobj automatically

	switch (C.getDefaultToolChain().getArch()) {
	default:
	break;
	case llvm::Triple::thumb:
	case llvm::Triple::thumbeb:
	case llvm::Triple::arm:
	case llvm::Triple::armeb:
	if (Value.startswith("-mimplicit-it=")) {
	// Only store the value; the last value set takes effect.
	ImplicitIt = Value.split("=").second;
	if (CheckARMImplicitITArg(ImplicitIt))
	continue;
	}
	if (Value == "-mthumb")
	// -mthumb has already been processed in ComputeLLVMTriple()
	// recognize but skip over here.
	continue;
	break;
	case llvm::Triple::mips:
	case llvm::Triple::mipsel:
	case llvm::Triple::mips64:
	case llvm::Triple::mips64el:
	if (Value == "--trap") {
	CmdArgs.push_back("-target-feature");
	CmdArgs.push_back("+use-tcc-in-div");
	continue;
	}
	if (Value == "--break") {
	CmdArgs.push_back("-target-feature");
	CmdArgs.push_back("-use-tcc-in-div");
	continue;
	}
	if (Value.startswith("-msoft-float")) {
	CmdArgs.push_back("-target-feature");
	CmdArgs.push_back("+soft-float");
	continue;
	}
	if (Value.startswith("-mhard-float")) {
	CmdArgs.push_back("-target-feature");
	CmdArgs.push_back("-soft-float");
	continue;
	}

	MipsTargetFeature = llvm::StringSwitch<const char *>(Value)
	.Case("-mips1", "+mips1")
	.Case("-mips2", "+mips2")
	.Case("-mips3", "+mips3")
	.Case("-mips4", "+mips4")
	.Case("-mips5", "+mips5")
	.Case("-mips32", "+mips32")
	.Case("-mips32r2", "+mips32r2")
	.Case("-mips32r3", "+mips32r3")
	.Case("-mips32r5", "+mips32r5")
	.Case("-mips32r6", "+mips32r6")
	.Case("-mips64", "+mips64")
	.Case("-mips64r2", "+mips64r2")
	.Case("-mips64r3", "+mips64r3")
	.Case("-mips64r5", "+mips64r5")
	.Case("-mips64r6", "+mips64r6")
	.Default(nullptr);
	if (MipsTargetFeature)
	continue;
	}

	if (Value == "-force_cpusubtype_ALL") {
	// Do nothing, this is the default and we don't support anything else.
	} else if (Value == "-L") {
	CmdArgs.push_back("-msave-temp-labels");
	} else if (Value == "--fatal-warnings") {
	CmdArgs.push_back("-massembler-fatal-warnings");
	} else if (Value == "--no-warn" \|\| Value == "-W") {
	CmdArgs.push_back("-massembler-no-warn");
	} else if (Value == "--noexecstack") {
	UseNoExecStack = true;
	} else if (Value.startswith("-compress-debug-sections") \|\|
	Value.startswith("--compress-debug-sections") \|\|
	Value == "-nocompress-debug-sections" \|\|
	Value == "--nocompress-debug-sections") {
	CmdArgs.push_back(Value.data());
	} else if (Value == "-mrelax-relocations=yes" \|\|
	Value == "--mrelax-relocations=yes") {
	UseRelaxRelocations = true;
	} else if (Value == "-mrelax-relocations=no" \|\|
	Value == "--mrelax-relocations=no") {
	UseRelaxRelocations = false;
	} else if (Value.startswith("-I")) {
	CmdArgs.push_back(Value.data());
	// We need to consume the next argument if the current arg is a plain
	// -I. The next arg will be the include directory.
	if (Value == "-I")
	TakeNextArg = true;
	} else if (Value.startswith("-gdwarf-")) {
	// "-gdwarf-N" options are not cc1as options.
	unsigned DwarfVersion = DwarfVersionNum(Value);
	if (DwarfVersion == 0) { // Send it onward, and let cc1as complain.
	CmdArgs.push_back(Value.data());
	} else {
	RenderDebugEnablingArgs(Args, CmdArgs,
	codegenoptions::DebugInfoConstructor,
	DwarfVersion, llvm::DebuggerKind::Default);
	}
	} else if (Value.startswith("-mcpu") \|\| Value.startswith("-mfpu") \|\|
	Value.startswith("-mhwdiv") \|\| Value.startswith("-march")) {
	// Do nothing, we'll validate it later.
	} else if (Value == "-defsym") {
	if (A->getNumValues() != 2) {
	D.Diag(diag::err_drv_defsym_invalid_format) << Value;
	break;
	}
	const char *S = A->getValue(1);
	auto Pair = StringRef(S).split('=');
	auto Sym = Pair.first;
	auto SVal = Pair.second;

	if (Sym.empty() \|\| SVal.empty()) {
	D.Diag(diag::err_drv_defsym_invalid_format) << S;
	break;
	}
	int64_t IVal;
	if (SVal.getAsInteger(0, IVal)) {
	D.Diag(diag::err_drv_defsym_invalid_symval) << SVal;
	break;
	}
	CmdArgs.push_back(Value.data());
	TakeNextArg = true;
	} else if (Value == "-fdebug-compilation-dir") {
	CmdArgs.push_back("-fdebug-compilation-dir");
	TakeNextArg = true;
	} else if (Value.consume_front("-fdebug-compilation-dir=")) {
	// The flag is a -Wa / -Xassembler argument and Options doesn't
	// parse the argument, so this isn't automatically aliased to
	// -fdebug-compilation-dir (without '=') here.
	CmdArgs.push_back("-fdebug-compilation-dir");
	CmdArgs.push_back(Value.data());
	} else if (Value == "--version") {
	D.PrintVersion(C, llvm::outs());
	} else {
	D.Diag(diag::err_drv_unsupported_option_argument)
	<< A->getOption().getName() << Value;
	}
	}
	}
	if (ImplicitIt.size())
	AddARMImplicitITArgs(Args, CmdArgs, ImplicitIt);
	if (UseRelaxRelocations)
	CmdArgs.push_back("--mrelax-relocations");
	if (UseNoExecStack)
	CmdArgs.push_back("-mnoexecstack");
	if (MipsTargetFeature != nullptr) {
	CmdArgs.push_back("-target-feature");
	CmdArgs.push_back(MipsTargetFeature);
	}

	// forward -fembed-bitcode to assmebler
	if (C.getDriver().embedBitcodeEnabled() \|\|
	C.getDriver().embedBitcodeMarkerOnly())
	Args.AddLastArg(CmdArgs, options::OPT_fembed_bitcode_EQ);
	}

	static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
	bool OFastEnabled, const ArgList &Args,
	ArgStringList &CmdArgs,
	const JobAction &JA) {
	// Handle various floating point optimization flags, mapping them to the
	// appropriate LLVM code generation flags. This is complicated by several
	// "umbrella" flags, so we do this by stepping through the flags incrementally
	// adjusting what we think is enabled/disabled, then at the end setting the
	// LLVM flags based on the final state.
	bool HonorINFs = true;
	bool HonorNaNs = true;
	// -fmath-errno is the default on some platforms, e.g. BSD-derived OSes.
	bool MathErrno = TC.IsMathErrnoDefault();
	bool AssociativeMath = false;
	bool ReciprocalMath = false;
	bool SignedZeros = true;
	bool TrappingMath = false; // Implemented via -ffp-exception-behavior
	bool TrappingMathPresent = false; // Is trapping-math in args, and not
	// overriden by ffp-exception-behavior?
	bool RoundingFPMath = false;
	bool RoundingMathPresent = false; // Is rounding-math in args?
	// -ffp-model values: strict, fast, precise
	StringRef FPModel = "";
	// -ffp-exception-behavior options: strict, maytrap, ignore
	StringRef FPExceptionBehavior = "";
	const llvm::DenormalMode DefaultDenormalFPMath =
	TC.getDefaultDenormalModeForType(Args, JA);
	const llvm::DenormalMode DefaultDenormalFP32Math =
	TC.getDefaultDenormalModeForType(Args, JA, &llvm::APFloat::IEEEsingle());

	llvm::DenormalMode DenormalFPMath = DefaultDenormalFPMath;
	llvm::DenormalMode DenormalFP32Math = DefaultDenormalFP32Math;
	StringRef FPContract = "";
	bool StrictFPModel = false;


	if (const Arg *A = Args.getLastArg(options::OPT_flimited_precision_EQ)) {
	CmdArgs.push_back("-mlimit-float-precision");
	CmdArgs.push_back(A->getValue());
	}

	for (const Arg *A : Args) {
	auto optID = A->getOption().getID();
	bool PreciseFPModel = false;
	switch (optID) {
	default:
	break;
	case options::OPT_ffp_model_EQ: {
	// If -ffp-model= is seen, reset to fno-fast-math
	HonorINFs = true;
	HonorNaNs = true;
	// Turning off -ffast-math restores the toolchain default.
	MathErrno = TC.IsMathErrnoDefault();
	AssociativeMath = false;
	ReciprocalMath = false;
	SignedZeros = true;
	// -fno_fast_math restores default denormal and fpcontract handling
	FPContract = "";
	DenormalFPMath = llvm::DenormalMode::getIEEE();

	// FIXME: The target may have picked a non-IEEE default mode here based on
	// -cl-denorms-are-zero. Should the target consider -fp-model interaction?
	DenormalFP32Math = llvm::DenormalMode::getIEEE();

	StringRef Val = A->getValue();
	if (OFastEnabled && !Val.equals("fast")) {
	// Only -ffp-model=fast is compatible with OFast, ignore.
	D.Diag(clang::diag::warn_drv_overriding_flag_option)
	<< Args.MakeArgString("-ffp-model=" + Val)
	<< "-Ofast";
	break;
	}
	StrictFPModel = false;
	PreciseFPModel = true;
	// ffp-model= is a Driver option, it is entirely rewritten into more
	// granular options before being passed into cc1.
	// Use the gcc option in the switch below.
	if (!FPModel.empty() && !FPModel.equals(Val)) {
	D.Diag(clang::diag::warn_drv_overriding_flag_option)
	<< Args.MakeArgString("-ffp-model=" + FPModel)
	<< Args.MakeArgString("-ffp-model=" + Val);
	FPContract = "";
	}
	if (Val.equals("fast")) {
	optID = options::OPT_ffast_math;
	FPModel = Val;
	FPContract = "fast";
	} else if (Val.equals("precise")) {
	optID = options::OPT_ffp_contract;
	FPModel = Val;
	FPContract = "fast";
	PreciseFPModel = true;
	} else if (Val.equals("strict")) {
	StrictFPModel = true;
	optID = options::OPT_frounding_math;
	FPExceptionBehavior = "strict";
	FPModel = Val;
	FPContract = "off";
	TrappingMath = true;
	} else
	D.Diag(diag::err_drv_unsupported_option_argument)
	<< A->getOption().getName() << Val;
	break;
	}
	}

	switch (optID) {
	// If this isn't an FP option skip the claim below
	default: continue;

	// Options controlling individual features
	case options::OPT_fhonor_infinities: HonorINFs = true; break;
	case options::OPT_fno_honor_infinities: HonorINFs = false; break;
	case options::OPT_fhonor_nans: HonorNaNs = true; break;
	case options::OPT_fno_honor_nans: HonorNaNs = false; break;
	case options::OPT_fmath_errno: MathErrno = true; break;
	case options::OPT_fno_math_errno: MathErrno = false; break;
	case options::OPT_fassociative_math: AssociativeMath = true; break;
	case options::OPT_fno_associative_math: AssociativeMath = false; break;
	case options::OPT_freciprocal_math: ReciprocalMath = true; break;
	case options::OPT_fno_reciprocal_math: ReciprocalMath = false; break;
	case options::OPT_fsigned_zeros: SignedZeros = true; break;
	case options::OPT_fno_signed_zeros: SignedZeros = false; break;
	case options::OPT_ftrapping_math:
	if (!TrappingMathPresent && !FPExceptionBehavior.empty() &&
	!FPExceptionBehavior.equals("strict"))
	// Warn that previous value of option is overridden.
	D.Diag(clang::diag::warn_drv_overriding_flag_option)
	<< Args.MakeArgString("-ffp-exception-behavior=" + FPExceptionBehavior)
	<< "-ftrapping-math";
	TrappingMath = true;
	TrappingMathPresent = true;
	FPExceptionBehavior = "strict";
	break;
	case options::OPT_fno_trapping_math:
	if (!TrappingMathPresent && !FPExceptionBehavior.empty() &&
	!FPExceptionBehavior.equals("ignore"))
	// Warn that previous value of option is overridden.
	D.Diag(clang::diag::warn_drv_overriding_flag_option)
	<< Args.MakeArgString("-ffp-exception-behavior=" + FPExceptionBehavior)
	<< "-fno-trapping-math";
	TrappingMath = false;
	TrappingMathPresent = true;
	FPExceptionBehavior = "ignore";
	break;

	case options::OPT_frounding_math:
	RoundingFPMath = true;
	RoundingMathPresent = true;
	break;

	case options::OPT_fno_rounding_math:
	RoundingFPMath = false;
	RoundingMathPresent = false;
	break;

	case options::OPT_fdenormal_fp_math_EQ:
	DenormalFPMath = llvm::parseDenormalFPAttribute(A->getValue());
	if (!DenormalFPMath.isValid()) {
	D.Diag(diag::err_drv_invalid_value)
	<< A->getAsString(Args) << A->getValue();
	}
	break;

	case options::OPT_fdenormal_fp_math_f32_EQ:
	DenormalFP32Math = llvm::parseDenormalFPAttribute(A->getValue());
	if (!DenormalFP32Math.isValid()) {
	D.Diag(diag::err_drv_invalid_value)
	<< A->getAsString(Args) << A->getValue();
	}
	break;

	// Validate and pass through -ffp-contract option.
	case options::OPT_ffp_contract: {
	StringRef Val = A->getValue();
	if (PreciseFPModel) {
	// -ffp-model=precise enables ffp-contract=fast as a side effect
	// the FPContract value has already been set to a string literal
	// and the Val string isn't a pertinent value.
	;
	} else if (Val.equals("fast") \|\| Val.equals("on") \|\| Val.equals("off"))
	FPContract = Val;
	else
	D.Diag(diag::err_drv_unsupported_option_argument)
	<< A->getOption().getName() << Val;
	break;
	}

	// Validate and pass through -ffp-model option.
	case options::OPT_ffp_model_EQ:
	// This should only occur in the error case
	// since the optID has been replaced by a more granular
	// floating point option.
	break;

	// Validate and pass through -ffp-exception-behavior option.
	case options::OPT_ffp_exception_behavior_EQ: {
	StringRef Val = A->getValue();
	if (!TrappingMathPresent && !FPExceptionBehavior.empty() &&
	!FPExceptionBehavior.equals(Val))
	// Warn that previous value of option is overridden.
	D.Diag(clang::diag::warn_drv_overriding_flag_option)
	<< Args.MakeArgString("-ffp-exception-behavior=" + FPExceptionBehavior)
	<< Args.MakeArgString("-ffp-exception-behavior=" + Val);
	TrappingMath = TrappingMathPresent = false;
	if (Val.equals("ignore") \|\| Val.equals("maytrap"))
	FPExceptionBehavior = Val;
	else if (Val.equals("strict")) {
	FPExceptionBehavior = Val;
	TrappingMath = TrappingMathPresent = true;
	} else
	D.Diag(diag::err_drv_unsupported_option_argument)
	<< A->getOption().getName() << Val;
	break;
	}

	case options::OPT_ffinite_math_only:
	HonorINFs = false;
	HonorNaNs = false;
	break;
	case options::OPT_fno_finite_math_only:
	HonorINFs = true;
	HonorNaNs = true;
	break;

	case options::OPT_funsafe_math_optimizations:
	AssociativeMath = true;
	ReciprocalMath = true;
	SignedZeros = false;
	TrappingMath = false;
	FPExceptionBehavior = "";
	break;
	case options::OPT_fno_unsafe_math_optimizations:
	AssociativeMath = false;
	ReciprocalMath = false;
	SignedZeros = true;
	TrappingMath = true;
	FPExceptionBehavior = "strict";

	// The target may have opted to flush by default, so force IEEE.
	DenormalFPMath = llvm::DenormalMode::getIEEE();
	DenormalFP32Math = llvm::DenormalMode::getIEEE();
	break;

	case options::OPT_Ofast:
	// If -Ofast is the optimization level, then -ffast-math should be enabled
	if (!OFastEnabled)
	continue;
	LLVM_FALLTHROUGH;
	case options::OPT_ffast_math:
	HonorINFs = false;
	HonorNaNs = false;
	MathErrno = false;
	AssociativeMath = true;
	ReciprocalMath = true;
	SignedZeros = false;
	TrappingMath = false;
	RoundingFPMath = false;
	// If fast-math is set then set the fp-contract mode to fast.
	FPContract = "fast";
	break;
	case options::OPT_fno_fast_math:
	HonorINFs = true;
	HonorNaNs = true;
	// Turning on -ffast-math (with either flag) removes the need for
	// MathErrno. However, turning off -ffast-math merely restores the
	// toolchain default (which may be false).
	MathErrno = TC.IsMathErrnoDefault();
	AssociativeMath = false;
	ReciprocalMath = false;
	SignedZeros = true;
	TrappingMath = false;
	RoundingFPMath = false;
	// -fno_fast_math restores default denormal and fpcontract handling
	DenormalFPMath = DefaultDenormalFPMath;
	DenormalFP32Math = llvm::DenormalMode::getIEEE();
	FPContract = "";
	break;
	}
	if (StrictFPModel) {
	// If -ffp-model=strict has been specified on command line but
	// subsequent options conflict then emit warning diagnostic.
	if (HonorINFs && HonorNaNs &&
	!AssociativeMath && !ReciprocalMath &&
	SignedZeros && TrappingMath && RoundingFPMath &&
	(FPContract.equals("off") \|\| FPContract.empty()) &&
	DenormalFPMath == llvm::DenormalMode::getIEEE() &&
	DenormalFP32Math == llvm::DenormalMode::getIEEE())
	// OK: Current Arg doesn't conflict with -ffp-model=strict
	;
	else {
	StrictFPModel = false;
	FPModel = "";
	D.Diag(clang::diag::warn_drv_overriding_flag_option)
	<< "-ffp-model=strict" <<
	((A->getNumValues() == 0) ? A->getSpelling()
	: Args.MakeArgString(A->getSpelling() + A->getValue()));
	}
	}

	// If we handled this option claim it
	A->claim();
	}

	if (!HonorINFs)
	CmdArgs.push_back("-menable-no-infs");

	if (!HonorNaNs)
	CmdArgs.push_back("-menable-no-nans");

	if (MathErrno)
	CmdArgs.push_back("-fmath-errno");

	if (!MathErrno && AssociativeMath && ReciprocalMath && !SignedZeros &&
	!TrappingMath)
	CmdArgs.push_back("-menable-unsafe-fp-math");

	if (!SignedZeros)
	CmdArgs.push_back("-fno-signed-zeros");

	if (AssociativeMath && !SignedZeros && !TrappingMath)
	CmdArgs.push_back("-mreassociate");

	if (ReciprocalMath)
	CmdArgs.push_back("-freciprocal-math");

	if (TrappingMath) {
	// FP Exception Behavior is also set to strict
	assert(FPExceptionBehavior.equals("strict"));
	}

	// The default is IEEE.
	if (DenormalFPMath != llvm::DenormalMode::getIEEE()) {
	llvm::SmallString<64> DenormFlag;
	llvm::raw_svector_ostream ArgStr(DenormFlag);
	ArgStr << "-fdenormal-fp-math=" << DenormalFPMath;
	CmdArgs.push_back(Args.MakeArgString(ArgStr.str()));
	}

	// Add f32 specific denormal mode flag if it's different.
	if (DenormalFP32Math != DenormalFPMath) {
	llvm::SmallString<64> DenormFlag;
	llvm::raw_svector_ostream ArgStr(DenormFlag);
	ArgStr << "-fdenormal-fp-math-f32=" << DenormalFP32Math;
	CmdArgs.push_back(Args.MakeArgString(ArgStr.str()));
	}

	if (!FPContract.empty())
	CmdArgs.push_back(Args.MakeArgString("-ffp-contract=" + FPContract));

	if (!RoundingFPMath)
	CmdArgs.push_back(Args.MakeArgString("-fno-rounding-math"));

	if (RoundingFPMath && RoundingMathPresent)
	CmdArgs.push_back(Args.MakeArgString("-frounding-math"));

	if (!FPExceptionBehavior.empty())
	CmdArgs.push_back(Args.MakeArgString("-ffp-exception-behavior=" +
	FPExceptionBehavior));

	ParseMRecip(D, Args, CmdArgs);

	// -ffast-math enables the __FAST_MATH__ preprocessor macro, but check for the
	// individual features enabled by -ffast-math instead of the option itself as
	// that's consistent with gcc's behaviour.
	if (!HonorINFs && !HonorNaNs && !MathErrno && AssociativeMath &&
	ReciprocalMath && !SignedZeros && !TrappingMath && !RoundingFPMath) {
	CmdArgs.push_back("-ffast-math");
	if (FPModel.equals("fast")) {
	if (FPContract.equals("fast"))
	// All set, do nothing.
	;
	else if (FPContract.empty())
	// Enable -ffp-contract=fast
	CmdArgs.push_back(Args.MakeArgString("-ffp-contract=fast"));
	else
	D.Diag(clang::diag::warn_drv_overriding_flag_option)
	<< "-ffp-model=fast"
	<< Args.MakeArgString("-ffp-contract=" + FPContract);
	}
	}

	// Handle __FINITE_MATH_ONLY__ similarly.
	if (!HonorINFs && !HonorNaNs)
	CmdArgs.push_back("-ffinite-math-only");

	if (const Arg *A = Args.getLastArg(options::OPT_mfpmath_EQ)) {
	CmdArgs.push_back("-mfpmath");
	CmdArgs.push_back(A->getValue());
	}

	// Disable a codegen optimization for floating-point casts.
	if (Args.hasFlag(options::OPT_fno_strict_float_cast_overflow,
	options::OPT_fstrict_float_cast_overflow, false))
	CmdArgs.push_back("-fno-strict-float-cast-overflow");
	}

	static void RenderAnalyzerOptions(const ArgList &Args, ArgStringList &CmdArgs,
	const llvm::Triple &Triple,
	const InputInfo &Input) {
	// Enable region store model by default.
	CmdArgs.push_back("-analyzer-store=region");

	// Treat blocks as analysis entry points.
	CmdArgs.push_back("-analyzer-opt-analyze-nested-blocks");

	// Add default argument set.
	if (!Args.hasArg(options::OPT__analyzer_no_default_checks)) {
	CmdArgs.push_back("-analyzer-checker=core");
	CmdArgs.push_back("-analyzer-checker=apiModeling");

	if (!Triple.isWindowsMSVCEnvironment()) {
	CmdArgs.push_back("-analyzer-checker=unix");
	} else {
	// Enable "unix" checkers that also work on Windows.
	CmdArgs.push_back("-analyzer-checker=unix.API");
	CmdArgs.push_back("-analyzer-checker=unix.Malloc");
	CmdArgs.push_back("-analyzer-checker=unix.MallocSizeof");
	CmdArgs.push_back("-analyzer-checker=unix.MismatchedDeallocator");
	CmdArgs.push_back("-analyzer-checker=unix.cstring.BadSizeArg");
	CmdArgs.push_back("-analyzer-checker=unix.cstring.NullArg");
	}

	// Disable some unix checkers for PS4.
	if (Triple.isPS4CPU()) {
	CmdArgs.push_back("-analyzer-disable-checker=unix.API");
	CmdArgs.push_back("-analyzer-disable-checker=unix.Vfork");
	}

	if (Triple.isOSDarwin()) {
	CmdArgs.push_back("-analyzer-checker=osx");
	CmdArgs.push_back(
	"-analyzer-checker=security.insecureAPI.decodeValueOfObjCType");
	}
	else if (Triple.isOSFuchsia())
	CmdArgs.push_back("-analyzer-checker=fuchsia");

	CmdArgs.push_back("-analyzer-checker=deadcode");

	if (types::isCXX(Input.getType()))
	CmdArgs.push_back("-analyzer-checker=cplusplus");

	if (!Triple.isPS4CPU()) {
	CmdArgs.push_back("-analyzer-checker=security.insecureAPI.UncheckedReturn");
	CmdArgs.push_back("-analyzer-checker=security.insecureAPI.getpw");
	CmdArgs.push_back("-analyzer-checker=security.insecureAPI.gets");
	CmdArgs.push_back("-analyzer-checker=security.insecureAPI.mktemp");
	CmdArgs.push_back("-analyzer-checker=security.insecureAPI.mkstemp");
	CmdArgs.push_back("-analyzer-checker=security.insecureAPI.vfork");
	}

	// Default nullability checks.
	CmdArgs.push_back("-analyzer-checker=nullability.NullPassedToNonnull");
	CmdArgs.push_back("-analyzer-checker=nullability.NullReturnedFromNonnull");
	}

	// Set the output format. The default is plist, for (lame) historical reasons.
	CmdArgs.push_back("-analyzer-output");
	if (Arg *A = Args.getLastArg(options::OPT__analyzer_output))
	CmdArgs.push_back(A->getValue());
	else
	CmdArgs.push_back("plist");

	// Disable the presentation of standard compiler warnings when using
	// --analyze. We only want to show static analyzer diagnostics or frontend
	// errors.
	CmdArgs.push_back("-w");

	// Add -Xanalyzer arguments when running as analyzer.
	Args.AddAllArgValues(CmdArgs, options::OPT_Xanalyzer);
	}

	static void RenderSSPOptions(const Driver &D, const ToolChain &TC,
	const ArgList &Args, ArgStringList &CmdArgs,
	bool KernelOrKext) {
	const llvm::Triple &EffectiveTriple = TC.getEffectiveTriple();

	// NVPTX doesn't support stack protectors; from the compiler's perspective, it
	// doesn't even have a stack!
	if (EffectiveTriple.isNVPTX())
	return;

	// -stack-protector=0 is default.
	LangOptions::StackProtectorMode StackProtectorLevel = LangOptions::SSPOff;
	LangOptions::StackProtectorMode DefaultStackProtectorLevel =
	TC.GetDefaultStackProtectorLevel(KernelOrKext);

	if (Arg *A = Args.getLastArg(options::OPT_fno_stack_protector,
	options::OPT_fstack_protector_all,
	options::OPT_fstack_protector_strong,
	options::OPT_fstack_protector)) {
	if (A->getOption().matches(options::OPT_fstack_protector))
	StackProtectorLevel =
	std::max<>(LangOptions::SSPOn, DefaultStackProtectorLevel);
	else if (A->getOption().matches(options::OPT_fstack_protector_strong))
	StackProtectorLevel = LangOptions::SSPStrong;
	else if (A->getOption().matches(options::OPT_fstack_protector_all))
	StackProtectorLevel = LangOptions::SSPReq;
	} else {
	StackProtectorLevel = DefaultStackProtectorLevel;
	}

	if (StackProtectorLevel) {
	CmdArgs.push_back("-stack-protector");
	CmdArgs.push_back(Args.MakeArgString(Twine(StackProtectorLevel)));
	}

	// --param ssp-buffer-size=
	for (const Arg *A : Args.filtered(options::OPT__param)) {
	StringRef Str(A->getValue());
	if (Str.startswith("ssp-buffer-size=")) {
	if (StackProtectorLevel) {
	CmdArgs.push_back("-stack-protector-buffer-size");
	// FIXME: Verify the argument is a valid integer.
	CmdArgs.push_back(Args.MakeArgString(Str.drop_front(16)));
	}
	A->claim();
	}
	}

	const std::string &TripleStr = EffectiveTriple.getTriple();
	if (Arg *A = Args.getLastArg(options::OPT_mstack_protector_guard_EQ)) {
	StringRef Value = A->getValue();
	if (!EffectiveTriple.isX86() && !EffectiveTriple.isAArch64())
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< A->getAsString(Args) << TripleStr;
	if (EffectiveTriple.isX86() && Value != "tls" && Value != "global") {
	D.Diag(diag::err_drv_invalid_value_with_suggestion)
	<< A->getOption().getName() << Value << "tls global";
	return;
	}
	if (EffectiveTriple.isAArch64() && Value != "sysreg" && Value != "global") {
	D.Diag(diag::err_drv_invalid_value_with_suggestion)
	<< A->getOption().getName() << Value << "sysreg global";
	return;
	}
	A->render(Args, CmdArgs);
	}

	if (Arg *A = Args.getLastArg(options::OPT_mstack_protector_guard_offset_EQ)) {
	StringRef Value = A->getValue();
	if (!EffectiveTriple.isX86() && !EffectiveTriple.isAArch64())
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< A->getAsString(Args) << TripleStr;
	int Offset;
	if (Value.getAsInteger(10, Offset)) {
	D.Diag(diag::err_drv_invalid_value) << A->getOption().getName() << Value;
	return;
	}
	A->render(Args, CmdArgs);
	}

	if (Arg *A = Args.getLastArg(options::OPT_mstack_protector_guard_reg_EQ)) {
	StringRef Value = A->getValue();
	if (!EffectiveTriple.isX86() && !EffectiveTriple.isAArch64())
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< A->getAsString(Args) << TripleStr;
	if (EffectiveTriple.isX86() && (Value != "fs" && Value != "gs")) {
	D.Diag(diag::err_drv_invalid_value_with_suggestion)
	<< A->getOption().getName() << Value << "fs gs";
	return;
	}
	if (EffectiveTriple.isAArch64() && Value != "sp_el0") {
	D.Diag(diag::err_drv_invalid_value) << A->getOption().getName() << Value;
	return;
	}
	A->render(Args, CmdArgs);
	}
	}

	static void RenderSCPOptions(const ToolChain &TC, const ArgList &Args,
	ArgStringList &CmdArgs) {
	const llvm::Triple &EffectiveTriple = TC.getEffectiveTriple();

	if (!EffectiveTriple.isOSFreeBSD() && !EffectiveTriple.isOSLinux())
	return;

	if (!EffectiveTriple.isX86() && !EffectiveTriple.isSystemZ() &&
	!EffectiveTriple.isPPC64())
	return;

	if (Args.hasFlag(options::OPT_fstack_clash_protection,
	options::OPT_fno_stack_clash_protection, false))
	CmdArgs.push_back("-fstack-clash-protection");
	}

	static void RenderTrivialAutoVarInitOptions(const Driver &D,
	const ToolChain &TC,
	const ArgList &Args,
	ArgStringList &CmdArgs) {
	auto DefaultTrivialAutoVarInit = TC.GetDefaultTrivialAutoVarInit();
	StringRef TrivialAutoVarInit = "";

	for (const Arg *A : Args) {
	switch (A->getOption().getID()) {
	default:
	continue;
	case options::OPT_ftrivial_auto_var_init: {
	A->claim();
	StringRef Val = A->getValue();
	if (Val == "uninitialized" \|\| Val == "zero" \|\| Val == "pattern")
	TrivialAutoVarInit = Val;
	else
	D.Diag(diag::err_drv_unsupported_option_argument)
	<< A->getOption().getName() << Val;
	break;
	}
	}
	}

	if (TrivialAutoVarInit.empty())
	switch (DefaultTrivialAutoVarInit) {
	case LangOptions::TrivialAutoVarInitKind::Uninitialized:
	break;
	case LangOptions::TrivialAutoVarInitKind::Pattern:
	TrivialAutoVarInit = "pattern";
	break;
	case LangOptions::TrivialAutoVarInitKind::Zero:
	TrivialAutoVarInit = "zero";
	break;
	}

	if (!TrivialAutoVarInit.empty()) {
	if (TrivialAutoVarInit == "zero" && !Args.hasArg(options::OPT_enable_trivial_var_init_zero))
	D.Diag(diag::err_drv_trivial_auto_var_init_zero_disabled);
	CmdArgs.push_back(
	Args.MakeArgString("-ftrivial-auto-var-init=" + TrivialAutoVarInit));
	}

	if (Arg *A =
	Args.getLastArg(options::OPT_ftrivial_auto_var_init_stop_after)) {
	if (!Args.hasArg(options::OPT_ftrivial_auto_var_init) \|\|
	StringRef(
	Args.getLastArg(options::OPT_ftrivial_auto_var_init)->getValue()) ==
	"uninitialized")
	D.Diag(diag::err_drv_trivial_auto_var_init_stop_after_missing_dependency);
	A->claim();
	StringRef Val = A->getValue();
	if (std::stoi(Val.str()) <= 0)
	D.Diag(diag::err_drv_trivial_auto_var_init_stop_after_invalid_value);
	CmdArgs.push_back(
	Args.MakeArgString("-ftrivial-auto-var-init-stop-after=" + Val));
	}
	}

	static void RenderOpenCLOptions(const ArgList &Args, ArgStringList &CmdArgs,
	types::ID InputType) {
	// cl-denorms-are-zero is not forwarded. It is translated into a generic flag
	// for denormal flushing handling based on the target.
	const unsigned ForwardedArguments[] = {
	options::OPT_cl_opt_disable,
	options::OPT_cl_strict_aliasing,
	options::OPT_cl_single_precision_constant,
	options::OPT_cl_finite_math_only,
	options::OPT_cl_kernel_arg_info,
	options::OPT_cl_unsafe_math_optimizations,
	options::OPT_cl_fast_relaxed_math,
	options::OPT_cl_mad_enable,
	options::OPT_cl_no_signed_zeros,
	options::OPT_cl_fp32_correctly_rounded_divide_sqrt,
	options::OPT_cl_uniform_work_group_size
	};

	if (Arg *A = Args.getLastArg(options::OPT_cl_std_EQ)) {
	std::string CLStdStr = std::string("-cl-std=") + A->getValue();
	CmdArgs.push_back(Args.MakeArgString(CLStdStr));
	}

	for (const auto &Arg : ForwardedArguments)
	if (const auto *A = Args.getLastArg(Arg))
	CmdArgs.push_back(Args.MakeArgString(A->getOption().getPrefixedName()));

	// Only add the default headers if we are compiling OpenCL sources.
	if ((types::isOpenCL(InputType) \|\|
	(Args.hasArg(options::OPT_cl_std_EQ) && types::isSrcFile(InputType))) &&
	!Args.hasArg(options::OPT_cl_no_stdinc)) {
	CmdArgs.push_back("-finclude-default-header");
	CmdArgs.push_back("-fdeclare-opencl-builtins");
	}
	}

	static void RenderARCMigrateToolOptions(const Driver &D, const ArgList &Args,
	ArgStringList &CmdArgs) {
	bool ARCMTEnabled = false;
	if (!Args.hasArg(options::OPT_fno_objc_arc, options::OPT_fobjc_arc)) {
	if (const Arg *A = Args.getLastArg(options::OPT_ccc_arcmt_check,
	options::OPT_ccc_arcmt_modify,
	options::OPT_ccc_arcmt_migrate)) {
	ARCMTEnabled = true;
	switch (A->getOption().getID()) {
	default: llvm_unreachable("missed a case");
	case options::OPT_ccc_arcmt_check:
	CmdArgs.push_back("-arcmt-action=check");
	break;
	case options::OPT_ccc_arcmt_modify:
	CmdArgs.push_back("-arcmt-action=modify");
	break;
	case options::OPT_ccc_arcmt_migrate:
	CmdArgs.push_back("-arcmt-action=migrate");
	CmdArgs.push_back("-mt-migrate-directory");
	CmdArgs.push_back(A->getValue());

	Args.AddLastArg(CmdArgs, options::OPT_arcmt_migrate_report_output);
	Args.AddLastArg(CmdArgs, options::OPT_arcmt_migrate_emit_arc_errors);
	break;
	}
	}
	} else {
	Args.ClaimAllArgs(options::OPT_ccc_arcmt_check);
	Args.ClaimAllArgs(options::OPT_ccc_arcmt_modify);
	Args.ClaimAllArgs(options::OPT_ccc_arcmt_migrate);
	}

	if (const Arg *A = Args.getLastArg(options::OPT_ccc_objcmt_migrate)) {
	if (ARCMTEnabled)
	D.Diag(diag::err_drv_argument_not_allowed_with)
	<< A->getAsString(Args) << "-ccc-arcmt-migrate";

	CmdArgs.push_back("-mt-migrate-directory");
	CmdArgs.push_back(A->getValue());

	if (!Args.hasArg(options::OPT_objcmt_migrate_literals,
	options::OPT_objcmt_migrate_subscripting,
	options::OPT_objcmt_migrate_property)) {
	// None specified, means enable them all.
	CmdArgs.push_back("-objcmt-migrate-literals");
	CmdArgs.push_back("-objcmt-migrate-subscripting");
	CmdArgs.push_back("-objcmt-migrate-property");
	} else {
	Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_literals);
	Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_subscripting);
	Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_property);
	}
	} else {
	Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_literals);
	Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_subscripting);
	Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_property);
	Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_all);
	Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_readonly_property);
	Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_readwrite_property);
	Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_property_dot_syntax);
	Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_annotation);
	Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_instancetype);
	Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_nsmacros);
	Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_protocol_conformance);
	Args.AddLastArg(CmdArgs, options::OPT_objcmt_atomic_property);
	Args.AddLastArg(CmdArgs, options::OPT_objcmt_returns_innerpointer_property);
	Args.AddLastArg(CmdArgs, options::OPT_objcmt_ns_nonatomic_iosonly);
	Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_designated_init);
	Args.AddLastArg(CmdArgs, options::OPT_objcmt_whitelist_dir_path);
	}
	}

	static void RenderBuiltinOptions(const ToolChain &TC, const llvm::Triple &T,
	const ArgList &Args, ArgStringList &CmdArgs) {
	// -fbuiltin is default unless -mkernel is used.
	bool UseBuiltins =
	Args.hasFlag(options::OPT_fbuiltin, options::OPT_fno_builtin,
	!Args.hasArg(options::OPT_mkernel));
	if (!UseBuiltins)
	CmdArgs.push_back("-fno-builtin");

	// -ffreestanding implies -fno-builtin.
	if (Args.hasArg(options::OPT_ffreestanding))
	UseBuiltins = false;

	// Process the -fno-builtin-* options.
	for (const auto &Arg : Args) {
	const Option &O = Arg->getOption();
	if (!O.matches(options::OPT_fno_builtin_))
	continue;

	Arg->claim();

	// If -fno-builtin is specified, then there's no need to pass the option to
	// the frontend.
	if (!UseBuiltins)
	continue;

	StringRef FuncName = Arg->getValue();
	CmdArgs.push_back(Args.MakeArgString("-fno-builtin-" + FuncName));
	}

	// le32-specific flags:
	// -fno-math-builtin: clang should not convert math builtins to intrinsics
	// by default.
	if (TC.getArch() == llvm::Triple::le32)
	CmdArgs.push_back("-fno-math-builtin");
	}

	bool Driver::getDefaultModuleCachePath(SmallVectorImpl<char> &Result) {
	if (llvm::sys::path::cache_directory(Result)) {
	llvm::sys::path::append(Result, "clang");
	llvm::sys::path::append(Result, "ModuleCache");
	return true;
	}
	return false;
	}

	static void RenderModulesOptions(Compilation &C, const Driver &D,
	const ArgList &Args, const InputInfo &Input,
	const InputInfo &Output,
	ArgStringList &CmdArgs, bool &HaveModules) {
	// -fmodules enables the use of precompiled modules (off by default).
	// Users can pass -fno-cxx-modules to turn off modules support for
	// C++/Objective-C++ programs.
	bool HaveClangModules = false;
	if (Args.hasFlag(options::OPT_fmodules, options::OPT_fno_modules, false)) {
	bool AllowedInCXX = Args.hasFlag(options::OPT_fcxx_modules,
	options::OPT_fno_cxx_modules, true);
	if (AllowedInCXX \|\| !types::isCXX(Input.getType())) {
	CmdArgs.push_back("-fmodules");
	HaveClangModules = true;
	}
	}

	HaveModules \|= HaveClangModules;
	if (Args.hasArg(options::OPT_fmodules_ts)) {
	CmdArgs.push_back("-fmodules-ts");
	HaveModules = true;
	}

	// -fmodule-maps enables implicit reading of module map files. By default,
	// this is enabled if we are using Clang's flavor of precompiled modules.
	if (Args.hasFlag(options::OPT_fimplicit_module_maps,
	options::OPT_fno_implicit_module_maps, HaveClangModules))
	CmdArgs.push_back("-fimplicit-module-maps");

	// -fmodules-decluse checks that modules used are declared so (off by default)
	if (Args.hasFlag(options::OPT_fmodules_decluse,
	options::OPT_fno_modules_decluse, false))
	CmdArgs.push_back("-fmodules-decluse");

	// -fmodules-strict-decluse is like -fmodule-decluse, but also checks that
	// all #included headers are part of modules.
	if (Args.hasFlag(options::OPT_fmodules_strict_decluse,
	options::OPT_fno_modules_strict_decluse, false))
	CmdArgs.push_back("-fmodules-strict-decluse");

	// -fno-implicit-modules turns off implicitly compiling modules on demand.
	bool ImplicitModules = false;
	if (!Args.hasFlag(options::OPT_fimplicit_modules,
	options::OPT_fno_implicit_modules, HaveClangModules)) {
	if (HaveModules)
	CmdArgs.push_back("-fno-implicit-modules");
	} else if (HaveModules) {
	ImplicitModules = true;
	// -fmodule-cache-path specifies where our implicitly-built module files
	// should be written.
	SmallString<128> Path;
	if (Arg *A = Args.getLastArg(options::OPT_fmodules_cache_path))
	Path = A->getValue();

	bool HasPath = true;
	if (C.isForDiagnostics()) {
	// When generating crash reports, we want to emit the modules along with
	// the reproduction sources, so we ignore any provided module path.
	Path = Output.getFilename();
	llvm::sys::path::replace_extension(Path, ".cache");
	llvm::sys::path::append(Path, "modules");
	} else if (Path.empty()) {
	// No module path was provided: use the default.
	HasPath = Driver::getDefaultModuleCachePath(Path);
	}

	// `HasPath` will only be false if getDefaultModuleCachePath() fails.
	// That being said, that failure is unlikely and not caching is harmless.
	if (HasPath) {
	const char Arg[] = "-fmodules-cache-path=";
	Path.insert(Path.begin(), Arg, Arg + strlen(Arg));
	CmdArgs.push_back(Args.MakeArgString(Path));
	}
	}

	if (HaveModules) {
	// -fprebuilt-module-path specifies where to load the prebuilt module files.
	for (const Arg *A : Args.filtered(options::OPT_fprebuilt_module_path)) {
	CmdArgs.push_back(Args.MakeArgString(
	std::string("-fprebuilt-module-path=") + A->getValue()));
	A->claim();
	}
	if (Args.hasFlag(options::OPT_fprebuilt_implicit_modules,
	options::OPT_fno_prebuilt_implicit_modules, false))
	CmdArgs.push_back("-fprebuilt-implicit-modules");
	if (Args.hasFlag(options::OPT_fmodules_validate_input_files_content,
	options::OPT_fno_modules_validate_input_files_content,
	false))
	CmdArgs.push_back("-fvalidate-ast-input-files-content");
	}

	// -fmodule-name specifies the module that is currently being built (or
	// used for header checking by -fmodule-maps).
	Args.AddLastArg(CmdArgs, options::OPT_fmodule_name_EQ);

	// -fmodule-map-file can be used to specify files containing module
	// definitions.
	Args.AddAllArgs(CmdArgs, options::OPT_fmodule_map_file);

	// -fbuiltin-module-map can be used to load the clang
	// builtin headers modulemap file.
	if (Args.hasArg(options::OPT_fbuiltin_module_map)) {
	SmallString<128> BuiltinModuleMap(D.ResourceDir);
	llvm::sys::path::append(BuiltinModuleMap, "include");
	llvm::sys::path::append(BuiltinModuleMap, "module.modulemap");
	if (llvm::sys::fs::exists(BuiltinModuleMap))
	CmdArgs.push_back(
	Args.MakeArgString("-fmodule-map-file=" + BuiltinModuleMap));
	}

	// The -fmodule-file=<name>=<file> form specifies the mapping of module
	// names to precompiled module files (the module is loaded only if used).
	// The -fmodule-file=<file> form can be used to unconditionally load
	// precompiled module files (whether used or not).
	if (HaveModules)
	Args.AddAllArgs(CmdArgs, options::OPT_fmodule_file);
	else
	Args.ClaimAllArgs(options::OPT_fmodule_file);

	// When building modules and generating crashdumps, we need to dump a module
	// dependency VFS alongside the output.
	if (HaveClangModules && C.isForDiagnostics()) {
	SmallString<128> VFSDir(Output.getFilename());
	llvm::sys::path::replace_extension(VFSDir, ".cache");
	// Add the cache directory as a temp so the crash diagnostics pick it up.
	C.addTempFile(Args.MakeArgString(VFSDir));

	llvm::sys::path::append(VFSDir, "vfs");
	CmdArgs.push_back("-module-dependency-dir");
	CmdArgs.push_back(Args.MakeArgString(VFSDir));
	}

	if (HaveClangModules)
	Args.AddLastArg(CmdArgs, options::OPT_fmodules_user_build_path);

	// Pass through all -fmodules-ignore-macro arguments.
	Args.AddAllArgs(CmdArgs, options::OPT_fmodules_ignore_macro);
	Args.AddLastArg(CmdArgs, options::OPT_fmodules_prune_interval);
	Args.AddLastArg(CmdArgs, options::OPT_fmodules_prune_after);

	Args.AddLastArg(CmdArgs, options::OPT_fbuild_session_timestamp);

	if (Arg *A = Args.getLastArg(options::OPT_fbuild_session_file)) {
	if (Args.hasArg(options::OPT_fbuild_session_timestamp))
	D.Diag(diag::err_drv_argument_not_allowed_with)
	<< A->getAsString(Args) << "-fbuild-session-timestamp";

	llvm::sys::fs::file_status Status;
	if (llvm::sys::fs::status(A->getValue(), Status))
	D.Diag(diag::err_drv_no_such_file) << A->getValue();
	CmdArgs.push_back(
	Args.MakeArgString("-fbuild-session-timestamp=" +
	Twine((uint64_t)Status.getLastModificationTime()
	.time_since_epoch()
	.count())));
	}

	if (Args.getLastArg(options::OPT_fmodules_validate_once_per_build_session)) {
	if (!Args.getLastArg(options::OPT_fbuild_session_timestamp,
	options::OPT_fbuild_session_file))
	D.Diag(diag::err_drv_modules_validate_once_requires_timestamp);

	Args.AddLastArg(CmdArgs,
	options::OPT_fmodules_validate_once_per_build_session);
	}

	if (Args.hasFlag(options::OPT_fmodules_validate_system_headers,
	options::OPT_fno_modules_validate_system_headers,
	ImplicitModules))
	CmdArgs.push_back("-fmodules-validate-system-headers");

	Args.AddLastArg(CmdArgs, options::OPT_fmodules_disable_diagnostic_validation);
	}

	static void RenderCharacterOptions(const ArgList &Args, const llvm::Triple &T,
	ArgStringList &CmdArgs) {
	// -fsigned-char is default.
	if (const Arg *A = Args.getLastArg(options::OPT_fsigned_char,
	options::OPT_fno_signed_char,
	options::OPT_funsigned_char,
	options::OPT_fno_unsigned_char)) {
	if (A->getOption().matches(options::OPT_funsigned_char) \|\|
	A->getOption().matches(options::OPT_fno_signed_char)) {
	CmdArgs.push_back("-fno-signed-char");
	}
	} else if (!isSignedCharDefault(T)) {
	CmdArgs.push_back("-fno-signed-char");
	}

	// The default depends on the language standard.
	Args.AddLastArg(CmdArgs, options::OPT_fchar8__t, options::OPT_fno_char8__t);

	if (const Arg *A = Args.getLastArg(options::OPT_fshort_wchar,
	options::OPT_fno_short_wchar)) {
	if (A->getOption().matches(options::OPT_fshort_wchar)) {
	CmdArgs.push_back("-fwchar-type=short");
	CmdArgs.push_back("-fno-signed-wchar");
	} else {
	bool IsARM = T.isARM() \|\| T.isThumb() \|\| T.isAArch64();
	CmdArgs.push_back("-fwchar-type=int");
	if (T.isOSzOS() \|\|
	(IsARM && !(T.isOSWindows() \|\| T.isOSNetBSD() \|\| T.isOSOpenBSD())))
	CmdArgs.push_back("-fno-signed-wchar");
	else
	CmdArgs.push_back("-fsigned-wchar");
	}
	}
	}

	static void RenderObjCOptions(const ToolChain &TC, const Driver &D,
	const llvm::Triple &T, const ArgList &Args,
	ObjCRuntime &Runtime, bool InferCovariantReturns,
	const InputInfo &Input, ArgStringList &CmdArgs) {
	const llvm::Triple::ArchType Arch = TC.getArch();

	// -fobjc-dispatch-method is only relevant with the nonfragile-abi, and legacy
	// is the default. Except for deployment target of 10.5, next runtime is
	// always legacy dispatch and -fno-objc-legacy-dispatch gets ignored silently.
	if (Runtime.isNonFragile()) {
	if (!Args.hasFlag(options::OPT_fobjc_legacy_dispatch,
	options::OPT_fno_objc_legacy_dispatch,
	Runtime.isLegacyDispatchDefaultForArch(Arch))) {
	if (TC.UseObjCMixedDispatch())
	CmdArgs.push_back("-fobjc-dispatch-method=mixed");
	else
	CmdArgs.push_back("-fobjc-dispatch-method=non-legacy");
	}
	}

	// When ObjectiveC legacy runtime is in effect on MacOSX, turn on the option
	// to do Array/Dictionary subscripting by default.
	if (Arch == llvm::Triple::x86 && T.isMacOSX() &&
	Runtime.getKind() == ObjCRuntime::FragileMacOSX && Runtime.isNeXTFamily())
	CmdArgs.push_back("-fobjc-subscripting-legacy-runtime");

	// Allow -fno-objc-arr to trump -fobjc-arr/-fobjc-arc.
	// NOTE: This logic is duplicated in ToolChains.cpp.
	if (isObjCAutoRefCount(Args)) {
	TC.CheckObjCARC();

	CmdArgs.push_back("-fobjc-arc");

	// FIXME: It seems like this entire block, and several around it should be
	// wrapped in isObjC, but for now we just use it here as this is where it
	// was being used previously.
	if (types::isCXX(Input.getType()) && types::isObjC(Input.getType())) {
	if (TC.GetCXXStdlibType(Args) == ToolChain::CST_Libcxx)
	CmdArgs.push_back("-fobjc-arc-cxxlib=libc++");
	else
	CmdArgs.push_back("-fobjc-arc-cxxlib=libstdc++");
	}

	// Allow the user to enable full exceptions code emission.
	// We default off for Objective-C, on for Objective-C++.
	if (Args.hasFlag(options::OPT_fobjc_arc_exceptions,
	options::OPT_fno_objc_arc_exceptions,
	/Default=/types::isCXX(Input.getType())))
	CmdArgs.push_back("-fobjc-arc-exceptions");
	}

	// Silence warning for full exception code emission options when explicitly
	// set to use no ARC.
	if (Args.hasArg(options::OPT_fno_objc_arc)) {
	Args.ClaimAllArgs(options::OPT_fobjc_arc_exceptions);
	Args.ClaimAllArgs(options::OPT_fno_objc_arc_exceptions);
	}

	// Allow the user to control whether messages can be converted to runtime
	// functions.
	if (types::isObjC(Input.getType())) {
	auto *Arg = Args.getLastArg(
	options::OPT_fobjc_convert_messages_to_runtime_calls,
	options::OPT_fno_objc_convert_messages_to_runtime_calls);
	if (Arg &&
	Arg->getOption().matches(
	options::OPT_fno_objc_convert_messages_to_runtime_calls))
	CmdArgs.push_back("-fno-objc-convert-messages-to-runtime-calls");
	}

	// -fobjc-infer-related-result-type is the default, except in the Objective-C
	// rewriter.
	if (InferCovariantReturns)
	CmdArgs.push_back("-fno-objc-infer-related-result-type");

	// Pass down -fobjc-weak or -fno-objc-weak if present.
	if (types::isObjC(Input.getType())) {
	auto WeakArg =
	Args.getLastArg(options::OPT_fobjc_weak, options::OPT_fno_objc_weak);
	if (!WeakArg) {
	// nothing to do
	} else if (!Runtime.allowsWeak()) {
	if (WeakArg->getOption().matches(options::OPT_fobjc_weak))
	D.Diag(diag::err_objc_weak_unsupported);
	} else {
	WeakArg->render(Args, CmdArgs);
	}
	}

	if (Args.hasArg(options::OPT_fobjc_disable_direct_methods_for_testing))
	CmdArgs.push_back("-fobjc-disable-direct-methods-for-testing");
	}

	static void RenderDiagnosticsOptions(const Driver &D, const ArgList &Args,
	ArgStringList &CmdArgs) {
	bool CaretDefault = true;
	bool ColumnDefault = true;

	if (const Arg *A = Args.getLastArg(options::OPT__SLASH_diagnostics_classic,
	options::OPT__SLASH_diagnostics_column,
	options::OPT__SLASH_diagnostics_caret)) {
	switch (A->getOption().getID()) {
	case options::OPT__SLASH_diagnostics_caret:
	CaretDefault = true;
	ColumnDefault = true;
	break;
	case options::OPT__SLASH_diagnostics_column:
	CaretDefault = false;
	ColumnDefault = true;
	break;
	case options::OPT__SLASH_diagnostics_classic:
	CaretDefault = false;
	ColumnDefault = false;
	break;
	}
	}

	// -fcaret-diagnostics is default.
	if (!Args.hasFlag(options::OPT_fcaret_diagnostics,
	options::OPT_fno_caret_diagnostics, CaretDefault))
	CmdArgs.push_back("-fno-caret-diagnostics");

	// -fdiagnostics-fixit-info is default, only pass non-default.
	if (!Args.hasFlag(options::OPT_fdiagnostics_fixit_info,
	options::OPT_fno_diagnostics_fixit_info))
	CmdArgs.push_back("-fno-diagnostics-fixit-info");

	// Enable -fdiagnostics-show-option by default.
	if (!Args.hasFlag(options::OPT_fdiagnostics_show_option,
	options::OPT_fno_diagnostics_show_option, true))
	CmdArgs.push_back("-fno-diagnostics-show-option");

	if (const Arg *A =
	Args.getLastArg(options::OPT_fdiagnostics_show_category_EQ)) {
	CmdArgs.push_back("-fdiagnostics-show-category");
	CmdArgs.push_back(A->getValue());
	}

	if (Args.hasFlag(options::OPT_fdiagnostics_show_hotness,
	options::OPT_fno_diagnostics_show_hotness, false))
	CmdArgs.push_back("-fdiagnostics-show-hotness");

	if (const Arg *A =
	Args.getLastArg(options::OPT_fdiagnostics_hotness_threshold_EQ)) {
	std::string Opt =
	std::string("-fdiagnostics-hotness-threshold=") + A->getValue();
	CmdArgs.push_back(Args.MakeArgString(Opt));
	}

	if (const Arg *A = Args.getLastArg(options::OPT_fdiagnostics_format_EQ)) {
	CmdArgs.push_back("-fdiagnostics-format");
	CmdArgs.push_back(A->getValue());
	}

	if (const Arg *A = Args.getLastArg(
	options::OPT_fdiagnostics_show_note_include_stack,
	options::OPT_fno_diagnostics_show_note_include_stack)) {
	const Option &O = A->getOption();
	if (O.matches(options::OPT_fdiagnostics_show_note_include_stack))
	CmdArgs.push_back("-fdiagnostics-show-note-include-stack");
	else
	CmdArgs.push_back("-fno-diagnostics-show-note-include-stack");
	}

	// Color diagnostics are parsed by the driver directly from argv and later
	// re-parsed to construct this job; claim any possible color diagnostic here
	// to avoid warn_drv_unused_argument and diagnose bad
	// OPT_fdiagnostics_color_EQ values.
	for (const Arg *A : Args) {
	const Option &O = A->getOption();
	if (!O.matches(options::OPT_fcolor_diagnostics) &&
	!O.matches(options::OPT_fdiagnostics_color) &&
	!O.matches(options::OPT_fno_color_diagnostics) &&
	!O.matches(options::OPT_fno_diagnostics_color) &&
	!O.matches(options::OPT_fdiagnostics_color_EQ))
	continue;

	if (O.matches(options::OPT_fdiagnostics_color_EQ)) {
	StringRef Value(A->getValue());
	if (Value != "always" && Value != "never" && Value != "auto")
	D.Diag(diag::err_drv_clang_unsupported)
	<< ("-fdiagnostics-color=" + Value).str();
	}
	A->claim();
	}

	if (D.getDiags().getDiagnosticOptions().ShowColors)
	CmdArgs.push_back("-fcolor-diagnostics");

	if (Args.hasArg(options::OPT_fansi_escape_codes))
	CmdArgs.push_back("-fansi-escape-codes");

	if (!Args.hasFlag(options::OPT_fshow_source_location,
	options::OPT_fno_show_source_location))
	CmdArgs.push_back("-fno-show-source-location");

	if (Args.hasArg(options::OPT_fdiagnostics_absolute_paths))
	CmdArgs.push_back("-fdiagnostics-absolute-paths");

	if (!Args.hasFlag(options::OPT_fshow_column, options::OPT_fno_show_column,
	ColumnDefault))
	CmdArgs.push_back("-fno-show-column");

	if (!Args.hasFlag(options::OPT_fspell_checking,
	options::OPT_fno_spell_checking))
	CmdArgs.push_back("-fno-spell-checking");
	}

	enum class DwarfFissionKind { None, Split, Single };

	static DwarfFissionKind getDebugFissionKind(const Driver &D,
	const ArgList &Args, Arg *&Arg) {
	Arg = Args.getLastArg(options::OPT_gsplit_dwarf, options::OPT_gsplit_dwarf_EQ,
	options::OPT_gno_split_dwarf);
	if (!Arg \|\| Arg->getOption().matches(options::OPT_gno_split_dwarf))
	return DwarfFissionKind::None;

	if (Arg->getOption().matches(options::OPT_gsplit_dwarf))
	return DwarfFissionKind::Split;

	StringRef Value = Arg->getValue();
	if (Value == "split")
	return DwarfFissionKind::Split;
	if (Value == "single")
	return DwarfFissionKind::Single;

	D.Diag(diag::err_drv_unsupported_option_argument)
	<< Arg->getOption().getName() << Arg->getValue();
	return DwarfFissionKind::None;
	}

	static void renderDwarfFormat(const Driver &D, const llvm::Triple &T,
	const ArgList &Args, ArgStringList &CmdArgs,
	unsigned DwarfVersion) {
	auto *DwarfFormatArg =
	Args.getLastArg(options::OPT_gdwarf64, options::OPT_gdwarf32);
	if (!DwarfFormatArg)
	return;

	if (DwarfFormatArg->getOption().matches(options::OPT_gdwarf64)) {
	if (DwarfVersion < 3)
	D.Diag(diag::err_drv_argument_only_allowed_with)
	<< DwarfFormatArg->getAsString(Args) << "DWARFv3 or greater";
	else if (!T.isArch64Bit())
	D.Diag(diag::err_drv_argument_only_allowed_with)
	<< DwarfFormatArg->getAsString(Args) << "64 bit architecture";
	else if (!T.isOSBinFormatELF())
	D.Diag(diag::err_drv_argument_only_allowed_with)
	<< DwarfFormatArg->getAsString(Args) << "ELF platforms";
	}

	DwarfFormatArg->render(Args, CmdArgs);
	}

	static void renderDebugOptions(const ToolChain &TC, const Driver &D,
	const llvm::Triple &T, const ArgList &Args,
	bool EmitCodeView, bool IRInput,
	ArgStringList &CmdArgs,
	codegenoptions::DebugInfoKind &DebugInfoKind,
	DwarfFissionKind &DwarfFission) {
	// These two forms of profiling info can't be used together.
	if (const Arg *A1 = Args.getLastArg(options::OPT_fpseudo_probe_for_profiling))
	if (const Arg *A2 = Args.getLastArg(options::OPT_fdebug_info_for_profiling))
	D.Diag(diag::err_drv_argument_not_allowed_with)
	<< A1->getAsString(Args) << A2->getAsString(Args);

	if (Args.hasFlag(options::OPT_fdebug_info_for_profiling,
	options::OPT_fno_debug_info_for_profiling, false) &&
	checkDebugInfoOption(
	Args.getLastArg(options::OPT_fdebug_info_for_profiling), Args, D, TC))
	CmdArgs.push_back("-fdebug-info-for-profiling");

	// The 'g' groups options involve a somewhat intricate sequence of decisions
	// about what to pass from the driver to the frontend, but by the time they
	// reach cc1 they've been factored into three well-defined orthogonal choices:
	// * what level of debug info to generate
	// * what dwarf version to write
	// * what debugger tuning to use
	// This avoids having to monkey around further in cc1 other than to disable
	// codeview if not running in a Windows environment. Perhaps even that
	// decision should be made in the driver as well though.
	llvm::DebuggerKind DebuggerTuning = TC.getDefaultDebuggerTuning();

	bool SplitDWARFInlining =
	Args.hasFlag(options::OPT_fsplit_dwarf_inlining,
	options::OPT_fno_split_dwarf_inlining, false);

	// Normally -gsplit-dwarf is only useful with -gN. For IR input, Clang does
	// object file generation and no IR generation, -gN should not be needed. So
	// allow -gsplit-dwarf with either -gN or IR input.
	if (IRInput \|\| Args.hasArg(options::OPT_g_Group)) {
	Arg *SplitDWARFArg;
	DwarfFission = getDebugFissionKind(D, Args, SplitDWARFArg);
	if (DwarfFission != DwarfFissionKind::None &&
	!checkDebugInfoOption(SplitDWARFArg, Args, D, TC)) {
	DwarfFission = DwarfFissionKind::None;
	SplitDWARFInlining = false;
	}
	}
	if (const Arg *A = Args.getLastArg(options::OPT_g_Group)) {
	DebugInfoKind = codegenoptions::DebugInfoConstructor;

	// If the last option explicitly specified a debug-info level, use it.
	if (checkDebugInfoOption(A, Args, D, TC) &&
	A->getOption().matches(options::OPT_gN_Group)) {
	DebugInfoKind = DebugLevelToInfoKind(*A);
	// For -g0 or -gline-tables-only, drop -gsplit-dwarf. This gets a bit more
	// complicated if you've disabled inline info in the skeleton CUs
	// (SplitDWARFInlining) - then there's value in composing split-dwarf and
	// line-tables-only, so let those compose naturally in that case.
	if (DebugInfoKind == codegenoptions::NoDebugInfo \|\|
	DebugInfoKind == codegenoptions::DebugDirectivesOnly \|\|
	(DebugInfoKind == codegenoptions::DebugLineTablesOnly &&
	SplitDWARFInlining))
	DwarfFission = DwarfFissionKind::None;
	}
	}

	// If a debugger tuning argument appeared, remember it.
	if (const Arg *A =
	Args.getLastArg(options::OPT_gTune_Group, options::OPT_ggdbN_Group)) {
	if (checkDebugInfoOption(A, Args, D, TC)) {
	if (A->getOption().matches(options::OPT_glldb))
	DebuggerTuning = llvm::DebuggerKind::LLDB;
	else if (A->getOption().matches(options::OPT_gsce))
	DebuggerTuning = llvm::DebuggerKind::SCE;
	else if (A->getOption().matches(options::OPT_gdbx))
	DebuggerTuning = llvm::DebuggerKind::DBX;
	else
	DebuggerTuning = llvm::DebuggerKind::GDB;
	}
	}

	// If a -gdwarf argument appeared, remember it.
	const Arg *GDwarfN = getDwarfNArg(Args);
	bool EmitDwarf = false;
	if (GDwarfN) {
	if (checkDebugInfoOption(GDwarfN, Args, D, TC))
	EmitDwarf = true;
	else
	GDwarfN = nullptr;
	}

	if (const Arg *A = Args.getLastArg(options::OPT_gcodeview)) {
	if (checkDebugInfoOption(A, Args, D, TC))
	EmitCodeView = true;
	}

	// If the user asked for debug info but did not explicitly specify -gcodeview
	// or -gdwarf, ask the toolchain for the default format.
	if (!EmitCodeView && !EmitDwarf &&
	DebugInfoKind != codegenoptions::NoDebugInfo) {
	switch (TC.getDefaultDebugFormat()) {
	case codegenoptions::DIF_CodeView:
	EmitCodeView = true;
	break;
	case codegenoptions::DIF_DWARF:
	EmitDwarf = true;
	break;
	}
	}

	unsigned RequestedDWARFVersion = 0; // DWARF version requested by the user
	unsigned EffectiveDWARFVersion = 0; // DWARF version TC can generate. It may
	// be lower than what the user wanted.
	unsigned DefaultDWARFVersion = ParseDebugDefaultVersion(TC, Args);
	if (EmitDwarf) {
	// Start with the platform default DWARF version
	RequestedDWARFVersion = TC.GetDefaultDwarfVersion();
	assert(RequestedDWARFVersion &&
	"toolchain default DWARF version must be nonzero");

	// If the user specified a default DWARF version, that takes precedence
	// over the platform default.
	if (DefaultDWARFVersion)
	RequestedDWARFVersion = DefaultDWARFVersion;

	// Override with a user-specified DWARF version
	if (GDwarfN)
	if (auto ExplicitVersion = DwarfVersionNum(GDwarfN->getSpelling()))
	RequestedDWARFVersion = ExplicitVersion;
	// Clamp effective DWARF version to the max supported by the toolchain.
	EffectiveDWARFVersion =
	std::min(RequestedDWARFVersion, TC.getMaxDwarfVersion());
	}

	// -gline-directives-only supported only for the DWARF debug info.
	if (RequestedDWARFVersion == 0 &&
	DebugInfoKind == codegenoptions::DebugDirectivesOnly)
	DebugInfoKind = codegenoptions::NoDebugInfo;

	// strict DWARF is set to false by default. But for DBX, we need it to be set
	// as true by default.
	if (const Arg *A = Args.getLastArg(options::OPT_gstrict_dwarf))
	(void)checkDebugInfoOption(A, Args, D, TC);
	if (Args.hasFlag(options::OPT_gstrict_dwarf, options::OPT_gno_strict_dwarf,
	DebuggerTuning == llvm::DebuggerKind::DBX))
	CmdArgs.push_back("-gstrict-dwarf");

	// And we handle flag -grecord-gcc-switches later with DWARFDebugFlags.
	Args.ClaimAllArgs(options::OPT_g_flags_Group);

	// Column info is included by default for everything except SCE and
	// CodeView. Clang doesn't track end columns, just starting columns, which,
	// in theory, is fine for CodeView (and PDB). In practice, however, the
	// Microsoft debuggers don't handle missing end columns well, and the AIX
	// debugger DBX also doesn't handle the columns well, so it's better not to
	// include any column info.
	if (const Arg *A = Args.getLastArg(options::OPT_gcolumn_info))
	(void)checkDebugInfoOption(A, Args, D, TC);
	if (!Args.hasFlag(options::OPT_gcolumn_info, options::OPT_gno_column_info,
	!EmitCodeView &&
	(DebuggerTuning != llvm::DebuggerKind::SCE &&
	DebuggerTuning != llvm::DebuggerKind::DBX)))
	CmdArgs.push_back("-gno-column-info");

	// FIXME: Move backend command line options to the module.
	// If -gline-tables-only or -gline-directives-only is the last option it wins.
	if (const Arg *A = Args.getLastArg(options::OPT_gmodules))
	if (checkDebugInfoOption(A, Args, D, TC)) {
	if (DebugInfoKind != codegenoptions::DebugLineTablesOnly &&
	DebugInfoKind != codegenoptions::DebugDirectivesOnly) {
	DebugInfoKind = codegenoptions::DebugInfoConstructor;
	CmdArgs.push_back("-dwarf-ext-refs");
	CmdArgs.push_back("-fmodule-format=obj");
	}
	}

	if (T.isOSBinFormatELF() && SplitDWARFInlining)
	CmdArgs.push_back("-fsplit-dwarf-inlining");

	// After we've dealt with all combinations of things that could
	// make DebugInfoKind be other than None or DebugLineTablesOnly,
	// figure out if we need to "upgrade" it to standalone debug info.
	// We parse these two '-f' options whether or not they will be used,
	// to claim them even if you wrote "-fstandalone-debug -gline-tables-only"
	bool NeedFullDebug = Args.hasFlag(
	options::OPT_fstandalone_debug, options::OPT_fno_standalone_debug,
	DebuggerTuning == llvm::DebuggerKind::LLDB \|\|
	TC.GetDefaultStandaloneDebug());
	if (const Arg *A = Args.getLastArg(options::OPT_fstandalone_debug))
	(void)checkDebugInfoOption(A, Args, D, TC);

	if (DebugInfoKind == codegenoptions::LimitedDebugInfo \|\|
	DebugInfoKind == codegenoptions::DebugInfoConstructor) {
	if (Args.hasFlag(options::OPT_fno_eliminate_unused_debug_types,
	options::OPT_feliminate_unused_debug_types, false))
	DebugInfoKind = codegenoptions::UnusedTypeInfo;
	else if (NeedFullDebug)
	DebugInfoKind = codegenoptions::FullDebugInfo;
	}

	if (Args.hasFlag(options::OPT_gembed_source, options::OPT_gno_embed_source,
	false)) {
	// Source embedding is a vendor extension to DWARF v5. By now we have
	// checked if a DWARF version was stated explicitly, and have otherwise
	// fallen back to the target default, so if this is still not at least 5
	// we emit an error.
	const Arg *A = Args.getLastArg(options::OPT_gembed_source);
	if (RequestedDWARFVersion < 5)
	D.Diag(diag::err_drv_argument_only_allowed_with)
	<< A->getAsString(Args) << "-gdwarf-5";
	else if (EffectiveDWARFVersion < 5)
	// The toolchain has reduced allowed dwarf version, so we can't enable
	// -gembed-source.
	D.Diag(diag::warn_drv_dwarf_version_limited_by_target)
	<< A->getAsString(Args) << TC.getTripleString() << 5
	<< EffectiveDWARFVersion;
	else if (checkDebugInfoOption(A, Args, D, TC))
	CmdArgs.push_back("-gembed-source");
	}

	if (EmitCodeView) {
	CmdArgs.push_back("-gcodeview");

	// Emit codeview type hashes if requested.
	if (Args.hasFlag(options::OPT_gcodeview_ghash,
	options::OPT_gno_codeview_ghash, false)) {
	CmdArgs.push_back("-gcodeview-ghash");
	}
	}

	// Omit inline line tables if requested.
	if (Args.hasFlag(options::OPT_gno_inline_line_tables,
	options::OPT_ginline_line_tables, false)) {
	CmdArgs.push_back("-gno-inline-line-tables");
	}

	// When emitting remarks, we need at least debug lines in the output.
	if (willEmitRemarks(Args) &&
	DebugInfoKind <= codegenoptions::DebugDirectivesOnly)
	DebugInfoKind = codegenoptions::DebugLineTablesOnly;

	// Adjust the debug info kind for the given toolchain.
	TC.adjustDebugInfoKind(DebugInfoKind, Args);

	RenderDebugEnablingArgs(Args, CmdArgs, DebugInfoKind, EffectiveDWARFVersion,
	DebuggerTuning);

	// -fdebug-macro turns on macro debug info generation.
	if (Args.hasFlag(options::OPT_fdebug_macro, options::OPT_fno_debug_macro,
	false))
	if (checkDebugInfoOption(Args.getLastArg(options::OPT_fdebug_macro), Args,
	D, TC))
	CmdArgs.push_back("-debug-info-macro");

	// -ggnu-pubnames turns on gnu style pubnames in the backend.
	const auto *PubnamesArg =
	Args.getLastArg(options::OPT_ggnu_pubnames, options::OPT_gno_gnu_pubnames,
	options::OPT_gpubnames, options::OPT_gno_pubnames);
	if (DwarfFission != DwarfFissionKind::None \|\|
	(PubnamesArg && checkDebugInfoOption(PubnamesArg, Args, D, TC)))
	if (!PubnamesArg \|\|
	(!PubnamesArg->getOption().matches(options::OPT_gno_gnu_pubnames) &&
	!PubnamesArg->getOption().matches(options::OPT_gno_pubnames)))
	CmdArgs.push_back(PubnamesArg && PubnamesArg->getOption().matches(
	options::OPT_gpubnames)
	? "-gpubnames"
	: "-ggnu-pubnames");

	if (Args.hasFlag(options::OPT_fdebug_ranges_base_address,
	options::OPT_fno_debug_ranges_base_address, false)) {
	CmdArgs.push_back("-fdebug-ranges-base-address");
	}

	// -gdwarf-aranges turns on the emission of the aranges section in the
	// backend.
	// Always enabled for SCE tuning.
	bool NeedAranges = DebuggerTuning == llvm::DebuggerKind::SCE;
	if (const Arg *A = Args.getLastArg(options::OPT_gdwarf_aranges))
	NeedAranges = checkDebugInfoOption(A, Args, D, TC) \|\| NeedAranges;
	if (NeedAranges) {
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back("-generate-arange-section");
	}

	if (Args.hasFlag(options::OPT_fforce_dwarf_frame,
	options::OPT_fno_force_dwarf_frame, false))
	CmdArgs.push_back("-fforce-dwarf-frame");

	if (Args.hasFlag(options::OPT_fdebug_types_section,
	options::OPT_fno_debug_types_section, false)) {
	if (!(T.isOSBinFormatELF() \|\| T.isOSBinFormatWasm())) {
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< Args.getLastArg(options::OPT_fdebug_types_section)
	->getAsString(Args)
	<< T.getTriple();
	} else if (checkDebugInfoOption(
	Args.getLastArg(options::OPT_fdebug_types_section), Args, D,
	TC)) {
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back("-generate-type-units");
	}
	}

	// To avoid join/split of directory+filename, the integrated assembler prefers
	// the directory form of .file on all DWARF versions. GNU as doesn't allow the
	// form before DWARF v5.
	if (!Args.hasFlag(options::OPT_fdwarf_directory_asm,
	options::OPT_fno_dwarf_directory_asm,
	TC.useIntegratedAs() \|\| EffectiveDWARFVersion >= 5))
	CmdArgs.push_back("-fno-dwarf-directory-asm");

	// Decide how to render forward declarations of template instantiations.
	// SCE wants full descriptions, others just get them in the name.
	if (DebuggerTuning == llvm::DebuggerKind::SCE)
	CmdArgs.push_back("-debug-forward-template-params");

	// Do we need to explicitly import anonymous namespaces into the parent
	// scope?
	if (DebuggerTuning == llvm::DebuggerKind::SCE)
	CmdArgs.push_back("-dwarf-explicit-import");

	renderDwarfFormat(D, T, Args, CmdArgs, EffectiveDWARFVersion);
	RenderDebugInfoCompressionArgs(Args, CmdArgs, D, TC);
	}

	void Clang::ConstructJob(Compilation &C, const JobAction &JA,
	const InputInfo &Output, const InputInfoList &Inputs,
	const ArgList &Args, const char *LinkingOutput) const {
	const auto &TC = getToolChain();
	const llvm::Triple &RawTriple = TC.getTriple();
	const llvm::Triple &Triple = TC.getEffectiveTriple();
	const std::string &TripleStr = Triple.getTriple();

	bool KernelOrKext =
	Args.hasArg(options::OPT_mkernel, options::OPT_fapple_kext);
	const Driver &D = TC.getDriver();
	ArgStringList CmdArgs;

	// Check number of inputs for sanity. We need at least one input.
	assert(Inputs.size() >= 1 && "Must have at least one input.");
	// CUDA/HIP compilation may have multiple inputs (source file + results of
	// device-side compilations). OpenMP device jobs also take the host IR as a
	// second input. Module precompilation accepts a list of header files to
	// include as part of the module. All other jobs are expected to have exactly
	// one input.
	bool IsCuda = JA.isOffloading(Action::OFK_Cuda);
	bool IsCudaDevice = JA.isDeviceOffloading(Action::OFK_Cuda);
	bool IsHIP = JA.isOffloading(Action::OFK_HIP);
	bool IsHIPDevice = JA.isDeviceOffloading(Action::OFK_HIP);
	bool IsOpenMPDevice = JA.isDeviceOffloading(Action::OFK_OpenMP);
	bool IsHeaderModulePrecompile = isa<HeaderModulePrecompileJobAction>(JA);
	bool IsDeviceOffloadAction = !(JA.isDeviceOffloading(Action::OFK_None) \|\|
	JA.isDeviceOffloading(Action::OFK_Host));
	bool IsUsingLTO = D.isUsingLTO(IsDeviceOffloadAction);
	auto LTOMode = D.getLTOMode(IsDeviceOffloadAction);

	// A header module compilation doesn't have a main input file, so invent a
	// fake one as a placeholder.
	const char *ModuleName = [&]{
	auto *ModuleNameArg = Args.getLastArg(options::OPT_fmodule_name_EQ);
	return ModuleNameArg ? ModuleNameArg->getValue() : "";
	}();
	InputInfo HeaderModuleInput(Inputs[0].getType(), ModuleName, ModuleName);

	const InputInfo &Input =
	IsHeaderModulePrecompile ? HeaderModuleInput : Inputs[0];

	InputInfoList ModuleHeaderInputs;
	const InputInfo *CudaDeviceInput = nullptr;
	const InputInfo *OpenMPDeviceInput = nullptr;
	for (const InputInfo &I : Inputs) {
	if (&I == &Input) {
	// This is the primary input.
	} else if (IsHeaderModulePrecompile &&
	types::getPrecompiledType(I.getType()) == types::TY_PCH) {
	types::ID Expected = HeaderModuleInput.getType();
	if (I.getType() != Expected) {
	D.Diag(diag::err_drv_module_header_wrong_kind)
	<< I.getFilename() << types::getTypeName(I.getType())
	<< types::getTypeName(Expected);
	}
	ModuleHeaderInputs.push_back(I);
	} else if ((IsCuda \|\| IsHIP) && !CudaDeviceInput) {
	CudaDeviceInput = &I;
	} else if (IsOpenMPDevice && !OpenMPDeviceInput) {
	OpenMPDeviceInput = &I;
	} else {
	llvm_unreachable("unexpectedly given multiple inputs");
	}
	}

	const llvm::Triple *AuxTriple =
	(IsCuda \|\| IsHIP) ? TC.getAuxTriple() : nullptr;
	bool IsWindowsMSVC = RawTriple.isWindowsMSVCEnvironment();
	bool IsIAMCU = RawTriple.isOSIAMCU();

	// Adjust IsWindowsXYZ for CUDA/HIP compilations. Even when compiling in
	// device mode (i.e., getToolchain().getTriple() is NVPTX/AMDGCN, not
	// Windows), we need to pass Windows-specific flags to cc1.
	if (IsCuda \|\| IsHIP)
	IsWindowsMSVC \|= AuxTriple && AuxTriple->isWindowsMSVCEnvironment();

	// C++ is not supported for IAMCU.
	if (IsIAMCU && types::isCXX(Input.getType()))
	D.Diag(diag::err_drv_clang_unsupported) << "C++ for IAMCU";

	// Invoke ourselves in -cc1 mode.
	//
	// FIXME: Implement custom jobs for internal actions.
	CmdArgs.push_back("-cc1");

	// Add the "effective" target triple.
	CmdArgs.push_back("-triple");
	CmdArgs.push_back(Args.MakeArgString(TripleStr));

	if (const Arg *MJ = Args.getLastArg(options::OPT_MJ)) {
	DumpCompilationDatabase(C, MJ->getValue(), TripleStr, Output, Input, Args);
	Args.ClaimAllArgs(options::OPT_MJ);
	} else if (const Arg *GenCDBFragment =
	Args.getLastArg(options::OPT_gen_cdb_fragment_path)) {
	DumpCompilationDatabaseFragmentToDir(GenCDBFragment->getValue(), C,
	TripleStr, Output, Input, Args);
	Args.ClaimAllArgs(options::OPT_gen_cdb_fragment_path);
	}

	if (IsCuda \|\| IsHIP) {
	// We have to pass the triple of the host if compiling for a CUDA/HIP device
	// and vice-versa.
	std::string NormalizedTriple;
	if (JA.isDeviceOffloading(Action::OFK_Cuda) \|\|
	JA.isDeviceOffloading(Action::OFK_HIP))
	NormalizedTriple = C.getSingleOffloadToolChain<Action::OFK_Host>()
	->getTriple()
	.normalize();
	else {
	// Host-side compilation.
	NormalizedTriple =
	(IsCuda ? C.getSingleOffloadToolChain<Action::OFK_Cuda>()
	: C.getSingleOffloadToolChain<Action::OFK_HIP>())
	->getTriple()
	.normalize();
	if (IsCuda) {
	// We need to figure out which CUDA version we're compiling for, as that
	// determines how we load and launch GPU kernels.
	auto CTC = static_cast<const toolchains::CudaToolChain >(
	C.getSingleOffloadToolChain<Action::OFK_Cuda>());
	assert(CTC && "Expected valid CUDA Toolchain.");
	if (CTC && CTC->CudaInstallation.version() != CudaVersion::UNKNOWN)
	CmdArgs.push_back(Args.MakeArgString(
	Twine("-target-sdk-version=") +
	CudaVersionToString(CTC->CudaInstallation.version())));
	}
	}
	CmdArgs.push_back("-aux-triple");
	CmdArgs.push_back(Args.MakeArgString(NormalizedTriple));
	}

	if (Args.hasFlag(options::OPT_fsycl, options::OPT_fno_sycl, false)) {
	CmdArgs.push_back("-fsycl-is-device");

	if (Arg *A = Args.getLastArg(options::OPT_sycl_std_EQ)) {
	A->render(Args, CmdArgs);
	} else {
	// Ensure the default version in SYCL mode is 2020.
	CmdArgs.push_back("-sycl-std=2020");
	}
	}

	if (IsOpenMPDevice) {
	// We have to pass the triple of the host if compiling for an OpenMP device.
	std::string NormalizedTriple =
	C.getSingleOffloadToolChain<Action::OFK_Host>()
	->getTriple()
	.normalize();
	CmdArgs.push_back("-aux-triple");
	CmdArgs.push_back(Args.MakeArgString(NormalizedTriple));
	}

	if (Triple.isOSWindows() && (Triple.getArch() == llvm::Triple::arm \|\|
	Triple.getArch() == llvm::Triple::thumb)) {
	unsigned Offset = Triple.getArch() == llvm::Triple::arm ? 4 : 6;
	unsigned Version = 0;
	bool Failure =
	Triple.getArchName().substr(Offset).consumeInteger(10, Version);
	if (Failure \|\| Version < 7)
	D.Diag(diag::err_target_unsupported_arch) << Triple.getArchName()
	<< TripleStr;
	}

	// Push all default warning arguments that are specific to
	// the given target. These come before user provided warning options
	// are provided.
	TC.addClangWarningOptions(CmdArgs);

	// FIXME: Subclass ToolChain for SPIR and move this to addClangWarningOptions.
	if (Triple.isSPIR())
	CmdArgs.push_back("-Wspir-compat");

	// Select the appropriate action.
	RewriteKind rewriteKind = RK_None;

	// If CollectArgsForIntegratedAssembler() isn't called below, claim the args
	// it claims when not running an assembler. Otherwise, clang would emit
	// "argument unused" warnings for assembler flags when e.g. adding "-E" to
	// flags while debugging something. That'd be somewhat inconvenient, and it's
	// also inconsistent with most other flags -- we don't warn on
	// -ffunction-sections not being used in -E mode either for example, even
	// though it's not really used either.
	if (!isa<AssembleJobAction>(JA)) {
	// The args claimed here should match the args used in
	// CollectArgsForIntegratedAssembler().
	if (TC.useIntegratedAs()) {
	Args.ClaimAllArgs(options::OPT_mrelax_all);
	Args.ClaimAllArgs(options::OPT_mno_relax_all);
	Args.ClaimAllArgs(options::OPT_mincremental_linker_compatible);
	Args.ClaimAllArgs(options::OPT_mno_incremental_linker_compatible);
	switch (C.getDefaultToolChain().getArch()) {
	case llvm::Triple::arm:
	case llvm::Triple::armeb:
	case llvm::Triple::thumb:
	case llvm::Triple::thumbeb:
	Args.ClaimAllArgs(options::OPT_mimplicit_it_EQ);
	break;
	default:
	break;
	}
	}
	Args.ClaimAllArgs(options::OPT_Wa_COMMA);
	Args.ClaimAllArgs(options::OPT_Xassembler);
	}

	if (isa<AnalyzeJobAction>(JA)) {
	assert(JA.getType() == types::TY_Plist && "Invalid output type.");
	CmdArgs.push_back("-analyze");
	} else if (isa<MigrateJobAction>(JA)) {
	CmdArgs.push_back("-migrate");
	} else if (isa<PreprocessJobAction>(JA)) {
	if (Output.getType() == types::TY_Dependencies)
	CmdArgs.push_back("-Eonly");
	else {
	CmdArgs.push_back("-E");
	if (Args.hasArg(options::OPT_rewrite_objc) &&
	!Args.hasArg(options::OPT_g_Group))
	CmdArgs.push_back("-P");
	}
	} else if (isa<AssembleJobAction>(JA)) {
	CmdArgs.push_back("-emit-obj");

	CollectArgsForIntegratedAssembler(C, Args, CmdArgs, D);

	// Also ignore explicit -force_cpusubtype_ALL option.
	(void)Args.hasArg(options::OPT_force__cpusubtype__ALL);
	} else if (isa<PrecompileJobAction>(JA)) {
	if (JA.getType() == types::TY_Nothing)
	CmdArgs.push_back("-fsyntax-only");
	else if (JA.getType() == types::TY_ModuleFile)
	CmdArgs.push_back(IsHeaderModulePrecompile
	? "-emit-header-module"
	: "-emit-module-interface");
	else
	CmdArgs.push_back("-emit-pch");
	} else if (isa<VerifyPCHJobAction>(JA)) {
	CmdArgs.push_back("-verify-pch");
	} else {
	assert((isa<CompileJobAction>(JA) \|\| isa<BackendJobAction>(JA)) &&
	"Invalid action for clang tool.");
	if (JA.getType() == types::TY_Nothing) {
	CmdArgs.push_back("-fsyntax-only");
	} else if (JA.getType() == types::TY_LLVM_IR \|\|
	JA.getType() == types::TY_LTO_IR) {
	CmdArgs.push_back("-emit-llvm");
	} else if (JA.getType() == types::TY_LLVM_BC \|\|
	JA.getType() == types::TY_LTO_BC) {
	// Emit textual llvm IR for AMDGPU offloading for -emit-llvm -S
	if (Triple.isAMDGCN() && IsOpenMPDevice && Args.hasArg(options::OPT_S) &&
	Args.hasArg(options::OPT_emit_llvm)) {
	CmdArgs.push_back("-emit-llvm");
	} else {
	CmdArgs.push_back("-emit-llvm-bc");
	}
	} else if (JA.getType() == types::TY_IFS \|\|
	JA.getType() == types::TY_IFS_CPP) {
	StringRef ArgStr =
	Args.hasArg(options::OPT_interface_stub_version_EQ)
	? Args.getLastArgValue(options::OPT_interface_stub_version_EQ)
	: "ifs-v1";
	CmdArgs.push_back("-emit-interface-stubs");
	CmdArgs.push_back(
	Args.MakeArgString(Twine("-interface-stub-version=") + ArgStr.str()));
	} else if (JA.getType() == types::TY_PP_Asm) {
	CmdArgs.push_back("-S");
	} else if (JA.getType() == types::TY_AST) {
	CmdArgs.push_back("-emit-pch");
	} else if (JA.getType() == types::TY_ModuleFile) {
	CmdArgs.push_back("-module-file-info");
	} else if (JA.getType() == types::TY_RewrittenObjC) {
	CmdArgs.push_back("-rewrite-objc");
	rewriteKind = RK_NonFragile;
	} else if (JA.getType() == types::TY_RewrittenLegacyObjC) {
	CmdArgs.push_back("-rewrite-objc");
	rewriteKind = RK_Fragile;
	} else {
	assert(JA.getType() == types::TY_PP_Asm && "Unexpected output type!");
	}

	// Preserve use-list order by default when emitting bitcode, so that
	// loading the bitcode up in 'opt' or 'llc' and running passes gives the
	// same result as running passes here. For LTO, we don't need to preserve
	// the use-list order, since serialization to bitcode is part of the flow.
	if (JA.getType() == types::TY_LLVM_BC)
	CmdArgs.push_back("-emit-llvm-uselists");

	if (IsUsingLTO) {
	if (!IsDeviceOffloadAction) {
	if (Args.hasArg(options::OPT_flto))
	CmdArgs.push_back("-flto");
	else {
	if (D.getLTOMode() == LTOK_Thin)
	CmdArgs.push_back("-flto=thin");
	else
	CmdArgs.push_back("-flto=full");
	}
	CmdArgs.push_back("-flto-unit");
	} else if (Triple.isAMDGPU()) {
	// Only AMDGPU supports device-side LTO
	assert(LTOMode == LTOK_Full \|\| LTOMode == LTOK_Thin);
	CmdArgs.push_back(Args.MakeArgString(
	Twine("-flto=") + (LTOMode == LTOK_Thin ? "thin" : "full")));
	CmdArgs.push_back("-flto-unit");
	} else {
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< Args.getLastArg(options::OPT_foffload_lto,
	options::OPT_foffload_lto_EQ)
	->getAsString(Args)
	<< Triple.getTriple();
	}
	}
	}

	if (const Arg *A = Args.getLastArg(options::OPT_fthinlto_index_EQ)) {
	if (!types::isLLVMIR(Input.getType()))
	D.Diag(diag::err_drv_arg_requires_bitcode_input) << A->getAsString(Args);
	Args.AddLastArg(CmdArgs, options::OPT_fthinlto_index_EQ);
	}

	if (Args.getLastArg(options::OPT_fthin_link_bitcode_EQ))
	Args.AddLastArg(CmdArgs, options::OPT_fthin_link_bitcode_EQ);

	if (Args.getLastArg(options::OPT_save_temps_EQ))
	Args.AddLastArg(CmdArgs, options::OPT_save_temps_EQ);

	auto *MemProfArg = Args.getLastArg(options::OPT_fmemory_profile,
	options::OPT_fmemory_profile_EQ,
	options::OPT_fno_memory_profile);
	if (MemProfArg &&
	!MemProfArg->getOption().matches(options::OPT_fno_memory_profile))
	MemProfArg->render(Args, CmdArgs);

	// Embed-bitcode option.
	// Only white-listed flags below are allowed to be embedded.
	if (C.getDriver().embedBitcodeInObject() && !IsUsingLTO &&
	(isa<BackendJobAction>(JA) \|\| isa<AssembleJobAction>(JA))) {
	// Add flags implied by -fembed-bitcode.
	Args.AddLastArg(CmdArgs, options::OPT_fembed_bitcode_EQ);
	// Disable all llvm IR level optimizations.
	CmdArgs.push_back("-disable-llvm-passes");

	// Render target options.
	TC.addClangTargetOptions(Args, CmdArgs, JA.getOffloadingDeviceKind());

	// reject options that shouldn't be supported in bitcode
	// also reject kernel/kext
	static const constexpr unsigned kBitcodeOptionBlacklist[] = {
	options::OPT_mkernel,
	options::OPT_fapple_kext,
	options::OPT_ffunction_sections,
	options::OPT_fno_function_sections,
	options::OPT_fdata_sections,
	options::OPT_fno_data_sections,
	options::OPT_fbasic_block_sections_EQ,
	options::OPT_funique_internal_linkage_names,
	options::OPT_fno_unique_internal_linkage_names,
	options::OPT_funique_section_names,
	options::OPT_fno_unique_section_names,
	options::OPT_funique_basic_block_section_names,
	options::OPT_fno_unique_basic_block_section_names,
	options::OPT_mrestrict_it,
	options::OPT_mno_restrict_it,
	options::OPT_mstackrealign,
	options::OPT_mno_stackrealign,
	options::OPT_mstack_alignment,
	options::OPT_mcmodel_EQ,
	options::OPT_mlong_calls,
	options::OPT_mno_long_calls,
	options::OPT_ggnu_pubnames,
	options::OPT_gdwarf_aranges,
	options::OPT_fdebug_types_section,
	options::OPT_fno_debug_types_section,
	options::OPT_fdwarf_directory_asm,
	options::OPT_fno_dwarf_directory_asm,
	options::OPT_mrelax_all,
	options::OPT_mno_relax_all,
	options::OPT_ftrap_function_EQ,
	options::OPT_ffixed_r9,
	options::OPT_mfix_cortex_a53_835769,
	options::OPT_mno_fix_cortex_a53_835769,
	options::OPT_ffixed_x18,
	options::OPT_mglobal_merge,
	options::OPT_mno_global_merge,
	options::OPT_mred_zone,
	options::OPT_mno_red_zone,
	options::OPT_Wa_COMMA,
	options::OPT_Xassembler,
	options::OPT_mllvm,
	};
	for (const auto &A : Args)
	if (llvm::find(kBitcodeOptionBlacklist, A->getOption().getID()) !=
	std::end(kBitcodeOptionBlacklist))
	D.Diag(diag::err_drv_unsupported_embed_bitcode) << A->getSpelling();

	// Render the CodeGen options that need to be passed.
	if (!Args.hasFlag(options::OPT_foptimize_sibling_calls,
	options::OPT_fno_optimize_sibling_calls))
	CmdArgs.push_back("-mdisable-tail-calls");

	RenderFloatingPointOptions(TC, D, isOptimizationLevelFast(Args), Args,
	CmdArgs, JA);

	// Render ABI arguments
	switch (TC.getArch()) {
	default: break;
	case llvm::Triple::arm:
	case llvm::Triple::armeb:
	case llvm::Triple::thumbeb:
	RenderARMABI(Triple, Args, CmdArgs);
	break;
	case llvm::Triple::aarch64:
	case llvm::Triple::aarch64_32:
	case llvm::Triple::aarch64_be:
	RenderAArch64ABI(Triple, Args, CmdArgs);
	break;
	}

	// Optimization level for CodeGen.
	if (const Arg *A = Args.getLastArg(options::OPT_O_Group)) {
	if (A->getOption().matches(options::OPT_O4)) {
	CmdArgs.push_back("-O3");
	D.Diag(diag::warn_O4_is_O3);
	} else {
	A->render(Args, CmdArgs);
	}
	}

	// Input/Output file.
	if (Output.getType() == types::TY_Dependencies) {
	// Handled with other dependency code.
	} else if (Output.isFilename()) {
	CmdArgs.push_back("-o");
	CmdArgs.push_back(Output.getFilename());
	} else {
	assert(Output.isNothing() && "Input output.");
	}

	for (const auto &II : Inputs) {
	addDashXForInput(Args, II, CmdArgs);
	if (II.isFilename())
	CmdArgs.push_back(II.getFilename());
	else
	II.getInputArg().renderAsInput(Args, CmdArgs);
	}

	C.addCommand(std::make_unique<Command>(
	JA, *this, ResponseFileSupport::AtFileUTF8(), D.getClangProgramPath(),
	CmdArgs, Inputs, Output));
	return;
	}

	if (C.getDriver().embedBitcodeMarkerOnly() && !IsUsingLTO)
	CmdArgs.push_back("-fembed-bitcode=marker");

	// We normally speed up the clang process a bit by skipping destructors at
	// exit, but when we're generating diagnostics we can rely on some of the
	// cleanup.
	if (!C.isForDiagnostics())
	CmdArgs.push_back("-disable-free");

	#ifdef NDEBUG
	const bool IsAssertBuild = false;
	#else
	const bool IsAssertBuild = true;
	#endif

	// Disable the verification pass in -asserts builds.
	if (!IsAssertBuild)
	CmdArgs.push_back("-disable-llvm-verifier");

	// Discard value names in assert builds unless otherwise specified.
	if (Args.hasFlag(options::OPT_fdiscard_value_names,
	options::OPT_fno_discard_value_names, !IsAssertBuild)) {
	if (Args.hasArg(options::OPT_fdiscard_value_names) &&
	(std::any_of(Inputs.begin(), Inputs.end(),
	[](const clang::driver::InputInfo &II) {
	return types::isLLVMIR(II.getType());
	}))) {
	D.Diag(diag::warn_ignoring_fdiscard_for_bitcode);
	}
	CmdArgs.push_back("-discard-value-names");
	}

	// Set the main file name, so that debug info works even with
	// -save-temps.
	CmdArgs.push_back("-main-file-name");
	CmdArgs.push_back(getBaseInputName(Args, Input));

	// Some flags which affect the language (via preprocessor
	// defines).
	if (Args.hasArg(options::OPT_static))
	CmdArgs.push_back("-static-define");

	if (Args.hasArg(options::OPT_municode))
	CmdArgs.push_back("-DUNICODE");

	if (isa<AnalyzeJobAction>(JA))
	RenderAnalyzerOptions(Args, CmdArgs, Triple, Input);

	if (isa<AnalyzeJobAction>(JA) \|\|
	(isa<PreprocessJobAction>(JA) && Args.hasArg(options::OPT__analyze)))
	CmdArgs.push_back("-setup-static-analyzer");

	// Enable compatilibily mode to avoid analyzer-config related errors.
	// Since we can't access frontend flags through hasArg, let's manually iterate
	// through them.
	bool FoundAnalyzerConfig = false;
	for (auto Arg : Args.filtered(options::OPT_Xclang))
	if (StringRef(Arg->getValue()) == "-analyzer-config") {
	FoundAnalyzerConfig = true;
	break;
	}
	if (!FoundAnalyzerConfig)
	for (auto Arg : Args.filtered(options::OPT_Xanalyzer))
	if (StringRef(Arg->getValue()) == "-analyzer-config") {
	FoundAnalyzerConfig = true;
	break;
	}
	if (FoundAnalyzerConfig)
	CmdArgs.push_back("-analyzer-config-compatibility-mode=true");

	CheckCodeGenerationOptions(D, Args);

	unsigned FunctionAlignment = ParseFunctionAlignment(TC, Args);
	assert(FunctionAlignment <= 31 && "function alignment will be truncated!");
	if (FunctionAlignment) {
	CmdArgs.push_back("-function-alignment");
	CmdArgs.push_back(Args.MakeArgString(std::to_string(FunctionAlignment)));
	}

	llvm::Reloc::Model RelocationModel;
	unsigned PICLevel;
	bool IsPIE;
	std::tie(RelocationModel, PICLevel, IsPIE) = ParsePICArgs(TC, Args);

	bool IsROPI = RelocationModel == llvm::Reloc::ROPI \|\|
	RelocationModel == llvm::Reloc::ROPI_RWPI;
	bool IsRWPI = RelocationModel == llvm::Reloc::RWPI \|\|
	RelocationModel == llvm::Reloc::ROPI_RWPI;

	if (Args.hasArg(options::OPT_mcmse) &&
	!Args.hasArg(options::OPT_fallow_unsupported)) {
	if (IsROPI)
	D.Diag(diag::err_cmse_pi_are_incompatible) << IsROPI;
	if (IsRWPI)
	D.Diag(diag::err_cmse_pi_are_incompatible) << !IsRWPI;
	}

	if (IsROPI && types::isCXX(Input.getType()) &&
	!Args.hasArg(options::OPT_fallow_unsupported))
	D.Diag(diag::err_drv_ropi_incompatible_with_cxx);

	const char *RMName = RelocationModelName(RelocationModel);
	if (RMName) {
	CmdArgs.push_back("-mrelocation-model");
	CmdArgs.push_back(RMName);
	}
	if (PICLevel > 0) {
	CmdArgs.push_back("-pic-level");
	CmdArgs.push_back(PICLevel == 1 ? "1" : "2");
	if (IsPIE)
	CmdArgs.push_back("-pic-is-pie");
	}

	if (RelocationModel == llvm::Reloc::ROPI \|\|
	RelocationModel == llvm::Reloc::ROPI_RWPI)
	CmdArgs.push_back("-fropi");
	if (RelocationModel == llvm::Reloc::RWPI \|\|
	RelocationModel == llvm::Reloc::ROPI_RWPI)
	CmdArgs.push_back("-frwpi");

	if (Arg *A = Args.getLastArg(options::OPT_meabi)) {
	CmdArgs.push_back("-meabi");
	CmdArgs.push_back(A->getValue());
	}

	// -fsemantic-interposition is forwarded to CC1: set the
	// "SemanticInterposition" metadata to 1 (make some linkages interposable) and
	// make default visibility external linkage definitions dso_preemptable.
	//
	// -fno-semantic-interposition: if the target supports .Lfoo$local local
	// aliases (make default visibility external linkage definitions dso_local).
	// This is the CC1 default for ELF to match COFF/Mach-O.
	//
	// Otherwise use Clang's traditional behavior: like
	// -fno-semantic-interposition but local aliases are not used. So references
	// can be interposed if not optimized out.
	if (Triple.isOSBinFormatELF()) {
	Arg *A = Args.getLastArg(options::OPT_fsemantic_interposition,
	options::OPT_fno_semantic_interposition);
	if (RelocationModel != llvm::Reloc::Static && !IsPIE) {
	// The supported targets need to call AsmPrinter::getSymbolPreferLocal.
	bool SupportsLocalAlias =
	Triple.isAArch64() \|\| Triple.isRISCV() \|\| Triple.isX86();
	if (!A)
	CmdArgs.push_back("-fhalf-no-semantic-interposition");
	else if (A->getOption().matches(options::OPT_fsemantic_interposition))
	A->render(Args, CmdArgs);
	else if (!SupportsLocalAlias)
	CmdArgs.push_back("-fhalf-no-semantic-interposition");
	}
	}

	{
	std::string Model;
	if (Arg *A = Args.getLastArg(options::OPT_mthread_model)) {
	if (!TC.isThreadModelSupported(A->getValue()))
	D.Diag(diag::err_drv_invalid_thread_model_for_target)
	<< A->getValue() << A->getAsString(Args);
	Model = A->getValue();
	} else
	Model = TC.getThreadModel();
	if (Model != "posix") {
	CmdArgs.push_back("-mthread-model");
	CmdArgs.push_back(Args.MakeArgString(Model));
	}
	}

	Args.AddLastArg(CmdArgs, options::OPT_fveclib);

	if (Args.hasFlag(options::OPT_fmerge_all_constants,
	options::OPT_fno_merge_all_constants, false))
	CmdArgs.push_back("-fmerge-all-constants");

	if (Args.hasFlag(options::OPT_fno_delete_null_pointer_checks,
	options::OPT_fdelete_null_pointer_checks, false))
	CmdArgs.push_back("-fno-delete-null-pointer-checks");

	// LLVM Code Generator Options.

	for (const Arg *A : Args.filtered(options::OPT_frewrite_map_file_EQ)) {
	StringRef Map = A->getValue();
	if (!llvm::sys::fs::exists(Map)) {
	D.Diag(diag::err_drv_no_such_file) << Map;
	} else {
	A->render(Args, CmdArgs);
	A->claim();
	}
	}

	if (Arg *A = Args.getLastArg(options::OPT_mabi_EQ_vec_extabi,
	options::OPT_mabi_EQ_vec_default)) {
	if (!Triple.isOSAIX())
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< A->getSpelling() << RawTriple.str();
	if (A->getOption().getID() == options::OPT_mabi_EQ_vec_extabi)
	CmdArgs.push_back("-mabi=vec-extabi");
	else
	CmdArgs.push_back("-mabi=vec-default");
	}

	if (Arg *A = Args.getLastArg(options::OPT_mlong_double_128)) {
	// Emit the unsupported option error until the Clang's library integration
	// support for 128-bit long double is available for AIX.
	if (Triple.isOSAIX())
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< A->getSpelling() << RawTriple.str();
	}

	if (Arg *A = Args.getLastArg(options::OPT_Wframe_larger_than_EQ)) {
	StringRef v = A->getValue();
	// FIXME: Validate the argument here so we don't produce meaningless errors
	// about -fwarn-stack-size=.
	if (v.empty())
	D.Diag(diag::err_drv_missing_argument) << A->getSpelling() << 1;
	else
	CmdArgs.push_back(Args.MakeArgString("-fwarn-stack-size=" + v));
	A->claim();
	}

	if (!Args.hasFlag(options::OPT_fjump_tables, options::OPT_fno_jump_tables,
	true))
	CmdArgs.push_back("-fno-jump-tables");

	if (Args.hasFlag(options::OPT_fprofile_sample_accurate,
	options::OPT_fno_profile_sample_accurate, false))
	CmdArgs.push_back("-fprofile-sample-accurate");

	if (!Args.hasFlag(options::OPT_fpreserve_as_comments,
	options::OPT_fno_preserve_as_comments, true))
	CmdArgs.push_back("-fno-preserve-as-comments");

	if (Arg *A = Args.getLastArg(options::OPT_mregparm_EQ)) {
	CmdArgs.push_back("-mregparm");
	CmdArgs.push_back(A->getValue());
	}

	if (Arg *A = Args.getLastArg(options::OPT_maix_struct_return,
	options::OPT_msvr4_struct_return)) {
	if (!TC.getTriple().isPPC32()) {
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< A->getSpelling() << RawTriple.str();
	} else if (A->getOption().matches(options::OPT_maix_struct_return)) {
	CmdArgs.push_back("-maix-struct-return");
	} else {
	assert(A->getOption().matches(options::OPT_msvr4_struct_return));
	CmdArgs.push_back("-msvr4-struct-return");
	}
	}

	if (Arg *A = Args.getLastArg(options::OPT_fpcc_struct_return,
	options::OPT_freg_struct_return)) {
	if (TC.getArch() != llvm::Triple::x86) {
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< A->getSpelling() << RawTriple.str();
	} else if (A->getOption().matches(options::OPT_fpcc_struct_return)) {
	CmdArgs.push_back("-fpcc-struct-return");
	} else {
	assert(A->getOption().matches(options::OPT_freg_struct_return));
	CmdArgs.push_back("-freg-struct-return");
	}
	}

	if (Args.hasFlag(options::OPT_mrtd, options::OPT_mno_rtd, false))
	CmdArgs.push_back("-fdefault-calling-conv=stdcall");

	if (Args.hasArg(options::OPT_fenable_matrix)) {
	// enable-matrix is needed by both the LangOpts and by LLVM.
	CmdArgs.push_back("-fenable-matrix");
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back("-enable-matrix");
	}

	CodeGenOptions::FramePointerKind FPKeepKind =
	getFramePointerKind(Args, RawTriple);
	const char *FPKeepKindStr = nullptr;
	switch (FPKeepKind) {
	case CodeGenOptions::FramePointerKind::None:
	FPKeepKindStr = "-mframe-pointer=none";
	break;
	case CodeGenOptions::FramePointerKind::NonLeaf:
	FPKeepKindStr = "-mframe-pointer=non-leaf";
	break;
	case CodeGenOptions::FramePointerKind::All:
	FPKeepKindStr = "-mframe-pointer=all";
	break;
	}
	assert(FPKeepKindStr && "unknown FramePointerKind");
	CmdArgs.push_back(FPKeepKindStr);

	if (!Args.hasFlag(options::OPT_fzero_initialized_in_bss,
	options::OPT_fno_zero_initialized_in_bss, true))
	CmdArgs.push_back("-fno-zero-initialized-in-bss");

	bool OFastEnabled = isOptimizationLevelFast(Args);
	// If -Ofast is the optimization level, then -fstrict-aliasing should be
	// enabled. This alias option is being used to simplify the hasFlag logic.
	OptSpecifier StrictAliasingAliasOption =
	OFastEnabled ? options::OPT_Ofast : options::OPT_fstrict_aliasing;
	// We turn strict aliasing off by default if we're in CL mode, since MSVC
	// doesn't do any TBAA.
	bool TBAAOnByDefault = !D.IsCLMode();
	if (!Args.hasFlag(options::OPT_fstrict_aliasing, StrictAliasingAliasOption,
	options::OPT_fno_strict_aliasing, TBAAOnByDefault))
	CmdArgs.push_back("-relaxed-aliasing");
	if (!Args.hasFlag(options::OPT_fstruct_path_tbaa,
	options::OPT_fno_struct_path_tbaa))
	CmdArgs.push_back("-no-struct-path-tbaa");
	if (Args.hasFlag(options::OPT_fstrict_enums, options::OPT_fno_strict_enums,
	false))
	CmdArgs.push_back("-fstrict-enums");
	if (!Args.hasFlag(options::OPT_fstrict_return, options::OPT_fno_strict_return,
	true))
	CmdArgs.push_back("-fno-strict-return");
	if (Args.hasFlag(options::OPT_fallow_editor_placeholders,
	options::OPT_fno_allow_editor_placeholders, false))
	CmdArgs.push_back("-fallow-editor-placeholders");
	if (Args.hasFlag(options::OPT_fstrict_vtable_pointers,
	options::OPT_fno_strict_vtable_pointers,
	false))
	CmdArgs.push_back("-fstrict-vtable-pointers");
	if (Args.hasFlag(options::OPT_fforce_emit_vtables,
	options::OPT_fno_force_emit_vtables,
	false))
	CmdArgs.push_back("-fforce-emit-vtables");
	if (!Args.hasFlag(options::OPT_foptimize_sibling_calls,
	options::OPT_fno_optimize_sibling_calls))
	CmdArgs.push_back("-mdisable-tail-calls");
	if (Args.hasFlag(options::OPT_fno_escaping_block_tail_calls,
	options::OPT_fescaping_block_tail_calls, false))
	CmdArgs.push_back("-fno-escaping-block-tail-calls");

	Args.AddLastArg(CmdArgs, options::OPT_ffine_grained_bitfield_accesses,
	options::OPT_fno_fine_grained_bitfield_accesses);

	Args.AddLastArg(CmdArgs, options::OPT_fexperimental_relative_cxx_abi_vtables,
	options::OPT_fno_experimental_relative_cxx_abi_vtables);

	// Handle segmented stacks.
	if (Args.hasFlag(options::OPT_fsplit_stack, options::OPT_fno_split_stack,
	false))
	CmdArgs.push_back("-fsplit-stack");

	// -fprotect-parens=0 is default.
	if (Args.hasFlag(options::OPT_fprotect_parens,
	options::OPT_fno_protect_parens, false))
	CmdArgs.push_back("-fprotect-parens");

	RenderFloatingPointOptions(TC, D, OFastEnabled, Args, CmdArgs, JA);

	if (Arg *A = Args.getLastArg(options::OPT_fextend_args_EQ)) {
	const llvm::Triple::ArchType Arch = TC.getArch();
	if (Arch == llvm::Triple::x86 \|\| Arch == llvm::Triple::x86_64) {
	StringRef V = A->getValue();
	if (V == "64")
	CmdArgs.push_back("-fextend-arguments=64");
	else if (V != "32")
	D.Diag(diag::err_drv_invalid_argument_to_option)
	<< A->getValue() << A->getOption().getName();
	} else
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< A->getOption().getName() << TripleStr;
	}

	if (Arg *A = Args.getLastArg(options::OPT_mdouble_EQ)) {
	if (TC.getArch() == llvm::Triple::avr)
	A->render(Args, CmdArgs);
	else
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< A->getAsString(Args) << TripleStr;
	}

	if (Arg *A = Args.getLastArg(options::OPT_LongDouble_Group)) {
	if (TC.getTriple().isX86())
	A->render(Args, CmdArgs);
	else if (TC.getTriple().isPPC() &&
	(A->getOption().getID() != options::OPT_mlong_double_80))
	A->render(Args, CmdArgs);
	else
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< A->getAsString(Args) << TripleStr;
	}

	// Decide whether to use verbose asm. Verbose assembly is the default on
	// toolchains which have the integrated assembler on by default.
	bool IsIntegratedAssemblerDefault = TC.IsIntegratedAssemblerDefault();
	if (!Args.hasFlag(options::OPT_fverbose_asm, options::OPT_fno_verbose_asm,
	IsIntegratedAssemblerDefault))
	CmdArgs.push_back("-fno-verbose-asm");

	// Parse 'none' or '$major.$minor'. Disallow -fbinutils-version=0 because we
	// use that to indicate the MC default in the backend.
	if (Arg *A = Args.getLastArg(options::OPT_fbinutils_version_EQ)) {
	StringRef V = A->getValue();
	unsigned Num;
	if (V == "none")
	A->render(Args, CmdArgs);
	else if (!V.consumeInteger(10, Num) && Num > 0 &&
	(V.empty() \|\| (V.consume_front(".") &&
	!V.consumeInteger(10, Num) && V.empty())))
	A->render(Args, CmdArgs);
	else
	D.Diag(diag::err_drv_invalid_argument_to_option)
	<< A->getValue() << A->getOption().getName();
	}

	// If toolchain choose to use MCAsmParser for inline asm don't pass the
	// option to disable integrated-as explictly.
	if (!TC.useIntegratedAs() && !TC.parseInlineAsmUsingAsmParser())
	CmdArgs.push_back("-no-integrated-as");

	if (Args.hasArg(options::OPT_fdebug_pass_structure)) {
	CmdArgs.push_back("-mdebug-pass");
	CmdArgs.push_back("Structure");
	}
	if (Args.hasArg(options::OPT_fdebug_pass_arguments)) {
	CmdArgs.push_back("-mdebug-pass");
	CmdArgs.push_back("Arguments");
	}

	// Enable -mconstructor-aliases except on darwin, where we have to work around
	// a linker bug (see <rdar://problem/7651567>), and CUDA/AMDGPU device code,
	// where aliases aren't supported.
	if (!RawTriple.isOSDarwin() && !RawTriple.isNVPTX() && !RawTriple.isAMDGPU())
	CmdArgs.push_back("-mconstructor-aliases");

	// Darwin's kernel doesn't support guard variables; just die if we
	// try to use them.
	if (KernelOrKext && RawTriple.isOSDarwin())
	CmdArgs.push_back("-fforbid-guard-variables");

	if (Args.hasFlag(options::OPT_mms_bitfields, options::OPT_mno_ms_bitfields,
	Triple.isWindowsGNUEnvironment())) {
	CmdArgs.push_back("-mms-bitfields");
	}

	// Non-PIC code defaults to -fdirect-access-external-data while PIC code
	// defaults to -fno-direct-access-external-data. Pass the option if different
	// from the default.
	if (Arg *A = Args.getLastArg(options::OPT_fdirect_access_external_data,
	options::OPT_fno_direct_access_external_data))
	if (A->getOption().matches(options::OPT_fdirect_access_external_data) !=
	(PICLevel == 0))
	A->render(Args, CmdArgs);

	if (Args.hasFlag(options::OPT_fno_plt, options::OPT_fplt, false)) {
	CmdArgs.push_back("-fno-plt");
	}

	// -fhosted is default.
	// TODO: Audit uses of KernelOrKext and see where it'd be more appropriate to
	// use Freestanding.
	bool Freestanding =
	Args.hasFlag(options::OPT_ffreestanding, options::OPT_fhosted, false) \|\|
	KernelOrKext;
	if (Freestanding)
	CmdArgs.push_back("-ffreestanding");

	// This is a coarse approximation of what llvm-gcc actually does, both
	// -fasynchronous-unwind-tables and -fnon-call-exceptions interact in more
	// complicated ways.
	bool UnwindTables =
	Args.hasFlag(options::OPT_fasynchronous_unwind_tables,
	options::OPT_fno_asynchronous_unwind_tables,
	(TC.IsUnwindTablesDefault(Args) \|\|
	TC.getSanitizerArgs().needsUnwindTables()) &&
	!Freestanding);
	UnwindTables = Args.hasFlag(options::OPT_funwind_tables,
	options::OPT_fno_unwind_tables, UnwindTables);
	if (UnwindTables)
	CmdArgs.push_back("-munwind-tables");

	// Prepare `-aux-target-cpu` and `-aux-target-feature` unless
	// `--gpu-use-aux-triple-only` is specified.
	if (!Args.getLastArg(options::OPT_gpu_use_aux_triple_only) &&
	(IsCudaDevice \|\| IsHIPDevice)) {
	const ArgList &HostArgs =
	C.getArgsForToolChain(nullptr, StringRef(), Action::OFK_None);
	std::string HostCPU =
	getCPUName(HostArgs, TC.getAuxTriple(), /FromAs*/ false);
	if (!HostCPU.empty()) {
	CmdArgs.push_back("-aux-target-cpu");
	CmdArgs.push_back(Args.MakeArgString(HostCPU));
	}
	getTargetFeatures(D, *TC.getAuxTriple(), HostArgs, CmdArgs,
	/ForAS/ false, /IsAux/ true);
	}

	TC.addClangTargetOptions(Args, CmdArgs, JA.getOffloadingDeviceKind());

	// FIXME: Handle -mtune=.
	(void)Args.hasArg(options::OPT_mtune_EQ);

	if (Arg *A = Args.getLastArg(options::OPT_mcmodel_EQ)) {
	StringRef CM = A->getValue();
	if (CM == "small" \|\| CM == "kernel" \|\| CM == "medium" \|\| CM == "large" \|\|
	CM == "tiny") {
	if (Triple.isOSAIX() && CM == "medium")
	CmdArgs.push_back("-mcmodel=large");
	else
	A->render(Args, CmdArgs);
	} else {
	D.Diag(diag::err_drv_invalid_argument_to_option)
	<< CM << A->getOption().getName();
	}
	}

	if (Arg *A = Args.getLastArg(options::OPT_mtls_size_EQ)) {
	StringRef Value = A->getValue();
	unsigned TLSSize = 0;
	Value.getAsInteger(10, TLSSize);
	if (!Triple.isAArch64() \|\| !Triple.isOSBinFormatELF())
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< A->getOption().getName() << TripleStr;
	if (TLSSize != 12 && TLSSize != 24 && TLSSize != 32 && TLSSize != 48)
	D.Diag(diag::err_drv_invalid_int_value)
	<< A->getOption().getName() << Value;
	Args.AddLastArg(CmdArgs, options::OPT_mtls_size_EQ);
	}

	// Add the target cpu
	std::string CPU = getCPUName(Args, Triple, /FromAs/ false);
	if (!CPU.empty()) {
	CmdArgs.push_back("-target-cpu");
	CmdArgs.push_back(Args.MakeArgString(CPU));
	}

	RenderTargetOptions(Triple, Args, KernelOrKext, CmdArgs);

	// FIXME: For now we want to demote any errors to warnings, when they have
	// been raised for asking the wrong question of scalable vectors, such as
	// asking for the fixed number of elements. This may happen because code that
	// is not yet ported to work for scalable vectors uses the wrong interfaces,
	// whereas the behaviour is actually correct. Emitting a warning helps bring
	// up scalable vector support in an incremental way. When scalable vector
	// support is stable enough, all uses of wrong interfaces should be considered
	// as errors, but until then, we can live with a warning being emitted by the
	// compiler. This way, Clang can be used to compile code with scalable vectors
	// and identify possible issues.
	if (isa<BackendJobAction>(JA)) {
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back("-treat-scalable-fixed-error-as-warning");
	}

	// These two are potentially updated by AddClangCLArgs.
	codegenoptions::DebugInfoKind DebugInfoKind = codegenoptions::NoDebugInfo;
	bool EmitCodeView = false;

	// Add clang-cl arguments.
	types::ID InputType = Input.getType();
	if (D.IsCLMode())
	AddClangCLArgs(Args, InputType, CmdArgs, &DebugInfoKind, &EmitCodeView);

	DwarfFissionKind DwarfFission = DwarfFissionKind::None;
	renderDebugOptions(TC, D, RawTriple, Args, EmitCodeView,
	types::isLLVMIR(InputType), CmdArgs, DebugInfoKind,
	DwarfFission);

	// Add the split debug info name to the command lines here so we
	// can propagate it to the backend.
	bool SplitDWARF = (DwarfFission != DwarfFissionKind::None) &&
	(TC.getTriple().isOSBinFormatELF() \|\|
	TC.getTriple().isOSBinFormatWasm()) &&
	(isa<AssembleJobAction>(JA) \|\| isa<CompileJobAction>(JA) \|\|
	isa<BackendJobAction>(JA));
	if (SplitDWARF) {
	const char *SplitDWARFOut = SplitDebugName(JA, Args, Input, Output);
	CmdArgs.push_back("-split-dwarf-file");
	CmdArgs.push_back(SplitDWARFOut);
	if (DwarfFission == DwarfFissionKind::Split) {
	CmdArgs.push_back("-split-dwarf-output");
	CmdArgs.push_back(SplitDWARFOut);
	}
	}

	// Pass the linker version in use.
	if (Arg *A = Args.getLastArg(options::OPT_mlinker_version_EQ)) {
	CmdArgs.push_back("-target-linker-version");
	CmdArgs.push_back(A->getValue());
	}

	// Explicitly error on some things we know we don't support and can't just
	// ignore.
	if (!Args.hasArg(options::OPT_fallow_unsupported)) {
	Arg *Unsupported;
	if (types::isCXX(InputType) && RawTriple.isOSDarwin() &&
	TC.getArch() == llvm::Triple::x86) {
	if ((Unsupported = Args.getLastArg(options::OPT_fapple_kext)) \|\|
	(Unsupported = Args.getLastArg(options::OPT_mkernel)))
	D.Diag(diag::err_drv_clang_unsupported_opt_cxx_darwin_i386)
	<< Unsupported->getOption().getName();
	}
	// The faltivec option has been superseded by the maltivec option.
	if ((Unsupported = Args.getLastArg(options::OPT_faltivec)))
	D.Diag(diag::err_drv_clang_unsupported_opt_faltivec)
	<< Unsupported->getOption().getName()
	<< "please use -maltivec and include altivec.h explicitly";
	if ((Unsupported = Args.getLastArg(options::OPT_fno_altivec)))
	D.Diag(diag::err_drv_clang_unsupported_opt_faltivec)
	<< Unsupported->getOption().getName() << "please use -mno-altivec";
	}

	Args.AddAllArgs(CmdArgs, options::OPT_v);

	if (Args.getLastArg(options::OPT_H)) {
	CmdArgs.push_back("-H");
	CmdArgs.push_back("-sys-header-deps");
	}
	Args.AddAllArgs(CmdArgs, options::OPT_fshow_skipped_includes);

	if (D.CCPrintHeaders && !D.CCGenDiagnostics) {
	CmdArgs.push_back("-header-include-file");
	CmdArgs.push_back(!D.CCPrintHeadersFilename.empty()
	? D.CCPrintHeadersFilename.c_str()
	: "-");
	CmdArgs.push_back("-sys-header-deps");
	}
	Args.AddLastArg(CmdArgs, options::OPT_P);
	Args.AddLastArg(CmdArgs, options::OPT_print_ivar_layout);

	if (D.CCLogDiagnostics && !D.CCGenDiagnostics) {
	CmdArgs.push_back("-diagnostic-log-file");
	CmdArgs.push_back(!D.CCLogDiagnosticsFilename.empty()
	? D.CCLogDiagnosticsFilename.c_str()
	: "-");
	}

	// Give the gen diagnostics more chances to succeed, by avoiding intentional
	// crashes.
	if (D.CCGenDiagnostics)
	CmdArgs.push_back("-disable-pragma-debug-crash");

	// Allow backend to put its diagnostic files in the same place as frontend
	// crash diagnostics files.
	if (Args.hasArg(options::OPT_fcrash_diagnostics_dir)) {
	StringRef Dir = Args.getLastArgValue(options::OPT_fcrash_diagnostics_dir);
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back(Args.MakeArgString("-crash-diagnostics-dir=" + Dir));
	}

	bool UseSeparateSections = isUseSeparateSections(Triple);

	if (Args.hasFlag(options::OPT_ffunction_sections,
	options::OPT_fno_function_sections, UseSeparateSections)) {
	CmdArgs.push_back("-ffunction-sections");
	}

	if (Arg *A = Args.getLastArg(options::OPT_fbasic_block_sections_EQ)) {
	StringRef Val = A->getValue();
	if (Triple.isX86() && Triple.isOSBinFormatELF()) {
	if (Val != "all" && Val != "labels" && Val != "none" &&
	!Val.startswith("list="))
	D.Diag(diag::err_drv_invalid_value)
	<< A->getAsString(Args) << A->getValue();
	else
	A->render(Args, CmdArgs);
	} else if (Triple.isNVPTX()) {
	// Do not pass the option to the GPU compilation. We still want it enabled
	// for the host-side compilation, so seeing it here is not an error.
	} else if (Val != "none") {
	// =none is allowed everywhere. It's useful for overriding the option
	// and is the same as not specifying the option.
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< A->getAsString(Args) << TripleStr;
	}
	}

	bool HasDefaultDataSections = Triple.isOSBinFormatXCOFF();
	if (Args.hasFlag(options::OPT_fdata_sections, options::OPT_fno_data_sections,
	UseSeparateSections \|\| HasDefaultDataSections)) {
	CmdArgs.push_back("-fdata-sections");
	}

	if (!Args.hasFlag(options::OPT_funique_section_names,
	options::OPT_fno_unique_section_names, true))
	CmdArgs.push_back("-fno-unique-section-names");

	if (Args.hasFlag(options::OPT_funique_internal_linkage_names,
	options::OPT_fno_unique_internal_linkage_names, false))
	CmdArgs.push_back("-funique-internal-linkage-names");

	if (Args.hasFlag(options::OPT_funique_basic_block_section_names,
	options::OPT_fno_unique_basic_block_section_names, false))
	CmdArgs.push_back("-funique-basic-block-section-names");

	if (Arg *A = Args.getLastArg(options::OPT_fsplit_machine_functions,
	options::OPT_fno_split_machine_functions)) {
	// This codegen pass is only available on x86-elf targets.
	if (Triple.isX86() && Triple.isOSBinFormatELF()) {
	if (A->getOption().matches(options::OPT_fsplit_machine_functions))
	A->render(Args, CmdArgs);
	} else {
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< A->getAsString(Args) << TripleStr;
	}
	}

	Args.AddLastArg(CmdArgs, options::OPT_finstrument_functions,
	options::OPT_finstrument_functions_after_inlining,
	options::OPT_finstrument_function_entry_bare);

	// NVPTX/AMDGCN doesn't support PGO or coverage. There's no runtime support
	// for sampling, overhead of call arc collection is way too high and there's
	// no way to collect the output.
	if (!Triple.isNVPTX() && !Triple.isAMDGCN())
	addPGOAndCoverageFlags(TC, C, D, Output, Args, CmdArgs);

	Args.AddLastArg(CmdArgs, options::OPT_fclang_abi_compat_EQ);

	// Add runtime flag for PS4 when PGO, coverage, or sanitizers are enabled.
	if (RawTriple.isPS4CPU() &&
	!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
	PS4cpu::addProfileRTArgs(TC, Args, CmdArgs);
	PS4cpu::addSanitizerArgs(TC, CmdArgs);
	}

	// Pass options for controlling the default header search paths.
	if (Args.hasArg(options::OPT_nostdinc)) {
	CmdArgs.push_back("-nostdsysteminc");
	CmdArgs.push_back("-nobuiltininc");
	} else {
	if (Args.hasArg(options::OPT_nostdlibinc))
	CmdArgs.push_back("-nostdsysteminc");
	Args.AddLastArg(CmdArgs, options::OPT_nostdincxx);
	Args.AddLastArg(CmdArgs, options::OPT_nobuiltininc);
	}

	// Pass the path to compiler resource files.
	CmdArgs.push_back("-resource-dir");
	CmdArgs.push_back(D.ResourceDir.c_str());

	Args.AddLastArg(CmdArgs, options::OPT_working_directory);

	RenderARCMigrateToolOptions(D, Args, CmdArgs);

	// Add preprocessing options like -I, -D, etc. if we are using the
	// preprocessor.
	//
	// FIXME: Support -fpreprocessed
	if (types::getPreprocessedType(InputType) != types::TY_INVALID)
	AddPreprocessingOptions(C, JA, D, Args, CmdArgs, Output, Inputs);

	// Don't warn about "clang -c -DPIC -fPIC test.i" because libtool.m4 assumes
	// that "The compiler can only warn and ignore the option if not recognized".
	// When building with ccache, it will pass -D options to clang even on
	// preprocessed inputs and configure concludes that -fPIC is not supported.
	Args.ClaimAllArgs(options::OPT_D);

	// Manually translate -O4 to -O3; let clang reject others.
	if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
	if (A->getOption().matches(options::OPT_O4)) {
	CmdArgs.push_back("-O3");
	D.Diag(diag::warn_O4_is_O3);
	} else {
	A->render(Args, CmdArgs);
	}
	}

	// Warn about ignored options to clang.
	for (const Arg *A :
	Args.filtered(options::OPT_clang_ignored_gcc_optimization_f_Group)) {
	D.Diag(diag::warn_ignored_gcc_optimization) << A->getAsString(Args);
	A->claim();
	}

	for (const Arg *A :
	Args.filtered(options::OPT_clang_ignored_legacy_options_Group)) {
	D.Diag(diag::warn_ignored_clang_option) << A->getAsString(Args);
	A->claim();
	}

	claimNoWarnArgs(Args);

	Args.AddAllArgs(CmdArgs, options::OPT_R_Group);

	Args.AddAllArgs(CmdArgs, options::OPT_W_Group);
	if (Args.hasFlag(options::OPT_pedantic, options::OPT_no_pedantic, false))
	CmdArgs.push_back("-pedantic");
	Args.AddLastArg(CmdArgs, options::OPT_pedantic_errors);
	Args.AddLastArg(CmdArgs, options::OPT_w);

	// Fixed point flags
	if (Args.hasFlag(options::OPT_ffixed_point, options::OPT_fno_fixed_point,
	/Default=/false))
	Args.AddLastArg(CmdArgs, options::OPT_ffixed_point);

	if (Arg *A = Args.getLastArg(options::OPT_fcxx_abi_EQ))
	A->render(Args, CmdArgs);

	Args.AddLastArg(CmdArgs, options::OPT_fexperimental_relative_cxx_abi_vtables,
	options::OPT_fno_experimental_relative_cxx_abi_vtables);

	// Handle -{std, ansi, trigraphs} -- take the last of -{std, ansi}
	// (-ansi is equivalent to -std=c89 or -std=c++98).
	//
	// If a std is supplied, only add -trigraphs if it follows the
	// option.
	bool ImplyVCPPCVer = false;
	bool ImplyVCPPCXXVer = false;
	const Arg *Std = Args.getLastArg(options::OPT_std_EQ, options::OPT_ansi);
	if (Std) {
	if (Std->getOption().matches(options::OPT_ansi))
	if (types::isCXX(InputType))
	CmdArgs.push_back("-std=c++98");
	else
	CmdArgs.push_back("-std=c89");
	else
	Std->render(Args, CmdArgs);

	// If -f(no-)trigraphs appears after the language standard flag, honor it.
	if (Arg *A = Args.getLastArg(options::OPT_std_EQ, options::OPT_ansi,
	options::OPT_ftrigraphs,
	options::OPT_fno_trigraphs))
	if (A != Std)
	A->render(Args, CmdArgs);
	} else {
	// Honor -std-default.
	//
	// FIXME: Clang doesn't correctly handle -std= when the input language
	// doesn't match. For the time being just ignore this for C++ inputs;
	// eventually we want to do all the standard defaulting here instead of
	// splitting it between the driver and clang -cc1.
	if (!types::isCXX(InputType)) {
	if (!Args.hasArg(options::OPT__SLASH_std)) {
	Args.AddAllArgsTranslated(CmdArgs, options::OPT_std_default_EQ, "-std=",
	/Joined=/true);
	} else
	ImplyVCPPCVer = true;
	}
	else if (IsWindowsMSVC)
	ImplyVCPPCXXVer = true;

	Args.AddLastArg(CmdArgs, options::OPT_ftrigraphs,
	options::OPT_fno_trigraphs);

	// HIP headers has minimum C++ standard requirements. Therefore set the
	// default language standard.
	if (IsHIP)
	CmdArgs.push_back(IsWindowsMSVC ? "-std=c++14" : "-std=c++11");
	}

	// GCC's behavior for -Wwrite-strings is a bit strange:
	// * In C, this "warning flag" changes the types of string literals from
	// 'char[N]' to 'const char[N]', and thus triggers an unrelated warning
	// for the discarded qualifier.
	// * In C++, this is just a normal warning flag.
	//
	// Implementing this warning correctly in C is hard, so we follow GCC's
	// behavior for now. FIXME: Directly diagnose uses of a string literal as
	// a non-const char* in C, rather than using this crude hack.
	if (!types::isCXX(InputType)) {
	// FIXME: This should behave just like a warning flag, and thus should also
	// respect -Weverything, -Wno-everything, -Werror=write-strings, and so on.
	Arg *WriteStrings =
	Args.getLastArg(options::OPT_Wwrite_strings,
	options::OPT_Wno_write_strings, options::OPT_w);
	if (WriteStrings &&
	WriteStrings->getOption().matches(options::OPT_Wwrite_strings))
	CmdArgs.push_back("-fconst-strings");
	}

	// GCC provides a macro definition '__DEPRECATED' when -Wdeprecated is active
	// during C++ compilation, which it is by default. GCC keeps this define even
	// in the presence of '-w', match this behavior bug-for-bug.
	if (types::isCXX(InputType) &&
	Args.hasFlag(options::OPT_Wdeprecated, options::OPT_Wno_deprecated,
	true)) {
	CmdArgs.push_back("-fdeprecated-macro");
	}

	// Translate GCC's misnamer '-fasm' arguments to '-fgnu-keywords'.
	if (Arg *Asm = Args.getLastArg(options::OPT_fasm, options::OPT_fno_asm)) {
	if (Asm->getOption().matches(options::OPT_fasm))
	CmdArgs.push_back("-fgnu-keywords");
	else
	CmdArgs.push_back("-fno-gnu-keywords");
	}

	if (!ShouldEnableAutolink(Args, TC, JA))
	CmdArgs.push_back("-fno-autolink");

	// Add in -fdebug-compilation-dir if necessary.
	addDebugCompDirArg(Args, CmdArgs, D.getVFS());

	addDebugPrefixMapArg(D, Args, CmdArgs);

	if (Arg *A = Args.getLastArg(options::OPT_ftemplate_depth_,
	options::OPT_ftemplate_depth_EQ)) {
	CmdArgs.push_back("-ftemplate-depth");
	CmdArgs.push_back(A->getValue());
	}

	if (Arg *A = Args.getLastArg(options::OPT_foperator_arrow_depth_EQ)) {
	CmdArgs.push_back("-foperator-arrow-depth");
	CmdArgs.push_back(A->getValue());
	}

	if (Arg *A = Args.getLastArg(options::OPT_fconstexpr_depth_EQ)) {
	CmdArgs.push_back("-fconstexpr-depth");
	CmdArgs.push_back(A->getValue());
	}

	if (Arg *A = Args.getLastArg(options::OPT_fconstexpr_steps_EQ)) {
	CmdArgs.push_back("-fconstexpr-steps");
	CmdArgs.push_back(A->getValue());
	}

	if (Args.hasArg(options::OPT_fexperimental_new_constant_interpreter))
	CmdArgs.push_back("-fexperimental-new-constant-interpreter");

	if (Arg *A = Args.getLastArg(options::OPT_fbracket_depth_EQ)) {
	CmdArgs.push_back("-fbracket-depth");
	CmdArgs.push_back(A->getValue());
	}

	if (Arg *A = Args.getLastArg(options::OPT_Wlarge_by_value_copy_EQ,
	options::OPT_Wlarge_by_value_copy_def)) {
	if (A->getNumValues()) {
	StringRef bytes = A->getValue();
	CmdArgs.push_back(Args.MakeArgString("-Wlarge-by-value-copy=" + bytes));
	} else
	CmdArgs.push_back("-Wlarge-by-value-copy=64"); // default value
	}

	if (Args.hasArg(options::OPT_relocatable_pch))
	CmdArgs.push_back("-relocatable-pch");

	if (const Arg *A = Args.getLastArg(options::OPT_fcf_runtime_abi_EQ)) {
	static const char *kCFABIs[] = {
	"standalone", "objc", "swift", "swift-5.0", "swift-4.2", "swift-4.1",
	};

	if (find(kCFABIs, StringRef(A->getValue())) == std::end(kCFABIs))
	D.Diag(diag::err_drv_invalid_cf_runtime_abi) << A->getValue();
	else
	A->render(Args, CmdArgs);
	}

	if (Arg *A = Args.getLastArg(options::OPT_fconstant_string_class_EQ)) {
	CmdArgs.push_back("-fconstant-string-class");
	CmdArgs.push_back(A->getValue());
	}

	if (Arg *A = Args.getLastArg(options::OPT_ftabstop_EQ)) {
	CmdArgs.push_back("-ftabstop");
	CmdArgs.push_back(A->getValue());
	}

	if (Args.hasFlag(options::OPT_fstack_size_section,
	options::OPT_fno_stack_size_section, RawTriple.isPS4()))
	CmdArgs.push_back("-fstack-size-section");

	if (Args.hasArg(options::OPT_fstack_usage)) {
	CmdArgs.push_back("-stack-usage-file");

	if (Arg *OutputOpt = Args.getLastArg(options::OPT_o)) {
	SmallString<128> OutputFilename(OutputOpt->getValue());
	llvm::sys::path::replace_extension(OutputFilename, "su");
	CmdArgs.push_back(Args.MakeArgString(OutputFilename));
	} else
	CmdArgs.push_back(
	Args.MakeArgString(Twine(getBaseInputStem(Args, Inputs)) + ".su"));
	}

	CmdArgs.push_back("-ferror-limit");
	if (Arg *A = Args.getLastArg(options::OPT_ferror_limit_EQ))
	CmdArgs.push_back(A->getValue());
	else
	CmdArgs.push_back("19");

	if (Arg *A = Args.getLastArg(options::OPT_fmacro_backtrace_limit_EQ)) {
	CmdArgs.push_back("-fmacro-backtrace-limit");
	CmdArgs.push_back(A->getValue());
	}

	if (Arg *A = Args.getLastArg(options::OPT_ftemplate_backtrace_limit_EQ)) {
	CmdArgs.push_back("-ftemplate-backtrace-limit");
	CmdArgs.push_back(A->getValue());
	}

	if (Arg *A = Args.getLastArg(options::OPT_fconstexpr_backtrace_limit_EQ)) {
	CmdArgs.push_back("-fconstexpr-backtrace-limit");
	CmdArgs.push_back(A->getValue());
	}

	if (Arg *A = Args.getLastArg(options::OPT_fspell_checking_limit_EQ)) {
	CmdArgs.push_back("-fspell-checking-limit");
	CmdArgs.push_back(A->getValue());
	}

	// Pass -fmessage-length=.
	unsigned MessageLength = 0;
	if (Arg *A = Args.getLastArg(options::OPT_fmessage_length_EQ)) {
	StringRef V(A->getValue());
	if (V.getAsInteger(0, MessageLength))
	D.Diag(diag::err_drv_invalid_argument_to_option)
	<< V << A->getOption().getName();
	} else {
	// If -fmessage-length=N was not specified, determine whether this is a
	// terminal and, if so, implicitly define -fmessage-length appropriately.
	MessageLength = llvm::sys::Process::StandardErrColumns();
	}
	if (MessageLength != 0)
	CmdArgs.push_back(
	Args.MakeArgString("-fmessage-length=" + Twine(MessageLength)));

	// -fvisibility= and -fvisibility-ms-compat are of a piece.
	if (const Arg *A = Args.getLastArg(options::OPT_fvisibility_EQ,
	options::OPT_fvisibility_ms_compat)) {
	if (A->getOption().matches(options::OPT_fvisibility_EQ)) {
	CmdArgs.push_back("-fvisibility");
	CmdArgs.push_back(A->getValue());
	} else {
	assert(A->getOption().matches(options::OPT_fvisibility_ms_compat));
	CmdArgs.push_back("-fvisibility");
	CmdArgs.push_back("hidden");
	CmdArgs.push_back("-ftype-visibility");
	CmdArgs.push_back("default");
	}
	}

	if (!RawTriple.isPS4())
	if (const Arg *A =
	Args.getLastArg(options::OPT_fvisibility_from_dllstorageclass,
	options::OPT_fno_visibility_from_dllstorageclass)) {
	if (A->getOption().matches(
	options::OPT_fvisibility_from_dllstorageclass)) {
	CmdArgs.push_back("-fvisibility-from-dllstorageclass");
	Args.AddLastArg(CmdArgs, options::OPT_fvisibility_dllexport_EQ);
	Args.AddLastArg(CmdArgs, options::OPT_fvisibility_nodllstorageclass_EQ);
	Args.AddLastArg(CmdArgs, options::OPT_fvisibility_externs_dllimport_EQ);
	Args.AddLastArg(CmdArgs,
	options::OPT_fvisibility_externs_nodllstorageclass_EQ);
	}
	}

	if (const Arg *A = Args.getLastArg(options::OPT_mignore_xcoff_visibility)) {
	if (Triple.isOSAIX())
	CmdArgs.push_back("-mignore-xcoff-visibility");
	else
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< A->getAsString(Args) << TripleStr;
	}


	if (Args.hasFlag(options::OPT_fvisibility_inlines_hidden,
	options::OPT_fno_visibility_inlines_hidden, false))
	CmdArgs.push_back("-fvisibility-inlines-hidden");

	Args.AddLastArg(CmdArgs, options::OPT_fvisibility_inlines_hidden_static_local_var,
	options::OPT_fno_visibility_inlines_hidden_static_local_var);
	Args.AddLastArg(CmdArgs, options::OPT_fvisibility_global_new_delete_hidden);

	Args.AddLastArg(CmdArgs, options::OPT_ftlsmodel_EQ);

	if (Args.hasFlag(options::OPT_fno_operator_names,
	options::OPT_foperator_names, false))
	CmdArgs.push_back("-fno-operator-names");

	// Forward -f (flag) options which we can pass directly.
	Args.AddLastArg(CmdArgs, options::OPT_femit_all_decls);
	Args.AddLastArg(CmdArgs, options::OPT_fheinous_gnu_extensions);
	Args.AddLastArg(CmdArgs, options::OPT_fdigraphs, options::OPT_fno_digraphs);
	Args.AddLastArg(CmdArgs, options::OPT_femulated_tls,
	options::OPT_fno_emulated_tls);

	// AltiVec-like language extensions aren't relevant for assembling.
	if (!isa<PreprocessJobAction>(JA) \|\| Output.getType() != types::TY_PP_Asm)
	Args.AddLastArg(CmdArgs, options::OPT_fzvector);

	Args.AddLastArg(CmdArgs, options::OPT_fdiagnostics_show_template_tree);
	Args.AddLastArg(CmdArgs, options::OPT_fno_elide_type);

	// Forward flags for OpenMP. We don't do this if the current action is an
	// device offloading action other than OpenMP.
	if (Args.hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ,
	options::OPT_fno_openmp, false) &&
	(JA.isDeviceOffloading(Action::OFK_None) \|\|
	JA.isDeviceOffloading(Action::OFK_OpenMP))) {
	switch (D.getOpenMPRuntime(Args)) {
	case Driver::OMPRT_OMP:
	case Driver::OMPRT_IOMP5:
	// Clang can generate useful OpenMP code for these two runtime libraries.
	CmdArgs.push_back("-fopenmp");

	// If no option regarding the use of TLS in OpenMP codegeneration is
	// given, decide a default based on the target. Otherwise rely on the
	// options and pass the right information to the frontend.
	if (!Args.hasFlag(options::OPT_fopenmp_use_tls,
	options::OPT_fnoopenmp_use_tls, /Default=/true))
	CmdArgs.push_back("-fnoopenmp-use-tls");
	Args.AddLastArg(CmdArgs, options::OPT_fopenmp_simd,
	options::OPT_fno_openmp_simd);
	Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_enable_irbuilder);
	Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_version_EQ);
	Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_cuda_number_of_sm_EQ);
	Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_cuda_blocks_per_sm_EQ);
	Args.AddAllArgs(CmdArgs,
	options::OPT_fopenmp_cuda_teams_reduction_recs_num_EQ);
	if (Args.hasFlag(options::OPT_fopenmp_optimistic_collapse,
	options::OPT_fno_openmp_optimistic_collapse,
	/Default=/false))
	CmdArgs.push_back("-fopenmp-optimistic-collapse");

	// When in OpenMP offloading mode with NVPTX target, forward
	// cuda-mode flag
	if (Args.hasFlag(options::OPT_fopenmp_cuda_mode,
	options::OPT_fno_openmp_cuda_mode, /Default=/false))
	CmdArgs.push_back("-fopenmp-cuda-mode");

	// When in OpenMP offloading mode with NVPTX target, check if full runtime
	// is required.
	if (Args.hasFlag(options::OPT_fopenmp_cuda_force_full_runtime,
	options::OPT_fno_openmp_cuda_force_full_runtime,
	/Default=/false))
	CmdArgs.push_back("-fopenmp-cuda-force-full-runtime");
	break;
	default:
	// By default, if Clang doesn't know how to generate useful OpenMP code
	// for a specific runtime library, we just don't pass the '-fopenmp' flag
	// down to the actual compilation.
	// FIXME: It would be better to have a mode which only omits IR
	// generation based on the OpenMP support so that we get consistent
	// semantic analysis, etc.
	break;
	}
	} else {
	Args.AddLastArg(CmdArgs, options::OPT_fopenmp_simd,
	options::OPT_fno_openmp_simd);
	Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_version_EQ);
	}

	const SanitizerArgs &Sanitize = TC.getSanitizerArgs();
	Sanitize.addArgs(TC, Args, CmdArgs, InputType);

	const XRayArgs &XRay = TC.getXRayArgs();
	XRay.addArgs(TC, Args, CmdArgs, InputType);

	for (const auto &Filename :
	Args.getAllArgValues(options::OPT_fprofile_list_EQ)) {
	if (D.getVFS().exists(Filename))
	CmdArgs.push_back(Args.MakeArgString("-fprofile-list=" + Filename));
	else
	D.Diag(clang::diag::err_drv_no_such_file) << Filename;
	}

	if (Arg *A = Args.getLastArg(options::OPT_fpatchable_function_entry_EQ)) {
	StringRef S0 = A->getValue(), S = S0;
	unsigned Size, Offset = 0;
	if (!Triple.isAArch64() && !Triple.isRISCV() && !Triple.isX86())
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< A->getAsString(Args) << TripleStr;
	else if (S.consumeInteger(10, Size) \|\|
	(!S.empty() && (!S.consume_front(",") \|\|
	S.consumeInteger(10, Offset) \|\| !S.empty())))
	D.Diag(diag::err_drv_invalid_argument_to_option)
	<< S0 << A->getOption().getName();
	else if (Size < Offset)
	D.Diag(diag::err_drv_unsupported_fpatchable_function_entry_argument);
	else {
	CmdArgs.push_back(Args.MakeArgString(A->getSpelling() + Twine(Size)));
	CmdArgs.push_back(Args.MakeArgString(
	"-fpatchable-function-entry-offset=" + Twine(Offset)));
	}
	}

	if (TC.SupportsProfiling()) {
	Args.AddLastArg(CmdArgs, options::OPT_pg);

	llvm::Triple::ArchType Arch = TC.getArch();
	if (Arg *A = Args.getLastArg(options::OPT_mfentry)) {
	if (Arch == llvm::Triple::systemz \|\| TC.getTriple().isX86())
	A->render(Args, CmdArgs);
	else
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< A->getAsString(Args) << TripleStr;
	}
	if (Arg *A = Args.getLastArg(options::OPT_mnop_mcount)) {
	if (Arch == llvm::Triple::systemz)
	A->render(Args, CmdArgs);
	else
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< A->getAsString(Args) << TripleStr;
	}
	if (Arg *A = Args.getLastArg(options::OPT_mrecord_mcount)) {
	if (Arch == llvm::Triple::systemz)
	A->render(Args, CmdArgs);
	else
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< A->getAsString(Args) << TripleStr;
	}
	}

	if (Args.getLastArg(options::OPT_fapple_kext) \|\|
	(Args.hasArg(options::OPT_mkernel) && types::isCXX(InputType)))
	CmdArgs.push_back("-fapple-kext");

	Args.AddLastArg(CmdArgs, options::OPT_altivec_src_compat);
	Args.AddLastArg(CmdArgs, options::OPT_flax_vector_conversions_EQ);
	Args.AddLastArg(CmdArgs, options::OPT_fobjc_sender_dependent_dispatch);
	Args.AddLastArg(CmdArgs, options::OPT_fdiagnostics_print_source_range_info);
	Args.AddLastArg(CmdArgs, options::OPT_fdiagnostics_parseable_fixits);
	Args.AddLastArg(CmdArgs, options::OPT_ftime_report);
	Args.AddLastArg(CmdArgs, options::OPT_ftime_report_EQ);
	Args.AddLastArg(CmdArgs, options::OPT_ftime_trace);
	Args.AddLastArg(CmdArgs, options::OPT_ftime_trace_granularity_EQ);
	Args.AddLastArg(CmdArgs, options::OPT_ftrapv);
	Args.AddLastArg(CmdArgs, options::OPT_malign_double);
	Args.AddLastArg(CmdArgs, options::OPT_fno_temp_file);

	if (Arg *A = Args.getLastArg(options::OPT_ftrapv_handler_EQ)) {
	CmdArgs.push_back("-ftrapv-handler");
	CmdArgs.push_back(A->getValue());
	}

	Args.AddLastArg(CmdArgs, options::OPT_ftrap_function_EQ);

	// -fno-strict-overflow implies -fwrapv if it isn't disabled, but
	// -fstrict-overflow won't turn off an explicitly enabled -fwrapv.
	if (Arg *A = Args.getLastArg(options::OPT_fwrapv, options::OPT_fno_wrapv)) {
	if (A->getOption().matches(options::OPT_fwrapv))
	CmdArgs.push_back("-fwrapv");
	} else if (Arg *A = Args.getLastArg(options::OPT_fstrict_overflow,
	options::OPT_fno_strict_overflow)) {
	if (A->getOption().matches(options::OPT_fno_strict_overflow))
	CmdArgs.push_back("-fwrapv");
	}

	if (Arg *A = Args.getLastArg(options::OPT_freroll_loops,
	options::OPT_fno_reroll_loops))
	if (A->getOption().matches(options::OPT_freroll_loops))
	CmdArgs.push_back("-freroll-loops");

	Args.AddLastArg(CmdArgs, options::OPT_ffinite_loops,
	options::OPT_fno_finite_loops);

	Args.AddLastArg(CmdArgs, options::OPT_fwritable_strings);
	Args.AddLastArg(CmdArgs, options::OPT_funroll_loops,
	options::OPT_fno_unroll_loops);

	Args.AddLastArg(CmdArgs, options::OPT_pthread);

	if (Args.hasFlag(options::OPT_mspeculative_load_hardening,
	options::OPT_mno_speculative_load_hardening, false))
	CmdArgs.push_back(Args.MakeArgString("-mspeculative-load-hardening"));

	RenderSSPOptions(D, TC, Args, CmdArgs, KernelOrKext);
	RenderSCPOptions(TC, Args, CmdArgs);
	RenderTrivialAutoVarInitOptions(D, TC, Args, CmdArgs);

	// Translate -mstackrealign
	if (Args.hasFlag(options::OPT_mstackrealign, options::OPT_mno_stackrealign,
	false))
	CmdArgs.push_back(Args.MakeArgString("-mstackrealign"));

	if (Args.hasArg(options::OPT_mstack_alignment)) {
	StringRef alignment = Args.getLastArgValue(options::OPT_mstack_alignment);
	CmdArgs.push_back(Args.MakeArgString("-mstack-alignment=" + alignment));
	}

	if (Args.hasArg(options::OPT_mstack_probe_size)) {
	StringRef Size = Args.getLastArgValue(options::OPT_mstack_probe_size);

	if (!Size.empty())
	CmdArgs.push_back(Args.MakeArgString("-mstack-probe-size=" + Size));
	else
	CmdArgs.push_back("-mstack-probe-size=0");
	}

	if (!Args.hasFlag(options::OPT_mstack_arg_probe,
	options::OPT_mno_stack_arg_probe, true))
	CmdArgs.push_back(Args.MakeArgString("-mno-stack-arg-probe"));

	if (Arg *A = Args.getLastArg(options::OPT_mrestrict_it,
	options::OPT_mno_restrict_it)) {
	if (A->getOption().matches(options::OPT_mrestrict_it)) {
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back("-arm-restrict-it");
	} else {
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back("-arm-no-restrict-it");
	}
	} else if (Triple.isOSWindows() &&
	(Triple.getArch() == llvm::Triple::arm \|\|
	Triple.getArch() == llvm::Triple::thumb)) {
	// Windows on ARM expects restricted IT blocks
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back("-arm-restrict-it");
	}

	// Forward -cl options to -cc1
	RenderOpenCLOptions(Args, CmdArgs, InputType);

	if (IsHIP) {
	if (Args.hasFlag(options::OPT_fhip_new_launch_api,
	options::OPT_fno_hip_new_launch_api, true))
	CmdArgs.push_back("-fhip-new-launch-api");
	if (Args.hasFlag(options::OPT_fgpu_allow_device_init,
	options::OPT_fno_gpu_allow_device_init, false))
	CmdArgs.push_back("-fgpu-allow-device-init");
	}

	if (IsCuda \|\| IsHIP) {
	if (Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false))
	CmdArgs.push_back("-fgpu-rdc");
	if (Args.hasFlag(options::OPT_fgpu_defer_diag,
	options::OPT_fno_gpu_defer_diag, false))
	CmdArgs.push_back("-fgpu-defer-diag");
	if (Args.hasFlag(options::OPT_fgpu_exclude_wrong_side_overloads,
	options::OPT_fno_gpu_exclude_wrong_side_overloads,
	false)) {
	CmdArgs.push_back("-fgpu-exclude-wrong-side-overloads");
	CmdArgs.push_back("-fgpu-defer-diag");
	}
	}

	if (Arg *A = Args.getLastArg(options::OPT_fcf_protection_EQ)) {
	CmdArgs.push_back(
	Args.MakeArgString(Twine("-fcf-protection=") + A->getValue()));
	}

	// Forward -f options with positive and negative forms; we translate these by
	// hand. Do not propagate PGO options to the GPU-side compilations as the
	// profile info is for the host-side compilation only.
	if (!(IsCudaDevice \|\| IsHIPDevice)) {
	if (Arg *A = getLastProfileSampleUseArg(Args)) {
	auto *PGOArg = Args.getLastArg(
	options::OPT_fprofile_generate, options::OPT_fprofile_generate_EQ,
	options::OPT_fcs_profile_generate,
	options::OPT_fcs_profile_generate_EQ, options::OPT_fprofile_use,
	options::OPT_fprofile_use_EQ);
	if (PGOArg)
	D.Diag(diag::err_drv_argument_not_allowed_with)
	<< "SampleUse with PGO options";

	StringRef fname = A->getValue();
	if (!llvm::sys::fs::exists(fname))
	D.Diag(diag::err_drv_no_such_file) << fname;
	else
	A->render(Args, CmdArgs);
	}
	Args.AddLastArg(CmdArgs, options::OPT_fprofile_remapping_file_EQ);

	if (Args.hasFlag(options::OPT_fpseudo_probe_for_profiling,
	options::OPT_fno_pseudo_probe_for_profiling, false)) {
	CmdArgs.push_back("-fpseudo-probe-for-profiling");
	// Enforce -funique-internal-linkage-names if it's not explicitly turned
	// off.
	if (Args.hasFlag(options::OPT_funique_internal_linkage_names,
	options::OPT_fno_unique_internal_linkage_names, true))
	CmdArgs.push_back("-funique-internal-linkage-names");
	}
	}
	RenderBuiltinOptions(TC, RawTriple, Args, CmdArgs);

	if (!Args.hasFlag(options::OPT_fassume_sane_operator_new,
	options::OPT_fno_assume_sane_operator_new))
	CmdArgs.push_back("-fno-assume-sane-operator-new");

	// -fblocks=0 is default.
	if (Args.hasFlag(options::OPT_fblocks, options::OPT_fno_blocks,
	TC.IsBlocksDefault()) \|\|
	(Args.hasArg(options::OPT_fgnu_runtime) &&
	Args.hasArg(options::OPT_fobjc_nonfragile_abi) &&
	!Args.hasArg(options::OPT_fno_blocks))) {
	CmdArgs.push_back("-fblocks");

	if (!Args.hasArg(options::OPT_fgnu_runtime) && !TC.hasBlocksRuntime())
	CmdArgs.push_back("-fblocks-runtime-optional");
	}

	// -fencode-extended-block-signature=1 is default.
	if (TC.IsEncodeExtendedBlockSignatureDefault())
	CmdArgs.push_back("-fencode-extended-block-signature");

	if (Args.hasFlag(options::OPT_fcoroutines_ts, options::OPT_fno_coroutines_ts,
	false) &&
	types::isCXX(InputType)) {
	CmdArgs.push_back("-fcoroutines-ts");
	}

	Args.AddLastArg(CmdArgs, options::OPT_fdouble_square_bracket_attributes,
	options::OPT_fno_double_square_bracket_attributes);

	// -faccess-control is default.
	if (Args.hasFlag(options::OPT_fno_access_control,
	options::OPT_faccess_control, false))
	CmdArgs.push_back("-fno-access-control");

	// -felide-constructors is the default.
	if (Args.hasFlag(options::OPT_fno_elide_constructors,
	options::OPT_felide_constructors, false))
	CmdArgs.push_back("-fno-elide-constructors");

	ToolChain::RTTIMode RTTIMode = TC.getRTTIMode();

	if (KernelOrKext \|\| (types::isCXX(InputType) &&
	(RTTIMode == ToolChain::RM_Disabled)))
	CmdArgs.push_back("-fno-rtti");

	// -fshort-enums=0 is default for all architectures except Hexagon and z/OS.
	if (Args.hasFlag(options::OPT_fshort_enums, options::OPT_fno_short_enums,
	TC.getArch() == llvm::Triple::hexagon \|\| Triple.isOSzOS()))
	CmdArgs.push_back("-fshort-enums");

	RenderCharacterOptions(Args, AuxTriple ? *AuxTriple : RawTriple, CmdArgs);

	// -fuse-cxa-atexit is default.
	if (!Args.hasFlag(
	options::OPT_fuse_cxa_atexit, options::OPT_fno_use_cxa_atexit,
	!RawTriple.isOSAIX() && !RawTriple.isOSWindows() &&
	TC.getArch() != llvm::Triple::xcore &&
	((RawTriple.getVendor() != llvm::Triple::MipsTechnologies) \|\|
	RawTriple.hasEnvironment())) \|\|
	KernelOrKext)
	CmdArgs.push_back("-fno-use-cxa-atexit");

	if (Args.hasFlag(options::OPT_fregister_global_dtors_with_atexit,
	options::OPT_fno_register_global_dtors_with_atexit,
	RawTriple.isOSDarwin() && !KernelOrKext))
	CmdArgs.push_back("-fregister-global-dtors-with-atexit");

	// -fno-use-line-directives is default.
	if (Args.hasFlag(options::OPT_fuse_line_directives,
	options::OPT_fno_use_line_directives, false))
	CmdArgs.push_back("-fuse-line-directives");

	// -fms-extensions=0 is default.
	if (Args.hasFlag(options::OPT_fms_extensions, options::OPT_fno_ms_extensions,
	IsWindowsMSVC))
	CmdArgs.push_back("-fms-extensions");

	// -fms-compatibility=0 is default.
	bool IsMSVCCompat = Args.hasFlag(
	options::OPT_fms_compatibility, options::OPT_fno_ms_compatibility,
	(IsWindowsMSVC && Args.hasFlag(options::OPT_fms_extensions,
	options::OPT_fno_ms_extensions, true)));
	if (IsMSVCCompat)
	CmdArgs.push_back("-fms-compatibility");

	// Handle -fgcc-version, if present.
	VersionTuple GNUCVer;
	if (Arg *A = Args.getLastArg(options::OPT_fgnuc_version_EQ)) {
	// Check that the version has 1 to 3 components and the minor and patch
	// versions fit in two decimal digits.
	StringRef Val = A->getValue();
	Val = Val.empty() ? "0" : Val; // Treat "" as 0 or disable.
	bool Invalid = GNUCVer.tryParse(Val);
	unsigned Minor = GNUCVer.getMinor().getValueOr(0);
	unsigned Patch = GNUCVer.getSubminor().getValueOr(0);
	if (Invalid \|\| GNUCVer.getBuild() \|\| Minor >= 100 \|\| Patch >= 100) {
	D.Diag(diag::err_drv_invalid_value)
	<< A->getAsString(Args) << A->getValue();
	}
	} else if (!IsMSVCCompat) {
	// Imitate GCC 4.2.1 by default if -fms-compatibility is not in effect.
	GNUCVer = VersionTuple(4, 2, 1);
	}
	if (!GNUCVer.empty()) {
	CmdArgs.push_back(
	Args.MakeArgString("-fgnuc-version=" + GNUCVer.getAsString()));
	}

	VersionTuple MSVT = TC.computeMSVCVersion(&D, Args);
	if (!MSVT.empty())
	CmdArgs.push_back(
	Args.MakeArgString("-fms-compatibility-version=" + MSVT.getAsString()));

	bool IsMSVC2015Compatible = MSVT.getMajor() >= 19;
	if (ImplyVCPPCVer) {
	StringRef LanguageStandard;
	if (const Arg *StdArg = Args.getLastArg(options::OPT__SLASH_std)) {
	Std = StdArg;
	LanguageStandard = llvm::StringSwitch<StringRef>(StdArg->getValue())
	.Case("c11", "-std=c11")
	.Case("c17", "-std=c17")
	.Default("");
	if (LanguageStandard.empty())
	D.Diag(clang::diag::warn_drv_unused_argument)
	<< StdArg->getAsString(Args);
	}
	CmdArgs.push_back(LanguageStandard.data());
	}
	if (ImplyVCPPCXXVer) {
	StringRef LanguageStandard;
	if (const Arg *StdArg = Args.getLastArg(options::OPT__SLASH_std)) {
	Std = StdArg;
	LanguageStandard = llvm::StringSwitch<StringRef>(StdArg->getValue())
	.Case("c++14", "-std=c++14")
	.Case("c++17", "-std=c++17")
	.Case("c++20", "-std=c++20")
	.Case("c++latest", "-std=c++2b")
	.Default("");
	if (LanguageStandard.empty())
	D.Diag(clang::diag::warn_drv_unused_argument)
	<< StdArg->getAsString(Args);
	}

	if (LanguageStandard.empty()) {
	if (IsMSVC2015Compatible)
	LanguageStandard = "-std=c++14";
	else
	LanguageStandard = "-std=c++11";
	}

	CmdArgs.push_back(LanguageStandard.data());
	}

	// -fno-borland-extensions is default.
	if (Args.hasFlag(options::OPT_fborland_extensions,
	options::OPT_fno_borland_extensions, false))
	CmdArgs.push_back("-fborland-extensions");

	// -fno-declspec is default, except for PS4.
	if (Args.hasFlag(options::OPT_fdeclspec, options::OPT_fno_declspec,
	RawTriple.isPS4()))
	CmdArgs.push_back("-fdeclspec");
	else if (Args.hasArg(options::OPT_fno_declspec))
	CmdArgs.push_back("-fno-declspec"); // Explicitly disabling __declspec.

	// -fthreadsafe-static is default, except for MSVC compatibility versions less
	// than 19.
	if (!Args.hasFlag(options::OPT_fthreadsafe_statics,
	options::OPT_fno_threadsafe_statics,
	!IsWindowsMSVC \|\| IsMSVC2015Compatible))
	CmdArgs.push_back("-fno-threadsafe-statics");

	// -fno-delayed-template-parsing is default, except when targeting MSVC.
	// Many old Windows SDK versions require this to parse.
	// FIXME: MSVC introduced /Zc:twoPhase- to disable this behavior in their
	// compiler. We should be able to disable this by default at some point.
	if (Args.hasFlag(options::OPT_fdelayed_template_parsing,
	options::OPT_fno_delayed_template_parsing, IsWindowsMSVC))
	CmdArgs.push_back("-fdelayed-template-parsing");

	// -fgnu-keywords default varies depending on language; only pass if
	// specified.
	Args.AddLastArg(CmdArgs, options::OPT_fgnu_keywords,
	options::OPT_fno_gnu_keywords);

	if (Args.hasFlag(options::OPT_fgnu89_inline, options::OPT_fno_gnu89_inline,
	false))
	CmdArgs.push_back("-fgnu89-inline");

	if (Args.hasArg(options::OPT_fno_inline))
	CmdArgs.push_back("-fno-inline");

	Args.AddLastArg(CmdArgs, options::OPT_finline_functions,
	options::OPT_finline_hint_functions,
	options::OPT_fno_inline_functions);

	// FIXME: Find a better way to determine whether the language has modules
	// support by default, or just assume that all languages do.
	bool HaveModules =
	Std && (Std->containsValue("c++2a") \|\| Std->containsValue("c++20") \|\|
	Std->containsValue("c++latest"));
	RenderModulesOptions(C, D, Args, Input, Output, CmdArgs, HaveModules);

	if (Args.hasFlag(options::OPT_fpch_validate_input_files_content,
	options::OPT_fno_pch_validate_input_files_content, false))
	CmdArgs.push_back("-fvalidate-ast-input-files-content");
	if (Args.hasFlag(options::OPT_fpch_instantiate_templates,
	options::OPT_fno_pch_instantiate_templates, false))
	CmdArgs.push_back("-fpch-instantiate-templates");
	if (Args.hasFlag(options::OPT_fpch_codegen, options::OPT_fno_pch_codegen,
	false))
	CmdArgs.push_back("-fmodules-codegen");
	if (Args.hasFlag(options::OPT_fpch_debuginfo, options::OPT_fno_pch_debuginfo,
	false))
	CmdArgs.push_back("-fmodules-debuginfo");

	Args.AddLastArg(CmdArgs, options::OPT_flegacy_pass_manager,
	options::OPT_fno_legacy_pass_manager);

	ObjCRuntime Runtime = AddObjCRuntimeArgs(Args, Inputs, CmdArgs, rewriteKind);
	RenderObjCOptions(TC, D, RawTriple, Args, Runtime, rewriteKind != RK_None,
	Input, CmdArgs);

	if (types::isObjC(Input.getType()) &&
	Args.hasFlag(options::OPT_fobjc_encode_cxx_class_template_spec,
	options::OPT_fno_objc_encode_cxx_class_template_spec,
	!Runtime.isNeXTFamily()))
	CmdArgs.push_back("-fobjc-encode-cxx-class-template-spec");

	if (Args.hasFlag(options::OPT_fapplication_extension,
	options::OPT_fno_application_extension, false))
	CmdArgs.push_back("-fapplication-extension");

	// Handle GCC-style exception args.
	bool EH = false;
	if (!C.getDriver().IsCLMode())
	EH = addExceptionArgs(Args, InputType, TC, KernelOrKext, Runtime, CmdArgs);

	// Handle exception personalities
	Arg *A = Args.getLastArg(
	options::OPT_fsjlj_exceptions, options::OPT_fseh_exceptions,
	options::OPT_fdwarf_exceptions, options::OPT_fwasm_exceptions);
	if (A) {
	const Option &Opt = A->getOption();
	if (Opt.matches(options::OPT_fsjlj_exceptions))
	CmdArgs.push_back("-exception-model=sjlj");
	if (Opt.matches(options::OPT_fseh_exceptions))
	CmdArgs.push_back("-exception-model=seh");
	if (Opt.matches(options::OPT_fdwarf_exceptions))
	CmdArgs.push_back("-exception-model=dwarf");
	if (Opt.matches(options::OPT_fwasm_exceptions))
	CmdArgs.push_back("-exception-model=wasm");
	} else {
	switch (TC.GetExceptionModel(Args)) {
	default:
	break;
	case llvm::ExceptionHandling::DwarfCFI:
	CmdArgs.push_back("-exception-model=dwarf");
	break;
	case llvm::ExceptionHandling::SjLj:
	CmdArgs.push_back("-exception-model=sjlj");
	break;
	case llvm::ExceptionHandling::WinEH:
	CmdArgs.push_back("-exception-model=seh");
	break;
	}
	}

	// C++ "sane" operator new.
	if (!Args.hasFlag(options::OPT_fassume_sane_operator_new,
	options::OPT_fno_assume_sane_operator_new))
	CmdArgs.push_back("-fno-assume-sane-operator-new");

	// -frelaxed-template-template-args is off by default, as it is a severe
	// breaking change until a corresponding change to template partial ordering
	// is provided.
	if (Args.hasFlag(options::OPT_frelaxed_template_template_args,
	options::OPT_fno_relaxed_template_template_args, false))
	CmdArgs.push_back("-frelaxed-template-template-args");

	// -fsized-deallocation is off by default, as it is an ABI-breaking change for
	// most platforms.
	if (Args.hasFlag(options::OPT_fsized_deallocation,
	options::OPT_fno_sized_deallocation, false))
	CmdArgs.push_back("-fsized-deallocation");

	// -faligned-allocation is on by default in C++17 onwards and otherwise off
	// by default.
	if (Arg *A = Args.getLastArg(options::OPT_faligned_allocation,
	options::OPT_fno_aligned_allocation,
	options::OPT_faligned_new_EQ)) {
	if (A->getOption().matches(options::OPT_fno_aligned_allocation))
	CmdArgs.push_back("-fno-aligned-allocation");
	else
	CmdArgs.push_back("-faligned-allocation");
	}

	// The default new alignment can be specified using a dedicated option or via
	// a GCC-compatible option that also turns on aligned allocation.
	if (Arg *A = Args.getLastArg(options::OPT_fnew_alignment_EQ,
	options::OPT_faligned_new_EQ))
	CmdArgs.push_back(
	Args.MakeArgString(Twine("-fnew-alignment=") + A->getValue()));

	// -fconstant-cfstrings is default, and may be subject to argument translation
	// on Darwin.
	if (!Args.hasFlag(options::OPT_fconstant_cfstrings,
	options::OPT_fno_constant_cfstrings) \|\|
	!Args.hasFlag(options::OPT_mconstant_cfstrings,
	options::OPT_mno_constant_cfstrings))
	CmdArgs.push_back("-fno-constant-cfstrings");

	// -fno-pascal-strings is default, only pass non-default.
	if (Args.hasFlag(options::OPT_fpascal_strings,
	options::OPT_fno_pascal_strings, false))
	CmdArgs.push_back("-fpascal-strings");

	// Honor -fpack-struct= and -fpack-struct, if given. Note that
	// -fno-pack-struct doesn't apply to -fpack-struct=.
	if (Arg *A = Args.getLastArg(options::OPT_fpack_struct_EQ)) {
	std::string PackStructStr = "-fpack-struct=";
	PackStructStr += A->getValue();
	CmdArgs.push_back(Args.MakeArgString(PackStructStr));
	} else if (Args.hasFlag(options::OPT_fpack_struct,
	options::OPT_fno_pack_struct, false)) {
	CmdArgs.push_back("-fpack-struct=1");
	}

	// Handle -fmax-type-align=N and -fno-type-align
	bool SkipMaxTypeAlign = Args.hasArg(options::OPT_fno_max_type_align);
	if (Arg *A = Args.getLastArg(options::OPT_fmax_type_align_EQ)) {
	if (!SkipMaxTypeAlign) {
	std::string MaxTypeAlignStr = "-fmax-type-align=";
	MaxTypeAlignStr += A->getValue();
	CmdArgs.push_back(Args.MakeArgString(MaxTypeAlignStr));
	}
	} else if (RawTriple.isOSDarwin()) {
	if (!SkipMaxTypeAlign) {
	std::string MaxTypeAlignStr = "-fmax-type-align=16";
	CmdArgs.push_back(Args.MakeArgString(MaxTypeAlignStr));
	}
	}

	if (!Args.hasFlag(options::OPT_Qy, options::OPT_Qn, true))
	CmdArgs.push_back("-Qn");

	// -fno-common is the default, set -fcommon only when that flag is set.
	if (Args.hasFlag(options::OPT_fcommon, options::OPT_fno_common, false))
	CmdArgs.push_back("-fcommon");

	// -fsigned-bitfields is default, and clang doesn't yet support
	// -funsigned-bitfields.
	if (!Args.hasFlag(options::OPT_fsigned_bitfields,
	options::OPT_funsigned_bitfields))
	D.Diag(diag::warn_drv_clang_unsupported)
	<< Args.getLastArg(options::OPT_funsigned_bitfields)->getAsString(Args);

	// -fsigned-bitfields is default, and clang doesn't support -fno-for-scope.
	if (!Args.hasFlag(options::OPT_ffor_scope, options::OPT_fno_for_scope))
	D.Diag(diag::err_drv_clang_unsupported)
	<< Args.getLastArg(options::OPT_fno_for_scope)->getAsString(Args);

	// -finput_charset=UTF-8 is default. Reject others
	if (Arg *inputCharset = Args.getLastArg(options::OPT_finput_charset_EQ)) {
	StringRef value = inputCharset->getValue();
	if (!value.equals_insensitive("utf-8"))
	D.Diag(diag::err_drv_invalid_value) << inputCharset->getAsString(Args)
	<< value;
	}

	// -fexec_charset=UTF-8 is default. Reject others
	if (Arg *execCharset = Args.getLastArg(options::OPT_fexec_charset_EQ)) {
	StringRef value = execCharset->getValue();
	if (!value.equals_insensitive("utf-8"))
	D.Diag(diag::err_drv_invalid_value) << execCharset->getAsString(Args)
	<< value;
	}

	RenderDiagnosticsOptions(D, Args, CmdArgs);

	// -fno-asm-blocks is default.
	if (Args.hasFlag(options::OPT_fasm_blocks, options::OPT_fno_asm_blocks,
	false))
	CmdArgs.push_back("-fasm-blocks");

	// -fgnu-inline-asm is default.
	if (!Args.hasFlag(options::OPT_fgnu_inline_asm,
	options::OPT_fno_gnu_inline_asm, true))
	CmdArgs.push_back("-fno-gnu-inline-asm");

	// Enable vectorization per default according to the optimization level
	// selected. For optimization levels that want vectorization we use the alias
	// option to simplify the hasFlag logic.
	bool EnableVec = shouldEnableVectorizerAtOLevel(Args, false);
	OptSpecifier VectorizeAliasOption =
	EnableVec ? options::OPT_O_Group : options::OPT_fvectorize;
	if (Args.hasFlag(options::OPT_fvectorize, VectorizeAliasOption,
	options::OPT_fno_vectorize, EnableVec))
	CmdArgs.push_back("-vectorize-loops");

	// -fslp-vectorize is enabled based on the optimization level selected.
	bool EnableSLPVec = shouldEnableVectorizerAtOLevel(Args, true);
	OptSpecifier SLPVectAliasOption =
	EnableSLPVec ? options::OPT_O_Group : options::OPT_fslp_vectorize;
	if (Args.hasFlag(options::OPT_fslp_vectorize, SLPVectAliasOption,
	options::OPT_fno_slp_vectorize, EnableSLPVec))
	CmdArgs.push_back("-vectorize-slp");

	ParseMPreferVectorWidth(D, Args, CmdArgs);

	Args.AddLastArg(CmdArgs, options::OPT_fshow_overloads_EQ);
	Args.AddLastArg(CmdArgs,
	options::OPT_fsanitize_undefined_strip_path_components_EQ);

	// -fdollars-in-identifiers default varies depending on platform and
	// language; only pass if specified.
	if (Arg *A = Args.getLastArg(options::OPT_fdollars_in_identifiers,
	options::OPT_fno_dollars_in_identifiers)) {
	if (A->getOption().matches(options::OPT_fdollars_in_identifiers))
	CmdArgs.push_back("-fdollars-in-identifiers");
	else
	CmdArgs.push_back("-fno-dollars-in-identifiers");
	}

	// -funit-at-a-time is default, and we don't support -fno-unit-at-a-time for
	// practical purposes.
	if (Arg *A = Args.getLastArg(options::OPT_funit_at_a_time,
	options::OPT_fno_unit_at_a_time)) {
	if (A->getOption().matches(options::OPT_fno_unit_at_a_time))
	D.Diag(diag::warn_drv_clang_unsupported) << A->getAsString(Args);
	}

	if (Args.hasFlag(options::OPT_fapple_pragma_pack,
	options::OPT_fno_apple_pragma_pack, false))
	CmdArgs.push_back("-fapple-pragma-pack");

	if (Args.hasFlag(options::OPT_fxl_pragma_pack,
	options::OPT_fno_xl_pragma_pack, RawTriple.isOSAIX()))
	CmdArgs.push_back("-fxl-pragma-pack");

	// Remarks can be enabled with any of the `-f.optimization-record.` flags.
	if (willEmitRemarks(Args) && checkRemarksOptions(D, Args, Triple))
	renderRemarksOptions(Args, CmdArgs, Triple, Input, Output, JA);

	bool RewriteImports = Args.hasFlag(options::OPT_frewrite_imports,
	options::OPT_fno_rewrite_imports, false);
	if (RewriteImports)
	CmdArgs.push_back("-frewrite-imports");

	// Enable rewrite includes if the user's asked for it or if we're generating
	// diagnostics.
	// TODO: Once -module-dependency-dir works with -frewrite-includes it'd be
	// nice to enable this when doing a crashdump for modules as well.
	if (Args.hasFlag(options::OPT_frewrite_includes,
	options::OPT_fno_rewrite_includes, false) \|\|
	(C.isForDiagnostics() && !HaveModules))
	CmdArgs.push_back("-frewrite-includes");

	// Only allow -traditional or -traditional-cpp outside in preprocessing modes.
	if (Arg *A = Args.getLastArg(options::OPT_traditional,
	options::OPT_traditional_cpp)) {
	if (isa<PreprocessJobAction>(JA))
	CmdArgs.push_back("-traditional-cpp");
	else
	D.Diag(diag::err_drv_clang_unsupported) << A->getAsString(Args);
	}

	Args.AddLastArg(CmdArgs, options::OPT_dM);
	Args.AddLastArg(CmdArgs, options::OPT_dD);

	Args.AddLastArg(CmdArgs, options::OPT_fmax_tokens_EQ);

	// Handle serialized diagnostics.
	if (Arg *A = Args.getLastArg(options::OPT__serialize_diags)) {
	CmdArgs.push_back("-serialize-diagnostic-file");
	CmdArgs.push_back(Args.MakeArgString(A->getValue()));
	}

	if (Args.hasArg(options::OPT_fretain_comments_from_system_headers))
	CmdArgs.push_back("-fretain-comments-from-system-headers");

	// Forward -fcomment-block-commands to -cc1.
	Args.AddAllArgs(CmdArgs, options::OPT_fcomment_block_commands);
	// Forward -fparse-all-comments to -cc1.
	Args.AddAllArgs(CmdArgs, options::OPT_fparse_all_comments);

	// Turn -fplugin=name.so into -load name.so
	for (const Arg *A : Args.filtered(options::OPT_fplugin_EQ)) {
	CmdArgs.push_back("-load");
	CmdArgs.push_back(A->getValue());
	A->claim();
	}

	// Forward -fpass-plugin=name.so to -cc1.
	for (const Arg *A : Args.filtered(options::OPT_fpass_plugin_EQ)) {
	CmdArgs.push_back(
	Args.MakeArgString(Twine("-fpass-plugin=") + A->getValue()));
	A->claim();
	}

	// Setup statistics file output.
	SmallString<128> StatsFile = getStatsFileName(Args, Output, Input, D);
	if (!StatsFile.empty())
	CmdArgs.push_back(Args.MakeArgString(Twine("-stats-file=") + StatsFile));

	// Forward -Xclang arguments to -cc1, and -mllvm arguments to the LLVM option
	// parser.
	// -finclude-default-header flag is for preprocessor,
	// do not pass it to other cc1 commands when save-temps is enabled
	if (C.getDriver().isSaveTempsEnabled() &&
	!isa<PreprocessJobAction>(JA)) {
	for (auto Arg : Args.filtered(options::OPT_Xclang)) {
	Arg->claim();
	if (StringRef(Arg->getValue()) != "-finclude-default-header")
	CmdArgs.push_back(Arg->getValue());
	}
	}
	else {
	Args.AddAllArgValues(CmdArgs, options::OPT_Xclang);
	}
	for (const Arg *A : Args.filtered(options::OPT_mllvm)) {
	A->claim();

	// We translate this by hand to the -cc1 argument, since nightly test uses
	// it and developers have been trained to spell it with -mllvm. Both
	// spellings are now deprecated and should be removed.
	if (StringRef(A->getValue(0)) == "-disable-llvm-optzns") {
	CmdArgs.push_back("-disable-llvm-optzns");
	} else {
	A->render(Args, CmdArgs);
	}
	}

	// With -save-temps, we want to save the unoptimized bitcode output from the
	// CompileJobAction, use -disable-llvm-passes to get pristine IR generated
	// by the frontend.
	// When -fembed-bitcode is enabled, optimized bitcode is emitted because it
	// has slightly different breakdown between stages.
	// FIXME: -fembed-bitcode -save-temps will save optimized bitcode instead of
	// pristine IR generated by the frontend. Ideally, a new compile action should
	// be added so both IR can be captured.
	if ((C.getDriver().isSaveTempsEnabled() \|\|
	JA.isHostOffloading(Action::OFK_OpenMP)) &&
	!(C.getDriver().embedBitcodeInObject() && !IsUsingLTO) &&
	isa<CompileJobAction>(JA))
	CmdArgs.push_back("-disable-llvm-passes");

	Args.AddAllArgs(CmdArgs, options::OPT_undef);

	const char *Exec = D.getClangProgramPath();

	// Optionally embed the -cc1 level arguments into the debug info or a
	// section, for build analysis.
	// Also record command line arguments into the debug info if
	// -grecord-gcc-switches options is set on.
	// By default, -gno-record-gcc-switches is set on and no recording.
	auto GRecordSwitches =
	Args.hasFlag(options::OPT_grecord_command_line,
	options::OPT_gno_record_command_line, false);
	auto FRecordSwitches =
	Args.hasFlag(options::OPT_frecord_command_line,
	options::OPT_fno_record_command_line, false);
	if (FRecordSwitches && !Triple.isOSBinFormatELF())
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< Args.getLastArg(options::OPT_frecord_command_line)->getAsString(Args)
	<< TripleStr;
	if (TC.UseDwarfDebugFlags() \|\| GRecordSwitches \|\| FRecordSwitches) {
	ArgStringList OriginalArgs;
	for (const auto &Arg : Args)
	Arg->render(Args, OriginalArgs);

	SmallString<256> Flags;
	EscapeSpacesAndBackslashes(Exec, Flags);
	for (const char *OriginalArg : OriginalArgs) {
	SmallString<128> EscapedArg;
	EscapeSpacesAndBackslashes(OriginalArg, EscapedArg);
	Flags += " ";
	Flags += EscapedArg;
	}
	auto FlagsArgString = Args.MakeArgString(Flags);
	if (TC.UseDwarfDebugFlags() \|\| GRecordSwitches) {
	CmdArgs.push_back("-dwarf-debug-flags");
	CmdArgs.push_back(FlagsArgString);
	}
	if (FRecordSwitches) {
	CmdArgs.push_back("-record-command-line");
	CmdArgs.push_back(FlagsArgString);
	}
	}

	// Host-side cuda compilation receives all device-side outputs in a single
	// fatbin as Inputs[1]. Include the binary with -fcuda-include-gpubinary.
	if ((IsCuda \|\| IsHIP) && CudaDeviceInput) {
	CmdArgs.push_back("-fcuda-include-gpubinary");
	CmdArgs.push_back(CudaDeviceInput->getFilename());
	if (Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false))
	CmdArgs.push_back("-fgpu-rdc");
	}

	if (IsCuda) {
	if (Args.hasFlag(options::OPT_fcuda_short_ptr,
	options::OPT_fno_cuda_short_ptr, false))
	CmdArgs.push_back("-fcuda-short-ptr");
	}

	if (IsCuda \|\| IsHIP) {
	// Determine the original source input.
	const Action *SourceAction = &JA;
	while (SourceAction->getKind() != Action::InputClass) {
	assert(!SourceAction->getInputs().empty() && "unexpected root action!");
	SourceAction = SourceAction->getInputs()[0];
	}
	auto CUID = cast<InputAction>(SourceAction)->getId();
	if (!CUID.empty())
	CmdArgs.push_back(Args.MakeArgString(Twine("-cuid=") + Twine(CUID)));
	}

	if (IsHIP)
	CmdArgs.push_back("-fcuda-allow-variadic-functions");

	if (IsCudaDevice \|\| IsHIPDevice) {
	StringRef InlineThresh =
	Args.getLastArgValue(options::OPT_fgpu_inline_threshold_EQ);
	if (!InlineThresh.empty()) {
	std::string ArgStr =
	std::string("-inline-threshold=") + InlineThresh.str();
	CmdArgs.append({"-mllvm", Args.MakeArgStringRef(ArgStr)});
	}
	}

	// OpenMP offloading device jobs take the argument -fopenmp-host-ir-file-path
	// to specify the result of the compile phase on the host, so the meaningful
	// device declarations can be identified. Also, -fopenmp-is-device is passed
	// along to tell the frontend that it is generating code for a device, so that
	// only the relevant declarations are emitted.
	if (IsOpenMPDevice) {
	CmdArgs.push_back("-fopenmp-is-device");
	if (OpenMPDeviceInput) {
	CmdArgs.push_back("-fopenmp-host-ir-file-path");
	CmdArgs.push_back(Args.MakeArgString(OpenMPDeviceInput->getFilename()));
	}
	}

	if (Triple.isAMDGPU()) {
	handleAMDGPUCodeObjectVersionOptions(D, Args, CmdArgs);

	if (Args.hasFlag(options::OPT_munsafe_fp_atomics,
	options::OPT_mno_unsafe_fp_atomics, /Default=/false))
	CmdArgs.push_back("-munsafe-fp-atomics");
	}

	// For all the host OpenMP offloading compile jobs we need to pass the targets
	// information using -fopenmp-targets= option.
	if (JA.isHostOffloading(Action::OFK_OpenMP)) {
	SmallString<128> TargetInfo("-fopenmp-targets=");

	Arg *Tgts = Args.getLastArg(options::OPT_fopenmp_targets_EQ);
	assert(Tgts && Tgts->getNumValues() &&
	"OpenMP offloading has to have targets specified.");
	for (unsigned i = 0; i < Tgts->getNumValues(); ++i) {
	if (i)
	TargetInfo += ',';
	// We need to get the string from the triple because it may be not exactly
	// the same as the one we get directly from the arguments.
	llvm::Triple T(Tgts->getValue(i));
	TargetInfo += T.getTriple();
	}
	CmdArgs.push_back(Args.MakeArgString(TargetInfo.str()));
	}

	bool VirtualFunctionElimination =
	Args.hasFlag(options::OPT_fvirtual_function_elimination,
	options::OPT_fno_virtual_function_elimination, false);
	if (VirtualFunctionElimination) {
	// VFE requires full LTO (currently, this might be relaxed to allow ThinLTO
	// in the future).
	if (LTOMode != LTOK_Full)
	D.Diag(diag::err_drv_argument_only_allowed_with)
	<< "-fvirtual-function-elimination"
	<< "-flto=full";

	CmdArgs.push_back("-fvirtual-function-elimination");
	}

	// VFE requires whole-program-vtables, and enables it by default.
	bool WholeProgramVTables = Args.hasFlag(
	options::OPT_fwhole_program_vtables,
	options::OPT_fno_whole_program_vtables, VirtualFunctionElimination);
	if (VirtualFunctionElimination && !WholeProgramVTables) {
	D.Diag(diag::err_drv_argument_not_allowed_with)
	<< "-fno-whole-program-vtables"
	<< "-fvirtual-function-elimination";
	}

	if (WholeProgramVTables) {
	// Propagate -fwhole-program-vtables if this is an LTO compile.
	if (IsUsingLTO)
	CmdArgs.push_back("-fwhole-program-vtables");
	// Check if we passed LTO options but they were suppressed because this is a
	// device offloading action, or we passed device offload LTO options which
	// were suppressed because this is not the device offload action.
	// Otherwise, issue an error.
	else if (!D.isUsingLTO(!IsDeviceOffloadAction))
	D.Diag(diag::err_drv_argument_only_allowed_with)
	<< "-fwhole-program-vtables"
	<< "-flto";
	}

	bool DefaultsSplitLTOUnit =
	(WholeProgramVTables \|\| Sanitize.needsLTO()) &&
	(LTOMode == LTOK_Full \|\| TC.canSplitThinLTOUnit());
	bool SplitLTOUnit =
	Args.hasFlag(options::OPT_fsplit_lto_unit,
	options::OPT_fno_split_lto_unit, DefaultsSplitLTOUnit);
	if (Sanitize.needsLTO() && !SplitLTOUnit)
	D.Diag(diag::err_drv_argument_not_allowed_with) << "-fno-split-lto-unit"
	<< "-fsanitize=cfi";
	if (SplitLTOUnit)
	CmdArgs.push_back("-fsplit-lto-unit");

	if (Arg *A = Args.getLastArg(options::OPT_fglobal_isel,
	options::OPT_fno_global_isel)) {
	CmdArgs.push_back("-mllvm");
	if (A->getOption().matches(options::OPT_fglobal_isel)) {
	CmdArgs.push_back("-global-isel=1");

	// GISel is on by default on AArch64 -O0, so don't bother adding
	// the fallback remarks for it. Other combinations will add a warning of
	// some kind.
	bool IsArchSupported = Triple.getArch() == llvm::Triple::aarch64;
	bool IsOptLevelSupported = false;

	Arg *A = Args.getLastArg(options::OPT_O_Group);
	if (Triple.getArch() == llvm::Triple::aarch64) {
	if (!A \|\| A->getOption().matches(options::OPT_O0))
	IsOptLevelSupported = true;
	}
	if (!IsArchSupported \|\| !IsOptLevelSupported) {
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back("-global-isel-abort=2");

	if (!IsArchSupported)
	D.Diag(diag::warn_drv_global_isel_incomplete) << Triple.getArchName();
	else
	D.Diag(diag::warn_drv_global_isel_incomplete_opt);
	}
	} else {
	CmdArgs.push_back("-global-isel=0");
	}
	}

	if (Args.hasArg(options::OPT_forder_file_instrumentation)) {
	CmdArgs.push_back("-forder-file-instrumentation");
	// Enable order file instrumentation when ThinLTO is not on. When ThinLTO is
	// on, we need to pass these flags as linker flags and that will be handled
	// outside of the compiler.
	if (!IsUsingLTO) {
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back("-enable-order-file-instrumentation");
	}
	}

	if (Arg *A = Args.getLastArg(options::OPT_fforce_enable_int128,
	options::OPT_fno_force_enable_int128)) {
	if (A->getOption().matches(options::OPT_fforce_enable_int128))
	CmdArgs.push_back("-fforce-enable-int128");
	}

	if (Args.hasFlag(options::OPT_fkeep_static_consts,
	options::OPT_fno_keep_static_consts, false))
	CmdArgs.push_back("-fkeep-static-consts");

	if (Args.hasFlag(options::OPT_fcomplete_member_pointers,
	options::OPT_fno_complete_member_pointers, false))
	CmdArgs.push_back("-fcomplete-member-pointers");

	if (!Args.hasFlag(options::OPT_fcxx_static_destructors,
	options::OPT_fno_cxx_static_destructors, true))
	CmdArgs.push_back("-fno-c++-static-destructors");

	addMachineOutlinerArgs(D, Args, CmdArgs, Triple, /IsLTO=/false);

	if (Arg *A = Args.getLastArg(options::OPT_moutline_atomics,
	options::OPT_mno_outline_atomics)) {
	if (A->getOption().matches(options::OPT_moutline_atomics)) {
	// Option -moutline-atomics supported for AArch64 target only.
	if (!Triple.isAArch64()) {
	D.Diag(diag::warn_drv_moutline_atomics_unsupported_opt)
	<< Triple.getArchName();
	} else {
	CmdArgs.push_back("-target-feature");
	CmdArgs.push_back("+outline-atomics");
	}
	} else {
	CmdArgs.push_back("-target-feature");
	CmdArgs.push_back("-outline-atomics");
	}
	} else if (Triple.isAArch64() &&
	getToolChain().IsAArch64OutlineAtomicsDefault(Args)) {
	CmdArgs.push_back("-target-feature");
	CmdArgs.push_back("+outline-atomics");
	}

	if (Args.hasFlag(options::OPT_faddrsig, options::OPT_fno_addrsig,
	(TC.getTriple().isOSBinFormatELF() \|\|
	TC.getTriple().isOSBinFormatCOFF()) &&
	!TC.getTriple().isPS4() && !TC.getTriple().isVE() &&
	!TC.getTriple().isOSNetBSD() &&
	!Distro(D.getVFS(), TC.getTriple()).IsGentoo() &&
	!TC.getTriple().isAndroid() && TC.useIntegratedAs()))
	CmdArgs.push_back("-faddrsig");

	if ((Triple.isOSBinFormatELF() \|\| Triple.isOSBinFormatMachO()) &&
	(EH \|\| UnwindTables \|\| DebugInfoKind != codegenoptions::NoDebugInfo))
	CmdArgs.push_back("-D__GCC_HAVE_DWARF2_CFI_ASM=1");

	if (Arg *A = Args.getLastArg(options::OPT_fsymbol_partition_EQ)) {
	std::string Str = A->getAsString(Args);
	if (!TC.getTriple().isOSBinFormatELF())
	D.Diag(diag::err_drv_unsupported_opt_for_target)
	<< Str << TC.getTripleString();
	CmdArgs.push_back(Args.MakeArgString(Str));
	}

	// Add the "-o out -x type src.c" flags last. This is done primarily to make
	// the -cc1 command easier to edit when reproducing compiler crashes.
	if (Output.getType() == types::TY_Dependencies) {
	// Handled with other dependency code.
	} else if (Output.isFilename()) {
	if (Output.getType() == clang::driver::types::TY_IFS_CPP \|\|
	Output.getType() == clang::driver::types::TY_IFS) {
	SmallString<128> OutputFilename(Output.getFilename());
	llvm::sys::path::replace_extension(OutputFilename, "ifs");
	CmdArgs.push_back("-o");
	CmdArgs.push_back(Args.MakeArgString(OutputFilename));
	} else {
	CmdArgs.push_back("-o");
	CmdArgs.push_back(Output.getFilename());
	}
	} else {
	assert(Output.isNothing() && "Invalid output.");
	}

	addDashXForInput(Args, Input, CmdArgs);

	ArrayRef<InputInfo> FrontendInputs = Input;
	if (IsHeaderModulePrecompile)
	FrontendInputs = ModuleHeaderInputs;
	else if (Input.isNothing())
	FrontendInputs = {};

	for (const InputInfo &Input : FrontendInputs) {
	if (Input.isFilename())
	CmdArgs.push_back(Input.getFilename());
	else
	Input.getInputArg().renderAsInput(Args, CmdArgs);
	}

	if (D.CC1Main && !D.CCGenDiagnostics) {
	// Invoke the CC1 directly in this process
	C.addCommand(std::make_unique<CC1Command>(JA, *this,
	ResponseFileSupport::AtFileUTF8(),
	Exec, CmdArgs, Inputs, Output));
	} else {
	C.addCommand(std::make_unique<Command>(JA, *this,
	ResponseFileSupport::AtFileUTF8(),
	Exec, CmdArgs, Inputs, Output));
	}

	// Make the compile command echo its inputs for /showFilenames.
	if (Output.getType() == types::TY_Object &&
	Args.hasFlag(options::OPT__SLASH_showFilenames,
	options::OPT__SLASH_showFilenames_, false)) {
	C.getJobs().getJobs().back()->PrintInputFilenames = true;
	}

	if (Arg *A = Args.getLastArg(options::OPT_pg))
	if (FPKeepKind == CodeGenOptions::FramePointerKind::None &&
	!Args.hasArg(options::OPT_mfentry))
	D.Diag(diag::err_drv_argument_not_allowed_with) << "-fomit-frame-pointer"
	<< A->getAsString(Args);

	// Claim some arguments which clang supports automatically.

	// -fpch-preprocess is used with gcc to add a special marker in the output to
	// include the PCH file.
	Args.ClaimAllArgs(options::OPT_fpch_preprocess);

	// Claim some arguments which clang doesn't support, but we don't
	// care to warn the user about.
	Args.ClaimAllArgs(options::OPT_clang_ignored_f_Group);
	Args.ClaimAllArgs(options::OPT_clang_ignored_m_Group);

	// Disable warnings for clang -E -emit-llvm foo.c
	Args.ClaimAllArgs(options::OPT_emit_llvm);
	}

	Clang::Clang(const ToolChain &TC)
	// CAUTION! The first constructor argument ("clang") is not arbitrary,
	// as it is for other tools. Some operations on a Tool actually test
	// whether that tool is Clang based on the Tool's Name as a string.
	: Tool("clang", "clang frontend", TC) {}

	Clang::~Clang() {}

	/// Add options related to the Objective-C runtime/ABI.
	///
	/// Returns true if the runtime is non-fragile.
	ObjCRuntime Clang::AddObjCRuntimeArgs(const ArgList &args,
	const InputInfoList &inputs,
	ArgStringList &cmdArgs,
	RewriteKind rewriteKind) const {
	// Look for the controlling runtime option.
	Arg *runtimeArg =
	args.getLastArg(options::OPT_fnext_runtime, options::OPT_fgnu_runtime,
	options::OPT_fobjc_runtime_EQ);

	// Just forward -fobjc-runtime= to the frontend. This supercedes
	// options about fragility.
	if (runtimeArg &&
	runtimeArg->getOption().matches(options::OPT_fobjc_runtime_EQ)) {
	ObjCRuntime runtime;
	StringRef value = runtimeArg->getValue();
	if (runtime.tryParse(value)) {
	getToolChain().getDriver().Diag(diag::err_drv_unknown_objc_runtime)
	<< value;
	}
	if ((runtime.getKind() == ObjCRuntime::GNUstep) &&
	(runtime.getVersion() >= VersionTuple(2, 0)))
	if (!getToolChain().getTriple().isOSBinFormatELF() &&
	!getToolChain().getTriple().isOSBinFormatCOFF()) {
	getToolChain().getDriver().Diag(
	diag::err_drv_gnustep_objc_runtime_incompatible_binary)
	<< runtime.getVersion().getMajor();
	}

	runtimeArg->render(args, cmdArgs);
	return runtime;
	}

	// Otherwise, we'll need the ABI "version". Version numbers are
	// slightly confusing for historical reasons:
	// 1 - Traditional "fragile" ABI
	// 2 - Non-fragile ABI, version 1
	// 3 - Non-fragile ABI, version 2
	unsigned objcABIVersion = 1;
	// If -fobjc-abi-version= is present, use that to set the version.
	if (Arg *abiArg = args.getLastArg(options::OPT_fobjc_abi_version_EQ)) {
	StringRef value = abiArg->getValue();
	if (value == "1")
	objcABIVersion = 1;
	else if (value == "2")
	objcABIVersion = 2;
	else if (value == "3")
	objcABIVersion = 3;
	else
	getToolChain().getDriver().Diag(diag::err_drv_clang_unsupported) << value;
	} else {
	// Otherwise, determine if we are using the non-fragile ABI.
	bool nonFragileABIIsDefault =
	(rewriteKind == RK_NonFragile \|\|
	(rewriteKind == RK_None &&
	getToolChain().IsObjCNonFragileABIDefault()));
	if (args.hasFlag(options::OPT_fobjc_nonfragile_abi,
	options::OPT_fno_objc_nonfragile_abi,
	nonFragileABIIsDefault)) {
	// Determine the non-fragile ABI version to use.
	#ifdef DISABLE_DEFAULT_NONFRAGILEABI_TWO
	unsigned nonFragileABIVersion = 1;
	#else
	unsigned nonFragileABIVersion = 2;
	#endif

	if (Arg *abiArg =
	args.getLastArg(options::OPT_fobjc_nonfragile_abi_version_EQ)) {
	StringRef value = abiArg->getValue();
	if (value == "1")
	nonFragileABIVersion = 1;
	else if (value == "2")
	nonFragileABIVersion = 2;
	else
	getToolChain().getDriver().Diag(diag::err_drv_clang_unsupported)
	<< value;
	}

	objcABIVersion = 1 + nonFragileABIVersion;
	} else {
	objcABIVersion = 1;
	}
	}

	// We don't actually care about the ABI version other than whether
	// it's non-fragile.
	bool isNonFragile = objcABIVersion != 1;

	// If we have no runtime argument, ask the toolchain for its default runtime.
	// However, the rewriter only really supports the Mac runtime, so assume that.
	ObjCRuntime runtime;
	if (!runtimeArg) {
	switch (rewriteKind) {
	case RK_None:
	runtime = getToolChain().getDefaultObjCRuntime(isNonFragile);
	break;
	case RK_Fragile:
	runtime = ObjCRuntime(ObjCRuntime::FragileMacOSX, VersionTuple());
	break;
	case RK_NonFragile:
	runtime = ObjCRuntime(ObjCRuntime::MacOSX, VersionTuple());
	break;
	}

	// -fnext-runtime
	} else if (runtimeArg->getOption().matches(options::OPT_fnext_runtime)) {
	// On Darwin, make this use the default behavior for the toolchain.
	if (getToolChain().getTriple().isOSDarwin()) {
	runtime = getToolChain().getDefaultObjCRuntime(isNonFragile);

	// Otherwise, build for a generic macosx port.
	} else {
	runtime = ObjCRuntime(ObjCRuntime::MacOSX, VersionTuple());
	}

	// -fgnu-runtime
	} else {
	assert(runtimeArg->getOption().matches(options::OPT_fgnu_runtime));
	// Legacy behaviour is to target the gnustep runtime if we are in
	// non-fragile mode or the GCC runtime in fragile mode.
	if (isNonFragile)
	runtime = ObjCRuntime(ObjCRuntime::GNUstep, VersionTuple(2, 0));
	else
	runtime = ObjCRuntime(ObjCRuntime::GCC, VersionTuple());
	}

	if (llvm::any_of(inputs, [](const InputInfo &input) {
	return types::isObjC(input.getType());
	}))
	cmdArgs.push_back(
	args.MakeArgString("-fobjc-runtime=" + runtime.getAsString()));
	return runtime;
	}

	static bool maybeConsumeDash(const std::string &EH, size_t &I) {
	bool HaveDash = (I + 1 < EH.size() && EH[I + 1] == '-');
	I += HaveDash;
	return !HaveDash;
	}

	namespace {
	struct EHFlags {
	bool Synch = false;
	bool Asynch = false;
	bool NoUnwindC = false;
	};
	} // end anonymous namespace

	/// /EH controls whether to run destructor cleanups when exceptions are
	/// thrown. There are three modifiers:
	/// - s: Cleanup after "synchronous" exceptions, aka C++ exceptions.
	/// - a: Cleanup after "asynchronous" exceptions, aka structured exceptions.
	/// The 'a' modifier is unimplemented and fundamentally hard in LLVM IR.
	/// - c: Assume that extern "C" functions are implicitly nounwind.
	/// The default is /EHs-c-, meaning cleanups are disabled.
	static EHFlags parseClangCLEHFlags(const Driver &D, const ArgList &Args) {
	EHFlags EH;

	std::vector<std::string> EHArgs =
	Args.getAllArgValues(options::OPT__SLASH_EH);
	for (auto EHVal : EHArgs) {
	for (size_t I = 0, E = EHVal.size(); I != E; ++I) {
	switch (EHVal[I]) {
	case 'a':
	EH.Asynch = maybeConsumeDash(EHVal, I);
	if (EH.Asynch)
	EH.Synch = false;
	continue;
	case 'c':
	EH.NoUnwindC = maybeConsumeDash(EHVal, I);
	continue;
	case 's':
	EH.Synch = maybeConsumeDash(EHVal, I);
	if (EH.Synch)
	EH.Asynch = false;
	continue;
	default:
	break;
	}
	D.Diag(clang::diag::err_drv_invalid_value) << "/EH" << EHVal;
	break;
	}
	}
	// The /GX, /GX- flags are only processed if there are not /EH flags.
	// The default is that /GX is not specified.
	if (EHArgs.empty() &&
	Args.hasFlag(options::OPT__SLASH_GX, options::OPT__SLASH_GX_,
	/Default=/false)) {
	EH.Synch = true;
	EH.NoUnwindC = true;
	}

	return EH;
	}

	void Clang::AddClangCLArgs(const ArgList &Args, types::ID InputType,
	ArgStringList &CmdArgs,
	codegenoptions::DebugInfoKind *DebugInfoKind,
	bool *EmitCodeView) const {
	unsigned RTOptionID = options::OPT__SLASH_MT;
	bool isNVPTX = getToolChain().getTriple().isNVPTX();

	if (Args.hasArg(options::OPT__SLASH_LDd))
	// The /LDd option implies /MTd. The dependent lib part can be overridden,
	// but defining _DEBUG is sticky.
	RTOptionID = options::OPT__SLASH_MTd;

	if (Arg *A = Args.getLastArg(options::OPT__SLASH_M_Group))
	RTOptionID = A->getOption().getID();

	StringRef FlagForCRT;
	switch (RTOptionID) {
	case options::OPT__SLASH_MD:
	if (Args.hasArg(options::OPT__SLASH_LDd))
	CmdArgs.push_back("-D_DEBUG");
	CmdArgs.push_back("-D_MT");
	CmdArgs.push_back("-D_DLL");
	FlagForCRT = "--dependent-lib=msvcrt";
	break;
	case options::OPT__SLASH_MDd:
	CmdArgs.push_back("-D_DEBUG");
	CmdArgs.push_back("-D_MT");
	CmdArgs.push_back("-D_DLL");
	FlagForCRT = "--dependent-lib=msvcrtd";
	break;
	case options::OPT__SLASH_MT:
	if (Args.hasArg(options::OPT__SLASH_LDd))
	CmdArgs.push_back("-D_DEBUG");
	CmdArgs.push_back("-D_MT");
	CmdArgs.push_back("-flto-visibility-public-std");
	FlagForCRT = "--dependent-lib=libcmt";
	break;
	case options::OPT__SLASH_MTd:
	CmdArgs.push_back("-D_DEBUG");
	CmdArgs.push_back("-D_MT");
	CmdArgs.push_back("-flto-visibility-public-std");
	FlagForCRT = "--dependent-lib=libcmtd";
	break;
	default:
	llvm_unreachable("Unexpected option ID.");
	}

	if (Args.hasArg(options::OPT__SLASH_Zl)) {
	CmdArgs.push_back("-D_VC_NODEFAULTLIB");
	} else {
	CmdArgs.push_back(FlagForCRT.data());

	// This provides POSIX compatibility (maps 'open' to '_open'), which most
	// users want. The /Za flag to cl.exe turns this off, but it's not
	// implemented in clang.
	CmdArgs.push_back("--dependent-lib=oldnames");
	}

	if (Arg *ShowIncludes =
	Args.getLastArg(options::OPT__SLASH_showIncludes,
	options::OPT__SLASH_showIncludes_user)) {
	CmdArgs.push_back("--show-includes");
	if (ShowIncludes->getOption().matches(options::OPT__SLASH_showIncludes))
	CmdArgs.push_back("-sys-header-deps");
	}

	// This controls whether or not we emit RTTI data for polymorphic types.
	if (Args.hasFlag(options::OPT__SLASH_GR_, options::OPT__SLASH_GR,
	/Default=/false))
	CmdArgs.push_back("-fno-rtti-data");

	// This controls whether or not we emit stack-protector instrumentation.
	// In MSVC, Buffer Security Check (/GS) is on by default.
	if (!isNVPTX && Args.hasFlag(options::OPT__SLASH_GS, options::OPT__SLASH_GS_,
	/Default=/true)) {
	CmdArgs.push_back("-stack-protector");
	CmdArgs.push_back(Args.MakeArgString(Twine(LangOptions::SSPStrong)));
	}

	// Emit CodeView if -Z7 or -gline-tables-only are present.
	if (Arg *DebugInfoArg = Args.getLastArg(options::OPT__SLASH_Z7,
	options::OPT_gline_tables_only)) {
	*EmitCodeView = true;
	if (DebugInfoArg->getOption().matches(options::OPT__SLASH_Z7))
	*DebugInfoKind = codegenoptions::DebugInfoConstructor;
	else
	*DebugInfoKind = codegenoptions::DebugLineTablesOnly;
	} else {
	*EmitCodeView = false;
	}

	const Driver &D = getToolChain().getDriver();
	EHFlags EH = parseClangCLEHFlags(D, Args);
	if (!isNVPTX && (EH.Synch \|\| EH.Asynch)) {
	if (types::isCXX(InputType))
	CmdArgs.push_back("-fcxx-exceptions");
	CmdArgs.push_back("-fexceptions");
	}
	if (types::isCXX(InputType) && EH.Synch && EH.NoUnwindC)
	CmdArgs.push_back("-fexternc-nounwind");

	// /EP should expand to -E -P.
	if (Args.hasArg(options::OPT__SLASH_EP)) {
	CmdArgs.push_back("-E");
	CmdArgs.push_back("-P");
	}

	unsigned VolatileOptionID;
	if (getToolChain().getTriple().isX86())
	VolatileOptionID = options::OPT__SLASH_volatile_ms;
	else
	VolatileOptionID = options::OPT__SLASH_volatile_iso;

	if (Arg *A = Args.getLastArg(options::OPT__SLASH_volatile_Group))
	VolatileOptionID = A->getOption().getID();

	if (VolatileOptionID == options::OPT__SLASH_volatile_ms)
	CmdArgs.push_back("-fms-volatile");

	if (Args.hasFlag(options::OPT__SLASH_Zc_dllexportInlines_,
	options::OPT__SLASH_Zc_dllexportInlines,
	false)) {
	CmdArgs.push_back("-fno-dllexport-inlines");
	}

	Arg *MostGeneralArg = Args.getLastArg(options::OPT__SLASH_vmg);
	Arg *BestCaseArg = Args.getLastArg(options::OPT__SLASH_vmb);
	if (MostGeneralArg && BestCaseArg)
	D.Diag(clang::diag::err_drv_argument_not_allowed_with)
	<< MostGeneralArg->getAsString(Args) << BestCaseArg->getAsString(Args);

	if (MostGeneralArg) {
	Arg *SingleArg = Args.getLastArg(options::OPT__SLASH_vms);
	Arg *MultipleArg = Args.getLastArg(options::OPT__SLASH_vmm);
	Arg *VirtualArg = Args.getLastArg(options::OPT__SLASH_vmv);

	Arg *FirstConflict = SingleArg ? SingleArg : MultipleArg;
	Arg *SecondConflict = VirtualArg ? VirtualArg : MultipleArg;
	if (FirstConflict && SecondConflict && FirstConflict != SecondConflict)
	D.Diag(clang::diag::err_drv_argument_not_allowed_with)
	<< FirstConflict->getAsString(Args)
	<< SecondConflict->getAsString(Args);

	if (SingleArg)
	CmdArgs.push_back("-fms-memptr-rep=single");
	else if (MultipleArg)
	CmdArgs.push_back("-fms-memptr-rep=multiple");
	else
	CmdArgs.push_back("-fms-memptr-rep=virtual");
	}

	// Parse the default calling convention options.
	if (Arg *CCArg =
	Args.getLastArg(options::OPT__SLASH_Gd, options::OPT__SLASH_Gr,
	options::OPT__SLASH_Gz, options::OPT__SLASH_Gv,
	options::OPT__SLASH_Gregcall)) {
	unsigned DCCOptId = CCArg->getOption().getID();
	const char *DCCFlag = nullptr;
	bool ArchSupported = !isNVPTX;
	llvm::Triple::ArchType Arch = getToolChain().getArch();
	switch (DCCOptId) {
	case options::OPT__SLASH_Gd:
	DCCFlag = "-fdefault-calling-conv=cdecl";
	break;
	case options::OPT__SLASH_Gr:
	ArchSupported = Arch == llvm::Triple::x86;
	DCCFlag = "-fdefault-calling-conv=fastcall";
	break;
	case options::OPT__SLASH_Gz:
	ArchSupported = Arch == llvm::Triple::x86;
	DCCFlag = "-fdefault-calling-conv=stdcall";
	break;
	case options::OPT__SLASH_Gv:
	ArchSupported = Arch == llvm::Triple::x86 \|\| Arch == llvm::Triple::x86_64;
	DCCFlag = "-fdefault-calling-conv=vectorcall";
	break;
	case options::OPT__SLASH_Gregcall:
	ArchSupported = Arch == llvm::Triple::x86 \|\| Arch == llvm::Triple::x86_64;
	DCCFlag = "-fdefault-calling-conv=regcall";
	break;
	}

	// MSVC doesn't warn if /Gr or /Gz is used on x64, so we don't either.
	if (ArchSupported && DCCFlag)
	CmdArgs.push_back(DCCFlag);
	}

	Args.AddLastArg(CmdArgs, options::OPT_vtordisp_mode_EQ);

	if (!Args.hasArg(options::OPT_fdiagnostics_format_EQ)) {
	CmdArgs.push_back("-fdiagnostics-format");
	CmdArgs.push_back("msvc");
	}

	if (Arg *A = Args.getLastArg(options::OPT__SLASH_guard)) {
	StringRef GuardArgs = A->getValue();
	// The only valid options are "cf", "cf,nochecks", "cf-", "ehcont" and
	// "ehcont-".
	if (GuardArgs.equals_insensitive("cf")) {
	// Emit CFG instrumentation and the table of address-taken functions.
	CmdArgs.push_back("-cfguard");
	} else if (GuardArgs.equals_insensitive("cf,nochecks")) {
	// Emit only the table of address-taken functions.
	CmdArgs.push_back("-cfguard-no-checks");
	} else if (GuardArgs.equals_insensitive("ehcont")) {
	// Emit EH continuation table.
	CmdArgs.push_back("-ehcontguard");
	} else if (GuardArgs.equals_insensitive("cf-") \|\|
	GuardArgs.equals_insensitive("ehcont-")) {
	// Do nothing, but we might want to emit a security warning in future.
	} else {
	D.Diag(diag::err_drv_invalid_value) << A->getSpelling() << GuardArgs;
	}
	}
	}

	const char *Clang::getBaseInputName(const ArgList &Args,
	const InputInfo &Input) {
	return Args.MakeArgString(llvm::sys::path::filename(Input.getBaseInput()));
	}

	const char *Clang::getBaseInputStem(const ArgList &Args,
	const InputInfoList &Inputs) {
	const char *Str = getBaseInputName(Args, Inputs[0]);

	if (const char *End = strrchr(Str, '.'))
	return Args.MakeArgString(std::string(Str, End));

	return Str;
	}

	const char *Clang::getDependencyFileName(const ArgList &Args,
	const InputInfoList &Inputs) {
	// FIXME: Think about this more.

	if (Arg *OutputOpt = Args.getLastArg(options::OPT_o)) {
	SmallString<128> OutputFilename(OutputOpt->getValue());
	llvm::sys::path::replace_extension(OutputFilename, llvm::Twine('d'));
	return Args.MakeArgString(OutputFilename);
	}

	return Args.MakeArgString(Twine(getBaseInputStem(Args, Inputs)) + ".d");
	}

	// Begin ClangAs

	void ClangAs::AddMIPSTargetArgs(const ArgList &Args,
	ArgStringList &CmdArgs) const {
	StringRef CPUName;
	StringRef ABIName;
	const llvm::Triple &Triple = getToolChain().getTriple();
	mips::getMipsCPUAndABI(Args, Triple, CPUName, ABIName);

	CmdArgs.push_back("-target-abi");
	CmdArgs.push_back(ABIName.data());
	}

	void ClangAs::AddX86TargetArgs(const ArgList &Args,
	ArgStringList &CmdArgs) const {
	addX86AlignBranchArgs(getToolChain().getDriver(), Args, CmdArgs,
	/IsLTO=/false);

	if (Arg *A = Args.getLastArg(options::OPT_masm_EQ)) {
	StringRef Value = A->getValue();
	if (Value == "intel" \|\| Value == "att") {
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back(Args.MakeArgString("-x86-asm-syntax=" + Value));
	} else {
	getToolChain().getDriver().Diag(diag::err_drv_unsupported_option_argument)
	<< A->getOption().getName() << Value;
	}
	}
	}

	void ClangAs::AddRISCVTargetArgs(const ArgList &Args,
	ArgStringList &CmdArgs) const {
	const llvm::Triple &Triple = getToolChain().getTriple();
	StringRef ABIName = riscv::getRISCVABI(Args, Triple);

	CmdArgs.push_back("-target-abi");
	CmdArgs.push_back(ABIName.data());
	}

	void ClangAs::ConstructJob(Compilation &C, const JobAction &JA,
	const InputInfo &Output, const InputInfoList &Inputs,
	const ArgList &Args,
	const char *LinkingOutput) const {
	ArgStringList CmdArgs;

	assert(Inputs.size() == 1 && "Unexpected number of inputs.");
	const InputInfo &Input = Inputs[0];

	const llvm::Triple &Triple = getToolChain().getEffectiveTriple();
	const std::string &TripleStr = Triple.getTriple();
	const auto &D = getToolChain().getDriver();

	// Don't warn about "clang -w -c foo.s"
	Args.ClaimAllArgs(options::OPT_w);
	// and "clang -emit-llvm -c foo.s"
	Args.ClaimAllArgs(options::OPT_emit_llvm);

	claimNoWarnArgs(Args);

	// Invoke ourselves in -cc1as mode.
	//
	// FIXME: Implement custom jobs for internal actions.
	CmdArgs.push_back("-cc1as");

	// Add the "effective" target triple.
	CmdArgs.push_back("-triple");
	CmdArgs.push_back(Args.MakeArgString(TripleStr));

	// Set the output mode, we currently only expect to be used as a real
	// assembler.
	CmdArgs.push_back("-filetype");
	CmdArgs.push_back("obj");

	// Set the main file name, so that debug info works even with
	// -save-temps or preprocessed assembly.
	CmdArgs.push_back("-main-file-name");
	CmdArgs.push_back(Clang::getBaseInputName(Args, Input));

	// Add the target cpu
	std::string CPU = getCPUName(Args, Triple, /FromAs/ true);
	if (!CPU.empty()) {
	CmdArgs.push_back("-target-cpu");
	CmdArgs.push_back(Args.MakeArgString(CPU));
	}

	// Add the target features
	getTargetFeatures(D, Triple, Args, CmdArgs, true);

	// Ignore explicit -force_cpusubtype_ALL option.
	(void)Args.hasArg(options::OPT_force__cpusubtype__ALL);

	// Pass along any -I options so we get proper .include search paths.
	Args.AddAllArgs(CmdArgs, options::OPT_I_Group);

	// Determine the original source input.
	const Action *SourceAction = &JA;
	while (SourceAction->getKind() != Action::InputClass) {
	assert(!SourceAction->getInputs().empty() && "unexpected root action!");
	SourceAction = SourceAction->getInputs()[0];
	}

	// Forward -g and handle debug info related flags, assuming we are dealing
	// with an actual assembly file.
	bool WantDebug = false;
	Args.ClaimAllArgs(options::OPT_g_Group);
	if (Arg *A = Args.getLastArg(options::OPT_g_Group))
	WantDebug = !A->getOption().matches(options::OPT_g0) &&
	!A->getOption().matches(options::OPT_ggdb0);

	unsigned DwarfVersion = ParseDebugDefaultVersion(getToolChain(), Args);
	if (const Arg *GDwarfN = getDwarfNArg(Args))
	DwarfVersion = DwarfVersionNum(GDwarfN->getSpelling());

	if (DwarfVersion == 0)
	DwarfVersion = getToolChain().GetDefaultDwarfVersion();

	codegenoptions::DebugInfoKind DebugInfoKind = codegenoptions::NoDebugInfo;

	if (SourceAction->getType() == types::TY_Asm \|\|
	SourceAction->getType() == types::TY_PP_Asm) {
	// You might think that it would be ok to set DebugInfoKind outside of
	// the guard for source type, however there is a test which asserts
	// that some assembler invocation receives no -debug-info-kind,
	// and it's not clear whether that test is just overly restrictive.
	DebugInfoKind = (WantDebug ? codegenoptions::DebugInfoConstructor
	: codegenoptions::NoDebugInfo);
	// Add the -fdebug-compilation-dir flag if needed.
	addDebugCompDirArg(Args, CmdArgs, C.getDriver().getVFS());

	addDebugPrefixMapArg(getToolChain().getDriver(), Args, CmdArgs);

	// Set the AT_producer to the clang version when using the integrated
	// assembler on assembly source files.
	CmdArgs.push_back("-dwarf-debug-producer");
	CmdArgs.push_back(Args.MakeArgString(getClangFullVersion()));

	// And pass along -I options
	Args.AddAllArgs(CmdArgs, options::OPT_I);
	}
	RenderDebugEnablingArgs(Args, CmdArgs, DebugInfoKind, DwarfVersion,
	llvm::DebuggerKind::Default);
	renderDwarfFormat(D, Triple, Args, CmdArgs, DwarfVersion);
	RenderDebugInfoCompressionArgs(Args, CmdArgs, D, getToolChain());


	// Handle -fPIC et al -- the relocation-model affects the assembler
	// for some targets.
	llvm::Reloc::Model RelocationModel;
	unsigned PICLevel;
	bool IsPIE;
	std::tie(RelocationModel, PICLevel, IsPIE) =
	ParsePICArgs(getToolChain(), Args);

	const char *RMName = RelocationModelName(RelocationModel);
	if (RMName) {
	CmdArgs.push_back("-mrelocation-model");
	CmdArgs.push_back(RMName);
	}

	// Optionally embed the -cc1as level arguments into the debug info, for build
	// analysis.
	if (getToolChain().UseDwarfDebugFlags()) {
	ArgStringList OriginalArgs;
	for (const auto &Arg : Args)
	Arg->render(Args, OriginalArgs);

	SmallString<256> Flags;
	const char *Exec = getToolChain().getDriver().getClangProgramPath();
	EscapeSpacesAndBackslashes(Exec, Flags);
	for (const char *OriginalArg : OriginalArgs) {
	SmallString<128> EscapedArg;
	EscapeSpacesAndBackslashes(OriginalArg, EscapedArg);
	Flags += " ";
	Flags += EscapedArg;
	}
	CmdArgs.push_back("-dwarf-debug-flags");
	CmdArgs.push_back(Args.MakeArgString(Flags));
	}

	// FIXME: Add -static support, once we have it.

	// Add target specific flags.
	switch (getToolChain().getArch()) {
	default:
	break;

	case llvm::Triple::mips:
	case llvm::Triple::mipsel:
	case llvm::Triple::mips64:
	case llvm::Triple::mips64el:
	AddMIPSTargetArgs(Args, CmdArgs);
	break;

	case llvm::Triple::x86:
	case llvm::Triple::x86_64:
	AddX86TargetArgs(Args, CmdArgs);
	break;

	case llvm::Triple::arm:
	case llvm::Triple::armeb:
	case llvm::Triple::thumb:
	case llvm::Triple::thumbeb:
	// This isn't in AddARMTargetArgs because we want to do this for assembly
	// only, not C/C++.
	if (Args.hasFlag(options::OPT_mdefault_build_attributes,
	options::OPT_mno_default_build_attributes, true)) {
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back("-arm-add-build-attributes");
	}
	break;

	case llvm::Triple::aarch64:
	case llvm::Triple::aarch64_32:
	case llvm::Triple::aarch64_be:
	if (Args.hasArg(options::OPT_mmark_bti_property)) {
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back("-aarch64-mark-bti-property");
	}
	break;

	case llvm::Triple::riscv32:
	case llvm::Triple::riscv64:
	AddRISCVTargetArgs(Args, CmdArgs);
	break;
	}

	// Consume all the warning flags. Usually this would be handled more
	// gracefully by -cc1 (warning about unknown warning flags, etc) but -cc1as
	// doesn't handle that so rather than warning about unused flags that are
	// actually used, we'll lie by omission instead.
	// FIXME: Stop lying and consume only the appropriate driver flags
	Args.ClaimAllArgs(options::OPT_W_Group);

	CollectArgsForIntegratedAssembler(C, Args, CmdArgs,
	getToolChain().getDriver());

	Args.AddAllArgs(CmdArgs, options::OPT_mllvm);

	assert(Output.isFilename() && "Unexpected lipo output.");
	CmdArgs.push_back("-o");
	CmdArgs.push_back(Output.getFilename());

	const llvm::Triple &T = getToolChain().getTriple();
	Arg *A;
	if (getDebugFissionKind(D, Args, A) == DwarfFissionKind::Split &&
	T.isOSBinFormatELF()) {
	CmdArgs.push_back("-split-dwarf-output");
	CmdArgs.push_back(SplitDebugName(JA, Args, Input, Output));
	}

	if (Triple.isAMDGPU())
	handleAMDGPUCodeObjectVersionOptions(D, Args, CmdArgs);

	assert(Input.isFilename() && "Invalid input.");
	CmdArgs.push_back(Input.getFilename());

	const char *Exec = getToolChain().getDriver().getClangProgramPath();
	if (D.CC1Main && !D.CCGenDiagnostics) {
	// Invoke cc1as directly in this process.
	C.addCommand(std::make_unique<CC1Command>(JA, *this,
	ResponseFileSupport::AtFileUTF8(),
	Exec, CmdArgs, Inputs, Output));
	} else {
	C.addCommand(std::make_unique<Command>(JA, *this,
	ResponseFileSupport::AtFileUTF8(),
	Exec, CmdArgs, Inputs, Output));
	}
	}

	// Begin OffloadBundler

	void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA,
	const InputInfo &Output,
	const InputInfoList &Inputs,
	const llvm::opt::ArgList &TCArgs,
	const char *LinkingOutput) const {
	// The version with only one output is expected to refer to a bundling job.
	assert(isa<OffloadBundlingJobAction>(JA) && "Expecting bundling job!");

	// The bundling command looks like this:
	// clang-offload-bundler -type=bc
	// -targets=host-triple,openmp-triple1,openmp-triple2
	// -outputs=input_file
	// -inputs=unbundle_file_host,unbundle_file_tgt1,unbundle_file_tgt2"

	ArgStringList CmdArgs;

	// Get the type.
	CmdArgs.push_back(TCArgs.MakeArgString(
	Twine("-type=") + types::getTypeTempSuffix(Output.getType())));

	assert(JA.getInputs().size() == Inputs.size() &&
	"Not have inputs for all dependence actions??");

	// Get the targets.
	SmallString<128> Triples;
	Triples += "-targets=";
	for (unsigned I = 0; I < Inputs.size(); ++I) {
	if (I)
	Triples += ',';

	// Find ToolChain for this input.
	Action::OffloadKind CurKind = Action::OFK_Host;
	const ToolChain *CurTC = &getToolChain();
	const Action *CurDep = JA.getInputs()[I];

	if (const auto *OA = dyn_cast<OffloadAction>(CurDep)) {
	CurTC = nullptr;
	OA->doOnEachDependence([&](Action A, const ToolChain TC, const char *) {
	assert(CurTC == nullptr && "Expected one dependence!");
	CurKind = A->getOffloadingDeviceKind();
	CurTC = TC;
	});
	}
	Triples += Action::GetOffloadKindName(CurKind);
	Triples += "-";
	std::string NormalizedTriple = CurTC->getTriple().normalize();
	Triples += NormalizedTriple;

	if (CurDep->getOffloadingArch() != nullptr) {
	// If OffloadArch is present it can only appear as the 6th hypen
	// sepearated field of Bundle Entry ID. So, pad required number of
	// hyphens in Triple.
	for (int i = 4 - StringRef(NormalizedTriple).count("-"); i > 0; i--)
	Triples += "-";
	Triples += CurDep->getOffloadingArch();
	}
	}
	CmdArgs.push_back(TCArgs.MakeArgString(Triples));

	// Get bundled file command.
	CmdArgs.push_back(
	TCArgs.MakeArgString(Twine("-outputs=") + Output.getFilename()));

	// Get unbundled files command.
	SmallString<128> UB;
	UB += "-inputs=";
	for (unsigned I = 0; I < Inputs.size(); ++I) {
	if (I)
	UB += ',';

	// Find ToolChain for this input.
	const ToolChain *CurTC = &getToolChain();
	if (const auto *OA = dyn_cast<OffloadAction>(JA.getInputs()[I])) {
	CurTC = nullptr;
	OA->doOnEachDependence([&](Action , const ToolChain TC, const char *) {
	assert(CurTC == nullptr && "Expected one dependence!");
	CurTC = TC;
	});
	UB += C.addTempFile(
	C.getArgs().MakeArgString(CurTC->getInputFilename(Inputs[I])));
	} else {
	UB += CurTC->getInputFilename(Inputs[I]);
	}
	}
	CmdArgs.push_back(TCArgs.MakeArgString(UB));

	// All the inputs are encoded as commands.
	C.addCommand(std::make_unique<Command>(
	JA, *this, ResponseFileSupport::None(),
	TCArgs.MakeArgString(getToolChain().GetProgramPath(getShortName())),
	CmdArgs, None, Output));
	}

	void OffloadBundler::ConstructJobMultipleOutputs(
	Compilation &C, const JobAction &JA, const InputInfoList &Outputs,
	const InputInfoList &Inputs, const llvm::opt::ArgList &TCArgs,
	const char *LinkingOutput) const {
	// The version with multiple outputs is expected to refer to a unbundling job.
	auto &UA = cast<OffloadUnbundlingJobAction>(JA);

	// The unbundling command looks like this:
	// clang-offload-bundler -type=bc
	// -targets=host-triple,openmp-triple1,openmp-triple2
	// -inputs=input_file
	// -outputs=unbundle_file_host,unbundle_file_tgt1,unbundle_file_tgt2"
	// -unbundle

	ArgStringList CmdArgs;

	assert(Inputs.size() == 1 && "Expecting to unbundle a single file!");
	InputInfo Input = Inputs.front();

	// Get the type.
	CmdArgs.push_back(TCArgs.MakeArgString(
	Twine("-type=") + types::getTypeTempSuffix(Input.getType())));

	// Get the targets.
	SmallString<128> Triples;
	Triples += "-targets=";
	auto DepInfo = UA.getDependentActionsInfo();
	for (unsigned I = 0; I < DepInfo.size(); ++I) {
	if (I)
	Triples += ',';

	auto &Dep = DepInfo[I];
	Triples += Action::GetOffloadKindName(Dep.DependentOffloadKind);
	Triples += "-";
	std::string NormalizedTriple =
	Dep.DependentToolChain->getTriple().normalize();
	Triples += NormalizedTriple;

	if (!Dep.DependentBoundArch.empty()) {
	// If OffloadArch is present it can only appear as the 6th hypen
	// sepearated field of Bundle Entry ID. So, pad required number of
	// hyphens in Triple.
	for (int i = 4 - StringRef(NormalizedTriple).count("-"); i > 0; i--)
	Triples += "-";
	Triples += Dep.DependentBoundArch;
	}
	}

	CmdArgs.push_back(TCArgs.MakeArgString(Triples));

	// Get bundled file command.
	CmdArgs.push_back(
	TCArgs.MakeArgString(Twine("-inputs=") + Input.getFilename()));

	// Get unbundled files command.
	SmallString<128> UB;
	UB += "-outputs=";
	for (unsigned I = 0; I < Outputs.size(); ++I) {
	if (I)
	UB += ',';
	UB += DepInfo[I].DependentToolChain->getInputFilename(Outputs[I]);
	}
	CmdArgs.push_back(TCArgs.MakeArgString(UB));
	CmdArgs.push_back("-unbundle");
	CmdArgs.push_back("-allow-missing-bundles");

	// All the inputs are encoded as commands.
	C.addCommand(std::make_unique<Command>(
	JA, *this, ResponseFileSupport::None(),
	TCArgs.MakeArgString(getToolChain().GetProgramPath(getShortName())),
	CmdArgs, None, Outputs));
	}

	void OffloadWrapper::ConstructJob(Compilation &C, const JobAction &JA,
	const InputInfo &Output,
	const InputInfoList &Inputs,
	const ArgList &Args,
	const char *LinkingOutput) const {
	ArgStringList CmdArgs;

	const llvm::Triple &Triple = getToolChain().getEffectiveTriple();

	// Add the "effective" target triple.
	CmdArgs.push_back("-target");
	CmdArgs.push_back(Args.MakeArgString(Triple.getTriple()));

	// Add the output file name.
	assert(Output.isFilename() && "Invalid output.");
	CmdArgs.push_back("-o");
	CmdArgs.push_back(Output.getFilename());

	// Add inputs.
	for (const InputInfo &I : Inputs) {
	assert(I.isFilename() && "Invalid input.");
	CmdArgs.push_back(I.getFilename());
	}

	C.addCommand(std::make_unique<Command>(
	JA, *this, ResponseFileSupport::None(),
	Args.MakeArgString(getToolChain().GetProgramPath(getShortName())),
	CmdArgs, Inputs, Output));
	}
	diff --git a/contrib/llvm-project/clang/lib/Driver/ToolChains/CommonArgs.cpp b/contrib/llvm-project/clang/lib/Driver/ToolChains/CommonArgs.cpp
	index 83cab3ac00cb..0ffe95795381 100644
	--- a/contrib/llvm-project/clang/lib/Driver/ToolChains/CommonArgs.cpp
	+++ b/contrib/llvm-project/clang/lib/Driver/ToolChains/CommonArgs.cpp
	@@ -1,1739 +1,1740 @@
	//===--- CommonArgs.cpp - Args handling for multiple toolchains -- C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#include "CommonArgs.h"
	#include "Arch/AArch64.h"
	#include "Arch/ARM.h"
	#include "Arch/M68k.h"
	#include "Arch/Mips.h"
	#include "Arch/PPC.h"
	#include "Arch/SystemZ.h"
	#include "Arch/VE.h"
	#include "Arch/X86.h"
	#include "HIP.h"
	#include "Hexagon.h"
	#include "clang/Basic/CharInfo.h"
	#include "clang/Basic/LangOptions.h"
	#include "clang/Basic/ObjCRuntime.h"
	#include "clang/Basic/Version.h"
	#include "clang/Config/config.h"
	#include "clang/Driver/Action.h"
	#include "clang/Driver/Compilation.h"
	#include "clang/Driver/Driver.h"
	#include "clang/Driver/DriverDiagnostic.h"
	#include "clang/Driver/InputInfo.h"
	#include "clang/Driver/Job.h"
	#include "clang/Driver/Options.h"
	#include "clang/Driver/SanitizerArgs.h"
	#include "clang/Driver/ToolChain.h"
	#include "clang/Driver/Util.h"
	#include "clang/Driver/XRayArgs.h"
	#include "llvm/ADT/STLExtras.h"
	#include "llvm/ADT/SmallString.h"
	#include "llvm/ADT/StringExtras.h"
	#include "llvm/ADT/StringSwitch.h"
	#include "llvm/ADT/Twine.h"
	#include "llvm/Config/llvm-config.h"
	#include "llvm/Option/Arg.h"
	#include "llvm/Option/ArgList.h"
	#include "llvm/Option/Option.h"
	#include "llvm/Support/CodeGen.h"
	#include "llvm/Support/Compression.h"
	#include "llvm/Support/Debug.h"
	#include "llvm/Support/ErrorHandling.h"
	#include "llvm/Support/FileSystem.h"
	#include "llvm/Support/Host.h"
	#include "llvm/Support/Path.h"
	#include "llvm/Support/Process.h"
	#include "llvm/Support/Program.h"
	#include "llvm/Support/ScopedPrinter.h"
	#include "llvm/Support/TargetParser.h"
	#include "llvm/Support/Threading.h"
	#include "llvm/Support/VirtualFileSystem.h"
	#include "llvm/Support/YAMLParser.h"

	using namespace clang::driver;
	using namespace clang::driver::tools;
	using namespace clang;
	using namespace llvm::opt;

	static void renderRpassOptions(const ArgList &Args, ArgStringList &CmdArgs) {
	if (const Arg *A = Args.getLastArg(options::OPT_Rpass_EQ))
	CmdArgs.push_back(Args.MakeArgString(Twine("--plugin-opt=-pass-remarks=") +
	A->getValue()));

	if (const Arg *A = Args.getLastArg(options::OPT_Rpass_missed_EQ))
	CmdArgs.push_back(Args.MakeArgString(
	Twine("--plugin-opt=-pass-remarks-missed=") + A->getValue()));

	if (const Arg *A = Args.getLastArg(options::OPT_Rpass_analysis_EQ))
	CmdArgs.push_back(Args.MakeArgString(
	Twine("--plugin-opt=-pass-remarks-analysis=") + A->getValue()));
	}

	static void renderRemarksOptions(const ArgList &Args, ArgStringList &CmdArgs,
	const llvm::Triple &Triple,
	const InputInfo &Input,
	const InputInfo &Output) {
	StringRef Format = "yaml";
	if (const Arg *A = Args.getLastArg(options::OPT_fsave_optimization_record_EQ))
	Format = A->getValue();

	SmallString<128> F;
	const Arg *A = Args.getLastArg(options::OPT_foptimization_record_file_EQ);
	if (A)
	F = A->getValue();
	else if (Output.isFilename())
	F = Output.getFilename();

	assert(!F.empty() && "Cannot determine remarks output name.");
	// Append "opt.ld.<format>" to the end of the file name.
	CmdArgs.push_back(
	Args.MakeArgString(Twine("--plugin-opt=opt-remarks-filename=") + F +
	Twine(".opt.ld.") + Format));

	if (const Arg *A =
	Args.getLastArg(options::OPT_foptimization_record_passes_EQ))
	CmdArgs.push_back(Args.MakeArgString(
	Twine("--plugin-opt=opt-remarks-passes=") + A->getValue()));

	CmdArgs.push_back(Args.MakeArgString(
	Twine("--plugin-opt=opt-remarks-format=") + Format.data()));
	}

	static void renderRemarksHotnessOptions(const ArgList &Args,
	ArgStringList &CmdArgs) {
	if (Args.hasFlag(options::OPT_fdiagnostics_show_hotness,
	options::OPT_fno_diagnostics_show_hotness, false))
	CmdArgs.push_back("--plugin-opt=opt-remarks-with-hotness");

	if (const Arg *A =
	Args.getLastArg(options::OPT_fdiagnostics_hotness_threshold_EQ))
	CmdArgs.push_back(Args.MakeArgString(
	Twine("--plugin-opt=opt-remarks-hotness-threshold=") + A->getValue()));
	}

	void tools::addPathIfExists(const Driver &D, const Twine &Path,
	ToolChain::path_list &Paths) {
	if (D.getVFS().exists(Path))
	Paths.push_back(Path.str());
	}

	void tools::handleTargetFeaturesGroup(const ArgList &Args,
	std::vector<StringRef> &Features,
	OptSpecifier Group) {
	for (const Arg *A : Args.filtered(Group)) {
	StringRef Name = A->getOption().getName();
	A->claim();

	// Skip over "-m".
	assert(Name.startswith("m") && "Invalid feature name.");
	Name = Name.substr(1);

	bool IsNegative = Name.startswith("no-");
	if (IsNegative)
	Name = Name.substr(3);
	Features.push_back(Args.MakeArgString((IsNegative ? "-" : "+") + Name));
	}
	}

	std::vector<StringRef>
	tools::unifyTargetFeatures(const std::vector<StringRef> &Features) {
	std::vector<StringRef> UnifiedFeatures;
	// Find the last of each feature.
	llvm::StringMap<unsigned> LastOpt;
	for (unsigned I = 0, N = Features.size(); I < N; ++I) {
	StringRef Name = Features[I];
	assert(Name[0] == '-' \|\| Name[0] == '+');
	LastOpt[Name.drop_front(1)] = I;
	}

	for (unsigned I = 0, N = Features.size(); I < N; ++I) {
	// If this feature was overridden, ignore it.
	StringRef Name = Features[I];
	llvm::StringMap<unsigned>::iterator LastI = LastOpt.find(Name.drop_front(1));
	assert(LastI != LastOpt.end());
	unsigned Last = LastI->second;
	if (Last != I)
	continue;

	UnifiedFeatures.push_back(Name);
	}
	return UnifiedFeatures;
	}

	void tools::addDirectoryList(const ArgList &Args, ArgStringList &CmdArgs,
	const char ArgName, const char EnvVar) {
	const char *DirList = ::getenv(EnvVar);
	bool CombinedArg = false;

	if (!DirList)
	return; // Nothing to do.

	StringRef Name(ArgName);
	if (Name.equals("-I") \|\| Name.equals("-L") \|\| Name.empty())
	CombinedArg = true;

	StringRef Dirs(DirList);
	if (Dirs.empty()) // Empty string should not add '.'.
	return;

	StringRef::size_type Delim;
	while ((Delim = Dirs.find(llvm::sys::EnvPathSeparator)) != StringRef::npos) {
	if (Delim == 0) { // Leading colon.
	if (CombinedArg) {
	CmdArgs.push_back(Args.MakeArgString(std::string(ArgName) + "."));
	} else {
	CmdArgs.push_back(ArgName);
	CmdArgs.push_back(".");
	}
	} else {
	if (CombinedArg) {
	CmdArgs.push_back(
	Args.MakeArgString(std::string(ArgName) + Dirs.substr(0, Delim)));
	} else {
	CmdArgs.push_back(ArgName);
	CmdArgs.push_back(Args.MakeArgString(Dirs.substr(0, Delim)));
	}
	}
	Dirs = Dirs.substr(Delim + 1);
	}

	if (Dirs.empty()) { // Trailing colon.
	if (CombinedArg) {
	CmdArgs.push_back(Args.MakeArgString(std::string(ArgName) + "."));
	} else {
	CmdArgs.push_back(ArgName);
	CmdArgs.push_back(".");
	}
	} else { // Add the last path.
	if (CombinedArg) {
	CmdArgs.push_back(Args.MakeArgString(std::string(ArgName) + Dirs));
	} else {
	CmdArgs.push_back(ArgName);
	CmdArgs.push_back(Args.MakeArgString(Dirs));
	}
	}
	}

	void tools::AddLinkerInputs(const ToolChain &TC, const InputInfoList &Inputs,
	const ArgList &Args, ArgStringList &CmdArgs,
	const JobAction &JA) {
	const Driver &D = TC.getDriver();

	// Add extra linker input arguments which are not treated as inputs
	// (constructed via -Xarch_).
	Args.AddAllArgValues(CmdArgs, options::OPT_Zlinker_input);

	// LIBRARY_PATH are included before user inputs and only supported on native
	// toolchains.
	if (!TC.isCrossCompiling())
	addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH");

	for (const auto &II : Inputs) {
	// If the current tool chain refers to an OpenMP offloading host, we
	// should ignore inputs that refer to OpenMP offloading devices -
	// they will be embedded according to a proper linker script.
	if (auto *IA = II.getAction())
	if ((JA.isHostOffloading(Action::OFK_OpenMP) &&
	IA->isDeviceOffloading(Action::OFK_OpenMP)))
	continue;

	if (!TC.HasNativeLLVMSupport() && types::isLLVMIR(II.getType()))
	// Don't try to pass LLVM inputs unless we have native support.
	D.Diag(diag::err_drv_no_linker_llvm_support) << TC.getTripleString();

	// Add filenames immediately.
	if (II.isFilename()) {
	CmdArgs.push_back(II.getFilename());
	continue;
	}

	// Otherwise, this is a linker input argument.
	const Arg &A = II.getInputArg();

	// Handle reserved library options.
	if (A.getOption().matches(options::OPT_Z_reserved_lib_stdcxx))
	TC.AddCXXStdlibLibArgs(Args, CmdArgs);
	else if (A.getOption().matches(options::OPT_Z_reserved_lib_cckext))
	TC.AddCCKextLibArgs(Args, CmdArgs);
	else if (A.getOption().matches(options::OPT_z)) {
	// Pass -z prefix for gcc linker compatibility.
	A.claim();
	A.render(Args, CmdArgs);
	} else {
	A.renderAsInput(Args, CmdArgs);
	}
	}
	}

	void tools::addLinkerCompressDebugSectionsOption(
	const ToolChain &TC, const llvm::opt::ArgList &Args,
	llvm::opt::ArgStringList &CmdArgs) {
	// GNU ld supports --compress-debug-sections=none\|zlib\|zlib-gnu\|zlib-gabi
	// whereas zlib is an alias to zlib-gabi. Therefore -gz=none\|zlib\|zlib-gnu
	// are translated to --compress-debug-sections=none\|zlib\|zlib-gnu.
	// -gz is not translated since ld --compress-debug-sections option requires an
	// argument.
	if (const Arg *A = Args.getLastArg(options::OPT_gz_EQ)) {
	StringRef V = A->getValue();
	if (V == "none" \|\| V == "zlib" \|\| V == "zlib-gnu")
	CmdArgs.push_back(Args.MakeArgString("--compress-debug-sections=" + V));
	else
	TC.getDriver().Diag(diag::err_drv_unsupported_option_argument)
	<< A->getOption().getName() << V;
	}
	}

	void tools::AddTargetFeature(const ArgList &Args,
	std::vector<StringRef> &Features,
	OptSpecifier OnOpt, OptSpecifier OffOpt,
	StringRef FeatureName) {
	if (Arg *A = Args.getLastArg(OnOpt, OffOpt)) {
	if (A->getOption().matches(OnOpt))
	Features.push_back(Args.MakeArgString("+" + FeatureName));
	else
	Features.push_back(Args.MakeArgString("-" + FeatureName));
	}
	}

	/// Get the (LLVM) name of the AMDGPU gpu we are targeting.
	static std::string getAMDGPUTargetGPU(const llvm::Triple &T,
	const ArgList &Args) {
	if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) {
	auto GPUName = getProcessorFromTargetID(T, A->getValue());
	return llvm::StringSwitch<std::string>(GPUName)
	.Cases("rv630", "rv635", "r600")
	.Cases("rv610", "rv620", "rs780", "rs880")
	.Case("rv740", "rv770")
	.Case("palm", "cedar")
	.Cases("sumo", "sumo2", "sumo")
	.Case("hemlock", "cypress")
	.Case("aruba", "cayman")
	.Default(GPUName.str());
	}
	return "";
	}

	static std::string getLanaiTargetCPU(const ArgList &Args) {
	if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) {
	return A->getValue();
	}
	return "";
	}

	/// Get the (LLVM) name of the WebAssembly cpu we are targeting.
	static StringRef getWebAssemblyTargetCPU(const ArgList &Args) {
	// If we have -mcpu=, use that.
	if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) {
	StringRef CPU = A->getValue();

	#ifdef __wasm__
	// Handle "native" by examining the host. "native" isn't meaningful when
	// cross compiling, so only support this when the host is also WebAssembly.
	if (CPU == "native")
	return llvm::sys::getHostCPUName();
	#endif

	return CPU;
	}

	return "generic";
	}

	std::string tools::getCPUName(const ArgList &Args, const llvm::Triple &T,
	bool FromAs) {
	Arg *A;

	switch (T.getArch()) {
	default:
	return "";

	case llvm::Triple::aarch64:
	case llvm::Triple::aarch64_32:
	case llvm::Triple::aarch64_be:
	return aarch64::getAArch64TargetCPU(Args, T, A);

	case llvm::Triple::arm:
	case llvm::Triple::armeb:
	case llvm::Triple::thumb:
	case llvm::Triple::thumbeb: {
	StringRef MArch, MCPU;
	arm::getARMArchCPUFromArgs(Args, MArch, MCPU, FromAs);
	return arm::getARMTargetCPU(MCPU, MArch, T);
	}

	case llvm::Triple::avr:
	if (const Arg *A = Args.getLastArg(options::OPT_mmcu_EQ))
	return A->getValue();
	return "";

	case llvm::Triple::m68k:
	return m68k::getM68kTargetCPU(Args);

	case llvm::Triple::mips:
	case llvm::Triple::mipsel:
	case llvm::Triple::mips64:
	case llvm::Triple::mips64el: {
	StringRef CPUName;
	StringRef ABIName;
	mips::getMipsCPUAndABI(Args, T, CPUName, ABIName);
	return std::string(CPUName);
	}

	case llvm::Triple::nvptx:
	case llvm::Triple::nvptx64:
	if (const Arg *A = Args.getLastArg(options::OPT_march_EQ))
	return A->getValue();
	return "";

	case llvm::Triple::ppc:
	case llvm::Triple::ppcle:
	case llvm::Triple::ppc64:
	case llvm::Triple::ppc64le: {
	std::string TargetCPUName = ppc::getPPCTargetCPU(Args);
	// LLVM may default to generating code for the native CPU,
	// but, like gcc, we default to a more generic option for
	// each architecture. (except on AIX)
	if (!TargetCPUName.empty())
	return TargetCPUName;

	if (T.isOSAIX()) {
	unsigned major, minor, unused_micro;
	T.getOSVersion(major, minor, unused_micro);
	// The minimal arch level moved from pwr4 for AIX7.1 to
	// pwr7 for AIX7.2.
	TargetCPUName =
	(major < 7 \|\| (major == 7 && minor < 2)) ? "pwr4" : "pwr7";
	} else if (T.getArch() == llvm::Triple::ppc64le)
	TargetCPUName = "ppc64le";
	else if (T.getArch() == llvm::Triple::ppc64)
	TargetCPUName = "ppc64";
	else
	TargetCPUName = "ppc";

	return TargetCPUName;
	}
	case llvm::Triple::riscv32:
	case llvm::Triple::riscv64:
	if (const Arg *A = Args.getLastArg(options::OPT_mcpu_EQ))
	return A->getValue();
	return "";

	case llvm::Triple::bpfel:
	case llvm::Triple::bpfeb:
	case llvm::Triple::sparc:
	case llvm::Triple::sparcel:
	case llvm::Triple::sparcv9:
	if (const Arg *A = Args.getLastArg(options::OPT_mcpu_EQ))
	return A->getValue();
	if (T.getArch() == llvm::Triple::sparc && T.isOSSolaris())
	return "v9";
	return "";

	case llvm::Triple::x86:
	case llvm::Triple::x86_64:
	return x86::getX86TargetCPU(Args, T);

	case llvm::Triple::hexagon:
	return "hexagon" +
	toolchains::HexagonToolChain::GetTargetCPUVersion(Args).str();

	case llvm::Triple::lanai:
	return getLanaiTargetCPU(Args);

	case llvm::Triple::systemz:
	return systemz::getSystemZTargetCPU(Args);

	case llvm::Triple::r600:
	case llvm::Triple::amdgcn:
	return getAMDGPUTargetGPU(T, Args);

	case llvm::Triple::wasm32:
	case llvm::Triple::wasm64:
	return std::string(getWebAssemblyTargetCPU(Args));
	}
	}

	llvm::StringRef tools::getLTOParallelism(const ArgList &Args, const Driver &D) {
	Arg *LtoJobsArg = Args.getLastArg(options::OPT_flto_jobs_EQ);
	if (!LtoJobsArg)
	return {};
	if (!llvm::get_threadpool_strategy(LtoJobsArg->getValue()))
	D.Diag(diag::err_drv_invalid_int_value)
	<< LtoJobsArg->getAsString(Args) << LtoJobsArg->getValue();
	return LtoJobsArg->getValue();
	}

	// CloudABI uses -ffunction-sections and -fdata-sections by default.
	bool tools::isUseSeparateSections(const llvm::Triple &Triple) {
	return Triple.getOS() == llvm::Triple::CloudABI;
	}

	void tools::addLTOOptions(const ToolChain &ToolChain, const ArgList &Args,
	ArgStringList &CmdArgs, const InputInfo &Output,
	const InputInfo &Input, bool IsThinLTO) {
	const char *Linker = Args.MakeArgString(ToolChain.GetLinkerPath());
	const Driver &D = ToolChain.getDriver();
	if (llvm::sys::path::filename(Linker) != "ld.lld" &&
	llvm::sys::path::stem(Linker) != "ld.lld") {
	// Tell the linker to load the plugin. This has to come before
	// AddLinkerInputs as gold requires -plugin to come before any -plugin-opt
	// that -Wl might forward.
	CmdArgs.push_back("-plugin");

	#if defined(_WIN32)
	const char *Suffix = ".dll";
	#elif defined(__APPLE__)
	const char *Suffix = ".dylib";
	#else
	const char *Suffix = ".so";
	#endif

	SmallString<1024> Plugin;
	llvm::sys::path::native(
	Twine(D.Dir) + "/../lib" CLANG_LIBDIR_SUFFIX "/LLVMgold" + Suffix,
	Plugin);
	CmdArgs.push_back(Args.MakeArgString(Plugin));
	}

	// Try to pass driver level flags relevant to LTO code generation down to
	// the plugin.

	// Handle flags for selecting CPU variants.
	std::string CPU = getCPUName(Args, ToolChain.getTriple());
	if (!CPU.empty())
	CmdArgs.push_back(Args.MakeArgString(Twine("-plugin-opt=mcpu=") + CPU));

	if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
	// The optimization level matches
	// CompilerInvocation.cpp:getOptimizationLevel().
	StringRef OOpt;
	if (A->getOption().matches(options::OPT_O4) \|\|
	A->getOption().matches(options::OPT_Ofast))
	OOpt = "3";
	else if (A->getOption().matches(options::OPT_O)) {
	OOpt = A->getValue();
	if (OOpt == "g")
	OOpt = "1";
	else if (OOpt == "s" \|\| OOpt == "z")
	OOpt = "2";
	} else if (A->getOption().matches(options::OPT_O0))
	OOpt = "0";
	if (!OOpt.empty())
	CmdArgs.push_back(Args.MakeArgString(Twine("-plugin-opt=O") + OOpt));
	}

	if (Args.hasArg(options::OPT_gsplit_dwarf)) {
	CmdArgs.push_back(
	Args.MakeArgString(Twine("-plugin-opt=dwo_dir=") +
	Output.getFilename() + "_dwo"));
	}

	if (IsThinLTO)
	CmdArgs.push_back("-plugin-opt=thinlto");

	StringRef Parallelism = getLTOParallelism(Args, D);
	if (!Parallelism.empty())
	CmdArgs.push_back(
	Args.MakeArgString("-plugin-opt=jobs=" + Twine(Parallelism)));

	// If an explicit debugger tuning argument appeared, pass it along.
	if (Arg *A = Args.getLastArg(options::OPT_gTune_Group,
	options::OPT_ggdbN_Group)) {
	if (A->getOption().matches(options::OPT_glldb))
	CmdArgs.push_back("-plugin-opt=-debugger-tune=lldb");
	else if (A->getOption().matches(options::OPT_gsce))
	CmdArgs.push_back("-plugin-opt=-debugger-tune=sce");
	else if (A->getOption().matches(options::OPT_gdbx))
	CmdArgs.push_back("-plugin-opt=-debugger-tune=dbx");
	else
	CmdArgs.push_back("-plugin-opt=-debugger-tune=gdb");
	}

	bool UseSeparateSections =
	isUseSeparateSections(ToolChain.getEffectiveTriple());

	if (Args.hasFlag(options::OPT_ffunction_sections,
	options::OPT_fno_function_sections, UseSeparateSections)) {
	CmdArgs.push_back("-plugin-opt=-function-sections");
	}

	if (Args.hasFlag(options::OPT_fdata_sections, options::OPT_fno_data_sections,
	UseSeparateSections)) {
	CmdArgs.push_back("-plugin-opt=-data-sections");
	}

	if (Arg *A = getLastProfileSampleUseArg(Args)) {
	StringRef FName = A->getValue();
	if (!llvm::sys::fs::exists(FName))
	D.Diag(diag::err_drv_no_such_file) << FName;
	else
	CmdArgs.push_back(
	Args.MakeArgString(Twine("-plugin-opt=sample-profile=") + FName));
	}

	auto *CSPGOGenerateArg = Args.getLastArg(options::OPT_fcs_profile_generate,
	options::OPT_fcs_profile_generate_EQ,
	options::OPT_fno_profile_generate);
	if (CSPGOGenerateArg &&
	CSPGOGenerateArg->getOption().matches(options::OPT_fno_profile_generate))
	CSPGOGenerateArg = nullptr;

	auto *ProfileUseArg = getLastProfileUseArg(Args);

	if (CSPGOGenerateArg) {
	CmdArgs.push_back(Args.MakeArgString("-plugin-opt=cs-profile-generate"));
	if (CSPGOGenerateArg->getOption().matches(
	options::OPT_fcs_profile_generate_EQ)) {
	SmallString<128> Path(CSPGOGenerateArg->getValue());
	llvm::sys::path::append(Path, "default_%m.profraw");
	CmdArgs.push_back(
	Args.MakeArgString(Twine("-plugin-opt=cs-profile-path=") + Path));
	} else
	CmdArgs.push_back(
	Args.MakeArgString("-plugin-opt=cs-profile-path=default_%m.profraw"));
	} else if (ProfileUseArg) {
	SmallString<128> Path(
	ProfileUseArg->getNumValues() == 0 ? "" : ProfileUseArg->getValue());
	if (Path.empty() \|\| llvm::sys::fs::is_directory(Path))
	llvm::sys::path::append(Path, "default.profdata");
	CmdArgs.push_back(Args.MakeArgString(Twine("-plugin-opt=cs-profile-path=") +
	Path));
	}

	// Pass an option to enable/disable the new pass manager.
	if (auto *A = Args.getLastArg(options::OPT_flegacy_pass_manager,
	options::OPT_fno_legacy_pass_manager)) {
	if (A->getOption().matches(options::OPT_flegacy_pass_manager))
	CmdArgs.push_back("-plugin-opt=legacy-pass-manager");
	else
	CmdArgs.push_back("-plugin-opt=new-pass-manager");
	}

	// Pass an option to enable pseudo probe emission.
	if (Args.hasFlag(options::OPT_fpseudo_probe_for_profiling,
	options::OPT_fno_pseudo_probe_for_profiling, false))
	CmdArgs.push_back("-plugin-opt=pseudo-probe-for-profiling");

	// Setup statistics file output.
	SmallString<128> StatsFile = getStatsFileName(Args, Output, Input, D);
	if (!StatsFile.empty())
	CmdArgs.push_back(
	Args.MakeArgString(Twine("-plugin-opt=stats-file=") + StatsFile));

	addX86AlignBranchArgs(D, Args, CmdArgs, /IsLTO=/true);

	// Handle remark diagnostics on screen options: '-Rpass-*'.
	renderRpassOptions(Args, CmdArgs);

	// Handle serialized remarks options: '-fsave-optimization-record'
	// and '-foptimization-record-*'.
	if (willEmitRemarks(Args))
	renderRemarksOptions(Args, CmdArgs, ToolChain.getEffectiveTriple(), Input,
	Output);

	// Handle remarks hotness/threshold related options.
	renderRemarksHotnessOptions(Args, CmdArgs);

	addMachineOutlinerArgs(D, Args, CmdArgs, ToolChain.getEffectiveTriple(),
	/IsLTO=/true);
	}

	void tools::addArchSpecificRPath(const ToolChain &TC, const ArgList &Args,
	ArgStringList &CmdArgs) {
	// Enable -frtlib-add-rpath by default for the case of VE.
	const bool IsVE = TC.getTriple().isVE();
	bool DefaultValue = IsVE;
	if (!Args.hasFlag(options::OPT_frtlib_add_rpath,
	options::OPT_fno_rtlib_add_rpath, DefaultValue))
	return;

	std::string CandidateRPath = TC.getArchSpecificLibPath();
	if (TC.getVFS().exists(CandidateRPath)) {
	CmdArgs.push_back("-rpath");
	CmdArgs.push_back(Args.MakeArgString(CandidateRPath.c_str()));
	}
	}

	bool tools::addOpenMPRuntime(ArgStringList &CmdArgs, const ToolChain &TC,
	const ArgList &Args, bool ForceStaticHostRuntime,
	bool IsOffloadingHost, bool GompNeedsRT) {
	if (!Args.hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ,
	options::OPT_fno_openmp, false))
	return false;

	Driver::OpenMPRuntimeKind RTKind = TC.getDriver().getOpenMPRuntime(Args);

	if (RTKind == Driver::OMPRT_Unknown)
	// Already diagnosed.
	return false;

	if (ForceStaticHostRuntime)
	CmdArgs.push_back("-Bstatic");

	switch (RTKind) {
	case Driver::OMPRT_OMP:
	CmdArgs.push_back("-lomp");
	break;
	case Driver::OMPRT_GOMP:
	CmdArgs.push_back("-lgomp");
	break;
	case Driver::OMPRT_IOMP5:
	CmdArgs.push_back("-liomp5");
	break;
	case Driver::OMPRT_Unknown:
	break;
	}

	if (ForceStaticHostRuntime)
	CmdArgs.push_back("-Bdynamic");

	if (RTKind == Driver::OMPRT_GOMP && GompNeedsRT)
	CmdArgs.push_back("-lrt");

	if (IsOffloadingHost)
	CmdArgs.push_back("-lomptarget");

	addArchSpecificRPath(TC, Args, CmdArgs);

	return true;
	}

	static void addSanitizerRuntime(const ToolChain &TC, const ArgList &Args,
	ArgStringList &CmdArgs, StringRef Sanitizer,
	bool IsShared, bool IsWhole) {
	// Wrap any static runtimes that must be forced into executable in
	// whole-archive.
	if (IsWhole) CmdArgs.push_back("--whole-archive");
	CmdArgs.push_back(TC.getCompilerRTArgString(
	Args, Sanitizer, IsShared ? ToolChain::FT_Shared : ToolChain::FT_Static));
	if (IsWhole) CmdArgs.push_back("--no-whole-archive");

	if (IsShared) {
	addArchSpecificRPath(TC, Args, CmdArgs);
	}
	}

	// Tries to use a file with the list of dynamic symbols that need to be exported
	// from the runtime library. Returns true if the file was found.
	static bool addSanitizerDynamicList(const ToolChain &TC, const ArgList &Args,
	ArgStringList &CmdArgs,
	StringRef Sanitizer) {
	// Solaris ld defaults to --export-dynamic behaviour but doesn't support
	// the option, so don't try to pass it.
	if (TC.getTriple().getOS() == llvm::Triple::Solaris)
	return true;
	SmallString<128> SanRT(TC.getCompilerRT(Args, Sanitizer));
	if (llvm::sys::fs::exists(SanRT + ".syms")) {
	CmdArgs.push_back(Args.MakeArgString("--dynamic-list=" + SanRT + ".syms"));
	return true;
	}
	return false;
	}

	static const char *getAsNeededOption(const ToolChain &TC, bool as_needed) {
	assert(!TC.getTriple().isOSAIX() &&
	"AIX linker does not support any form of --as-needed option yet.");

	// While the Solaris 11.2 ld added --as-needed/--no-as-needed as aliases
	// for the native forms -z ignore/-z record, they are missing in Illumos,
	// so always use the native form.
	if (TC.getTriple().isOSSolaris())
	return as_needed ? "-zignore" : "-zrecord";
	else
	return as_needed ? "--as-needed" : "--no-as-needed";
	}

	void tools::linkSanitizerRuntimeDeps(const ToolChain &TC,
	ArgStringList &CmdArgs) {
	// Fuchsia never needs these. Any sanitizer runtimes with system
	// dependencies use the `.deplibs` feature instead.
	if (TC.getTriple().isOSFuchsia())
	return;

	// Force linking against the system libraries sanitizers depends on
	// (see PR15823 why this is necessary).
	CmdArgs.push_back(getAsNeededOption(TC, false));
	// There's no libpthread or librt on RTEMS & Android.
	if (TC.getTriple().getOS() != llvm::Triple::RTEMS &&
	!TC.getTriple().isAndroid()) {
	CmdArgs.push_back("-lpthread");
	if (!TC.getTriple().isOSOpenBSD())
	CmdArgs.push_back("-lrt");
	}
	CmdArgs.push_back("-lm");
	// There's no libdl on all OSes.
	if (!TC.getTriple().isOSFreeBSD() && !TC.getTriple().isOSNetBSD() &&
	!TC.getTriple().isOSOpenBSD() &&
	TC.getTriple().getOS() != llvm::Triple::RTEMS)
	CmdArgs.push_back("-ldl");
	// Required for backtrace on some OSes
	if (TC.getTriple().isOSFreeBSD() \|\|
	- TC.getTriple().isOSNetBSD())
	+ TC.getTriple().isOSNetBSD() \|\|
	+ TC.getTriple().isOSOpenBSD())
	CmdArgs.push_back("-lexecinfo");
	}

	static void
	collectSanitizerRuntimes(const ToolChain &TC, const ArgList &Args,
	SmallVectorImpl<StringRef> &SharedRuntimes,
	SmallVectorImpl<StringRef> &StaticRuntimes,
	SmallVectorImpl<StringRef> &NonWholeStaticRuntimes,
	SmallVectorImpl<StringRef> &HelperStaticRuntimes,
	SmallVectorImpl<StringRef> &RequiredSymbols) {
	const SanitizerArgs &SanArgs = TC.getSanitizerArgs();
	// Collect shared runtimes.
	if (SanArgs.needsSharedRt()) {
	if (SanArgs.needsAsanRt() && SanArgs.linkRuntimes()) {
	SharedRuntimes.push_back("asan");
	if (!Args.hasArg(options::OPT_shared) && !TC.getTriple().isAndroid())
	HelperStaticRuntimes.push_back("asan-preinit");
	}
	if (SanArgs.needsMemProfRt() && SanArgs.linkRuntimes()) {
	SharedRuntimes.push_back("memprof");
	if (!Args.hasArg(options::OPT_shared) && !TC.getTriple().isAndroid())
	HelperStaticRuntimes.push_back("memprof-preinit");
	}
	if (SanArgs.needsUbsanRt() && SanArgs.linkRuntimes()) {
	if (SanArgs.requiresMinimalRuntime())
	SharedRuntimes.push_back("ubsan_minimal");
	else
	SharedRuntimes.push_back("ubsan_standalone");
	}
	if (SanArgs.needsScudoRt() && SanArgs.linkRuntimes()) {
	if (SanArgs.requiresMinimalRuntime())
	SharedRuntimes.push_back("scudo_minimal");
	else
	SharedRuntimes.push_back("scudo");
	}
	if (SanArgs.needsTsanRt() && SanArgs.linkRuntimes())
	SharedRuntimes.push_back("tsan");
	if (SanArgs.needsHwasanRt() && SanArgs.linkRuntimes()) {
	if (SanArgs.needsHwasanAliasesRt())
	SharedRuntimes.push_back("hwasan_aliases");
	else
	SharedRuntimes.push_back("hwasan");
	}
	}

	// The stats_client library is also statically linked into DSOs.
	if (SanArgs.needsStatsRt() && SanArgs.linkRuntimes())
	StaticRuntimes.push_back("stats_client");

	// Collect static runtimes.
	if (Args.hasArg(options::OPT_shared)) {
	// Don't link static runtimes into DSOs.
	return;
	}

	// Each static runtime that has a DSO counterpart above is excluded below,
	// but runtimes that exist only as static are not affected by needsSharedRt.

	if (!SanArgs.needsSharedRt() && SanArgs.needsAsanRt() && SanArgs.linkRuntimes()) {
	StaticRuntimes.push_back("asan");
	if (SanArgs.linkCXXRuntimes())
	StaticRuntimes.push_back("asan_cxx");
	}

	if (!SanArgs.needsSharedRt() && SanArgs.needsMemProfRt() &&
	SanArgs.linkRuntimes()) {
	StaticRuntimes.push_back("memprof");
	if (SanArgs.linkCXXRuntimes())
	StaticRuntimes.push_back("memprof_cxx");
	}

	if (!SanArgs.needsSharedRt() && SanArgs.needsHwasanRt() && SanArgs.linkRuntimes()) {
	if (SanArgs.needsHwasanAliasesRt()) {
	StaticRuntimes.push_back("hwasan_aliases");
	if (SanArgs.linkCXXRuntimes())
	StaticRuntimes.push_back("hwasan_aliases_cxx");
	} else {
	StaticRuntimes.push_back("hwasan");
	if (SanArgs.linkCXXRuntimes())
	StaticRuntimes.push_back("hwasan_cxx");
	}
	}
	if (SanArgs.needsDfsanRt() && SanArgs.linkRuntimes())
	StaticRuntimes.push_back("dfsan");
	if (SanArgs.needsLsanRt() && SanArgs.linkRuntimes())
	StaticRuntimes.push_back("lsan");
	if (SanArgs.needsMsanRt() && SanArgs.linkRuntimes()) {
	StaticRuntimes.push_back("msan");
	if (SanArgs.linkCXXRuntimes())
	StaticRuntimes.push_back("msan_cxx");
	}
	if (!SanArgs.needsSharedRt() && SanArgs.needsTsanRt() &&
	SanArgs.linkRuntimes()) {
	StaticRuntimes.push_back("tsan");
	if (SanArgs.linkCXXRuntimes())
	StaticRuntimes.push_back("tsan_cxx");
	}
	if (!SanArgs.needsSharedRt() && SanArgs.needsUbsanRt() && SanArgs.linkRuntimes()) {
	if (SanArgs.requiresMinimalRuntime()) {
	StaticRuntimes.push_back("ubsan_minimal");
	} else {
	StaticRuntimes.push_back("ubsan_standalone");
	if (SanArgs.linkCXXRuntimes())
	StaticRuntimes.push_back("ubsan_standalone_cxx");
	}
	}
	if (SanArgs.needsSafeStackRt() && SanArgs.linkRuntimes()) {
	NonWholeStaticRuntimes.push_back("safestack");
	RequiredSymbols.push_back("__safestack_init");
	}
	if (!(SanArgs.needsSharedRt() && SanArgs.needsUbsanRt() && SanArgs.linkRuntimes())) {
	if (SanArgs.needsCfiRt() && SanArgs.linkRuntimes())
	StaticRuntimes.push_back("cfi");
	if (SanArgs.needsCfiDiagRt() && SanArgs.linkRuntimes()) {
	StaticRuntimes.push_back("cfi_diag");
	if (SanArgs.linkCXXRuntimes())
	StaticRuntimes.push_back("ubsan_standalone_cxx");
	}
	}
	if (SanArgs.needsStatsRt() && SanArgs.linkRuntimes()) {
	NonWholeStaticRuntimes.push_back("stats");
	RequiredSymbols.push_back("__sanitizer_stats_register");
	}
	if (!SanArgs.needsSharedRt() && SanArgs.needsScudoRt() && SanArgs.linkRuntimes()) {
	if (SanArgs.requiresMinimalRuntime()) {
	StaticRuntimes.push_back("scudo_minimal");
	if (SanArgs.linkCXXRuntimes())
	StaticRuntimes.push_back("scudo_cxx_minimal");
	} else {
	StaticRuntimes.push_back("scudo");
	if (SanArgs.linkCXXRuntimes())
	StaticRuntimes.push_back("scudo_cxx");
	}
	}
	}

	// Should be called before we add system libraries (C++ ABI, libstdc++/libc++,
	// C runtime, etc). Returns true if sanitizer system deps need to be linked in.
	bool tools::addSanitizerRuntimes(const ToolChain &TC, const ArgList &Args,
	ArgStringList &CmdArgs) {
	SmallVector<StringRef, 4> SharedRuntimes, StaticRuntimes,
	NonWholeStaticRuntimes, HelperStaticRuntimes, RequiredSymbols;
	collectSanitizerRuntimes(TC, Args, SharedRuntimes, StaticRuntimes,
	NonWholeStaticRuntimes, HelperStaticRuntimes,
	RequiredSymbols);

	const SanitizerArgs &SanArgs = TC.getSanitizerArgs();
	// Inject libfuzzer dependencies.
	if (SanArgs.needsFuzzer() && SanArgs.linkRuntimes() &&
	!Args.hasArg(options::OPT_shared)) {

	addSanitizerRuntime(TC, Args, CmdArgs, "fuzzer", false, true);
	if (SanArgs.needsFuzzerInterceptors())
	addSanitizerRuntime(TC, Args, CmdArgs, "fuzzer_interceptors", false,
	true);
	if (!Args.hasArg(clang::driver::options::OPT_nostdlibxx)) {
	bool OnlyLibstdcxxStatic = Args.hasArg(options::OPT_static_libstdcxx) &&
	!Args.hasArg(options::OPT_static);
	if (OnlyLibstdcxxStatic)
	CmdArgs.push_back("-Bstatic");
	TC.AddCXXStdlibLibArgs(Args, CmdArgs);
	if (OnlyLibstdcxxStatic)
	CmdArgs.push_back("-Bdynamic");
	}
	}

	for (auto RT : SharedRuntimes)
	addSanitizerRuntime(TC, Args, CmdArgs, RT, true, false);
	for (auto RT : HelperStaticRuntimes)
	addSanitizerRuntime(TC, Args, CmdArgs, RT, false, true);
	bool AddExportDynamic = false;
	for (auto RT : StaticRuntimes) {
	addSanitizerRuntime(TC, Args, CmdArgs, RT, false, true);
	AddExportDynamic \|= !addSanitizerDynamicList(TC, Args, CmdArgs, RT);
	}
	for (auto RT : NonWholeStaticRuntimes) {
	addSanitizerRuntime(TC, Args, CmdArgs, RT, false, false);
	AddExportDynamic \|= !addSanitizerDynamicList(TC, Args, CmdArgs, RT);
	}
	for (auto S : RequiredSymbols) {
	CmdArgs.push_back("-u");
	CmdArgs.push_back(Args.MakeArgString(S));
	}
	// If there is a static runtime with no dynamic list, force all the symbols
	// to be dynamic to be sure we export sanitizer interface functions.
	if (AddExportDynamic)
	CmdArgs.push_back("--export-dynamic");

	if (SanArgs.hasCrossDsoCfi() && !AddExportDynamic)
	CmdArgs.push_back("--export-dynamic-symbol=__cfi_check");

	return !StaticRuntimes.empty() \|\| !NonWholeStaticRuntimes.empty();
	}

	bool tools::addXRayRuntime(const ToolChain&TC, const ArgList &Args, ArgStringList &CmdArgs) {
	if (Args.hasArg(options::OPT_shared))
	return false;

	if (TC.getXRayArgs().needsXRayRt()) {
	CmdArgs.push_back("-whole-archive");
	CmdArgs.push_back(TC.getCompilerRTArgString(Args, "xray"));
	for (const auto &Mode : TC.getXRayArgs().modeList())
	CmdArgs.push_back(TC.getCompilerRTArgString(Args, Mode));
	CmdArgs.push_back("-no-whole-archive");
	return true;
	}

	return false;
	}

	void tools::linkXRayRuntimeDeps(const ToolChain &TC, ArgStringList &CmdArgs) {
	CmdArgs.push_back(getAsNeededOption(TC, false));
	CmdArgs.push_back("-lpthread");
	if (!TC.getTriple().isOSOpenBSD())
	CmdArgs.push_back("-lrt");
	CmdArgs.push_back("-lm");

	if (!TC.getTriple().isOSFreeBSD() &&
	!TC.getTriple().isOSNetBSD() &&
	!TC.getTriple().isOSOpenBSD())
	CmdArgs.push_back("-ldl");
	}

	bool tools::areOptimizationsEnabled(const ArgList &Args) {
	// Find the last -O arg and see if it is non-zero.
	if (Arg *A = Args.getLastArg(options::OPT_O_Group))
	return !A->getOption().matches(options::OPT_O0);
	// Defaults to -O0.
	return false;
	}

	const char *tools::SplitDebugName(const JobAction &JA, const ArgList &Args,
	const InputInfo &Input,
	const InputInfo &Output) {
	auto AddPostfix = [JA](auto &F) {
	if (JA.getOffloadingDeviceKind() == Action::OFK_HIP)
	F += (Twine("_") + JA.getOffloadingArch()).str();
	F += ".dwo";
	};
	if (Arg *A = Args.getLastArg(options::OPT_gsplit_dwarf_EQ))
	if (StringRef(A->getValue()) == "single")
	return Args.MakeArgString(Output.getFilename());

	Arg *FinalOutput = Args.getLastArg(options::OPT_o);
	if (FinalOutput && Args.hasArg(options::OPT_c)) {
	SmallString<128> T(FinalOutput->getValue());
	llvm::sys::path::remove_filename(T);
	llvm::sys::path::append(T, llvm::sys::path::stem(FinalOutput->getValue()));
	AddPostfix(T);
	return Args.MakeArgString(T);
	} else {
	// Use the compilation dir.
	Arg *A = Args.getLastArg(options::OPT_ffile_compilation_dir_EQ,
	options::OPT_fdebug_compilation_dir_EQ);
	SmallString<128> T(A ? A->getValue() : "");
	SmallString<128> F(llvm::sys::path::stem(Input.getBaseInput()));
	AddPostfix(F);
	T += F;
	return Args.MakeArgString(T);
	}
	}

	void tools::SplitDebugInfo(const ToolChain &TC, Compilation &C, const Tool &T,
	const JobAction &JA, const ArgList &Args,
	const InputInfo &Output, const char *OutFile) {
	ArgStringList ExtractArgs;
	ExtractArgs.push_back("--extract-dwo");

	ArgStringList StripArgs;
	StripArgs.push_back("--strip-dwo");

	// Grabbing the output of the earlier compile step.
	StripArgs.push_back(Output.getFilename());
	ExtractArgs.push_back(Output.getFilename());
	ExtractArgs.push_back(OutFile);

	const char *Exec =
	Args.MakeArgString(TC.GetProgramPath(CLANG_DEFAULT_OBJCOPY));
	InputInfo II(types::TY_Object, Output.getFilename(), Output.getFilename());

	// First extract the dwo sections.
	C.addCommand(std::make_unique<Command>(JA, T,
	ResponseFileSupport::AtFileCurCP(),
	Exec, ExtractArgs, II, Output));

	// Then remove them from the original .o file.
	C.addCommand(std::make_unique<Command>(
	JA, T, ResponseFileSupport::AtFileCurCP(), Exec, StripArgs, II, Output));
	}

	// Claim options we don't want to warn if they are unused. We do this for
	// options that build systems might add but are unused when assembling or only
	// running the preprocessor for example.
	void tools::claimNoWarnArgs(const ArgList &Args) {
	// Don't warn about unused -f(no-)?lto. This can happen when we're
	// preprocessing, precompiling or assembling.
	Args.ClaimAllArgs(options::OPT_flto_EQ);
	Args.ClaimAllArgs(options::OPT_flto);
	Args.ClaimAllArgs(options::OPT_fno_lto);
	}

	Arg *tools::getLastProfileUseArg(const ArgList &Args) {
	auto *ProfileUseArg = Args.getLastArg(
	options::OPT_fprofile_instr_use, options::OPT_fprofile_instr_use_EQ,
	options::OPT_fprofile_use, options::OPT_fprofile_use_EQ,
	options::OPT_fno_profile_instr_use);

	if (ProfileUseArg &&
	ProfileUseArg->getOption().matches(options::OPT_fno_profile_instr_use))
	ProfileUseArg = nullptr;

	return ProfileUseArg;
	}

	Arg *tools::getLastProfileSampleUseArg(const ArgList &Args) {
	auto *ProfileSampleUseArg = Args.getLastArg(
	options::OPT_fprofile_sample_use, options::OPT_fprofile_sample_use_EQ,
	options::OPT_fauto_profile, options::OPT_fauto_profile_EQ,
	options::OPT_fno_profile_sample_use, options::OPT_fno_auto_profile);

	if (ProfileSampleUseArg &&
	(ProfileSampleUseArg->getOption().matches(
	options::OPT_fno_profile_sample_use) \|\|
	ProfileSampleUseArg->getOption().matches(options::OPT_fno_auto_profile)))
	return nullptr;

	return Args.getLastArg(options::OPT_fprofile_sample_use_EQ,
	options::OPT_fauto_profile_EQ);
	}

	/// Parses the various -fpic/-fPIC/-fpie/-fPIE arguments. Then,
	/// smooshes them together with platform defaults, to decide whether
	/// this compile should be using PIC mode or not. Returns a tuple of
	/// (RelocationModel, PICLevel, IsPIE).
	std::tuple<llvm::Reloc::Model, unsigned, bool>
	tools::ParsePICArgs(const ToolChain &ToolChain, const ArgList &Args) {
	const llvm::Triple &EffectiveTriple = ToolChain.getEffectiveTriple();
	const llvm::Triple &Triple = ToolChain.getTriple();

	bool PIE = ToolChain.isPIEDefault();
	bool PIC = PIE \|\| ToolChain.isPICDefault();
	// The Darwin/MachO default to use PIC does not apply when using -static.
	if (Triple.isOSBinFormatMachO() && Args.hasArg(options::OPT_static))
	PIE = PIC = false;
	bool IsPICLevelTwo = PIC;

	bool KernelOrKext =
	Args.hasArg(options::OPT_mkernel, options::OPT_fapple_kext);

	// Android-specific defaults for PIC/PIE
	if (Triple.isAndroid()) {
	switch (Triple.getArch()) {
	case llvm::Triple::arm:
	case llvm::Triple::armeb:
	case llvm::Triple::thumb:
	case llvm::Triple::thumbeb:
	case llvm::Triple::aarch64:
	case llvm::Triple::mips:
	case llvm::Triple::mipsel:
	case llvm::Triple::mips64:
	case llvm::Triple::mips64el:
	PIC = true; // "-fpic"
	break;

	case llvm::Triple::x86:
	case llvm::Triple::x86_64:
	PIC = true; // "-fPIC"
	IsPICLevelTwo = true;
	break;

	default:
	break;
	}
	}

	// OpenBSD-specific defaults for PIE
	if (Triple.isOSOpenBSD()) {
	switch (ToolChain.getArch()) {
	case llvm::Triple::arm:
	case llvm::Triple::aarch64:
	case llvm::Triple::mips64:
	case llvm::Triple::mips64el:
	case llvm::Triple::x86:
	case llvm::Triple::x86_64:
	IsPICLevelTwo = false; // "-fpie"
	break;

	case llvm::Triple::ppc:
	case llvm::Triple::sparcv9:
	IsPICLevelTwo = true; // "-fPIE"
	break;

	default:
	break;
	}
	}

	// AMDGPU-specific defaults for PIC.
	if (Triple.getArch() == llvm::Triple::amdgcn)
	PIC = true;

	// The last argument relating to either PIC or PIE wins, and no
	// other argument is used. If the last argument is any flavor of the
	// '-fno-...' arguments, both PIC and PIE are disabled. Any PIE
	// option implicitly enables PIC at the same level.
	Arg *LastPICArg = Args.getLastArg(options::OPT_fPIC, options::OPT_fno_PIC,
	options::OPT_fpic, options::OPT_fno_pic,
	options::OPT_fPIE, options::OPT_fno_PIE,
	options::OPT_fpie, options::OPT_fno_pie);
	if (Triple.isOSWindows() && LastPICArg &&
	LastPICArg ==
	Args.getLastArg(options::OPT_fPIC, options::OPT_fpic,
	options::OPT_fPIE, options::OPT_fpie)) {
	ToolChain.getDriver().Diag(diag::err_drv_unsupported_opt_for_target)
	<< LastPICArg->getSpelling() << Triple.str();
	if (Triple.getArch() == llvm::Triple::x86_64)
	return std::make_tuple(llvm::Reloc::PIC_, 2U, false);
	return std::make_tuple(llvm::Reloc::Static, 0U, false);
	}

	// Check whether the tool chain trumps the PIC-ness decision. If the PIC-ness
	// is forced, then neither PIC nor PIE flags will have no effect.
	if (!ToolChain.isPICDefaultForced()) {
	if (LastPICArg) {
	Option O = LastPICArg->getOption();
	if (O.matches(options::OPT_fPIC) \|\| O.matches(options::OPT_fpic) \|\|
	O.matches(options::OPT_fPIE) \|\| O.matches(options::OPT_fpie)) {
	PIE = O.matches(options::OPT_fPIE) \|\| O.matches(options::OPT_fpie);
	PIC =
	PIE \|\| O.matches(options::OPT_fPIC) \|\| O.matches(options::OPT_fpic);
	IsPICLevelTwo =
	O.matches(options::OPT_fPIE) \|\| O.matches(options::OPT_fPIC);
	} else {
	PIE = PIC = false;
	if (EffectiveTriple.isPS4CPU()) {
	Arg *ModelArg = Args.getLastArg(options::OPT_mcmodel_EQ);
	StringRef Model = ModelArg ? ModelArg->getValue() : "";
	if (Model != "kernel") {
	PIC = true;
	ToolChain.getDriver().Diag(diag::warn_drv_ps4_force_pic)
	<< LastPICArg->getSpelling();
	}
	}
	}
	}
	}

	// Introduce a Darwin and PS4-specific hack. If the default is PIC, but the
	// PIC level would've been set to level 1, force it back to level 2 PIC
	// instead.
	if (PIC && (Triple.isOSDarwin() \|\| EffectiveTriple.isPS4CPU()))
	IsPICLevelTwo \|= ToolChain.isPICDefault();

	// This kernel flags are a trump-card: they will disable PIC/PIE
	// generation, independent of the argument order.
	if (KernelOrKext &&
	((!EffectiveTriple.isiOS() \|\| EffectiveTriple.isOSVersionLT(6)) &&
	!EffectiveTriple.isWatchOS()))
	PIC = PIE = false;

	if (Arg *A = Args.getLastArg(options::OPT_mdynamic_no_pic)) {
	// This is a very special mode. It trumps the other modes, almost no one
	// uses it, and it isn't even valid on any OS but Darwin.
	if (!Triple.isOSDarwin())
	ToolChain.getDriver().Diag(diag::err_drv_unsupported_opt_for_target)
	<< A->getSpelling() << Triple.str();

	// FIXME: Warn when this flag trumps some other PIC or PIE flag.

	// Only a forced PIC mode can cause the actual compile to have PIC defines
	// etc., no flags are sufficient. This behavior was selected to closely
	// match that of llvm-gcc and Apple GCC before that.
	PIC = ToolChain.isPICDefault() && ToolChain.isPICDefaultForced();

	return std::make_tuple(llvm::Reloc::DynamicNoPIC, PIC ? 2U : 0U, false);
	}

	bool EmbeddedPISupported;
	switch (Triple.getArch()) {
	case llvm::Triple::arm:
	case llvm::Triple::armeb:
	case llvm::Triple::thumb:
	case llvm::Triple::thumbeb:
	EmbeddedPISupported = true;
	break;
	default:
	EmbeddedPISupported = false;
	break;
	}

	bool ROPI = false, RWPI = false;
	Arg* LastROPIArg = Args.getLastArg(options::OPT_fropi, options::OPT_fno_ropi);
	if (LastROPIArg && LastROPIArg->getOption().matches(options::OPT_fropi)) {
	if (!EmbeddedPISupported)
	ToolChain.getDriver().Diag(diag::err_drv_unsupported_opt_for_target)
	<< LastROPIArg->getSpelling() << Triple.str();
	ROPI = true;
	}
	Arg *LastRWPIArg = Args.getLastArg(options::OPT_frwpi, options::OPT_fno_rwpi);
	if (LastRWPIArg && LastRWPIArg->getOption().matches(options::OPT_frwpi)) {
	if (!EmbeddedPISupported)
	ToolChain.getDriver().Diag(diag::err_drv_unsupported_opt_for_target)
	<< LastRWPIArg->getSpelling() << Triple.str();
	RWPI = true;
	}

	// ROPI and RWPI are not compatible with PIC or PIE.
	if ((ROPI \|\| RWPI) && (PIC \|\| PIE))
	ToolChain.getDriver().Diag(diag::err_drv_ropi_rwpi_incompatible_with_pic);

	if (Triple.isMIPS()) {
	StringRef CPUName;
	StringRef ABIName;
	mips::getMipsCPUAndABI(Args, Triple, CPUName, ABIName);
	// When targeting the N64 ABI, PIC is the default, except in the case
	// when the -mno-abicalls option is used. In that case we exit
	// at next check regardless of PIC being set below.
	if (ABIName == "n64")
	PIC = true;
	// When targettng MIPS with -mno-abicalls, it's always static.
	if(Args.hasArg(options::OPT_mno_abicalls))
	return std::make_tuple(llvm::Reloc::Static, 0U, false);
	// Unlike other architectures, MIPS, even with -fPIC/-mxgot/multigot,
	// does not use PIC level 2 for historical reasons.
	IsPICLevelTwo = false;
	}

	if (PIC)
	return std::make_tuple(llvm::Reloc::PIC_, IsPICLevelTwo ? 2U : 1U, PIE);

	llvm::Reloc::Model RelocM = llvm::Reloc::Static;
	if (ROPI && RWPI)
	RelocM = llvm::Reloc::ROPI_RWPI;
	else if (ROPI)
	RelocM = llvm::Reloc::ROPI;
	else if (RWPI)
	RelocM = llvm::Reloc::RWPI;

	return std::make_tuple(RelocM, 0U, false);
	}

	// `-falign-functions` indicates that the functions should be aligned to a
	// 16-byte boundary.
	//
	// `-falign-functions=1` is the same as `-fno-align-functions`.
	//
	// The scalar `n` in `-falign-functions=n` must be an integral value between
	// [0, 65536]. If the value is not a power-of-two, it will be rounded up to
	// the nearest power-of-two.
	//
	// If we return `0`, the frontend will default to the backend's preferred
	// alignment.
	//
	// NOTE: icc only allows values between [0, 4096]. icc uses `-falign-functions`
	// to mean `-falign-functions=16`. GCC defaults to the backend's preferred
	// alignment. For unaligned functions, we default to the backend's preferred
	// alignment.
	unsigned tools::ParseFunctionAlignment(const ToolChain &TC,
	const ArgList &Args) {
	const Arg *A = Args.getLastArg(options::OPT_falign_functions,
	options::OPT_falign_functions_EQ,
	options::OPT_fno_align_functions);
	if (!A \|\| A->getOption().matches(options::OPT_fno_align_functions))
	return 0;

	if (A->getOption().matches(options::OPT_falign_functions))
	return 0;

	unsigned Value = 0;
	if (StringRef(A->getValue()).getAsInteger(10, Value) \|\| Value > 65536)
	TC.getDriver().Diag(diag::err_drv_invalid_int_value)
	<< A->getAsString(Args) << A->getValue();
	return Value ? llvm::Log2_32_Ceil(std::min(Value, 65536u)) : Value;
	}

	unsigned tools::ParseDebugDefaultVersion(const ToolChain &TC,
	const ArgList &Args) {
	const Arg *A = Args.getLastArg(options::OPT_fdebug_default_version);

	if (!A)
	return 0;

	unsigned Value = 0;
	if (StringRef(A->getValue()).getAsInteger(10, Value) \|\| Value > 5 \|\|
	Value < 2)
	TC.getDriver().Diag(diag::err_drv_invalid_int_value)
	<< A->getAsString(Args) << A->getValue();
	return Value;
	}

	void tools::AddAssemblerKPIC(const ToolChain &ToolChain, const ArgList &Args,
	ArgStringList &CmdArgs) {
	llvm::Reloc::Model RelocationModel;
	unsigned PICLevel;
	bool IsPIE;
	std::tie(RelocationModel, PICLevel, IsPIE) = ParsePICArgs(ToolChain, Args);

	if (RelocationModel != llvm::Reloc::Static)
	CmdArgs.push_back("-KPIC");
	}

	/// Determine whether Objective-C automated reference counting is
	/// enabled.
	bool tools::isObjCAutoRefCount(const ArgList &Args) {
	return Args.hasFlag(options::OPT_fobjc_arc, options::OPT_fno_objc_arc, false);
	}

	enum class LibGccType { UnspecifiedLibGcc, StaticLibGcc, SharedLibGcc };

	static LibGccType getLibGccType(const ToolChain &TC, const Driver &D,
	const ArgList &Args) {
	if (Args.hasArg(options::OPT_static_libgcc) \|\|
	Args.hasArg(options::OPT_static) \|\| Args.hasArg(options::OPT_static_pie))
	return LibGccType::StaticLibGcc;
	if (Args.hasArg(options::OPT_shared_libgcc))
	return LibGccType::SharedLibGcc;
	// The Android NDK only provides libunwind.a, not libunwind.so.
	if (TC.getTriple().isAndroid())
	return LibGccType::StaticLibGcc;
	// For MinGW, don't imply a shared libgcc here, we only want to return
	// SharedLibGcc if that was explicitly requested.
	if (D.CCCIsCXX() && !TC.getTriple().isOSCygMing())
	return LibGccType::SharedLibGcc;
	return LibGccType::UnspecifiedLibGcc;
	}

	// Gcc adds libgcc arguments in various ways:
	//
	// gcc <none>: -lgcc --as-needed -lgcc_s --no-as-needed
	// g++ <none>: -lgcc_s -lgcc
	// gcc shared: -lgcc_s -lgcc
	// g++ shared: -lgcc_s -lgcc
	// gcc static: -lgcc -lgcc_eh
	// g++ static: -lgcc -lgcc_eh
	// gcc static-pie: -lgcc -lgcc_eh
	// g++ static-pie: -lgcc -lgcc_eh
	//
	// Also, certain targets need additional adjustments.

	static void AddUnwindLibrary(const ToolChain &TC, const Driver &D,
	ArgStringList &CmdArgs, const ArgList &Args) {
	ToolChain::UnwindLibType UNW = TC.GetUnwindLibType(Args);
	// Targets that don't use unwind libraries.
	if ((TC.getTriple().isAndroid() && UNW == ToolChain::UNW_Libgcc) \|\|
	TC.getTriple().isOSIAMCU() \|\| TC.getTriple().isOSBinFormatWasm() \|\|
	UNW == ToolChain::UNW_None)
	return;

	LibGccType LGT = getLibGccType(TC, D, Args);
	bool AsNeeded = LGT == LibGccType::UnspecifiedLibGcc &&
	!TC.getTriple().isAndroid() &&
	!TC.getTriple().isOSCygMing() && !TC.getTriple().isOSAIX();
	if (AsNeeded)
	CmdArgs.push_back(getAsNeededOption(TC, true));

	switch (UNW) {
	case ToolChain::UNW_None:
	return;
	case ToolChain::UNW_Libgcc: {
	if (LGT == LibGccType::StaticLibGcc)
	CmdArgs.push_back("-lgcc_eh");
	else
	CmdArgs.push_back("-lgcc_s");
	break;
	}
	case ToolChain::UNW_CompilerRT:
	if (TC.getTriple().isOSAIX()) {
	// AIX only has libunwind as a shared library. So do not pass
	// anything in if -static is specified.
	if (LGT != LibGccType::StaticLibGcc)
	CmdArgs.push_back("-lunwind");
	} else if (LGT == LibGccType::StaticLibGcc) {
	CmdArgs.push_back("-l:libunwind.a");
	} else if (TC.getTriple().isOSCygMing()) {
	if (LGT == LibGccType::SharedLibGcc)
	CmdArgs.push_back("-l:libunwind.dll.a");
	else
	// Let the linker choose between libunwind.dll.a and libunwind.a
	// depending on what's available, and depending on the -static flag
	CmdArgs.push_back("-lunwind");
	} else {
	CmdArgs.push_back("-l:libunwind.so");
	}
	break;
	}

	if (AsNeeded)
	CmdArgs.push_back(getAsNeededOption(TC, false));
	}

	static void AddLibgcc(const ToolChain &TC, const Driver &D,
	ArgStringList &CmdArgs, const ArgList &Args) {
	LibGccType LGT = getLibGccType(TC, D, Args);
	if (LGT != LibGccType::SharedLibGcc)
	CmdArgs.push_back("-lgcc");
	AddUnwindLibrary(TC, D, CmdArgs, Args);
	if (LGT == LibGccType::SharedLibGcc)
	CmdArgs.push_back("-lgcc");
	}

	void tools::AddRunTimeLibs(const ToolChain &TC, const Driver &D,
	ArgStringList &CmdArgs, const ArgList &Args) {
	// Make use of compiler-rt if --rtlib option is used
	ToolChain::RuntimeLibType RLT = TC.GetRuntimeLibType(Args);

	switch (RLT) {
	case ToolChain::RLT_CompilerRT:
	CmdArgs.push_back(TC.getCompilerRTArgString(Args, "builtins"));
	AddUnwindLibrary(TC, D, CmdArgs, Args);
	break;
	case ToolChain::RLT_Libgcc:
	// Make sure libgcc is not used under MSVC environment by default
	if (TC.getTriple().isKnownWindowsMSVCEnvironment()) {
	// Issue error diagnostic if libgcc is explicitly specified
	// through command line as --rtlib option argument.
	if (Args.hasArg(options::OPT_rtlib_EQ)) {
	TC.getDriver().Diag(diag::err_drv_unsupported_rtlib_for_platform)
	<< Args.getLastArg(options::OPT_rtlib_EQ)->getValue() << "MSVC";
	}
	} else
	AddLibgcc(TC, D, CmdArgs, Args);
	break;
	}

	// On Android, the unwinder uses dl_iterate_phdr (or one of
	// dl_unwind_find_exidx/__gnu_Unwind_Find_exidx on arm32) from libdl.so. For
	// statically-linked executables, these functions come from libc.a instead.
	if (TC.getTriple().isAndroid() && !Args.hasArg(options::OPT_static) &&
	!Args.hasArg(options::OPT_static_pie))
	CmdArgs.push_back("-ldl");
	}

	SmallString<128> tools::getStatsFileName(const llvm::opt::ArgList &Args,
	const InputInfo &Output,
	const InputInfo &Input,
	const Driver &D) {
	const Arg *A = Args.getLastArg(options::OPT_save_stats_EQ);
	if (!A)
	return {};

	StringRef SaveStats = A->getValue();
	SmallString<128> StatsFile;
	if (SaveStats == "obj" && Output.isFilename()) {
	StatsFile.assign(Output.getFilename());
	llvm::sys::path::remove_filename(StatsFile);
	} else if (SaveStats != "cwd") {
	D.Diag(diag::err_drv_invalid_value) << A->getAsString(Args) << SaveStats;
	return {};
	}

	StringRef BaseName = llvm::sys::path::filename(Input.getBaseInput());
	llvm::sys::path::append(StatsFile, BaseName);
	llvm::sys::path::replace_extension(StatsFile, "stats");
	return StatsFile;
	}

	void tools::addMultilibFlag(bool Enabled, const char *const Flag,
	Multilib::flags_list &Flags) {
	Flags.push_back(std::string(Enabled ? "+" : "-") + Flag);
	}

	void tools::addX86AlignBranchArgs(const Driver &D, const ArgList &Args,
	ArgStringList &CmdArgs, bool IsLTO) {
	auto addArg = [&, IsLTO](const Twine &Arg) {
	if (IsLTO) {
	CmdArgs.push_back(Args.MakeArgString("-plugin-opt=" + Arg));
	} else {
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back(Args.MakeArgString(Arg));
	}
	};

	if (Args.hasArg(options::OPT_mbranches_within_32B_boundaries)) {
	addArg(Twine("-x86-branches-within-32B-boundaries"));
	}
	if (const Arg *A = Args.getLastArg(options::OPT_malign_branch_boundary_EQ)) {
	StringRef Value = A->getValue();
	unsigned Boundary;
	if (Value.getAsInteger(10, Boundary) \|\| Boundary < 16 \|\|
	!llvm::isPowerOf2_64(Boundary)) {
	D.Diag(diag::err_drv_invalid_argument_to_option)
	<< Value << A->getOption().getName();
	} else {
	addArg("-x86-align-branch-boundary=" + Twine(Boundary));
	}
	}
	if (const Arg *A = Args.getLastArg(options::OPT_malign_branch_EQ)) {
	std::string AlignBranch;
	for (StringRef T : A->getValues()) {
	if (T != "fused" && T != "jcc" && T != "jmp" && T != "call" &&
	T != "ret" && T != "indirect")
	D.Diag(diag::err_drv_invalid_malign_branch_EQ)
	<< T << "fused, jcc, jmp, call, ret, indirect";
	if (!AlignBranch.empty())
	AlignBranch += '+';
	AlignBranch += T;
	}
	addArg("-x86-align-branch=" + Twine(AlignBranch));
	}
	if (const Arg *A = Args.getLastArg(options::OPT_mpad_max_prefix_size_EQ)) {
	StringRef Value = A->getValue();
	unsigned PrefixSize;
	if (Value.getAsInteger(10, PrefixSize)) {
	D.Diag(diag::err_drv_invalid_argument_to_option)
	<< Value << A->getOption().getName();
	} else {
	addArg("-x86-pad-max-prefix-size=" + Twine(PrefixSize));
	}
	}
	}

	static llvm::opt::Arg *
	getAMDGPUCodeObjectArgument(const Driver &D, const llvm::opt::ArgList &Args) {
	// The last of -mcode-object-v3, -mno-code-object-v3 and
	// -mcode-object-version=<version> wins.
	return Args.getLastArg(options::OPT_mcode_object_v3_legacy,
	options::OPT_mno_code_object_v3_legacy,
	options::OPT_mcode_object_version_EQ);
	}

	void tools::checkAMDGPUCodeObjectVersion(const Driver &D,
	const llvm::opt::ArgList &Args) {
	const unsigned MinCodeObjVer = 2;
	const unsigned MaxCodeObjVer = 4;

	// Emit warnings for legacy options even if they are overridden.
	if (Args.hasArg(options::OPT_mno_code_object_v3_legacy))
	D.Diag(diag::warn_drv_deprecated_arg) << "-mno-code-object-v3"
	<< "-mcode-object-version=2";

	if (Args.hasArg(options::OPT_mcode_object_v3_legacy))
	D.Diag(diag::warn_drv_deprecated_arg) << "-mcode-object-v3"
	<< "-mcode-object-version=3";

	if (auto *CodeObjArg = getAMDGPUCodeObjectArgument(D, Args)) {
	if (CodeObjArg->getOption().getID() ==
	options::OPT_mcode_object_version_EQ) {
	unsigned CodeObjVer = MaxCodeObjVer;
	auto Remnant =
	StringRef(CodeObjArg->getValue()).getAsInteger(0, CodeObjVer);
	if (Remnant \|\| CodeObjVer < MinCodeObjVer \|\| CodeObjVer > MaxCodeObjVer)
	D.Diag(diag::err_drv_invalid_int_value)
	<< CodeObjArg->getAsString(Args) << CodeObjArg->getValue();
	}
	}
	}

	unsigned tools::getAMDGPUCodeObjectVersion(const Driver &D,
	const llvm::opt::ArgList &Args) {
	unsigned CodeObjVer = 4; // default
	if (auto *CodeObjArg = getAMDGPUCodeObjectArgument(D, Args)) {
	if (CodeObjArg->getOption().getID() ==
	options::OPT_mno_code_object_v3_legacy) {
	CodeObjVer = 2;
	} else if (CodeObjArg->getOption().getID() ==
	options::OPT_mcode_object_v3_legacy) {
	CodeObjVer = 3;
	} else {
	StringRef(CodeObjArg->getValue()).getAsInteger(0, CodeObjVer);
	}
	}
	return CodeObjVer;
	}

	bool tools::haveAMDGPUCodeObjectVersionArgument(
	const Driver &D, const llvm::opt::ArgList &Args) {
	return getAMDGPUCodeObjectArgument(D, Args) != nullptr;
	}

	void tools::addMachineOutlinerArgs(const Driver &D,
	const llvm::opt::ArgList &Args,
	llvm::opt::ArgStringList &CmdArgs,
	const llvm::Triple &Triple, bool IsLTO) {
	auto addArg = [&, IsLTO](const Twine &Arg) {
	if (IsLTO) {
	CmdArgs.push_back(Args.MakeArgString("-plugin-opt=" + Arg));
	} else {
	CmdArgs.push_back("-mllvm");
	CmdArgs.push_back(Args.MakeArgString(Arg));
	}
	};

	if (Arg *A = Args.getLastArg(options::OPT_moutline,
	options::OPT_mno_outline)) {
	if (A->getOption().matches(options::OPT_moutline)) {
	// We only support -moutline in AArch64 and ARM targets right now. If
	// we're not compiling for these, emit a warning and ignore the flag.
	// Otherwise, add the proper mllvm flags.
	if (!(Triple.isARM() \|\| Triple.isThumb() \|\|
	Triple.getArch() == llvm::Triple::aarch64 \|\|
	Triple.getArch() == llvm::Triple::aarch64_32)) {
	D.Diag(diag::warn_drv_moutline_unsupported_opt) << Triple.getArchName();
	} else {
	addArg(Twine("-enable-machine-outliner"));
	}
	} else {
	// Disable all outlining behaviour.
	addArg(Twine("-enable-machine-outliner=never"));
	}
	}
	}

	void tools::addOpenMPDeviceRTL(const Driver &D,
	const llvm::opt::ArgList &DriverArgs,
	llvm::opt::ArgStringList &CC1Args,
	StringRef BitcodeSuffix,
	const llvm::Triple &Triple) {
	SmallVector<StringRef, 8> LibraryPaths;
	// Add user defined library paths from LIBRARY_PATH.
	llvm::Optional<std::string> LibPath =
	llvm::sys::Process::GetEnv("LIBRARY_PATH");
	if (LibPath) {
	SmallVector<StringRef, 8> Frags;
	const char EnvPathSeparatorStr[] = {llvm::sys::EnvPathSeparator, '\0'};
	llvm::SplitString(*LibPath, Frags, EnvPathSeparatorStr);
	for (StringRef Path : Frags)
	LibraryPaths.emplace_back(Path.trim());
	}

	// Add path to lib / lib64 folder.
	SmallString<256> DefaultLibPath = llvm::sys::path::parent_path(D.Dir);
	llvm::sys::path::append(DefaultLibPath, Twine("lib") + CLANG_LIBDIR_SUFFIX);
	LibraryPaths.emplace_back(DefaultLibPath.c_str());

	OptSpecifier LibomptargetBCPathOpt =
	Triple.isAMDGCN() ? options::OPT_libomptarget_amdgcn_bc_path_EQ
	: options::OPT_libomptarget_nvptx_bc_path_EQ;

	StringRef ArchPrefix = Triple.isAMDGCN() ? "amdgcn" : "nvptx";
	// First check whether user specifies bc library
	if (const Arg *A = DriverArgs.getLastArg(LibomptargetBCPathOpt)) {
	std::string LibOmpTargetName(A->getValue());
	if (llvm::sys::fs::exists(LibOmpTargetName)) {
	CC1Args.push_back("-mlink-builtin-bitcode");
	CC1Args.push_back(DriverArgs.MakeArgString(LibOmpTargetName));
	} else {
	D.Diag(diag::err_drv_omp_offload_target_bcruntime_not_found)
	<< LibOmpTargetName;
	}
	} else {
	bool FoundBCLibrary = false;

	std::string LibOmpTargetName =
	"libomptarget-" + BitcodeSuffix.str() + ".bc";

	for (StringRef LibraryPath : LibraryPaths) {
	SmallString<128> LibOmpTargetFile(LibraryPath);
	llvm::sys::path::append(LibOmpTargetFile, LibOmpTargetName);
	if (llvm::sys::fs::exists(LibOmpTargetFile)) {
	CC1Args.push_back("-mlink-builtin-bitcode");
	CC1Args.push_back(DriverArgs.MakeArgString(LibOmpTargetFile));
	FoundBCLibrary = true;
	break;
	}
	}

	if (!FoundBCLibrary)
	D.Diag(diag::err_drv_omp_offload_target_missingbcruntime)
	<< LibOmpTargetName << ArchPrefix;
	}
	}
	diff --git a/contrib/llvm-project/clang/lib/Driver/ToolChains/HIP.cpp b/contrib/llvm-project/clang/lib/Driver/ToolChains/HIP.cpp
	index 59d58aadb687..c4e840de86e1 100644
	--- a/contrib/llvm-project/clang/lib/Driver/ToolChains/HIP.cpp
	+++ b/contrib/llvm-project/clang/lib/Driver/ToolChains/HIP.cpp
	@@ -1,487 +1,458 @@
	//===--- HIP.cpp - HIP Tool and ToolChain Implementations -------- C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#include "HIP.h"
	#include "AMDGPU.h"
	#include "CommonArgs.h"
	#include "clang/Basic/Cuda.h"
	#include "clang/Basic/TargetID.h"
	#include "clang/Driver/Compilation.h"
	#include "clang/Driver/Driver.h"
	#include "clang/Driver/DriverDiagnostic.h"
	#include "clang/Driver/InputInfo.h"
	#include "clang/Driver/Options.h"
	#include "llvm/Support/Alignment.h"
	#include "llvm/Support/FileSystem.h"
	#include "llvm/Support/Path.h"
	#include "llvm/Support/TargetParser.h"

	using namespace clang::driver;
	using namespace clang::driver::toolchains;
	using namespace clang::driver::tools;
	using namespace clang;
	using namespace llvm::opt;

	#if defined(_WIN32) \|\| defined(_WIN64)
	#define NULL_FILE "nul"
	#else
	#define NULL_FILE "/dev/null"
	#endif

	namespace {
	const unsigned HIPCodeObjectAlign = 4096;
	} // namespace

	void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA,
	const InputInfoList &Inputs,
	const InputInfo &Output,
	const llvm::opt::ArgList &Args) const {
	// Construct lld command.
	// The output from ld.lld is an HSA code object file.
	ArgStringList LldArgs{"-flavor", "gnu", "--no-undefined", "-shared",
	"-plugin-opt=-amdgpu-internalize-symbols"};

	auto &TC = getToolChain();
	auto &D = TC.getDriver();
	assert(!Inputs.empty() && "Must have at least one input.");
	bool IsThinLTO = D.getLTOMode(/IsOffload=/true) == LTOK_Thin;
	addLTOOptions(TC, Args, LldArgs, Output, Inputs[0], IsThinLTO);

	// Extract all the -m options
	std::vector<llvm::StringRef> Features;
	amdgpu::getAMDGPUTargetFeatures(D, TC.getTriple(), Args, Features);

	// Add features to mattr such as cumode
	std::string MAttrString = "-plugin-opt=-mattr=";
	for (auto OneFeature : unifyTargetFeatures(Features)) {
	MAttrString.append(Args.MakeArgString(OneFeature));
	if (OneFeature != Features.back())
	MAttrString.append(",");
	}
	if (!Features.empty())
	LldArgs.push_back(Args.MakeArgString(MAttrString));

	// ToDo: Remove this option after AMDGPU backend supports ISA-level linking.
	// Since AMDGPU backend currently does not support ISA-level linking, all
	// called functions need to be imported.
	if (IsThinLTO)
	LldArgs.push_back(Args.MakeArgString("-plugin-opt=-force-import-all"));

	for (const Arg *A : Args.filtered(options::OPT_mllvm)) {
	LldArgs.push_back(
	Args.MakeArgString(Twine("-plugin-opt=") + A->getValue(0)));
	}

	if (C.getDriver().isSaveTempsEnabled())
	LldArgs.push_back("-save-temps");

	addLinkerCompressDebugSectionsOption(TC, Args, LldArgs);

	LldArgs.append({"-o", Output.getFilename()});
	for (auto Input : Inputs)
	LldArgs.push_back(Input.getFilename());

	if (Args.hasFlag(options::OPT_fgpu_sanitize, options::OPT_fno_gpu_sanitize,
	false))
	llvm::for_each(TC.getHIPDeviceLibs(Args), [&](StringRef BCFile) {
	LldArgs.push_back(Args.MakeArgString(BCFile));
	});

	const char *Lld = Args.MakeArgString(getToolChain().GetProgramPath("lld"));
	C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(),
	Lld, LldArgs, Inputs, Output));
	}

	// Construct a clang-offload-bundler command to bundle code objects for
	// different GPU's into a HIP fat binary.
	void AMDGCN::constructHIPFatbinCommand(Compilation &C, const JobAction &JA,
	StringRef OutputFileName, const InputInfoList &Inputs,
	const llvm::opt::ArgList &Args, const Tool& T) {
	// Construct clang-offload-bundler command to bundle object files for
	// for different GPU archs.
	ArgStringList BundlerArgs;
	BundlerArgs.push_back(Args.MakeArgString("-type=o"));
	BundlerArgs.push_back(
	Args.MakeArgString("-bundle-align=" + Twine(HIPCodeObjectAlign)));

	// ToDo: Remove the dummy host binary entry which is required by
	// clang-offload-bundler.
	std::string BundlerTargetArg = "-targets=host-x86_64-unknown-linux";
	std::string BundlerInputArg = "-inputs=" NULL_FILE;

	// For code object version 2 and 3, the offload kind in bundle ID is 'hip'
	// for backward compatibility. For code object version 4 and greater, the
	// offload kind in bundle ID is 'hipv4'.
	std::string OffloadKind = "hip";
	if (getAMDGPUCodeObjectVersion(C.getDriver(), Args) >= 4)
	OffloadKind = OffloadKind + "v4";
	for (const auto &II : Inputs) {
	const auto* A = II.getAction();
	BundlerTargetArg = BundlerTargetArg + "," + OffloadKind +
	"-amdgcn-amd-amdhsa--" +
	StringRef(A->getOffloadingArch()).str();
	BundlerInputArg = BundlerInputArg + "," + II.getFilename();
	}
	BundlerArgs.push_back(Args.MakeArgString(BundlerTargetArg));
	BundlerArgs.push_back(Args.MakeArgString(BundlerInputArg));

	std::string Output = std::string(OutputFileName);
	auto BundlerOutputArg =
	Args.MakeArgString(std::string("-outputs=").append(Output));
	BundlerArgs.push_back(BundlerOutputArg);

	const char *Bundler = Args.MakeArgString(
	T.getToolChain().GetProgramPath("clang-offload-bundler"));
	C.addCommand(std::make_unique<Command>(
	JA, T, ResponseFileSupport::None(), Bundler, BundlerArgs, Inputs,
	InputInfo(&JA, Args.MakeArgString(Output))));
	}

	/// Add Generated HIP Object File which has device images embedded into the
	/// host to the argument list for linking. Using MC directives, embed the
	/// device code and also define symbols required by the code generation so that
	/// the image can be retrieved at runtime.
	void AMDGCN::Linker::constructGenerateObjFileFromHIPFatBinary(
	Compilation &C, const InputInfo &Output,
	const InputInfoList &Inputs, const ArgList &Args,
	const JobAction &JA) const {
	const ToolChain &TC = getToolChain();
	std::string Name =
	std::string(llvm::sys::path::stem(Output.getFilename()));

	// Create Temp Object File Generator,
	// Offload Bundled file and Bundled Object file.
	// Keep them if save-temps is enabled.
	const char *McinFile;
	const char *BundleFile;
	if (C.getDriver().isSaveTempsEnabled()) {
	McinFile = C.getArgs().MakeArgString(Name + ".mcin");
	BundleFile = C.getArgs().MakeArgString(Name + ".hipfb");
	} else {
	auto TmpNameMcin = C.getDriver().GetTemporaryPath(Name, "mcin");
	McinFile = C.addTempFile(C.getArgs().MakeArgString(TmpNameMcin));
	auto TmpNameFb = C.getDriver().GetTemporaryPath(Name, "hipfb");
	BundleFile = C.addTempFile(C.getArgs().MakeArgString(TmpNameFb));
	}
	constructHIPFatbinCommand(C, JA, BundleFile, Inputs, Args, *this);

	// Create a buffer to write the contents of the temp obj generator.
	std::string ObjBuffer;
	llvm::raw_string_ostream ObjStream(ObjBuffer);

	// Add MC directives to embed target binaries. We ensure that each
	// section and image is 16-byte aligned. This is not mandatory, but
	// increases the likelihood of data to be aligned with a cache block
	// in several main host machines.
	ObjStream << "# HIP Object Generator\n";
	ObjStream << "# * Automatically generated by Clang *\n";
	ObjStream << " .protected __hip_fatbin\n";
	ObjStream << " .type __hip_fatbin,@object\n";
	ObjStream << " .section .hip_fatbin,\"a\",@progbits\n";
	ObjStream << " .globl __hip_fatbin\n";
	ObjStream << " .p2align " << llvm::Log2(llvm::Align(HIPCodeObjectAlign))
	<< "\n";
	ObjStream << "__hip_fatbin:\n";
	ObjStream << " .incbin \"" << BundleFile << "\"\n";
	ObjStream.flush();

	// Dump the contents of the temp object file gen if the user requested that.
	// We support this option to enable testing of behavior with -###.
	if (C.getArgs().hasArg(options::OPT_fhip_dump_offload_linker_script))
	llvm::errs() << ObjBuffer;

	// Open script file and write the contents.
	std::error_code EC;
	llvm::raw_fd_ostream Objf(McinFile, EC, llvm::sys::fs::OF_None);

	if (EC) {
	C.getDriver().Diag(clang::diag::err_unable_to_make_temp) << EC.message();
	return;
	}

	Objf << ObjBuffer;

	ArgStringList McArgs{"-o", Output.getFilename(),
	McinFile, "--filetype=obj"};
	const char *Mc = Args.MakeArgString(TC.GetProgramPath("llvm-mc"));
	C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(),
	Mc, McArgs, Inputs, Output));
	}

	// For amdgcn the inputs of the linker job are device bitcode and output is
	// object file. It calls llvm-link, opt, llc, then lld steps.
	void AMDGCN::Linker::ConstructJob(Compilation &C, const JobAction &JA,
	const InputInfo &Output,
	const InputInfoList &Inputs,
	const ArgList &Args,
	const char *LinkingOutput) const {
	if (Inputs.size() > 0 &&
	Inputs[0].getType() == types::TY_Image &&
	JA.getType() == types::TY_Object)
	return constructGenerateObjFileFromHIPFatBinary(C, Output, Inputs, Args, JA);

	if (JA.getType() == types::TY_HIP_FATBIN)
	return constructHIPFatbinCommand(C, JA, Output.getFilename(), Inputs, Args, *this);

	return constructLldCommand(C, JA, Inputs, Output, Args);
	}

	HIPToolChain::HIPToolChain(const Driver &D, const llvm::Triple &Triple,
	const ToolChain &HostTC, const ArgList &Args)
	: ROCMToolChain(D, Triple, Args), HostTC(HostTC) {
	// Lookup binaries into the driver directory, this is used to
	// discover the clang-offload-bundler executable.
	getProgramPaths().push_back(getDriver().Dir);
	}

	void HIPToolChain::addClangTargetOptions(
	const llvm::opt::ArgList &DriverArgs,
	llvm::opt::ArgStringList &CC1Args,
	Action::OffloadKind DeviceOffloadingKind) const {
	HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);

	assert(DeviceOffloadingKind == Action::OFK_HIP &&
	"Only HIP offloading kinds are supported for GPUs.");

	CC1Args.push_back("-fcuda-is-device");

	if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
	options::OPT_fno_cuda_approx_transcendentals, false))
	CC1Args.push_back("-fcuda-approx-transcendentals");

	if (!DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
	false))
	CC1Args.append({"-mllvm", "-amdgpu-internalize-symbols"});

	StringRef MaxThreadsPerBlock =
	DriverArgs.getLastArgValue(options::OPT_gpu_max_threads_per_block_EQ);
	if (!MaxThreadsPerBlock.empty()) {
	std::string ArgStr =
	std::string("--gpu-max-threads-per-block=") + MaxThreadsPerBlock.str();
	CC1Args.push_back(DriverArgs.MakeArgStringRef(ArgStr));
	}

	CC1Args.push_back("-fcuda-allow-variadic-functions");

	// Default to "hidden" visibility, as object level linking will not be
	// supported for the foreseeable future.
	if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ,
	options::OPT_fvisibility_ms_compat)) {
	CC1Args.append({"-fvisibility", "hidden"});
	CC1Args.push_back("-fapply-global-visibility-to-externs");
	}

	llvm::for_each(getHIPDeviceLibs(DriverArgs), [&](StringRef BCFile) {
	CC1Args.push_back("-mlink-builtin-bitcode");
	CC1Args.push_back(DriverArgs.MakeArgString(BCFile));
	});
	}

	llvm::opt::DerivedArgList *
	HIPToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
	StringRef BoundArch,
	Action::OffloadKind DeviceOffloadKind) const {
	DerivedArgList *DAL =
	HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
	if (!DAL)
	DAL = new DerivedArgList(Args.getBaseArgs());

	const OptTable &Opts = getDriver().getOpts();

	for (Arg *A : Args) {
	if (!shouldSkipArgument(A))
	DAL->append(A);
	}

	if (!BoundArch.empty()) {
	DAL->eraseArg(options::OPT_mcpu_EQ);
	DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_mcpu_EQ), BoundArch);
	checkTargetID(*DAL);
	}

	return DAL;
	}

	Tool *HIPToolChain::buildLinker() const {
	assert(getTriple().getArch() == llvm::Triple::amdgcn);
	return new tools::AMDGCN::Linker(*this);
	}

	void HIPToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {
	HostTC.addClangWarningOptions(CC1Args);
	}

	ToolChain::CXXStdlibType
	HIPToolChain::GetCXXStdlibType(const ArgList &Args) const {
	return HostTC.GetCXXStdlibType(Args);
	}

	void HIPToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
	ArgStringList &CC1Args) const {
	HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
	}

	void HIPToolChain::AddClangCXXStdlibIncludeArgs(const ArgList &Args,
	ArgStringList &CC1Args) const {
	HostTC.AddClangCXXStdlibIncludeArgs(Args, CC1Args);
	}

	void HIPToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
	ArgStringList &CC1Args) const {
	HostTC.AddIAMCUIncludeArgs(Args, CC1Args);
	}

	void HIPToolChain::AddHIPIncludeArgs(const ArgList &DriverArgs,
	ArgStringList &CC1Args) const {
	RocmInstallation.AddHIPIncludeArgs(DriverArgs, CC1Args);
	}

	SanitizerMask HIPToolChain::getSupportedSanitizers() const {
	// The HIPToolChain only supports sanitizers in the sense that it allows
	// sanitizer arguments on the command line if they are supported by the host
	// toolchain. The HIPToolChain will actually ignore any command line
	// arguments for any of these "supported" sanitizers. That means that no
	// sanitization of device code is actually supported at this time.
	//
	// This behavior is necessary because the host and device toolchains
	// invocations often share the command line, so the device toolchain must
	// tolerate flags meant only for the host toolchain.
	return HostTC.getSupportedSanitizers();
	}

	VersionTuple HIPToolChain::computeMSVCVersion(const Driver *D,
	const ArgList &Args) const {
	return HostTC.computeMSVCVersion(D, Args);
	}

	llvm::SmallVector<std::string, 12>
	HIPToolChain::getHIPDeviceLibs(const llvm::opt::ArgList &DriverArgs) const {
	llvm::SmallVector<std::string, 12> BCLibs;
	if (DriverArgs.hasArg(options::OPT_nogpulib))
	return {};
	ArgStringList LibraryPaths;

	// Find in --hip-device-lib-path and HIP_LIBRARY_PATH.
	for (auto Path : RocmInstallation.getRocmDeviceLibPathArg())
	LibraryPaths.push_back(DriverArgs.MakeArgString(Path));

	addDirectoryList(DriverArgs, LibraryPaths, "", "HIP_DEVICE_LIB_PATH");

	// Maintain compatability with --hip-device-lib.
	auto BCLibArgs = DriverArgs.getAllArgValues(options::OPT_hip_device_lib_EQ);
	if (!BCLibArgs.empty()) {
	llvm::for_each(BCLibArgs, [&](StringRef BCName) {
	StringRef FullName;
	for (std::string LibraryPath : LibraryPaths) {
	SmallString<128> Path(LibraryPath);
	llvm::sys::path::append(Path, BCName);
	FullName = Path;
	if (llvm::sys::fs::exists(FullName)) {
	BCLibs.push_back(FullName.str());
	return;
	}
	}
	getDriver().Diag(diag::err_drv_no_such_file) << BCName;
	});
	} else {
	if (!RocmInstallation.hasDeviceLibrary()) {
	getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 0;
	return {};
	}
	StringRef GpuArch = getGPUArch(DriverArgs);
	assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
	- (void)GpuArch;
	- auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
	- const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind);
	-
	- std::string LibDeviceFile = RocmInstallation.getLibDeviceFile(CanonArch);
	- if (LibDeviceFile.empty()) {
	- getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 1 << GpuArch;
	- return {};
	- }

	// If --hip-device-lib is not set, add the default bitcode libraries.
	- // TODO: There are way too many flags that change this. Do we need to check
	- // them all?
	- bool DAZ = DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero,
	- options::OPT_fno_gpu_flush_denormals_to_zero,
	- getDefaultDenormsAreZeroForTarget(Kind));
	- bool FiniteOnly =
	- DriverArgs.hasFlag(options::OPT_ffinite_math_only,
	- options::OPT_fno_finite_math_only, false);
	- bool UnsafeMathOpt =
	- DriverArgs.hasFlag(options::OPT_funsafe_math_optimizations,
	- options::OPT_fno_unsafe_math_optimizations, false);
	- bool FastRelaxedMath = DriverArgs.hasFlag(
	- options::OPT_ffast_math, options::OPT_fno_fast_math, false);
	- bool CorrectSqrt = DriverArgs.hasFlag(
	- options::OPT_fhip_fp32_correctly_rounded_divide_sqrt,
	- options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt);
	- bool Wave64 = isWave64(DriverArgs, Kind);
	-
	if (DriverArgs.hasFlag(options::OPT_fgpu_sanitize,
	options::OPT_fno_gpu_sanitize, false)) {
	auto AsanRTL = RocmInstallation.getAsanRTLPath();
	if (AsanRTL.empty()) {
	unsigned DiagID = getDriver().getDiags().getCustomDiagID(
	DiagnosticsEngine::Error,
	"AMDGPU address sanitizer runtime library (asanrtl) is not found. "
	"Please install ROCm device library which supports address "
	"sanitizer");
	getDriver().Diag(DiagID);
	return {};
	} else
	BCLibs.push_back(AsanRTL.str());
	}

	// Add the HIP specific bitcode library.
	BCLibs.push_back(RocmInstallation.getHIPPath().str());

	- // Add the generic set of libraries.
	- BCLibs.append(RocmInstallation.getCommonBitcodeLibs(
	- DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt,
	- FastRelaxedMath, CorrectSqrt));
	+ // Add common device libraries like ocml etc.
	+ BCLibs.append(getCommonDeviceLibNames(DriverArgs, GpuArch.str()));

	// Add instrument lib.
	auto InstLib =
	DriverArgs.getLastArgValue(options::OPT_gpu_instrument_lib_EQ);
	if (InstLib.empty())
	return BCLibs;
	if (llvm::sys::fs::exists(InstLib))
	BCLibs.push_back(InstLib.str());
	else
	getDriver().Diag(diag::err_drv_no_such_file) << InstLib;
	}

	return BCLibs;
	}

	void HIPToolChain::checkTargetID(const llvm::opt::ArgList &DriverArgs) const {
	auto PTID = getParsedTargetID(DriverArgs);
	if (PTID.OptionalTargetID && !PTID.OptionalGPUArch) {
	getDriver().Diag(clang::diag::err_drv_bad_target_id)
	<< PTID.OptionalTargetID.getValue();
	return;
	}

	assert(PTID.OptionalFeatures && "Invalid return from getParsedTargetID");
	auto &FeatureMap = PTID.OptionalFeatures.getValue();
	// Sanitizer is not supported with xnack-.
	if (DriverArgs.hasFlag(options::OPT_fgpu_sanitize,
	options::OPT_fno_gpu_sanitize, false)) {
	auto Loc = FeatureMap.find("xnack");
	if (Loc != FeatureMap.end() && !Loc->second) {
	auto &Diags = getDriver().getDiags();
	auto DiagID = Diags.getCustomDiagID(
	DiagnosticsEngine::Error,
	"'-fgpu-sanitize' is not compatible with offload arch '%0'. "
	"Use an offload arch without 'xnack-' instead");
	Diags.Report(DiagID) << PTID.OptionalTargetID.getValue();
	}
	}
	}
	diff --git a/contrib/llvm-project/clang/lib/Driver/ToolChains/OpenBSD.cpp b/contrib/llvm-project/clang/lib/Driver/ToolChains/OpenBSD.cpp
	index e162165b2561..89828fbb6f5f 100644
	--- a/contrib/llvm-project/clang/lib/Driver/ToolChains/OpenBSD.cpp
	+++ b/contrib/llvm-project/clang/lib/Driver/ToolChains/OpenBSD.cpp
	@@ -1,316 +1,323 @@
	//===--- OpenBSD.cpp - OpenBSD ToolChain Implementations --------- C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#include "OpenBSD.h"
	#include "Arch/Mips.h"
	#include "Arch/Sparc.h"
	#include "CommonArgs.h"
	#include "clang/Config/config.h"
	#include "clang/Driver/Compilation.h"
	#include "clang/Driver/Options.h"
	#include "clang/Driver/SanitizerArgs.h"
	#include "llvm/Option/ArgList.h"
	#include "llvm/Support/Path.h"

	using namespace clang::driver;
	using namespace clang::driver::tools;
	using namespace clang::driver::toolchains;
	using namespace clang;
	using namespace llvm::opt;

	void openbsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
	const InputInfo &Output,
	const InputInfoList &Inputs,
	const ArgList &Args,
	const char *LinkingOutput) const {
	claimNoWarnArgs(Args);
	ArgStringList CmdArgs;

	switch (getToolChain().getArch()) {
	case llvm::Triple::x86:
	// When building 32-bit code on OpenBSD/amd64, we have to explicitly
	// instruct as in the base system to assemble 32-bit code.
	CmdArgs.push_back("--32");
	break;

	case llvm::Triple::ppc:
	CmdArgs.push_back("-mppc");
	CmdArgs.push_back("-many");
	break;

	case llvm::Triple::sparcv9: {
	CmdArgs.push_back("-64");
	std::string CPU = getCPUName(Args, getToolChain().getTriple());
	CmdArgs.push_back(sparc::getSparcAsmModeForCPU(CPU, getToolChain().getTriple()));
	AddAssemblerKPIC(getToolChain(), Args, CmdArgs);
	break;
	}

	case llvm::Triple::mips64:
	case llvm::Triple::mips64el: {
	StringRef CPUName;
	StringRef ABIName;
	mips::getMipsCPUAndABI(Args, getToolChain().getTriple(), CPUName, ABIName);

	CmdArgs.push_back("-mabi");
	CmdArgs.push_back(mips::getGnuCompatibleMipsABIName(ABIName).data());

	if (getToolChain().getTriple().isLittleEndian())
	CmdArgs.push_back("-EL");
	else
	CmdArgs.push_back("-EB");

	AddAssemblerKPIC(getToolChain(), Args, CmdArgs);
	break;
	}

	default:
	break;
	}

	Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler);

	CmdArgs.push_back("-o");
	CmdArgs.push_back(Output.getFilename());

	for (const auto &II : Inputs)
	CmdArgs.push_back(II.getFilename());

	const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as"));
	C.addCommand(std::make_unique<Command>(JA, *this,
	ResponseFileSupport::AtFileCurCP(),
	Exec, CmdArgs, Inputs, Output));
	}

	void openbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
	const InputInfo &Output,
	const InputInfoList &Inputs,
	const ArgList &Args,
	const char *LinkingOutput) const {
	const toolchains::OpenBSD &ToolChain =
	static_cast<const toolchains::OpenBSD &>(getToolChain());
	const Driver &D = getToolChain().getDriver();
	ArgStringList CmdArgs;

	// Silence warning for "clang -g foo.o -o foo"
	Args.ClaimAllArgs(options::OPT_g_Group);
	// and "clang -emit-llvm foo.o -o foo"
	Args.ClaimAllArgs(options::OPT_emit_llvm);
	// and for "clang -w foo.o -o foo". Other warning options are already
	// handled somewhere else.
	Args.ClaimAllArgs(options::OPT_w);

	if (ToolChain.getArch() == llvm::Triple::mips64)
	CmdArgs.push_back("-EB");
	else if (ToolChain.getArch() == llvm::Triple::mips64el)
	CmdArgs.push_back("-EL");

	if (!Args.hasArg(options::OPT_nostdlib, options::OPT_shared)) {
	CmdArgs.push_back("-e");
	CmdArgs.push_back("__start");
	}

	CmdArgs.push_back("--eh-frame-hdr");
	if (Args.hasArg(options::OPT_static)) {
	CmdArgs.push_back("-Bstatic");
	} else {
	if (Args.hasArg(options::OPT_rdynamic))
	CmdArgs.push_back("-export-dynamic");
	CmdArgs.push_back("-Bdynamic");
	if (Args.hasArg(options::OPT_shared)) {
	CmdArgs.push_back("-shared");
	} else {
	CmdArgs.push_back("-dynamic-linker");
	CmdArgs.push_back("/usr/libexec/ld.so");
	}
	}

	if (Args.hasArg(options::OPT_pie))
	CmdArgs.push_back("-pie");
	if (Args.hasArg(options::OPT_nopie) \|\| Args.hasArg(options::OPT_pg))
	CmdArgs.push_back("-nopie");

	if (Output.isFilename()) {
	CmdArgs.push_back("-o");
	CmdArgs.push_back(Output.getFilename());
	} else {
	assert(Output.isNothing() && "Invalid output.");
	}

	if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
	const char *crt0 = nullptr;
	const char *crtbegin = nullptr;
	if (!Args.hasArg(options::OPT_shared)) {
	if (Args.hasArg(options::OPT_pg))
	crt0 = "gcrt0.o";
	else if (Args.hasArg(options::OPT_static) &&
	!Args.hasArg(options::OPT_nopie))
	crt0 = "rcrt0.o";
	else
	crt0 = "crt0.o";
	crtbegin = "crtbegin.o";
	} else {
	crtbegin = "crtbeginS.o";
	}

	if (crt0)
	CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crt0)));
	CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crtbegin)));
	}

	Args.AddAllArgs(CmdArgs, options::OPT_L);
	ToolChain.AddFilePathLibArgs(Args, CmdArgs);
	Args.AddAllArgs(CmdArgs, {options::OPT_T_Group, options::OPT_e,
	options::OPT_s, options::OPT_t,
	options::OPT_Z_Flag, options::OPT_r});

	bool NeedsSanitizerDeps = addSanitizerRuntimes(ToolChain, Args, CmdArgs);
	bool NeedsXRayDeps = addXRayRuntime(ToolChain, Args, CmdArgs);
	AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA);

	if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
	+ // Use the static OpenMP runtime with -static-openmp
	+ bool StaticOpenMP = Args.hasArg(options::OPT_static_openmp) &&
	+ !Args.hasArg(options::OPT_static);
	+ addOpenMPRuntime(CmdArgs, ToolChain, Args, StaticOpenMP);
	+
	if (D.CCCIsCXX()) {
	if (ToolChain.ShouldLinkCXXStdlib(Args))
	ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs);
	if (Args.hasArg(options::OPT_pg))
	CmdArgs.push_back("-lm_p");
	else
	CmdArgs.push_back("-lm");
	}
	if (NeedsSanitizerDeps) {
	CmdArgs.push_back(ToolChain.getCompilerRTArgString(Args, "builtins"));
	linkSanitizerRuntimeDeps(ToolChain, CmdArgs);
	}
	if (NeedsXRayDeps) {
	CmdArgs.push_back(ToolChain.getCompilerRTArgString(Args, "builtins"));
	linkXRayRuntimeDeps(ToolChain, CmdArgs);
	}
	// FIXME: For some reason GCC passes -lgcc before adding
	// the default system libraries. Just mimic this for now.
	CmdArgs.push_back("-lcompiler_rt");

	if (Args.hasArg(options::OPT_pthread)) {
	if (!Args.hasArg(options::OPT_shared) && Args.hasArg(options::OPT_pg))
	CmdArgs.push_back("-lpthread_p");
	else
	CmdArgs.push_back("-lpthread");
	}

	if (!Args.hasArg(options::OPT_shared)) {
	if (Args.hasArg(options::OPT_pg))
	CmdArgs.push_back("-lc_p");
	else
	CmdArgs.push_back("-lc");
	}

	CmdArgs.push_back("-lcompiler_rt");
	}

	if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
	const char *crtend = nullptr;
	if (!Args.hasArg(options::OPT_shared))
	crtend = "crtend.o";
	else
	crtend = "crtendS.o";

	CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crtend)));
	}

	+ ToolChain.addProfileRTLibs(Args, CmdArgs);
	+
	const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath());
	C.addCommand(std::make_unique<Command>(JA, *this,
	ResponseFileSupport::AtFileCurCP(),
	Exec, CmdArgs, Inputs, Output));
	}

	SanitizerMask OpenBSD::getSupportedSanitizers() const {
	const bool IsX86 = getTriple().getArch() == llvm::Triple::x86;
	const bool IsX86_64 = getTriple().getArch() == llvm::Triple::x86_64;

	// For future use, only UBsan at the moment
	SanitizerMask Res = ToolChain::getSupportedSanitizers();

	if (IsX86 \|\| IsX86_64) {
	Res \|= SanitizerKind::Vptr;
	Res \|= SanitizerKind::Fuzzer;
	Res \|= SanitizerKind::FuzzerNoLink;
	}

	return Res;
	}

	/// OpenBSD - OpenBSD tool chain which can call as(1) and ld(1) directly.

	OpenBSD::OpenBSD(const Driver &D, const llvm::Triple &Triple,
	const ArgList &Args)
	: Generic_ELF(D, Triple, Args) {
	getFilePaths().push_back(getDriver().SysRoot + "/usr/lib");
	}

	void OpenBSD::AddClangSystemIncludeArgs(
	const llvm::opt::ArgList &DriverArgs,
	llvm::opt::ArgStringList &CC1Args) const {
	const Driver &D = getDriver();

	if (DriverArgs.hasArg(clang::driver::options::OPT_nostdinc))
	return;

	if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
	SmallString<128> Dir(D.ResourceDir);
	llvm::sys::path::append(Dir, "include");
	addSystemInclude(DriverArgs, CC1Args, Dir.str());
	}

	if (DriverArgs.hasArg(options::OPT_nostdlibinc))
	return;

	// Check for configure-time C include directories.
	StringRef CIncludeDirs(C_INCLUDE_DIRS);
	if (CIncludeDirs != "") {
	SmallVector<StringRef, 5> dirs;
	CIncludeDirs.split(dirs, ":");
	for (StringRef dir : dirs) {
	StringRef Prefix =
	llvm::sys::path::is_absolute(dir) ? StringRef(D.SysRoot) : "";
	addExternCSystemInclude(DriverArgs, CC1Args, Prefix + dir);
	}
	return;
	}

	addExternCSystemInclude(DriverArgs, CC1Args, D.SysRoot + "/usr/include");
	}

	void OpenBSD::addLibCxxIncludePaths(const llvm::opt::ArgList &DriverArgs,
	llvm::opt::ArgStringList &CC1Args) const {
	addSystemInclude(DriverArgs, CC1Args,
	getDriver().SysRoot + "/usr/include/c++/v1");
	}

	void OpenBSD::AddCXXStdlibLibArgs(const ArgList &Args,
	ArgStringList &CmdArgs) const {
	bool Profiling = Args.hasArg(options::OPT_pg);

	CmdArgs.push_back(Profiling ? "-lc++_p" : "-lc++");
	CmdArgs.push_back(Profiling ? "-lc++abi_p" : "-lc++abi");
	CmdArgs.push_back(Profiling ? "-lpthread_p" : "-lpthread");
	}

	std::string OpenBSD::getCompilerRT(const ArgList &Args,
	StringRef Component,
	FileType Type) const {
	SmallString<128> Path(getDriver().SysRoot);
	llvm::sys::path::append(Path, "/usr/lib/libcompiler_rt.a");
	return std::string(Path.str());
	}

	Tool *OpenBSD::buildAssembler() const {
	return new tools::openbsd::Assembler(*this);
	}

	Tool OpenBSD::buildLinker() const { return new tools::openbsd::Linker(this); }

	bool OpenBSD::HasNativeLLVMSupport() const { return true; }
	diff --git a/contrib/llvm-project/clang/lib/Headers/__clang_cuda_device_functions.h b/contrib/llvm-project/clang/lib/Headers/__clang_cuda_device_functions.h
	index f801e5426aa4..cc4e1a4dd96a 100644
	--- a/contrib/llvm-project/clang/lib/Headers/__clang_cuda_device_functions.h
	+++ b/contrib/llvm-project/clang/lib/Headers/__clang_cuda_device_functions.h
	@@ -1,1486 +1,1558 @@
	/*===---- __clang_cuda_device_functions.h - CUDA runtime support -----------===
	*
	* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	* See https://llvm.org/LICENSE.txt for license information.
	* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	*
	*===-----------------------------------------------------------------------===
	*/

	#ifndef __CLANG_CUDA_DEVICE_FUNCTIONS_H__
	#define __CLANG_CUDA_DEVICE_FUNCTIONS_H__

	#ifndef __OPENMP_NVPTX__
	#if CUDA_VERSION < 9000
	#error This file is intended to be used with CUDA-9+ only.
	#endif
	#endif

	// __DEVICE__ is a helper macro with common set of attributes for the wrappers
	// we implement in this file. We need static in order to avoid emitting unused
	// functions and __forceinline__ helps inlining these wrappers at -O1.
	#pragma push_macro("__DEVICE__")
	#ifdef __OPENMP_NVPTX__
	#define __DEVICE__ static __attribute__((always_inline, nothrow))
	#else
	#define __DEVICE__ static __device__ __forceinline__
	#endif

	__DEVICE__ int __all(int __a) { return __nvvm_vote_all(__a); }
	__DEVICE__ int __any(int __a) { return __nvvm_vote_any(__a); }
	__DEVICE__ unsigned int __ballot(int __a) { return __nvvm_vote_ballot(__a); }
	__DEVICE__ unsigned int __brev(unsigned int __a) { return __nv_brev(__a); }
	__DEVICE__ unsigned long long __brevll(unsigned long long __a) {
	return __nv_brevll(__a);
	}
	#if defined(__cplusplus)
	-__DEVICE__ void __brkpt() { asm volatile("brkpt;"); }
	+__DEVICE__ void __brkpt() { __asm__ __volatile__("brkpt;"); }
	__DEVICE__ void __brkpt(int __a) { __brkpt(); }
	#else
	-__DEVICE__ void __attribute__((overloadable)) __brkpt(void) { asm volatile("brkpt;"); }
	+__DEVICE__ void __attribute__((overloadable)) __brkpt(void) {
	+ __asm__ __volatile__("brkpt;");
	+}
	__DEVICE__ void __attribute__((overloadable)) __brkpt(int __a) { __brkpt(); }
	#endif
	__DEVICE__ unsigned int __byte_perm(unsigned int __a, unsigned int __b,
	unsigned int __c) {
	return __nv_byte_perm(__a, __b, __c);
	}
	__DEVICE__ int __clz(int __a) { return __nv_clz(__a); }
	__DEVICE__ int __clzll(long long __a) { return __nv_clzll(__a); }
	__DEVICE__ float __cosf(float __a) { return __nv_fast_cosf(__a); }
	__DEVICE__ double __dAtomicAdd(double *__p, double __v) {
	return __nvvm_atom_add_gen_d(__p, __v);
	}
	__DEVICE__ double __dAtomicAdd_block(double *__p, double __v) {
	return __nvvm_atom_cta_add_gen_d(__p, __v);
	}
	__DEVICE__ double __dAtomicAdd_system(double *__p, double __v) {
	return __nvvm_atom_sys_add_gen_d(__p, __v);
	}
	__DEVICE__ double __dadd_rd(double __a, double __b) {
	return __nv_dadd_rd(__a, __b);
	}
	__DEVICE__ double __dadd_rn(double __a, double __b) {
	return __nv_dadd_rn(__a, __b);
	}
	__DEVICE__ double __dadd_ru(double __a, double __b) {
	return __nv_dadd_ru(__a, __b);
	}
	__DEVICE__ double __dadd_rz(double __a, double __b) {
	return __nv_dadd_rz(__a, __b);
	}
	__DEVICE__ double __ddiv_rd(double __a, double __b) {
	return __nv_ddiv_rd(__a, __b);
	}
	__DEVICE__ double __ddiv_rn(double __a, double __b) {
	return __nv_ddiv_rn(__a, __b);
	}
	__DEVICE__ double __ddiv_ru(double __a, double __b) {
	return __nv_ddiv_ru(__a, __b);
	}
	__DEVICE__ double __ddiv_rz(double __a, double __b) {
	return __nv_ddiv_rz(__a, __b);
	}
	__DEVICE__ double __dmul_rd(double __a, double __b) {
	return __nv_dmul_rd(__a, __b);
	}
	__DEVICE__ double __dmul_rn(double __a, double __b) {
	return __nv_dmul_rn(__a, __b);
	}
	__DEVICE__ double __dmul_ru(double __a, double __b) {
	return __nv_dmul_ru(__a, __b);
	}
	__DEVICE__ double __dmul_rz(double __a, double __b) {
	return __nv_dmul_rz(__a, __b);
	}
	__DEVICE__ float __double2float_rd(double __a) {
	return __nv_double2float_rd(__a);
	}
	__DEVICE__ float __double2float_rn(double __a) {
	return __nv_double2float_rn(__a);
	}
	__DEVICE__ float __double2float_ru(double __a) {
	return __nv_double2float_ru(__a);
	}
	__DEVICE__ float __double2float_rz(double __a) {
	return __nv_double2float_rz(__a);
	}
	__DEVICE__ int __double2hiint(double __a) { return __nv_double2hiint(__a); }
	__DEVICE__ int __double2int_rd(double __a) { return __nv_double2int_rd(__a); }
	__DEVICE__ int __double2int_rn(double __a) { return __nv_double2int_rn(__a); }
	__DEVICE__ int __double2int_ru(double __a) { return __nv_double2int_ru(__a); }
	__DEVICE__ int __double2int_rz(double __a) { return __nv_double2int_rz(__a); }
	__DEVICE__ long long __double2ll_rd(double __a) {
	return __nv_double2ll_rd(__a);
	}
	__DEVICE__ long long __double2ll_rn(double __a) {
	return __nv_double2ll_rn(__a);
	}
	__DEVICE__ long long __double2ll_ru(double __a) {
	return __nv_double2ll_ru(__a);
	}
	__DEVICE__ long long __double2ll_rz(double __a) {
	return __nv_double2ll_rz(__a);
	}
	__DEVICE__ int __double2loint(double __a) { return __nv_double2loint(__a); }
	__DEVICE__ unsigned int __double2uint_rd(double __a) {
	return __nv_double2uint_rd(__a);
	}
	__DEVICE__ unsigned int __double2uint_rn(double __a) {
	return __nv_double2uint_rn(__a);
	}
	__DEVICE__ unsigned int __double2uint_ru(double __a) {
	return __nv_double2uint_ru(__a);
	}
	__DEVICE__ unsigned int __double2uint_rz(double __a) {
	return __nv_double2uint_rz(__a);
	}
	__DEVICE__ unsigned long long __double2ull_rd(double __a) {
	return __nv_double2ull_rd(__a);
	}
	__DEVICE__ unsigned long long __double2ull_rn(double __a) {
	return __nv_double2ull_rn(__a);
	}
	__DEVICE__ unsigned long long __double2ull_ru(double __a) {
	return __nv_double2ull_ru(__a);
	}
	__DEVICE__ unsigned long long __double2ull_rz(double __a) {
	return __nv_double2ull_rz(__a);
	}
	__DEVICE__ long long __double_as_longlong(double __a) {
	return __nv_double_as_longlong(__a);
	}
	__DEVICE__ double __drcp_rd(double __a) { return __nv_drcp_rd(__a); }
	__DEVICE__ double __drcp_rn(double __a) { return __nv_drcp_rn(__a); }
	__DEVICE__ double __drcp_ru(double __a) { return __nv_drcp_ru(__a); }
	__DEVICE__ double __drcp_rz(double __a) { return __nv_drcp_rz(__a); }
	__DEVICE__ double __dsqrt_rd(double __a) { return __nv_dsqrt_rd(__a); }
	__DEVICE__ double __dsqrt_rn(double __a) { return __nv_dsqrt_rn(__a); }
	__DEVICE__ double __dsqrt_ru(double __a) { return __nv_dsqrt_ru(__a); }
	__DEVICE__ double __dsqrt_rz(double __a) { return __nv_dsqrt_rz(__a); }
	__DEVICE__ double __dsub_rd(double __a, double __b) {
	return __nv_dsub_rd(__a, __b);
	}
	__DEVICE__ double __dsub_rn(double __a, double __b) {
	return __nv_dsub_rn(__a, __b);
	}
	__DEVICE__ double __dsub_ru(double __a, double __b) {
	return __nv_dsub_ru(__a, __b);
	}
	__DEVICE__ double __dsub_rz(double __a, double __b) {
	return __nv_dsub_rz(__a, __b);
	}
	__DEVICE__ float __exp10f(float __a) { return __nv_fast_exp10f(__a); }
	__DEVICE__ float __expf(float __a) { return __nv_fast_expf(__a); }
	__DEVICE__ float __fAtomicAdd(float *__p, float __v) {
	return __nvvm_atom_add_gen_f(__p, __v);
	}
	__DEVICE__ float __fAtomicAdd_block(float *__p, float __v) {
	return __nvvm_atom_cta_add_gen_f(__p, __v);
	}
	__DEVICE__ float __fAtomicAdd_system(float *__p, float __v) {
	return __nvvm_atom_sys_add_gen_f(__p, __v);
	}
	__DEVICE__ float __fAtomicExch(float *__p, float __v) {
	return __nv_int_as_float(
	__nvvm_atom_xchg_gen_i((int *)__p, __nv_float_as_int(__v)));
	}
	__DEVICE__ float __fAtomicExch_block(float *__p, float __v) {
	return __nv_int_as_float(
	__nvvm_atom_cta_xchg_gen_i((int *)__p, __nv_float_as_int(__v)));
	}
	__DEVICE__ float __fAtomicExch_system(float *__p, float __v) {
	return __nv_int_as_float(
	__nvvm_atom_sys_xchg_gen_i((int *)__p, __nv_float_as_int(__v)));
	}
	__DEVICE__ float __fadd_rd(float __a, float __b) {
	return __nv_fadd_rd(__a, __b);
	}
	__DEVICE__ float __fadd_rn(float __a, float __b) {
	return __nv_fadd_rn(__a, __b);
	}
	__DEVICE__ float __fadd_ru(float __a, float __b) {
	return __nv_fadd_ru(__a, __b);
	}
	__DEVICE__ float __fadd_rz(float __a, float __b) {
	return __nv_fadd_rz(__a, __b);
	}
	__DEVICE__ float __fdiv_rd(float __a, float __b) {
	return __nv_fdiv_rd(__a, __b);
	}
	__DEVICE__ float __fdiv_rn(float __a, float __b) {
	return __nv_fdiv_rn(__a, __b);
	}
	__DEVICE__ float __fdiv_ru(float __a, float __b) {
	return __nv_fdiv_ru(__a, __b);
	}
	__DEVICE__ float __fdiv_rz(float __a, float __b) {
	return __nv_fdiv_rz(__a, __b);
	}
	__DEVICE__ float __fdividef(float __a, float __b) {
	return __nv_fast_fdividef(__a, __b);
	}
	__DEVICE__ int __ffs(int __a) { return __nv_ffs(__a); }
	__DEVICE__ int __ffsll(long long __a) { return __nv_ffsll(__a); }
	__DEVICE__ int __finite(double __a) { return __nv_isfinited(__a); }
	__DEVICE__ int __finitef(float __a) { return __nv_finitef(__a); }
	#ifdef _MSC_VER
	__DEVICE__ int __finitel(long double __a);
	#endif
	__DEVICE__ int __float2int_rd(float __a) { return __nv_float2int_rd(__a); }
	__DEVICE__ int __float2int_rn(float __a) { return __nv_float2int_rn(__a); }
	__DEVICE__ int __float2int_ru(float __a) { return __nv_float2int_ru(__a); }
	__DEVICE__ int __float2int_rz(float __a) { return __nv_float2int_rz(__a); }
	__DEVICE__ long long __float2ll_rd(float __a) { return __nv_float2ll_rd(__a); }
	__DEVICE__ long long __float2ll_rn(float __a) { return __nv_float2ll_rn(__a); }
	__DEVICE__ long long __float2ll_ru(float __a) { return __nv_float2ll_ru(__a); }
	__DEVICE__ long long __float2ll_rz(float __a) { return __nv_float2ll_rz(__a); }
	__DEVICE__ unsigned int __float2uint_rd(float __a) {
	return __nv_float2uint_rd(__a);
	}
	__DEVICE__ unsigned int __float2uint_rn(float __a) {
	return __nv_float2uint_rn(__a);
	}
	__DEVICE__ unsigned int __float2uint_ru(float __a) {
	return __nv_float2uint_ru(__a);
	}
	__DEVICE__ unsigned int __float2uint_rz(float __a) {
	return __nv_float2uint_rz(__a);
	}
	__DEVICE__ unsigned long long __float2ull_rd(float __a) {
	return __nv_float2ull_rd(__a);
	}
	__DEVICE__ unsigned long long __float2ull_rn(float __a) {
	return __nv_float2ull_rn(__a);
	}
	__DEVICE__ unsigned long long __float2ull_ru(float __a) {
	return __nv_float2ull_ru(__a);
	}
	__DEVICE__ unsigned long long __float2ull_rz(float __a) {
	return __nv_float2ull_rz(__a);
	}
	__DEVICE__ int __float_as_int(float __a) { return __nv_float_as_int(__a); }
	__DEVICE__ unsigned int __float_as_uint(float __a) {
	return __nv_float_as_uint(__a);
	}
	__DEVICE__ double __fma_rd(double __a, double __b, double __c) {
	return __nv_fma_rd(__a, __b, __c);
	}
	__DEVICE__ double __fma_rn(double __a, double __b, double __c) {
	return __nv_fma_rn(__a, __b, __c);
	}
	__DEVICE__ double __fma_ru(double __a, double __b, double __c) {
	return __nv_fma_ru(__a, __b, __c);
	}
	__DEVICE__ double __fma_rz(double __a, double __b, double __c) {
	return __nv_fma_rz(__a, __b, __c);
	}
	__DEVICE__ float __fmaf_ieee_rd(float __a, float __b, float __c) {
	return __nv_fmaf_ieee_rd(__a, __b, __c);
	}
	__DEVICE__ float __fmaf_ieee_rn(float __a, float __b, float __c) {
	return __nv_fmaf_ieee_rn(__a, __b, __c);
	}
	__DEVICE__ float __fmaf_ieee_ru(float __a, float __b, float __c) {
	return __nv_fmaf_ieee_ru(__a, __b, __c);
	}
	__DEVICE__ float __fmaf_ieee_rz(float __a, float __b, float __c) {
	return __nv_fmaf_ieee_rz(__a, __b, __c);
	}
	__DEVICE__ float __fmaf_rd(float __a, float __b, float __c) {
	return __nv_fmaf_rd(__a, __b, __c);
	}
	__DEVICE__ float __fmaf_rn(float __a, float __b, float __c) {
	return __nv_fmaf_rn(__a, __b, __c);
	}
	__DEVICE__ float __fmaf_ru(float __a, float __b, float __c) {
	return __nv_fmaf_ru(__a, __b, __c);
	}
	__DEVICE__ float __fmaf_rz(float __a, float __b, float __c) {
	return __nv_fmaf_rz(__a, __b, __c);
	}
	__DEVICE__ float __fmul_rd(float __a, float __b) {
	return __nv_fmul_rd(__a, __b);
	}
	__DEVICE__ float __fmul_rn(float __a, float __b) {
	return __nv_fmul_rn(__a, __b);
	}
	__DEVICE__ float __fmul_ru(float __a, float __b) {
	return __nv_fmul_ru(__a, __b);
	}
	__DEVICE__ float __fmul_rz(float __a, float __b) {
	return __nv_fmul_rz(__a, __b);
	}
	__DEVICE__ float __frcp_rd(float __a) { return __nv_frcp_rd(__a); }
	__DEVICE__ float __frcp_rn(float __a) { return __nv_frcp_rn(__a); }
	__DEVICE__ float __frcp_ru(float __a) { return __nv_frcp_ru(__a); }
	__DEVICE__ float __frcp_rz(float __a) { return __nv_frcp_rz(__a); }
	__DEVICE__ float __frsqrt_rn(float __a) { return __nv_frsqrt_rn(__a); }
	__DEVICE__ float __fsqrt_rd(float __a) { return __nv_fsqrt_rd(__a); }
	__DEVICE__ float __fsqrt_rn(float __a) { return __nv_fsqrt_rn(__a); }
	__DEVICE__ float __fsqrt_ru(float __a) { return __nv_fsqrt_ru(__a); }
	__DEVICE__ float __fsqrt_rz(float __a) { return __nv_fsqrt_rz(__a); }
	__DEVICE__ float __fsub_rd(float __a, float __b) {
	return __nv_fsub_rd(__a, __b);
	}
	__DEVICE__ float __fsub_rn(float __a, float __b) {
	return __nv_fsub_rn(__a, __b);
	}
	__DEVICE__ float __fsub_ru(float __a, float __b) {
	return __nv_fsub_ru(__a, __b);
	}
	__DEVICE__ float __fsub_rz(float __a, float __b) {
	return __nv_fsub_rz(__a, __b);
	}
	__DEVICE__ int __hadd(int __a, int __b) { return __nv_hadd(__a, __b); }
	__DEVICE__ double __hiloint2double(int __a, int __b) {
	return __nv_hiloint2double(__a, __b);
	}
	__DEVICE__ int __iAtomicAdd(int *__p, int __v) {
	return __nvvm_atom_add_gen_i(__p, __v);
	}
	__DEVICE__ int __iAtomicAdd_block(int *__p, int __v) {
	return __nvvm_atom_cta_add_gen_i(__p, __v);
	}
	__DEVICE__ int __iAtomicAdd_system(int *__p, int __v) {
	return __nvvm_atom_sys_add_gen_i(__p, __v);
	}
	__DEVICE__ int __iAtomicAnd(int *__p, int __v) {
	return __nvvm_atom_and_gen_i(__p, __v);
	}
	__DEVICE__ int __iAtomicAnd_block(int *__p, int __v) {
	return __nvvm_atom_cta_and_gen_i(__p, __v);
	}
	__DEVICE__ int __iAtomicAnd_system(int *__p, int __v) {
	return __nvvm_atom_sys_and_gen_i(__p, __v);
	}
	__DEVICE__ int __iAtomicCAS(int *__p, int __cmp, int __v) {
	return __nvvm_atom_cas_gen_i(__p, __cmp, __v);
	}
	__DEVICE__ int __iAtomicCAS_block(int *__p, int __cmp, int __v) {
	return __nvvm_atom_cta_cas_gen_i(__p, __cmp, __v);
	}
	__DEVICE__ int __iAtomicCAS_system(int *__p, int __cmp, int __v) {
	return __nvvm_atom_sys_cas_gen_i(__p, __cmp, __v);
	}
	__DEVICE__ int __iAtomicExch(int *__p, int __v) {
	return __nvvm_atom_xchg_gen_i(__p, __v);
	}
	__DEVICE__ int __iAtomicExch_block(int *__p, int __v) {
	return __nvvm_atom_cta_xchg_gen_i(__p, __v);
	}
	__DEVICE__ int __iAtomicExch_system(int *__p, int __v) {
	return __nvvm_atom_sys_xchg_gen_i(__p, __v);
	}
	__DEVICE__ int __iAtomicMax(int *__p, int __v) {
	return __nvvm_atom_max_gen_i(__p, __v);
	}
	__DEVICE__ int __iAtomicMax_block(int *__p, int __v) {
	return __nvvm_atom_cta_max_gen_i(__p, __v);
	}
	__DEVICE__ int __iAtomicMax_system(int *__p, int __v) {
	return __nvvm_atom_sys_max_gen_i(__p, __v);
	}
	__DEVICE__ int __iAtomicMin(int *__p, int __v) {
	return __nvvm_atom_min_gen_i(__p, __v);
	}
	__DEVICE__ int __iAtomicMin_block(int *__p, int __v) {
	return __nvvm_atom_cta_min_gen_i(__p, __v);
	}
	__DEVICE__ int __iAtomicMin_system(int *__p, int __v) {
	return __nvvm_atom_sys_min_gen_i(__p, __v);
	}
	__DEVICE__ int __iAtomicOr(int *__p, int __v) {
	return __nvvm_atom_or_gen_i(__p, __v);
	}
	__DEVICE__ int __iAtomicOr_block(int *__p, int __v) {
	return __nvvm_atom_cta_or_gen_i(__p, __v);
	}
	__DEVICE__ int __iAtomicOr_system(int *__p, int __v) {
	return __nvvm_atom_sys_or_gen_i(__p, __v);
	}
	__DEVICE__ int __iAtomicXor(int *__p, int __v) {
	return __nvvm_atom_xor_gen_i(__p, __v);
	}
	__DEVICE__ int __iAtomicXor_block(int *__p, int __v) {
	return __nvvm_atom_cta_xor_gen_i(__p, __v);
	}
	__DEVICE__ int __iAtomicXor_system(int *__p, int __v) {
	return __nvvm_atom_sys_xor_gen_i(__p, __v);
	}
	__DEVICE__ long long __illAtomicMax(long long *__p, long long __v) {
	return __nvvm_atom_max_gen_ll(__p, __v);
	}
	__DEVICE__ long long __illAtomicMax_block(long long *__p, long long __v) {
	return __nvvm_atom_cta_max_gen_ll(__p, __v);
	}
	__DEVICE__ long long __illAtomicMax_system(long long *__p, long long __v) {
	return __nvvm_atom_sys_max_gen_ll(__p, __v);
	}
	__DEVICE__ long long __illAtomicMin(long long *__p, long long __v) {
	return __nvvm_atom_min_gen_ll(__p, __v);
	}
	__DEVICE__ long long __illAtomicMin_block(long long *__p, long long __v) {
	return __nvvm_atom_cta_min_gen_ll(__p, __v);
	}
	__DEVICE__ long long __illAtomicMin_system(long long *__p, long long __v) {
	return __nvvm_atom_sys_min_gen_ll(__p, __v);
	}
	__DEVICE__ double __int2double_rn(int __a) { return __nv_int2double_rn(__a); }
	__DEVICE__ float __int2float_rd(int __a) { return __nv_int2float_rd(__a); }
	__DEVICE__ float __int2float_rn(int __a) { return __nv_int2float_rn(__a); }
	__DEVICE__ float __int2float_ru(int __a) { return __nv_int2float_ru(__a); }
	__DEVICE__ float __int2float_rz(int __a) { return __nv_int2float_rz(__a); }
	__DEVICE__ float __int_as_float(int __a) { return __nv_int_as_float(__a); }
	__DEVICE__ int __isfinited(double __a) { return __nv_isfinited(__a); }
	__DEVICE__ int __isinf(double __a) { return __nv_isinfd(__a); }
	__DEVICE__ int __isinff(float __a) { return __nv_isinff(__a); }
	#ifdef _MSC_VER
	__DEVICE__ int __isinfl(long double __a);
	#endif
	__DEVICE__ int __isnan(double __a) { return __nv_isnand(__a); }
	__DEVICE__ int __isnanf(float __a) { return __nv_isnanf(__a); }
	#ifdef _MSC_VER
	__DEVICE__ int __isnanl(long double __a);
	#endif
	__DEVICE__ double __ll2double_rd(long long __a) {
	return __nv_ll2double_rd(__a);
	}
	__DEVICE__ double __ll2double_rn(long long __a) {
	return __nv_ll2double_rn(__a);
	}
	__DEVICE__ double __ll2double_ru(long long __a) {
	return __nv_ll2double_ru(__a);
	}
	__DEVICE__ double __ll2double_rz(long long __a) {
	return __nv_ll2double_rz(__a);
	}
	__DEVICE__ float __ll2float_rd(long long __a) { return __nv_ll2float_rd(__a); }
	__DEVICE__ float __ll2float_rn(long long __a) { return __nv_ll2float_rn(__a); }
	__DEVICE__ float __ll2float_ru(long long __a) { return __nv_ll2float_ru(__a); }
	__DEVICE__ float __ll2float_rz(long long __a) { return __nv_ll2float_rz(__a); }
	__DEVICE__ long long __llAtomicAnd(long long *__p, long long __v) {
	return __nvvm_atom_and_gen_ll(__p, __v);
	}
	__DEVICE__ long long __llAtomicAnd_block(long long *__p, long long __v) {
	return __nvvm_atom_cta_and_gen_ll(__p, __v);
	}
	__DEVICE__ long long __llAtomicAnd_system(long long *__p, long long __v) {
	return __nvvm_atom_sys_and_gen_ll(__p, __v);
	}
	__DEVICE__ long long __llAtomicOr(long long *__p, long long __v) {
	return __nvvm_atom_or_gen_ll(__p, __v);
	}
	__DEVICE__ long long __llAtomicOr_block(long long *__p, long long __v) {
	return __nvvm_atom_cta_or_gen_ll(__p, __v);
	}
	__DEVICE__ long long __llAtomicOr_system(long long *__p, long long __v) {
	return __nvvm_atom_sys_or_gen_ll(__p, __v);
	}
	__DEVICE__ long long __llAtomicXor(long long *__p, long long __v) {
	return __nvvm_atom_xor_gen_ll(__p, __v);
	}
	__DEVICE__ long long __llAtomicXor_block(long long *__p, long long __v) {
	return __nvvm_atom_cta_xor_gen_ll(__p, __v);
	}
	__DEVICE__ long long __llAtomicXor_system(long long *__p, long long __v) {
	return __nvvm_atom_sys_xor_gen_ll(__p, __v);
	}
	__DEVICE__ float __log10f(float __a) { return __nv_fast_log10f(__a); }
	__DEVICE__ float __log2f(float __a) { return __nv_fast_log2f(__a); }
	__DEVICE__ float __logf(float __a) { return __nv_fast_logf(__a); }
	__DEVICE__ double __longlong_as_double(long long __a) {
	return __nv_longlong_as_double(__a);
	}
	__DEVICE__ int __mul24(int __a, int __b) { return __nv_mul24(__a, __b); }
	__DEVICE__ long long __mul64hi(long long __a, long long __b) {
	return __nv_mul64hi(__a, __b);
	}
	__DEVICE__ int __mulhi(int __a, int __b) { return __nv_mulhi(__a, __b); }
	__DEVICE__ unsigned int __pm0(void) { return __nvvm_read_ptx_sreg_pm0(); }
	__DEVICE__ unsigned int __pm1(void) { return __nvvm_read_ptx_sreg_pm1(); }
	__DEVICE__ unsigned int __pm2(void) { return __nvvm_read_ptx_sreg_pm2(); }
	__DEVICE__ unsigned int __pm3(void) { return __nvvm_read_ptx_sreg_pm3(); }
	__DEVICE__ int __popc(int __a) { return __nv_popc(__a); }
	__DEVICE__ int __popcll(long long __a) { return __nv_popcll(__a); }
	__DEVICE__ float __powf(float __a, float __b) {
	return __nv_fast_powf(__a, __b);
	}

	// Parameter must have a known integer value.
	-#define __prof_trigger(__a) asm __volatile__("pmevent \t%0;" ::"i"(__a))
	+#define __prof_trigger(__a) __asm__ __volatile__("pmevent \t%0;" ::"i"(__a))
	__DEVICE__ int __rhadd(int __a, int __b) { return __nv_rhadd(__a, __b); }
	__DEVICE__ unsigned int __sad(int __a, int __b, unsigned int __c) {
	return __nv_sad(__a, __b, __c);
	}
	__DEVICE__ float __saturatef(float __a) { return __nv_saturatef(__a); }
	__DEVICE__ int __signbitd(double __a) { return __nv_signbitd(__a); }
	__DEVICE__ int __signbitf(float __a) { return __nv_signbitf(__a); }
	__DEVICE__ void __sincosf(float __a, float __s, float __c) {
	return __nv_fast_sincosf(__a, __s, __c);
	}
	__DEVICE__ float __sinf(float __a) { return __nv_fast_sinf(__a); }
	__DEVICE__ int __syncthreads_and(int __a) { return __nvvm_bar0_and(__a); }
	__DEVICE__ int __syncthreads_count(int __a) { return __nvvm_bar0_popc(__a); }
	__DEVICE__ int __syncthreads_or(int __a) { return __nvvm_bar0_or(__a); }
	__DEVICE__ float __tanf(float __a) { return __nv_fast_tanf(__a); }
	__DEVICE__ void __threadfence(void) { __nvvm_membar_gl(); }
	__DEVICE__ void __threadfence_block(void) { __nvvm_membar_cta(); };
	__DEVICE__ void __threadfence_system(void) { __nvvm_membar_sys(); };
	-__DEVICE__ void __trap(void) { asm volatile("trap;"); }
	+__DEVICE__ void __trap(void) { __asm__ __volatile__("trap;"); }
	__DEVICE__ unsigned int __uAtomicAdd(unsigned int *__p, unsigned int __v) {
	return __nvvm_atom_add_gen_i((int *)__p, __v);
	}
	__DEVICE__ unsigned int __uAtomicAdd_block(unsigned int *__p,
	unsigned int __v) {
	return __nvvm_atom_cta_add_gen_i((int *)__p, __v);
	}
	__DEVICE__ unsigned int __uAtomicAdd_system(unsigned int *__p,
	unsigned int __v) {
	return __nvvm_atom_sys_add_gen_i((int *)__p, __v);
	}
	__DEVICE__ unsigned int __uAtomicAnd(unsigned int *__p, unsigned int __v) {
	return __nvvm_atom_and_gen_i((int *)__p, __v);
	}
	__DEVICE__ unsigned int __uAtomicAnd_block(unsigned int *__p,
	unsigned int __v) {
	return __nvvm_atom_cta_and_gen_i((int *)__p, __v);
	}
	__DEVICE__ unsigned int __uAtomicAnd_system(unsigned int *__p,
	unsigned int __v) {
	return __nvvm_atom_sys_and_gen_i((int *)__p, __v);
	}
	__DEVICE__ unsigned int __uAtomicCAS(unsigned int *__p, unsigned int __cmp,
	unsigned int __v) {
	return __nvvm_atom_cas_gen_i((int *)__p, __cmp, __v);
	}
	__DEVICE__ unsigned int
	__uAtomicCAS_block(unsigned int *__p, unsigned int __cmp, unsigned int __v) {
	return __nvvm_atom_cta_cas_gen_i((int *)__p, __cmp, __v);
	}
	__DEVICE__ unsigned int
	__uAtomicCAS_system(unsigned int *__p, unsigned int __cmp, unsigned int __v) {
	return __nvvm_atom_sys_cas_gen_i((int *)__p, __cmp, __v);
	}
	__DEVICE__ unsigned int __uAtomicDec(unsigned int *__p, unsigned int __v) {
	return __nvvm_atom_dec_gen_ui(__p, __v);
	}
	__DEVICE__ unsigned int __uAtomicDec_block(unsigned int *__p,
	unsigned int __v) {
	return __nvvm_atom_cta_dec_gen_ui(__p, __v);
	}
	__DEVICE__ unsigned int __uAtomicDec_system(unsigned int *__p,
	unsigned int __v) {
	return __nvvm_atom_sys_dec_gen_ui(__p, __v);
	}
	__DEVICE__ unsigned int __uAtomicExch(unsigned int *__p, unsigned int __v) {
	return __nvvm_atom_xchg_gen_i((int *)__p, __v);
	}
	__DEVICE__ unsigned int __uAtomicExch_block(unsigned int *__p,
	unsigned int __v) {
	return __nvvm_atom_cta_xchg_gen_i((int *)__p, __v);
	}
	__DEVICE__ unsigned int __uAtomicExch_system(unsigned int *__p,
	unsigned int __v) {
	return __nvvm_atom_sys_xchg_gen_i((int *)__p, __v);
	}
	__DEVICE__ unsigned int __uAtomicInc(unsigned int *__p, unsigned int __v) {
	return __nvvm_atom_inc_gen_ui(__p, __v);
	}
	__DEVICE__ unsigned int __uAtomicInc_block(unsigned int *__p,
	unsigned int __v) {
	return __nvvm_atom_cta_inc_gen_ui(__p, __v);
	}
	__DEVICE__ unsigned int __uAtomicInc_system(unsigned int *__p,
	unsigned int __v) {
	return __nvvm_atom_sys_inc_gen_ui(__p, __v);
	}
	__DEVICE__ unsigned int __uAtomicMax(unsigned int *__p, unsigned int __v) {
	return __nvvm_atom_max_gen_ui(__p, __v);
	}
	__DEVICE__ unsigned int __uAtomicMax_block(unsigned int *__p,
	unsigned int __v) {
	return __nvvm_atom_cta_max_gen_ui(__p, __v);
	}
	__DEVICE__ unsigned int __uAtomicMax_system(unsigned int *__p,
	unsigned int __v) {
	return __nvvm_atom_sys_max_gen_ui(__p, __v);
	}
	__DEVICE__ unsigned int __uAtomicMin(unsigned int *__p, unsigned int __v) {
	return __nvvm_atom_min_gen_ui(__p, __v);
	}
	__DEVICE__ unsigned int __uAtomicMin_block(unsigned int *__p,
	unsigned int __v) {
	return __nvvm_atom_cta_min_gen_ui(__p, __v);
	}
	__DEVICE__ unsigned int __uAtomicMin_system(unsigned int *__p,
	unsigned int __v) {
	return __nvvm_atom_sys_min_gen_ui(__p, __v);
	}
	__DEVICE__ unsigned int __uAtomicOr(unsigned int *__p, unsigned int __v) {
	return __nvvm_atom_or_gen_i((int *)__p, __v);
	}
	__DEVICE__ unsigned int __uAtomicOr_block(unsigned int *__p, unsigned int __v) {
	return __nvvm_atom_cta_or_gen_i((int *)__p, __v);
	}
	__DEVICE__ unsigned int __uAtomicOr_system(unsigned int *__p,
	unsigned int __v) {
	return __nvvm_atom_sys_or_gen_i((int *)__p, __v);
	}
	__DEVICE__ unsigned int __uAtomicXor(unsigned int *__p, unsigned int __v) {
	return __nvvm_atom_xor_gen_i((int *)__p, __v);
	}
	__DEVICE__ unsigned int __uAtomicXor_block(unsigned int *__p,
	unsigned int __v) {
	return __nvvm_atom_cta_xor_gen_i((int *)__p, __v);
	}
	__DEVICE__ unsigned int __uAtomicXor_system(unsigned int *__p,
	unsigned int __v) {
	return __nvvm_atom_sys_xor_gen_i((int *)__p, __v);
	}
	__DEVICE__ unsigned int __uhadd(unsigned int __a, unsigned int __b) {
	return __nv_uhadd(__a, __b);
	}
	__DEVICE__ double __uint2double_rn(unsigned int __a) {
	return __nv_uint2double_rn(__a);
	}
	__DEVICE__ float __uint2float_rd(unsigned int __a) {
	return __nv_uint2float_rd(__a);
	}
	__DEVICE__ float __uint2float_rn(unsigned int __a) {
	return __nv_uint2float_rn(__a);
	}
	__DEVICE__ float __uint2float_ru(unsigned int __a) {
	return __nv_uint2float_ru(__a);
	}
	__DEVICE__ float __uint2float_rz(unsigned int __a) {
	return __nv_uint2float_rz(__a);
	}
	__DEVICE__ float __uint_as_float(unsigned int __a) {
	return __nv_uint_as_float(__a);
	} //
	__DEVICE__ double __ull2double_rd(unsigned long long __a) {
	return __nv_ull2double_rd(__a);
	}
	__DEVICE__ double __ull2double_rn(unsigned long long __a) {
	return __nv_ull2double_rn(__a);
	}
	__DEVICE__ double __ull2double_ru(unsigned long long __a) {
	return __nv_ull2double_ru(__a);
	}
	__DEVICE__ double __ull2double_rz(unsigned long long __a) {
	return __nv_ull2double_rz(__a);
	}
	__DEVICE__ float __ull2float_rd(unsigned long long __a) {
	return __nv_ull2float_rd(__a);
	}
	__DEVICE__ float __ull2float_rn(unsigned long long __a) {
	return __nv_ull2float_rn(__a);
	}
	__DEVICE__ float __ull2float_ru(unsigned long long __a) {
	return __nv_ull2float_ru(__a);
	}
	__DEVICE__ float __ull2float_rz(unsigned long long __a) {
	return __nv_ull2float_rz(__a);
	}
	__DEVICE__ unsigned long long __ullAtomicAdd(unsigned long long *__p,
	unsigned long long __v) {
	return __nvvm_atom_add_gen_ll((long long *)__p, __v);
	}
	__DEVICE__ unsigned long long __ullAtomicAdd_block(unsigned long long *__p,
	unsigned long long __v) {
	return __nvvm_atom_cta_add_gen_ll((long long *)__p, __v);
	}
	__DEVICE__ unsigned long long __ullAtomicAdd_system(unsigned long long *__p,
	unsigned long long __v) {
	return __nvvm_atom_sys_add_gen_ll((long long *)__p, __v);
	}
	__DEVICE__ unsigned long long __ullAtomicAnd(unsigned long long *__p,
	unsigned long long __v) {
	return __nvvm_atom_and_gen_ll((long long *)__p, __v);
	}
	__DEVICE__ unsigned long long __ullAtomicAnd_block(unsigned long long *__p,
	unsigned long long __v) {
	return __nvvm_atom_cta_and_gen_ll((long long *)__p, __v);
	}
	__DEVICE__ unsigned long long __ullAtomicAnd_system(unsigned long long *__p,
	unsigned long long __v) {
	return __nvvm_atom_sys_and_gen_ll((long long *)__p, __v);
	}
	__DEVICE__ unsigned long long __ullAtomicCAS(unsigned long long *__p,
	unsigned long long __cmp,
	unsigned long long __v) {
	return __nvvm_atom_cas_gen_ll((long long *)__p, __cmp, __v);
	}
	__DEVICE__ unsigned long long __ullAtomicCAS_block(unsigned long long *__p,
	unsigned long long __cmp,
	unsigned long long __v) {
	return __nvvm_atom_cta_cas_gen_ll((long long *)__p, __cmp, __v);
	}
	__DEVICE__ unsigned long long __ullAtomicCAS_system(unsigned long long *__p,
	unsigned long long __cmp,
	unsigned long long __v) {
	return __nvvm_atom_sys_cas_gen_ll((long long *)__p, __cmp, __v);
	}
	__DEVICE__ unsigned long long __ullAtomicExch(unsigned long long *__p,
	unsigned long long __v) {
	return __nvvm_atom_xchg_gen_ll((long long *)__p, __v);
	}
	__DEVICE__ unsigned long long __ullAtomicExch_block(unsigned long long *__p,
	unsigned long long __v) {
	return __nvvm_atom_cta_xchg_gen_ll((long long *)__p, __v);
	}
	__DEVICE__ unsigned long long __ullAtomicExch_system(unsigned long long *__p,
	unsigned long long __v) {
	return __nvvm_atom_sys_xchg_gen_ll((long long *)__p, __v);
	}
	__DEVICE__ unsigned long long __ullAtomicMax(unsigned long long *__p,
	unsigned long long __v) {
	return __nvvm_atom_max_gen_ull(__p, __v);
	}
	__DEVICE__ unsigned long long __ullAtomicMax_block(unsigned long long *__p,
	unsigned long long __v) {
	return __nvvm_atom_cta_max_gen_ull(__p, __v);
	}
	__DEVICE__ unsigned long long __ullAtomicMax_system(unsigned long long *__p,
	unsigned long long __v) {
	return __nvvm_atom_sys_max_gen_ull(__p, __v);
	}
	__DEVICE__ unsigned long long __ullAtomicMin(unsigned long long *__p,
	unsigned long long __v) {
	return __nvvm_atom_min_gen_ull(__p, __v);
	}
	__DEVICE__ unsigned long long __ullAtomicMin_block(unsigned long long *__p,
	unsigned long long __v) {
	return __nvvm_atom_cta_min_gen_ull(__p, __v);
	}
	__DEVICE__ unsigned long long __ullAtomicMin_system(unsigned long long *__p,
	unsigned long long __v) {
	return __nvvm_atom_sys_min_gen_ull(__p, __v);
	}
	__DEVICE__ unsigned long long __ullAtomicOr(unsigned long long *__p,
	unsigned long long __v) {
	return __nvvm_atom_or_gen_ll((long long *)__p, __v);
	}
	__DEVICE__ unsigned long long __ullAtomicOr_block(unsigned long long *__p,
	unsigned long long __v) {
	return __nvvm_atom_cta_or_gen_ll((long long *)__p, __v);
	}
	__DEVICE__ unsigned long long __ullAtomicOr_system(unsigned long long *__p,
	unsigned long long __v) {
	return __nvvm_atom_sys_or_gen_ll((long long *)__p, __v);
	}
	__DEVICE__ unsigned long long __ullAtomicXor(unsigned long long *__p,
	unsigned long long __v) {
	return __nvvm_atom_xor_gen_ll((long long *)__p, __v);
	}
	__DEVICE__ unsigned long long __ullAtomicXor_block(unsigned long long *__p,
	unsigned long long __v) {
	return __nvvm_atom_cta_xor_gen_ll((long long *)__p, __v);
	}
	__DEVICE__ unsigned long long __ullAtomicXor_system(unsigned long long *__p,
	unsigned long long __v) {
	return __nvvm_atom_sys_xor_gen_ll((long long *)__p, __v);
	}
	__DEVICE__ unsigned int __umul24(unsigned int __a, unsigned int __b) {
	return __nv_umul24(__a, __b);
	}
	__DEVICE__ unsigned long long __umul64hi(unsigned long long __a,
	unsigned long long __b) {
	return __nv_umul64hi(__a, __b);
	}
	__DEVICE__ unsigned int __umulhi(unsigned int __a, unsigned int __b) {
	return __nv_umulhi(__a, __b);
	}
	__DEVICE__ unsigned int __urhadd(unsigned int __a, unsigned int __b) {
	return __nv_urhadd(__a, __b);
	}
	__DEVICE__ unsigned int __usad(unsigned int __a, unsigned int __b,
	unsigned int __c) {
	return __nv_usad(__a, __b, __c);
	}

	#if CUDA_VERSION >= 9000 && CUDA_VERSION < 9020
	__DEVICE__ unsigned int __vabs2(unsigned int __a) { return __nv_vabs2(__a); }
	__DEVICE__ unsigned int __vabs4(unsigned int __a) { return __nv_vabs4(__a); }
	__DEVICE__ unsigned int __vabsdiffs2(unsigned int __a, unsigned int __b) {
	return __nv_vabsdiffs2(__a, __b);
	}
	__DEVICE__ unsigned int __vabsdiffs4(unsigned int __a, unsigned int __b) {
	return __nv_vabsdiffs4(__a, __b);
	}
	__DEVICE__ unsigned int __vabsdiffu2(unsigned int __a, unsigned int __b) {
	return __nv_vabsdiffu2(__a, __b);
	}
	__DEVICE__ unsigned int __vabsdiffu4(unsigned int __a, unsigned int __b) {
	return __nv_vabsdiffu4(__a, __b);
	}
	__DEVICE__ unsigned int __vabsss2(unsigned int __a) {
	return __nv_vabsss2(__a);
	}
	__DEVICE__ unsigned int __vabsss4(unsigned int __a) {
	return __nv_vabsss4(__a);
	}
	__DEVICE__ unsigned int __vadd2(unsigned int __a, unsigned int __b) {
	return __nv_vadd2(__a, __b);
	}
	__DEVICE__ unsigned int __vadd4(unsigned int __a, unsigned int __b) {
	return __nv_vadd4(__a, __b);
	}
	__DEVICE__ unsigned int __vaddss2(unsigned int __a, unsigned int __b) {
	return __nv_vaddss2(__a, __b);
	}
	__DEVICE__ unsigned int __vaddss4(unsigned int __a, unsigned int __b) {
	return __nv_vaddss4(__a, __b);
	}
	__DEVICE__ unsigned int __vaddus2(unsigned int __a, unsigned int __b) {
	return __nv_vaddus2(__a, __b);
	}
	__DEVICE__ unsigned int __vaddus4(unsigned int __a, unsigned int __b) {
	return __nv_vaddus4(__a, __b);
	}
	__DEVICE__ unsigned int __vavgs2(unsigned int __a, unsigned int __b) {
	return __nv_vavgs2(__a, __b);
	}
	__DEVICE__ unsigned int __vavgs4(unsigned int __a, unsigned int __b) {
	return __nv_vavgs4(__a, __b);
	}
	__DEVICE__ unsigned int __vavgu2(unsigned int __a, unsigned int __b) {
	return __nv_vavgu2(__a, __b);
	}
	__DEVICE__ unsigned int __vavgu4(unsigned int __a, unsigned int __b) {
	return __nv_vavgu4(__a, __b);
	}
	__DEVICE__ unsigned int __vcmpeq2(unsigned int __a, unsigned int __b) {
	return __nv_vcmpeq2(__a, __b);
	}
	__DEVICE__ unsigned int __vcmpeq4(unsigned int __a, unsigned int __b) {
	return __nv_vcmpeq4(__a, __b);
	}
	__DEVICE__ unsigned int __vcmpges2(unsigned int __a, unsigned int __b) {
	return __nv_vcmpges2(__a, __b);
	}
	__DEVICE__ unsigned int __vcmpges4(unsigned int __a, unsigned int __b) {
	return __nv_vcmpges4(__a, __b);
	}
	__DEVICE__ unsigned int __vcmpgeu2(unsigned int __a, unsigned int __b) {
	return __nv_vcmpgeu2(__a, __b);
	}
	__DEVICE__ unsigned int __vcmpgeu4(unsigned int __a, unsigned int __b) {
	return __nv_vcmpgeu4(__a, __b);
	}
	__DEVICE__ unsigned int __vcmpgts2(unsigned int __a, unsigned int __b) {
	return __nv_vcmpgts2(__a, __b);
	}
	__DEVICE__ unsigned int __vcmpgts4(unsigned int __a, unsigned int __b) {
	return __nv_vcmpgts4(__a, __b);
	}
	__DEVICE__ unsigned int __vcmpgtu2(unsigned int __a, unsigned int __b) {
	return __nv_vcmpgtu2(__a, __b);
	}
	__DEVICE__ unsigned int __vcmpgtu4(unsigned int __a, unsigned int __b) {
	return __nv_vcmpgtu4(__a, __b);
	}
	__DEVICE__ unsigned int __vcmples2(unsigned int __a, unsigned int __b) {
	return __nv_vcmples2(__a, __b);
	}
	__DEVICE__ unsigned int __vcmples4(unsigned int __a, unsigned int __b) {
	return __nv_vcmples4(__a, __b);
	}
	__DEVICE__ unsigned int __vcmpleu2(unsigned int __a, unsigned int __b) {
	return __nv_vcmpleu2(__a, __b);
	}
	__DEVICE__ unsigned int __vcmpleu4(unsigned int __a, unsigned int __b) {
	return __nv_vcmpleu4(__a, __b);
	}
	__DEVICE__ unsigned int __vcmplts2(unsigned int __a, unsigned int __b) {
	return __nv_vcmplts2(__a, __b);
	}
	__DEVICE__ unsigned int __vcmplts4(unsigned int __a, unsigned int __b) {
	return __nv_vcmplts4(__a, __b);
	}
	__DEVICE__ unsigned int __vcmpltu2(unsigned int __a, unsigned int __b) {
	return __nv_vcmpltu2(__a, __b);
	}
	__DEVICE__ unsigned int __vcmpltu4(unsigned int __a, unsigned int __b) {
	return __nv_vcmpltu4(__a, __b);
	}
	__DEVICE__ unsigned int __vcmpne2(unsigned int __a, unsigned int __b) {
	return __nv_vcmpne2(__a, __b);
	}
	__DEVICE__ unsigned int __vcmpne4(unsigned int __a, unsigned int __b) {
	return __nv_vcmpne4(__a, __b);
	}
	__DEVICE__ unsigned int __vhaddu2(unsigned int __a, unsigned int __b) {
	return __nv_vhaddu2(__a, __b);
	}
	__DEVICE__ unsigned int __vhaddu4(unsigned int __a, unsigned int __b) {
	return __nv_vhaddu4(__a, __b);
	}
	__DEVICE__ unsigned int __vmaxs2(unsigned int __a, unsigned int __b) {
	return __nv_vmaxs2(__a, __b);
	}
	__DEVICE__ unsigned int __vmaxs4(unsigned int __a, unsigned int __b) {
	return __nv_vmaxs4(__a, __b);
	}
	__DEVICE__ unsigned int __vmaxu2(unsigned int __a, unsigned int __b) {
	return __nv_vmaxu2(__a, __b);
	}
	__DEVICE__ unsigned int __vmaxu4(unsigned int __a, unsigned int __b) {
	return __nv_vmaxu4(__a, __b);
	}
	__DEVICE__ unsigned int __vmins2(unsigned int __a, unsigned int __b) {
	return __nv_vmins2(__a, __b);
	}
	__DEVICE__ unsigned int __vmins4(unsigned int __a, unsigned int __b) {
	return __nv_vmins4(__a, __b);
	}
	__DEVICE__ unsigned int __vminu2(unsigned int __a, unsigned int __b) {
	return __nv_vminu2(__a, __b);
	}
	__DEVICE__ unsigned int __vminu4(unsigned int __a, unsigned int __b) {
	return __nv_vminu4(__a, __b);
	}
	__DEVICE__ unsigned int __vneg2(unsigned int __a) { return __nv_vneg2(__a); }
	__DEVICE__ unsigned int __vneg4(unsigned int __a) { return __nv_vneg4(__a); }
	__DEVICE__ unsigned int __vnegss2(unsigned int __a) {
	return __nv_vnegss2(__a);
	}
	__DEVICE__ unsigned int __vnegss4(unsigned int __a) {
	return __nv_vnegss4(__a);
	}
	__DEVICE__ unsigned int __vsads2(unsigned int __a, unsigned int __b) {
	return __nv_vsads2(__a, __b);
	}
	__DEVICE__ unsigned int __vsads4(unsigned int __a, unsigned int __b) {
	return __nv_vsads4(__a, __b);
	}
	__DEVICE__ unsigned int __vsadu2(unsigned int __a, unsigned int __b) {
	return __nv_vsadu2(__a, __b);
	}
	__DEVICE__ unsigned int __vsadu4(unsigned int __a, unsigned int __b) {
	return __nv_vsadu4(__a, __b);
	}
	__DEVICE__ unsigned int __vseteq2(unsigned int __a, unsigned int __b) {
	return __nv_vseteq2(__a, __b);
	}
	__DEVICE__ unsigned int __vseteq4(unsigned int __a, unsigned int __b) {
	return __nv_vseteq4(__a, __b);
	}
	__DEVICE__ unsigned int __vsetges2(unsigned int __a, unsigned int __b) {
	return __nv_vsetges2(__a, __b);
	}
	__DEVICE__ unsigned int __vsetges4(unsigned int __a, unsigned int __b) {
	return __nv_vsetges4(__a, __b);
	}
	__DEVICE__ unsigned int __vsetgeu2(unsigned int __a, unsigned int __b) {
	return __nv_vsetgeu2(__a, __b);
	}
	__DEVICE__ unsigned int __vsetgeu4(unsigned int __a, unsigned int __b) {
	return __nv_vsetgeu4(__a, __b);
	}
	__DEVICE__ unsigned int __vsetgts2(unsigned int __a, unsigned int __b) {
	return __nv_vsetgts2(__a, __b);
	}
	__DEVICE__ unsigned int __vsetgts4(unsigned int __a, unsigned int __b) {
	return __nv_vsetgts4(__a, __b);
	}
	__DEVICE__ unsigned int __vsetgtu2(unsigned int __a, unsigned int __b) {
	return __nv_vsetgtu2(__a, __b);
	}
	__DEVICE__ unsigned int __vsetgtu4(unsigned int __a, unsigned int __b) {
	return __nv_vsetgtu4(__a, __b);
	}
	__DEVICE__ unsigned int __vsetles2(unsigned int __a, unsigned int __b) {
	return __nv_vsetles2(__a, __b);
	}
	__DEVICE__ unsigned int __vsetles4(unsigned int __a, unsigned int __b) {
	return __nv_vsetles4(__a, __b);
	}
	__DEVICE__ unsigned int __vsetleu2(unsigned int __a, unsigned int __b) {
	return __nv_vsetleu2(__a, __b);
	}
	__DEVICE__ unsigned int __vsetleu4(unsigned int __a, unsigned int __b) {
	return __nv_vsetleu4(__a, __b);
	}
	__DEVICE__ unsigned int __vsetlts2(unsigned int __a, unsigned int __b) {
	return __nv_vsetlts2(__a, __b);
	}
	__DEVICE__ unsigned int __vsetlts4(unsigned int __a, unsigned int __b) {
	return __nv_vsetlts4(__a, __b);
	}
	__DEVICE__ unsigned int __vsetltu2(unsigned int __a, unsigned int __b) {
	return __nv_vsetltu2(__a, __b);
	}
	__DEVICE__ unsigned int __vsetltu4(unsigned int __a, unsigned int __b) {
	return __nv_vsetltu4(__a, __b);
	}
	__DEVICE__ unsigned int __vsetne2(unsigned int __a, unsigned int __b) {
	return __nv_vsetne2(__a, __b);
	}
	__DEVICE__ unsigned int __vsetne4(unsigned int __a, unsigned int __b) {
	return __nv_vsetne4(__a, __b);
	}
	__DEVICE__ unsigned int __vsub2(unsigned int __a, unsigned int __b) {
	return __nv_vsub2(__a, __b);
	}
	__DEVICE__ unsigned int __vsub4(unsigned int __a, unsigned int __b) {
	return __nv_vsub4(__a, __b);
	}
	__DEVICE__ unsigned int __vsubss2(unsigned int __a, unsigned int __b) {
	return __nv_vsubss2(__a, __b);
	}
	__DEVICE__ unsigned int __vsubss4(unsigned int __a, unsigned int __b) {
	return __nv_vsubss4(__a, __b);
	}
	__DEVICE__ unsigned int __vsubus2(unsigned int __a, unsigned int __b) {
	return __nv_vsubus2(__a, __b);
	}
	__DEVICE__ unsigned int __vsubus4(unsigned int __a, unsigned int __b) {
	return __nv_vsubus4(__a, __b);
	}
	#else // CUDA_VERSION >= 9020
	// CUDA no longer provides inline assembly (or bitcode) implementation of these
	// functions, so we have to reimplment them. The implementation is naive and is
	// not optimized for performance.

	// Helper function to convert N-bit boolean subfields into all-0 or all-1.
	// E.g. __bool2mask(0x01000100,8) -> 0xff00ff00
	// __bool2mask(0x00010000,16) -> 0xffff0000
	__DEVICE__ unsigned int __bool2mask(unsigned int __a, int shift) {
	return (__a << shift) - __a;
	}
	__DEVICE__ unsigned int __vabs2(unsigned int __a) {
	unsigned int r;
	- asm("vabsdiff2.s32.s32.s32 %0,%1,%2,%3;"
	- : "=r"(r)
	- : "r"(__a), "r"(0), "r"(0));
	+ __asm__("vabsdiff2.s32.s32.s32 %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(0), "r"(0));
	return r;
	}
	__DEVICE__ unsigned int __vabs4(unsigned int __a) {
	unsigned int r;
	- asm("vabsdiff4.s32.s32.s32 %0,%1,%2,%3;"
	- : "=r"(r)
	- : "r"(__a), "r"(0), "r"(0));
	+ __asm__("vabsdiff4.s32.s32.s32 %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(0), "r"(0));
	return r;
	}
	__DEVICE__ unsigned int __vabsdiffs2(unsigned int __a, unsigned int __b) {
	unsigned int r;
	- asm("vabsdiff2.s32.s32.s32 %0,%1,%2,%3;"
	- : "=r"(r)
	- : "r"(__a), "r"(__b), "r"(0));
	+ __asm__("vabsdiff2.s32.s32.s32 %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(__b), "r"(0));
	return r;
	}

	__DEVICE__ unsigned int __vabsdiffs4(unsigned int __a, unsigned int __b) {
	unsigned int r;
	- asm("vabsdiff4.s32.s32.s32 %0,%1,%2,%3;"
	- : "=r"(r)
	- : "r"(__a), "r"(__b), "r"(0));
	+ __asm__("vabsdiff4.s32.s32.s32 %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(__b), "r"(0));
	return r;
	}
	__DEVICE__ unsigned int __vabsdiffu2(unsigned int __a, unsigned int __b) {
	unsigned int r;
	- asm("vabsdiff2.u32.u32.u32 %0,%1,%2,%3;"
	- : "=r"(r)
	- : "r"(__a), "r"(__b), "r"(0));
	+ __asm__("vabsdiff2.u32.u32.u32 %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(__b), "r"(0));
	return r;
	}
	__DEVICE__ unsigned int __vabsdiffu4(unsigned int __a, unsigned int __b) {
	unsigned int r;
	- asm("vabsdiff4.u32.u32.u32 %0,%1,%2,%3;"
	- : "=r"(r)
	- : "r"(__a), "r"(__b), "r"(0));
	+ __asm__("vabsdiff4.u32.u32.u32 %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(__b), "r"(0));
	return r;
	}
	__DEVICE__ unsigned int __vabsss2(unsigned int __a) {
	unsigned int r;
	- asm("vabsdiff2.s32.s32.s32.sat %0,%1,%2,%3;"
	- : "=r"(r)
	- : "r"(__a), "r"(0), "r"(0));
	+ __asm__("vabsdiff2.s32.s32.s32.sat %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(0), "r"(0));
	return r;
	}
	__DEVICE__ unsigned int __vabsss4(unsigned int __a) {
	unsigned int r;
	- asm("vabsdiff4.s32.s32.s32.sat %0,%1,%2,%3;"
	- : "=r"(r)
	- : "r"(__a), "r"(0), "r"(0));
	+ __asm__("vabsdiff4.s32.s32.s32.sat %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(0), "r"(0));
	return r;
	}
	__DEVICE__ unsigned int __vadd2(unsigned int __a, unsigned int __b) {
	unsigned int r;
	- asm("vadd2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
	+ __asm__("vadd2.u32.u32.u32 %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(__b), "r"(0));
	return r;
	}
	__DEVICE__ unsigned int __vadd4(unsigned int __a, unsigned int __b) {
	unsigned int r;
	- asm("vadd4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
	+ __asm__("vadd4.u32.u32.u32 %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(__b), "r"(0));
	return r;
	}
	__DEVICE__ unsigned int __vaddss2(unsigned int __a, unsigned int __b) {
	unsigned int r;
	- asm("vadd2.s32.s32.s32.sat %0,%1,%2,%3;"
	- : "=r"(r)
	- : "r"(__a), "r"(__b), "r"(0));
	+ __asm__("vadd2.s32.s32.s32.sat %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(__b), "r"(0));
	return r;
	}
	__DEVICE__ unsigned int __vaddss4(unsigned int __a, unsigned int __b) {
	unsigned int r;
	- asm("vadd4.s32.s32.s32.sat %0,%1,%2,%3;"
	- : "=r"(r)
	- : "r"(__a), "r"(__b), "r"(0));
	+ __asm__("vadd4.s32.s32.s32.sat %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(__b), "r"(0));
	return r;
	}
	__DEVICE__ unsigned int __vaddus2(unsigned int __a, unsigned int __b) {
	unsigned int r;
	- asm("vadd2.u32.u32.u32.sat %0,%1,%2,%3;"
	- : "=r"(r)
	- : "r"(__a), "r"(__b), "r"(0));
	+ __asm__("vadd2.u32.u32.u32.sat %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(__b), "r"(0));
	return r;
	}
	__DEVICE__ unsigned int __vaddus4(unsigned int __a, unsigned int __b) {
	unsigned int r;
	- asm("vadd4.u32.u32.u32.sat %0,%1,%2,%3;"
	- : "=r"(r)
	- : "r"(__a), "r"(__b), "r"(0));
	+ __asm__("vadd4.u32.u32.u32.sat %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(__b), "r"(0));
	return r;
	}
	__DEVICE__ unsigned int __vavgs2(unsigned int __a, unsigned int __b) {
	unsigned int r;
	- asm("vavrg2.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
	+ __asm__("vavrg2.s32.s32.s32 %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(__b), "r"(0));
	return r;
	}
	__DEVICE__ unsigned int __vavgs4(unsigned int __a, unsigned int __b) {
	unsigned int r;
	- asm("vavrg4.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
	+ __asm__("vavrg4.s32.s32.s32 %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(__b), "r"(0));
	return r;
	}
	__DEVICE__ unsigned int __vavgu2(unsigned int __a, unsigned int __b) {
	unsigned int r;
	- asm("vavrg2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
	+ __asm__("vavrg2.u32.u32.u32 %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(__b), "r"(0));
	return r;
	}
	__DEVICE__ unsigned int __vavgu4(unsigned int __a, unsigned int __b) {
	unsigned int r;
	- asm("vavrg4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
	+ __asm__("vavrg4.u32.u32.u32 %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(__b), "r"(0));
	return r;
	}
	__DEVICE__ unsigned int __vseteq2(unsigned int __a, unsigned int __b) {
	unsigned int r;
	- asm("vset2.u32.u32.eq %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
	+ __asm__("vset2.u32.u32.eq %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(__b), "r"(0));
	return r;
	}
	__DEVICE__ unsigned int __vcmpeq2(unsigned int __a, unsigned int __b) {
	return __bool2mask(__vseteq2(__a, __b), 16);
	}
	__DEVICE__ unsigned int __vseteq4(unsigned int __a, unsigned int __b) {
	unsigned int r;
	- asm("vset4.u32.u32.eq %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
	+ __asm__("vset4.u32.u32.eq %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(__b), "r"(0));
	return r;
	}
	__DEVICE__ unsigned int __vcmpeq4(unsigned int __a, unsigned int __b) {
	return __bool2mask(__vseteq4(__a, __b), 8);
	}
	__DEVICE__ unsigned int __vsetges2(unsigned int __a, unsigned int __b) {
	unsigned int r;
	- asm("vset2.s32.s32.ge %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
	+ __asm__("vset2.s32.s32.ge %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(__b), "r"(0));
	return r;
	}
	__DEVICE__ unsigned int __vcmpges2(unsigned int __a, unsigned int __b) {
	return __bool2mask(__vsetges2(__a, __b), 16);
	}
	__DEVICE__ unsigned int __vsetges4(unsigned int __a, unsigned int __b) {
	unsigned int r;
	- asm("vset4.s32.s32.ge %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
	+ __asm__("vset4.s32.s32.ge %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(__b), "r"(0));
	return r;
	}
	__DEVICE__ unsigned int __vcmpges4(unsigned int __a, unsigned int __b) {
	return __bool2mask(__vsetges4(__a, __b), 8);
	}
	__DEVICE__ unsigned int __vsetgeu2(unsigned int __a, unsigned int __b) {
	unsigned int r;
	- asm("vset2.u32.u32.ge %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
	+ __asm__("vset2.u32.u32.ge %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(__b), "r"(0));
	return r;
	}
	__DEVICE__ unsigned int __vcmpgeu2(unsigned int __a, unsigned int __b) {
	return __bool2mask(__vsetgeu2(__a, __b), 16);
	}
	__DEVICE__ unsigned int __vsetgeu4(unsigned int __a, unsigned int __b) {
	unsigned int r;
	- asm("vset4.u32.u32.ge %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
	+ __asm__("vset4.u32.u32.ge %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(__b), "r"(0));
	return r;
	}
	__DEVICE__ unsigned int __vcmpgeu4(unsigned int __a, unsigned int __b) {
	return __bool2mask(__vsetgeu4(__a, __b), 8);
	}
	__DEVICE__ unsigned int __vsetgts2(unsigned int __a, unsigned int __b) {
	unsigned int r;
	- asm("vset2.s32.s32.gt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
	+ __asm__("vset2.s32.s32.gt %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(__b), "r"(0));
	return r;
	}
	__DEVICE__ unsigned int __vcmpgts2(unsigned int __a, unsigned int __b) {
	return __bool2mask(__vsetgts2(__a, __b), 16);
	}
	__DEVICE__ unsigned int __vsetgts4(unsigned int __a, unsigned int __b) {
	unsigned int r;
	- asm("vset4.s32.s32.gt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
	+ __asm__("vset4.s32.s32.gt %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(__b), "r"(0));
	return r;
	}
	__DEVICE__ unsigned int __vcmpgts4(unsigned int __a, unsigned int __b) {
	return __bool2mask(__vsetgts4(__a, __b), 8);
	}
	__DEVICE__ unsigned int __vsetgtu2(unsigned int __a, unsigned int __b) {
	unsigned int r;
	- asm("vset2.u32.u32.gt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
	+ __asm__("vset2.u32.u32.gt %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(__b), "r"(0));
	return r;
	}
	__DEVICE__ unsigned int __vcmpgtu2(unsigned int __a, unsigned int __b) {
	return __bool2mask(__vsetgtu2(__a, __b), 16);
	}
	__DEVICE__ unsigned int __vsetgtu4(unsigned int __a, unsigned int __b) {
	unsigned int r;
	- asm("vset4.u32.u32.gt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
	+ __asm__("vset4.u32.u32.gt %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(__b), "r"(0));
	return r;
	}
	__DEVICE__ unsigned int __vcmpgtu4(unsigned int __a, unsigned int __b) {
	return __bool2mask(__vsetgtu4(__a, __b), 8);
	}
	__DEVICE__ unsigned int __vsetles2(unsigned int __a, unsigned int __b) {
	unsigned int r;
	- asm("vset2.s32.s32.le %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
	+ __asm__("vset2.s32.s32.le %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(__b), "r"(0));
	return r;
	}
	__DEVICE__ unsigned int __vcmples2(unsigned int __a, unsigned int __b) {
	return __bool2mask(__vsetles2(__a, __b), 16);
	}
	__DEVICE__ unsigned int __vsetles4(unsigned int __a, unsigned int __b) {
	unsigned int r;
	- asm("vset4.s32.s32.le %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
	+ __asm__("vset4.s32.s32.le %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(__b), "r"(0));
	return r;
	}
	__DEVICE__ unsigned int __vcmples4(unsigned int __a, unsigned int __b) {
	return __bool2mask(__vsetles4(__a, __b), 8);
	}
	__DEVICE__ unsigned int __vsetleu2(unsigned int __a, unsigned int __b) {
	unsigned int r;
	- asm("vset2.u32.u32.le %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
	+ __asm__("vset2.u32.u32.le %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(__b), "r"(0));
	return r;
	}
	__DEVICE__ unsigned int __vcmpleu2(unsigned int __a, unsigned int __b) {
	return __bool2mask(__vsetleu2(__a, __b), 16);
	}
	__DEVICE__ unsigned int __vsetleu4(unsigned int __a, unsigned int __b) {
	unsigned int r;
	- asm("vset4.u32.u32.le %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
	+ __asm__("vset4.u32.u32.le %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(__b), "r"(0));
	return r;
	}
	__DEVICE__ unsigned int __vcmpleu4(unsigned int __a, unsigned int __b) {
	return __bool2mask(__vsetleu4(__a, __b), 8);
	}
	__DEVICE__ unsigned int __vsetlts2(unsigned int __a, unsigned int __b) {
	unsigned int r;
	- asm("vset2.s32.s32.lt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
	+ __asm__("vset2.s32.s32.lt %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(__b), "r"(0));
	return r;
	}
	__DEVICE__ unsigned int __vcmplts2(unsigned int __a, unsigned int __b) {
	return __bool2mask(__vsetlts2(__a, __b), 16);
	}
	__DEVICE__ unsigned int __vsetlts4(unsigned int __a, unsigned int __b) {
	unsigned int r;
	- asm("vset4.s32.s32.lt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
	+ __asm__("vset4.s32.s32.lt %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(__b), "r"(0));
	return r;
	}
	__DEVICE__ unsigned int __vcmplts4(unsigned int __a, unsigned int __b) {
	return __bool2mask(__vsetlts4(__a, __b), 8);
	}
	__DEVICE__ unsigned int __vsetltu2(unsigned int __a, unsigned int __b) {
	unsigned int r;
	- asm("vset2.u32.u32.lt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
	+ __asm__("vset2.u32.u32.lt %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(__b), "r"(0));
	return r;
	}
	__DEVICE__ unsigned int __vcmpltu2(unsigned int __a, unsigned int __b) {
	return __bool2mask(__vsetltu2(__a, __b), 16);
	}
	__DEVICE__ unsigned int __vsetltu4(unsigned int __a, unsigned int __b) {
	unsigned int r;
	- asm("vset4.u32.u32.lt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
	+ __asm__("vset4.u32.u32.lt %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(__b), "r"(0));
	return r;
	}
	__DEVICE__ unsigned int __vcmpltu4(unsigned int __a, unsigned int __b) {
	return __bool2mask(__vsetltu4(__a, __b), 8);
	}
	__DEVICE__ unsigned int __vsetne2(unsigned int __a, unsigned int __b) {
	unsigned int r;
	- asm("vset2.u32.u32.ne %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
	+ __asm__("vset2.u32.u32.ne %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(__b), "r"(0));
	return r;
	}
	__DEVICE__ unsigned int __vcmpne2(unsigned int __a, unsigned int __b) {
	return __bool2mask(__vsetne2(__a, __b), 16);
	}
	__DEVICE__ unsigned int __vsetne4(unsigned int __a, unsigned int __b) {
	unsigned int r;
	- asm("vset4.u32.u32.ne %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
	+ __asm__("vset4.u32.u32.ne %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(__b), "r"(0));
	return r;
	}
	__DEVICE__ unsigned int __vcmpne4(unsigned int __a, unsigned int __b) {
	return __bool2mask(__vsetne4(__a, __b), 8);
	}

	// Based on ITEM 23 in AIM-239: http://dspace.mit.edu/handle/1721.1/6086
	// (a & b) + (a \| b) = a + b = (a ^ b) + 2 * (a & b) =>
	// (a + b) / 2 = ((a ^ b) >> 1) + (a & b)
	// To operate on multiple sub-elements we need to make sure to mask out bits
	// that crossed over into adjacent elements during the shift.
	__DEVICE__ unsigned int __vhaddu2(unsigned int __a, unsigned int __b) {
	return (((__a ^ __b) >> 1) & ~0x80008000u) + (__a & __b);
	}
	__DEVICE__ unsigned int __vhaddu4(unsigned int __a, unsigned int __b) {
	return (((__a ^ __b) >> 1) & ~0x80808080u) + (__a & __b);
	}

	__DEVICE__ unsigned int __vmaxs2(unsigned int __a, unsigned int __b) {
	unsigned int r;
	if ((__a & 0x8000) && (__b & 0x8000)) {
	// Work around a bug in ptxas which produces invalid result if low element
	// is negative.
	unsigned mask = __vcmpgts2(__a, __b);
	r = (__a & mask) \| (__b & ~mask);
	} else {
	- asm("vmax2.s32.s32.s32 %0,%1,%2,%3;"
	- : "=r"(r)
	- : "r"(__a), "r"(__b), "r"(0));
	+ __asm__("vmax2.s32.s32.s32 %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(__b), "r"(0));
	}
	return r;
	}
	__DEVICE__ unsigned int __vmaxs4(unsigned int __a, unsigned int __b) {
	unsigned int r;
	- asm("vmax4.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
	+ __asm__("vmax4.s32.s32.s32 %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(__b), "r"(0));
	return r;
	}
	__DEVICE__ unsigned int __vmaxu2(unsigned int __a, unsigned int __b) {
	unsigned int r;
	- asm("vmax2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
	+ __asm__("vmax2.u32.u32.u32 %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(__b), "r"(0));
	return r;
	}
	__DEVICE__ unsigned int __vmaxu4(unsigned int __a, unsigned int __b) {
	unsigned int r;
	- asm("vmax4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
	+ __asm__("vmax4.u32.u32.u32 %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(__b), "r"(0));
	return r;
	}
	__DEVICE__ unsigned int __vmins2(unsigned int __a, unsigned int __b) {
	unsigned int r;
	- asm("vmin2.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
	+ __asm__("vmin2.s32.s32.s32 %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(__b), "r"(0));
	return r;
	}
	__DEVICE__ unsigned int __vmins4(unsigned int __a, unsigned int __b) {
	unsigned int r;
	- asm("vmin4.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
	+ __asm__("vmin4.s32.s32.s32 %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(__b), "r"(0));
	return r;
	}
	__DEVICE__ unsigned int __vminu2(unsigned int __a, unsigned int __b) {
	unsigned int r;
	- asm("vmin2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
	+ __asm__("vmin2.u32.u32.u32 %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(__b), "r"(0));
	return r;
	}
	__DEVICE__ unsigned int __vminu4(unsigned int __a, unsigned int __b) {
	unsigned int r;
	- asm("vmin4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
	+ __asm__("vmin4.u32.u32.u32 %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(__b), "r"(0));
	return r;
	}
	__DEVICE__ unsigned int __vsads2(unsigned int __a, unsigned int __b) {
	unsigned int r;
	- asm("vabsdiff2.s32.s32.s32.add %0,%1,%2,%3;"
	- : "=r"(r)
	- : "r"(__a), "r"(__b), "r"(0));
	+ __asm__("vabsdiff2.s32.s32.s32.add %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(__b), "r"(0));
	return r;
	}
	__DEVICE__ unsigned int __vsads4(unsigned int __a, unsigned int __b) {
	unsigned int r;
	- asm("vabsdiff4.s32.s32.s32.add %0,%1,%2,%3;"
	- : "=r"(r)
	- : "r"(__a), "r"(__b), "r"(0));
	+ __asm__("vabsdiff4.s32.s32.s32.add %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(__b), "r"(0));
	return r;
	}
	__DEVICE__ unsigned int __vsadu2(unsigned int __a, unsigned int __b) {
	unsigned int r;
	- asm("vabsdiff2.u32.u32.u32.add %0,%1,%2,%3;"
	- : "=r"(r)
	- : "r"(__a), "r"(__b), "r"(0));
	+ __asm__("vabsdiff2.u32.u32.u32.add %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(__b), "r"(0));
	return r;
	}
	__DEVICE__ unsigned int __vsadu4(unsigned int __a, unsigned int __b) {
	unsigned int r;
	- asm("vabsdiff4.u32.u32.u32.add %0,%1,%2,%3;"
	- : "=r"(r)
	- : "r"(__a), "r"(__b), "r"(0));
	+ __asm__("vabsdiff4.u32.u32.u32.add %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(__b), "r"(0));
	return r;
	}

	__DEVICE__ unsigned int __vsub2(unsigned int __a, unsigned int __b) {
	unsigned int r;
	- asm("vsub2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
	+ __asm__("vsub2.u32.u32.u32 %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(__b), "r"(0));
	return r;
	}
	__DEVICE__ unsigned int __vneg2(unsigned int __a) { return __vsub2(0, __a); }

	__DEVICE__ unsigned int __vsub4(unsigned int __a, unsigned int __b) {
	unsigned int r;
	- asm("vsub4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
	+ __asm__("vsub4.u32.u32.u32 %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(__b), "r"(0));
	return r;
	}
	__DEVICE__ unsigned int __vneg4(unsigned int __a) { return __vsub4(0, __a); }
	__DEVICE__ unsigned int __vsubss2(unsigned int __a, unsigned int __b) {
	unsigned int r;
	- asm("vsub2.s32.s32.s32.sat %0,%1,%2,%3;"
	- : "=r"(r)
	- : "r"(__a), "r"(__b), "r"(0));
	+ __asm__("vsub2.s32.s32.s32.sat %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(__b), "r"(0));
	return r;
	}
	__DEVICE__ unsigned int __vnegss2(unsigned int __a) {
	return __vsubss2(0, __a);
	}
	__DEVICE__ unsigned int __vsubss4(unsigned int __a, unsigned int __b) {
	unsigned int r;
	- asm("vsub4.s32.s32.s32.sat %0,%1,%2,%3;"
	- : "=r"(r)
	- : "r"(__a), "r"(__b), "r"(0));
	+ __asm__("vsub4.s32.s32.s32.sat %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(__b), "r"(0));
	return r;
	}
	__DEVICE__ unsigned int __vnegss4(unsigned int __a) {
	return __vsubss4(0, __a);
	}
	__DEVICE__ unsigned int __vsubus2(unsigned int __a, unsigned int __b) {
	unsigned int r;
	- asm("vsub2.u32.u32.u32.sat %0,%1,%2,%3;"
	- : "=r"(r)
	- : "r"(__a), "r"(__b), "r"(0));
	+ __asm__("vsub2.u32.u32.u32.sat %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(__b), "r"(0));
	return r;
	}
	__DEVICE__ unsigned int __vsubus4(unsigned int __a, unsigned int __b) {
	unsigned int r;
	- asm("vsub4.u32.u32.u32.sat %0,%1,%2,%3;"
	- : "=r"(r)
	- : "r"(__a), "r"(__b), "r"(0));
	+ __asm__("vsub4.u32.u32.u32.sat %0,%1,%2,%3;"
	+ : "=r"(r)
	+ : "r"(__a), "r"(__b), "r"(0));
	return r;
	}
	#endif // CUDA_VERSION >= 9020

	// For OpenMP we require the user to include <time.h> as we need to know what
	// clock_t is on the system.
	#ifndef __OPENMP_NVPTX__
	__DEVICE__ /* clock_t= */ int clock() { return __nvvm_read_ptx_sreg_clock(); }
	#endif
	__DEVICE__ long long clock64() { return __nvvm_read_ptx_sreg_clock64(); }

	// These functions shouldn't be declared when including this header
	// for math function resolution purposes.
	#ifndef __OPENMP_NVPTX__
	__DEVICE__ void memcpy(void __a, const void *__b, size_t __c) {
	return __builtin_memcpy(__a, __b, __c);
	}
	__DEVICE__ void memset(void __a, int __b, size_t __c) {
	return __builtin_memset(__a, __b, __c);
	}
	#endif

	#pragma pop_macro("__DEVICE__")
	#endif // __CLANG_CUDA_DEVICE_FUNCTIONS_H__
	diff --git a/contrib/llvm-project/clang/lib/Headers/__clang_hip_cmath.h b/contrib/llvm-project/clang/lib/Headers/__clang_hip_cmath.h
	index 7342705434e6..d488db0a94d9 100644
	--- a/contrib/llvm-project/clang/lib/Headers/__clang_hip_cmath.h
	+++ b/contrib/llvm-project/clang/lib/Headers/__clang_hip_cmath.h
	@@ -1,810 +1,842 @@
	/*===---- __clang_hip_cmath.h - HIP cmath decls -----------------------------===
	*
	* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	* See https://llvm.org/LICENSE.txt for license information.
	* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	*
	*===-----------------------------------------------------------------------===
	*/

	#ifndef __CLANG_HIP_CMATH_H__
	#define __CLANG_HIP_CMATH_H__

	-#if !defined(__HIP__)
	+#if !defined(__HIP__) && !defined(__OPENMP_AMDGCN__)
	#error "This file is for HIP and OpenMP AMDGCN device compilation only."
	#endif

	#if !defined(__HIPCC_RTC__)
	#if defined(__cplusplus)
	#include <limits>
	#include <type_traits>
	#include <utility>
	#endif
	#include <limits.h>
	#include <stdint.h>
	#endif // !defined(__HIPCC_RTC__)

	#pragma push_macro("__DEVICE__")
	+#pragma push_macro("__CONSTEXPR__")
	+#ifdef __OPENMP_AMDGCN__
	+#define __DEVICE__ static __attribute__((always_inline, nothrow))
	+#define __CONSTEXPR__ constexpr
	+#else
	#define __DEVICE__ static __device__ inline __attribute__((always_inline))
	+#define __CONSTEXPR__
	+#endif // __OPENMP_AMDGCN__

	// Start with functions that cannot be defined by DEF macros below.
	#if defined(__cplusplus)
	-__DEVICE__ double abs(double __x) { return ::fabs(__x); }
	-__DEVICE__ float abs(float __x) { return ::fabsf(__x); }
	-__DEVICE__ long long abs(long long __n) { return ::llabs(__n); }
	-__DEVICE__ long abs(long __n) { return ::labs(__n); }
	-__DEVICE__ float fma(float __x, float __y, float __z) {
	+#if defined __OPENMP_AMDGCN__
	+__DEVICE__ __CONSTEXPR__ float fabs(float __x) { return ::fabsf(__x); }
	+__DEVICE__ __CONSTEXPR__ float sin(float __x) { return ::sinf(__x); }
	+__DEVICE__ __CONSTEXPR__ float cos(float __x) { return ::cosf(__x); }
	+#endif
	+__DEVICE__ __CONSTEXPR__ double abs(double __x) { return ::fabs(__x); }
	+__DEVICE__ __CONSTEXPR__ float abs(float __x) { return ::fabsf(__x); }
	+__DEVICE__ __CONSTEXPR__ long long abs(long long __n) { return ::llabs(__n); }
	+__DEVICE__ __CONSTEXPR__ long abs(long __n) { return ::labs(__n); }
	+__DEVICE__ __CONSTEXPR__ float fma(float __x, float __y, float __z) {
	return ::fmaf(__x, __y, __z);
	}
	#if !defined(__HIPCC_RTC__)
	// The value returned by fpclassify is platform dependent, therefore it is not
	// supported by hipRTC.
	-__DEVICE__ int fpclassify(float __x) {
	+__DEVICE__ __CONSTEXPR__ int fpclassify(float __x) {
	return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
	FP_ZERO, __x);
	}
	-__DEVICE__ int fpclassify(double __x) {
	+__DEVICE__ __CONSTEXPR__ int fpclassify(double __x) {
	return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
	FP_ZERO, __x);
	}
	#endif // !defined(__HIPCC_RTC__)

	-__DEVICE__ float frexp(float __arg, int *__exp) {
	+__DEVICE__ __CONSTEXPR__ float frexp(float __arg, int *__exp) {
	return ::frexpf(__arg, __exp);
	}

	#if defined(__OPENMP_AMDGCN__)
	// For OpenMP we work around some old system headers that have non-conforming
	// `isinf(float)` and `isnan(float)` implementations that return an `int`. We do
	// this by providing two versions of these functions, differing only in the
	// return type. To avoid conflicting definitions we disable implicit base
	// function generation. That means we will end up with two specializations, one
	// per type, but only one has a base function defined by the system header.
	#pragma omp begin declare variant match( \
	implementation = {extension(disable_implicit_base)})

	// FIXME: We lack an extension to customize the mangling of the variants, e.g.,
	// add a suffix. This means we would clash with the names of the variants
	// (note that we do not create implicit base functions here). To avoid
	// this clash we add a new trait to some of them that is always true
	// (this is LLVM after all ;)). It will only influence the mangled name
	// of the variants inside the inner region and avoid the clash.
	#pragma omp begin declare variant match(implementation = {vendor(llvm)})

	-__DEVICE__ int isinf(float __x) { return ::__isinff(__x); }
	-__DEVICE__ int isinf(double __x) { return ::__isinf(__x); }
	-__DEVICE__ int isfinite(float __x) { return ::__finitef(__x); }
	-__DEVICE__ int isfinite(double __x) { return ::__finite(__x); }
	-__DEVICE__ int isnan(float __x) { return ::__isnanf(__x); }
	-__DEVICE__ int isnan(double __x) { return ::__isnan(__x); }
	+__DEVICE__ __CONSTEXPR__ int isinf(float __x) { return ::__isinff(__x); }
	+__DEVICE__ __CONSTEXPR__ int isinf(double __x) { return ::__isinf(__x); }
	+__DEVICE__ __CONSTEXPR__ int isfinite(float __x) { return ::__finitef(__x); }
	+__DEVICE__ __CONSTEXPR__ int isfinite(double __x) { return ::__finite(__x); }
	+__DEVICE__ __CONSTEXPR__ int isnan(float __x) { return ::__isnanf(__x); }
	+__DEVICE__ __CONSTEXPR__ int isnan(double __x) { return ::__isnan(__x); }

	#pragma omp end declare variant
	#endif // defined(__OPENMP_AMDGCN__)

	-__DEVICE__ bool isinf(float __x) { return ::__isinff(__x); }
	-__DEVICE__ bool isinf(double __x) { return ::__isinf(__x); }
	-__DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); }
	-__DEVICE__ bool isfinite(double __x) { return ::__finite(__x); }
	-__DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); }
	-__DEVICE__ bool isnan(double __x) { return ::__isnan(__x); }
	+__DEVICE__ __CONSTEXPR__ bool isinf(float __x) { return ::__isinff(__x); }
	+__DEVICE__ __CONSTEXPR__ bool isinf(double __x) { return ::__isinf(__x); }
	+__DEVICE__ __CONSTEXPR__ bool isfinite(float __x) { return ::__finitef(__x); }
	+__DEVICE__ __CONSTEXPR__ bool isfinite(double __x) { return ::__finite(__x); }
	+__DEVICE__ __CONSTEXPR__ bool isnan(float __x) { return ::__isnanf(__x); }
	+__DEVICE__ __CONSTEXPR__ bool isnan(double __x) { return ::__isnan(__x); }

	#if defined(__OPENMP_AMDGCN__)
	#pragma omp end declare variant
	#endif // defined(__OPENMP_AMDGCN__)

	-__DEVICE__ bool isgreater(float __x, float __y) {
	+__DEVICE__ __CONSTEXPR__ bool isgreater(float __x, float __y) {
	return __builtin_isgreater(__x, __y);
	}
	-__DEVICE__ bool isgreater(double __x, double __y) {
	+__DEVICE__ __CONSTEXPR__ bool isgreater(double __x, double __y) {
	return __builtin_isgreater(__x, __y);
	}
	-__DEVICE__ bool isgreaterequal(float __x, float __y) {
	+__DEVICE__ __CONSTEXPR__ bool isgreaterequal(float __x, float __y) {
	return __builtin_isgreaterequal(__x, __y);
	}
	-__DEVICE__ bool isgreaterequal(double __x, double __y) {
	+__DEVICE__ __CONSTEXPR__ bool isgreaterequal(double __x, double __y) {
	return __builtin_isgreaterequal(__x, __y);
	}
	-__DEVICE__ bool isless(float __x, float __y) {
	+__DEVICE__ __CONSTEXPR__ bool isless(float __x, float __y) {
	return __builtin_isless(__x, __y);
	}
	-__DEVICE__ bool isless(double __x, double __y) {
	+__DEVICE__ __CONSTEXPR__ bool isless(double __x, double __y) {
	return __builtin_isless(__x, __y);
	}
	-__DEVICE__ bool islessequal(float __x, float __y) {
	+__DEVICE__ __CONSTEXPR__ bool islessequal(float __x, float __y) {
	return __builtin_islessequal(__x, __y);
	}
	-__DEVICE__ bool islessequal(double __x, double __y) {
	+__DEVICE__ __CONSTEXPR__ bool islessequal(double __x, double __y) {
	return __builtin_islessequal(__x, __y);
	}
	-__DEVICE__ bool islessgreater(float __x, float __y) {
	+__DEVICE__ __CONSTEXPR__ bool islessgreater(float __x, float __y) {
	return __builtin_islessgreater(__x, __y);
	}
	-__DEVICE__ bool islessgreater(double __x, double __y) {
	+__DEVICE__ __CONSTEXPR__ bool islessgreater(double __x, double __y) {
	return __builtin_islessgreater(__x, __y);
	}
	-__DEVICE__ bool isnormal(float __x) { return __builtin_isnormal(__x); }
	-__DEVICE__ bool isnormal(double __x) { return __builtin_isnormal(__x); }
	-__DEVICE__ bool isunordered(float __x, float __y) {
	+__DEVICE__ __CONSTEXPR__ bool isnormal(float __x) {
	+ return __builtin_isnormal(__x);
	+}
	+__DEVICE__ __CONSTEXPR__ bool isnormal(double __x) {
	+ return __builtin_isnormal(__x);
	+}
	+__DEVICE__ __CONSTEXPR__ bool isunordered(float __x, float __y) {
	return __builtin_isunordered(__x, __y);
	}
	-__DEVICE__ bool isunordered(double __x, double __y) {
	+__DEVICE__ __CONSTEXPR__ bool isunordered(double __x, double __y) {
	return __builtin_isunordered(__x, __y);
	}
	-__DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); }
	-__DEVICE__ float pow(float __base, int __iexp) {
	+__DEVICE__ __CONSTEXPR__ float modf(float __x, float *__iptr) {
	+ return ::modff(__x, __iptr);
	+}
	+__DEVICE__ __CONSTEXPR__ float pow(float __base, int __iexp) {
	return ::powif(__base, __iexp);
	}
	-__DEVICE__ double pow(double __base, int __iexp) {
	+__DEVICE__ __CONSTEXPR__ double pow(double __base, int __iexp) {
	return ::powi(__base, __iexp);
	}
	-__DEVICE__ float remquo(float __x, float __y, int *__quo) {
	+__DEVICE__ __CONSTEXPR__ float remquo(float __x, float __y, int *__quo) {
	return ::remquof(__x, __y, __quo);
	}
	-__DEVICE__ float scalbln(float __x, long int __n) {
	+__DEVICE__ __CONSTEXPR__ float scalbln(float __x, long int __n) {
	return ::scalblnf(__x, __n);
	}
	-__DEVICE__ bool signbit(float __x) { return ::__signbitf(__x); }
	-__DEVICE__ bool signbit(double __x) { return ::__signbit(__x); }
	+__DEVICE__ __CONSTEXPR__ bool signbit(float __x) { return ::__signbitf(__x); }
	+__DEVICE__ __CONSTEXPR__ bool signbit(double __x) { return ::__signbit(__x); }

	// Notably missing above is nexttoward. We omit it because
	// ocml doesn't provide an implementation, and we don't want to be in the
	// business of implementing tricky libm functions in this header.

	// Other functions.
	-__DEVICE__ _Float16 fma(_Float16 __x, _Float16 __y, _Float16 __z) {
	+__DEVICE__ __CONSTEXPR__ _Float16 fma(_Float16 __x, _Float16 __y,
	+ _Float16 __z) {
	return __ocml_fma_f16(__x, __y, __z);
	}
	-__DEVICE__ _Float16 pow(_Float16 __base, int __iexp) {
	+__DEVICE__ __CONSTEXPR__ _Float16 pow(_Float16 __base, int __iexp) {
	return __ocml_pown_f16(__base, __iexp);
	}

	+#ifndef __OPENMP_AMDGCN__
	// BEGIN DEF_FUN and HIP_OVERLOAD

	// BEGIN DEF_FUN

	#pragma push_macro("__DEF_FUN1")
	#pragma push_macro("__DEF_FUN2")
	#pragma push_macro("__DEF_FUN2_FI")

	// Define cmath functions with float argument and returns __retty.
	#define __DEF_FUN1(__retty, __func) \
	- __DEVICE__ \
	- __retty __func(float __x) { return __func##f(__x); }
	+ __DEVICE__ __CONSTEXPR__ __retty __func(float __x) { return __func##f(__x); }

	// Define cmath functions with two float arguments and returns __retty.
	#define __DEF_FUN2(__retty, __func) \
	- __DEVICE__ \
	- __retty __func(float __x, float __y) { return __func##f(__x, __y); }
	+ __DEVICE__ __CONSTEXPR__ __retty __func(float __x, float __y) { \
	+ return __func##f(__x, __y); \
	+ }

	// Define cmath functions with a float and an int argument and returns __retty.
	#define __DEF_FUN2_FI(__retty, __func) \
	- __DEVICE__ \
	- __retty __func(float __x, int __y) { return __func##f(__x, __y); }
	+ __DEVICE__ __CONSTEXPR__ __retty __func(float __x, int __y) { \
	+ return __func##f(__x, __y); \
	+ }

	__DEF_FUN1(float, acos)
	__DEF_FUN1(float, acosh)
	__DEF_FUN1(float, asin)
	__DEF_FUN1(float, asinh)
	__DEF_FUN1(float, atan)
	__DEF_FUN2(float, atan2)
	__DEF_FUN1(float, atanh)
	__DEF_FUN1(float, cbrt)
	__DEF_FUN1(float, ceil)
	__DEF_FUN2(float, copysign)
	__DEF_FUN1(float, cos)
	__DEF_FUN1(float, cosh)
	__DEF_FUN1(float, erf)
	__DEF_FUN1(float, erfc)
	__DEF_FUN1(float, exp)
	__DEF_FUN1(float, exp2)
	__DEF_FUN1(float, expm1)
	__DEF_FUN1(float, fabs)
	__DEF_FUN2(float, fdim)
	__DEF_FUN1(float, floor)
	__DEF_FUN2(float, fmax)
	__DEF_FUN2(float, fmin)
	__DEF_FUN2(float, fmod)
	__DEF_FUN2(float, hypot)
	__DEF_FUN1(int, ilogb)
	__DEF_FUN2_FI(float, ldexp)
	__DEF_FUN1(float, lgamma)
	__DEF_FUN1(float, log)
	__DEF_FUN1(float, log10)
	__DEF_FUN1(float, log1p)
	__DEF_FUN1(float, log2)
	__DEF_FUN1(float, logb)
	__DEF_FUN1(long long, llrint)
	__DEF_FUN1(long long, llround)
	__DEF_FUN1(long, lrint)
	__DEF_FUN1(long, lround)
	__DEF_FUN1(float, nearbyint)
	__DEF_FUN2(float, nextafter)
	__DEF_FUN2(float, pow)
	__DEF_FUN2(float, remainder)
	__DEF_FUN1(float, rint)
	__DEF_FUN1(float, round)
	__DEF_FUN2_FI(float, scalbn)
	__DEF_FUN1(float, sin)
	__DEF_FUN1(float, sinh)
	__DEF_FUN1(float, sqrt)
	__DEF_FUN1(float, tan)
	__DEF_FUN1(float, tanh)
	__DEF_FUN1(float, tgamma)
	__DEF_FUN1(float, trunc)

	#pragma pop_macro("__DEF_FUN1")
	#pragma pop_macro("__DEF_FUN2")
	#pragma pop_macro("__DEF_FUN2_FI")

	// END DEF_FUN

	// BEGIN HIP_OVERLOAD

	#pragma push_macro("__HIP_OVERLOAD1")
	#pragma push_macro("__HIP_OVERLOAD2")

	// __hip_enable_if::type is a type function which returns __T if __B is true.
	template <bool __B, class __T = void> struct __hip_enable_if {};

	template <class __T> struct __hip_enable_if<true, __T> { typedef __T type; };

	namespace __hip {
	template <class _Tp> struct is_integral {
	enum { value = 0 };
	};
	template <> struct is_integral<bool> {
	enum { value = 1 };
	};
	template <> struct is_integral<char> {
	enum { value = 1 };
	};
	template <> struct is_integral<signed char> {
	enum { value = 1 };
	};
	template <> struct is_integral<unsigned char> {
	enum { value = 1 };
	};
	template <> struct is_integral<wchar_t> {
	enum { value = 1 };
	};
	template <> struct is_integral<short> {
	enum { value = 1 };
	};
	template <> struct is_integral<unsigned short> {
	enum { value = 1 };
	};
	template <> struct is_integral<int> {
	enum { value = 1 };
	};
	template <> struct is_integral<unsigned int> {
	enum { value = 1 };
	};
	template <> struct is_integral<long> {
	enum { value = 1 };
	};
	template <> struct is_integral<unsigned long> {
	enum { value = 1 };
	};
	template <> struct is_integral<long long> {
	enum { value = 1 };
	};
	template <> struct is_integral<unsigned long long> {
	enum { value = 1 };
	};

	// ToDo: specializes is_arithmetic<_Float16>
	template <class _Tp> struct is_arithmetic {
	enum { value = 0 };
	};
	template <> struct is_arithmetic<bool> {
	enum { value = 1 };
	};
	template <> struct is_arithmetic<char> {
	enum { value = 1 };
	};
	template <> struct is_arithmetic<signed char> {
	enum { value = 1 };
	};
	template <> struct is_arithmetic<unsigned char> {
	enum { value = 1 };
	};
	template <> struct is_arithmetic<wchar_t> {
	enum { value = 1 };
	};
	template <> struct is_arithmetic<short> {
	enum { value = 1 };
	};
	template <> struct is_arithmetic<unsigned short> {
	enum { value = 1 };
	};
	template <> struct is_arithmetic<int> {
	enum { value = 1 };
	};
	template <> struct is_arithmetic<unsigned int> {
	enum { value = 1 };
	};
	template <> struct is_arithmetic<long> {
	enum { value = 1 };
	};
	template <> struct is_arithmetic<unsigned long> {
	enum { value = 1 };
	};
	template <> struct is_arithmetic<long long> {
	enum { value = 1 };
	};
	template <> struct is_arithmetic<unsigned long long> {
	enum { value = 1 };
	};
	template <> struct is_arithmetic<float> {
	enum { value = 1 };
	};
	template <> struct is_arithmetic<double> {
	enum { value = 1 };
	};

	struct true_type {
	static const __constant__ bool value = true;
	};
	struct false_type {
	static const __constant__ bool value = false;
	};

	template <typename __T, typename __U> struct is_same : public false_type {};
	template <typename __T> struct is_same<__T, __T> : public true_type {};

	template <typename __T> struct add_rvalue_reference { typedef __T &&type; };

	template <typename __T> typename add_rvalue_reference<__T>::type declval();

	// decltype is only available in C++11 and above.
	#if __cplusplus >= 201103L
	// __hip_promote
	template <class _Tp> struct __numeric_type {
	static void __test(...);
	static _Float16 __test(_Float16);
	static float __test(float);
	static double __test(char);
	static double __test(int);
	static double __test(unsigned);
	static double __test(long);
	static double __test(unsigned long);
	static double __test(long long);
	static double __test(unsigned long long);
	static double __test(double);
	// No support for long double, use double instead.
	static double __test(long double);

	typedef decltype(__test(declval<_Tp>())) type;
	static const bool value = !is_same<type, void>::value;
	};

	template <> struct __numeric_type<void> { static const bool value = true; };

	template <class _A1, class _A2 = void, class _A3 = void,
	bool = __numeric_type<_A1>::value &&__numeric_type<_A2>::value
	&&__numeric_type<_A3>::value>
	class __promote_imp {
	public:
	static const bool value = false;
	};

	template <class _A1, class _A2, class _A3>
	class __promote_imp<_A1, _A2, _A3, true> {
	private:
	typedef typename __promote_imp<_A1>::type __type1;
	typedef typename __promote_imp<_A2>::type __type2;
	typedef typename __promote_imp<_A3>::type __type3;

	public:
	typedef decltype(__type1() + __type2() + __type3()) type;
	static const bool value = true;
	};

	template <class _A1, class _A2> class __promote_imp<_A1, _A2, void, true> {
	private:
	typedef typename __promote_imp<_A1>::type __type1;
	typedef typename __promote_imp<_A2>::type __type2;

	public:
	typedef decltype(__type1() + __type2()) type;
	static const bool value = true;
	};

	template <class _A1> class __promote_imp<_A1, void, void, true> {
	public:
	typedef typename __numeric_type<_A1>::type type;
	static const bool value = true;
	};

	template <class _A1, class _A2 = void, class _A3 = void>
	class __promote : public __promote_imp<_A1, _A2, _A3> {};
	#endif //__cplusplus >= 201103L
	} // namespace __hip

	// __HIP_OVERLOAD1 is used to resolve function calls with integer argument to
	// avoid compilation error due to ambibuity. e.g. floor(5) is resolved with
	// floor(double).
	#define __HIP_OVERLOAD1(__retty, __fn) \
	template <typename __T> \
	- __DEVICE__ \
	+ __DEVICE__ __CONSTEXPR__ \
	typename __hip_enable_if<__hip::is_integral<__T>::value, __retty>::type \
	__fn(__T __x) { \
	return ::__fn((double)__x); \
	}

	// __HIP_OVERLOAD2 is used to resolve function calls with mixed float/double
	// or integer argument to avoid compilation error due to ambibuity. e.g.
	// max(5.0f, 6.0) is resolved with max(double, double).
	#if __cplusplus >= 201103L
	#define __HIP_OVERLOAD2(__retty, __fn) \
	template <typename __T1, typename __T2> \
	- __DEVICE__ typename __hip_enable_if< \
	+ __DEVICE__ __CONSTEXPR__ typename __hip_enable_if< \
	__hip::is_arithmetic<__T1>::value && __hip::is_arithmetic<__T2>::value, \
	typename __hip::__promote<__T1, __T2>::type>::type \
	__fn(__T1 __x, __T2 __y) { \
	typedef typename __hip::__promote<__T1, __T2>::type __result_type; \
	return __fn((__result_type)__x, (__result_type)__y); \
	}
	#else
	#define __HIP_OVERLOAD2(__retty, __fn) \
	template <typename __T1, typename __T2> \
	- __DEVICE__ typename __hip_enable_if<__hip::is_arithmetic<__T1>::value && \
	- __hip::is_arithmetic<__T2>::value, \
	- __retty>::type \
	- __fn(__T1 __x, __T2 __y) { \
	+ __DEVICE__ __CONSTEXPR__ \
	+ typename __hip_enable_if<__hip::is_arithmetic<__T1>::value && \
	+ __hip::is_arithmetic<__T2>::value, \
	+ __retty>::type \
	+ __fn(__T1 __x, __T2 __y) { \
	return __fn((double)__x, (double)__y); \
	}
	#endif

	__HIP_OVERLOAD1(double, acos)
	__HIP_OVERLOAD1(double, acosh)
	__HIP_OVERLOAD1(double, asin)
	__HIP_OVERLOAD1(double, asinh)
	__HIP_OVERLOAD1(double, atan)
	__HIP_OVERLOAD2(double, atan2)
	__HIP_OVERLOAD1(double, atanh)
	__HIP_OVERLOAD1(double, cbrt)
	__HIP_OVERLOAD1(double, ceil)
	__HIP_OVERLOAD2(double, copysign)
	__HIP_OVERLOAD1(double, cos)
	__HIP_OVERLOAD1(double, cosh)
	__HIP_OVERLOAD1(double, erf)
	__HIP_OVERLOAD1(double, erfc)
	__HIP_OVERLOAD1(double, exp)
	__HIP_OVERLOAD1(double, exp2)
	__HIP_OVERLOAD1(double, expm1)
	__HIP_OVERLOAD1(double, fabs)
	__HIP_OVERLOAD2(double, fdim)
	__HIP_OVERLOAD1(double, floor)
	__HIP_OVERLOAD2(double, fmax)
	__HIP_OVERLOAD2(double, fmin)
	__HIP_OVERLOAD2(double, fmod)
	#if !defined(__HIPCC_RTC__)
	__HIP_OVERLOAD1(int, fpclassify)
	#endif // !defined(__HIPCC_RTC__)
	__HIP_OVERLOAD2(double, hypot)
	__HIP_OVERLOAD1(int, ilogb)
	__HIP_OVERLOAD1(bool, isfinite)
	__HIP_OVERLOAD2(bool, isgreater)
	__HIP_OVERLOAD2(bool, isgreaterequal)
	__HIP_OVERLOAD1(bool, isinf)
	__HIP_OVERLOAD2(bool, isless)
	__HIP_OVERLOAD2(bool, islessequal)
	__HIP_OVERLOAD2(bool, islessgreater)
	__HIP_OVERLOAD1(bool, isnan)
	__HIP_OVERLOAD1(bool, isnormal)
	__HIP_OVERLOAD2(bool, isunordered)
	__HIP_OVERLOAD1(double, lgamma)
	__HIP_OVERLOAD1(double, log)
	__HIP_OVERLOAD1(double, log10)
	__HIP_OVERLOAD1(double, log1p)
	__HIP_OVERLOAD1(double, log2)
	__HIP_OVERLOAD1(double, logb)
	__HIP_OVERLOAD1(long long, llrint)
	__HIP_OVERLOAD1(long long, llround)
	__HIP_OVERLOAD1(long, lrint)
	__HIP_OVERLOAD1(long, lround)
	__HIP_OVERLOAD1(double, nearbyint)
	__HIP_OVERLOAD2(double, nextafter)
	__HIP_OVERLOAD2(double, pow)
	__HIP_OVERLOAD2(double, remainder)
	__HIP_OVERLOAD1(double, rint)
	__HIP_OVERLOAD1(double, round)
	__HIP_OVERLOAD1(bool, signbit)
	__HIP_OVERLOAD1(double, sin)
	__HIP_OVERLOAD1(double, sinh)
	__HIP_OVERLOAD1(double, sqrt)
	__HIP_OVERLOAD1(double, tan)
	__HIP_OVERLOAD1(double, tanh)
	__HIP_OVERLOAD1(double, tgamma)
	__HIP_OVERLOAD1(double, trunc)

	// Overload these but don't add them to std, they are not part of cmath.
	__HIP_OVERLOAD2(double, max)
	__HIP_OVERLOAD2(double, min)

	// Additional Overloads that don't quite match HIP_OVERLOAD.
	#if __cplusplus >= 201103L
	template <typename __T1, typename __T2, typename __T3>
	-__DEVICE__ typename __hip_enable_if<
	+__DEVICE__ __CONSTEXPR__ typename __hip_enable_if<
	__hip::is_arithmetic<__T1>::value && __hip::is_arithmetic<__T2>::value &&
	__hip::is_arithmetic<__T3>::value,
	typename __hip::__promote<__T1, __T2, __T3>::type>::type
	fma(__T1 __x, __T2 __y, __T3 __z) {
	typedef typename __hip::__promote<__T1, __T2, __T3>::type __result_type;
	return ::fma((__result_type)__x, (__result_type)__y, (__result_type)__z);
	}
	#else
	template <typename __T1, typename __T2, typename __T3>
	-__DEVICE__ typename __hip_enable_if<__hip::is_arithmetic<__T1>::value &&
	- __hip::is_arithmetic<__T2>::value &&
	- __hip::is_arithmetic<__T3>::value,
	- double>::type
	-fma(__T1 __x, __T2 __y, __T3 __z) {
	+__DEVICE__ __CONSTEXPR__
	+ typename __hip_enable_if<__hip::is_arithmetic<__T1>::value &&
	+ __hip::is_arithmetic<__T2>::value &&
	+ __hip::is_arithmetic<__T3>::value,
	+ double>::type
	+ fma(__T1 __x, __T2 __y, __T3 __z) {
	return ::fma((double)__x, (double)__y, (double)__z);
	}
	#endif

	template <typename __T>
	-__DEVICE__
	+__DEVICE__ __CONSTEXPR__
	typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type
	frexp(__T __x, int *__exp) {
	return ::frexp((double)__x, __exp);
	}

	template <typename __T>
	-__DEVICE__
	+__DEVICE__ __CONSTEXPR__
	typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type
	ldexp(__T __x, int __exp) {
	return ::ldexp((double)__x, __exp);
	}

	template <typename __T>
	-__DEVICE__
	+__DEVICE__ __CONSTEXPR__
	typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type
	modf(__T __x, double *__exp) {
	return ::modf((double)__x, __exp);
	}

	#if __cplusplus >= 201103L
	template <typename __T1, typename __T2>
	-__DEVICE__
	+__DEVICE__ __CONSTEXPR__
	typename __hip_enable_if<__hip::is_arithmetic<__T1>::value &&
	__hip::is_arithmetic<__T2>::value,
	typename __hip::__promote<__T1, __T2>::type>::type
	remquo(__T1 __x, __T2 __y, int *__quo) {
	typedef typename __hip::__promote<__T1, __T2>::type __result_type;
	return ::remquo((__result_type)__x, (__result_type)__y, __quo);
	}
	#else
	template <typename __T1, typename __T2>
	-__DEVICE__ typename __hip_enable_if<__hip::is_arithmetic<__T1>::value &&
	- __hip::is_arithmetic<__T2>::value,
	- double>::type
	-remquo(__T1 __x, __T2 __y, int *__quo) {
	+__DEVICE__ __CONSTEXPR__
	+ typename __hip_enable_if<__hip::is_arithmetic<__T1>::value &&
	+ __hip::is_arithmetic<__T2>::value,
	+ double>::type
	+ remquo(__T1 __x, __T2 __y, int *__quo) {
	return ::remquo((double)__x, (double)__y, __quo);
	}
	#endif

	template <typename __T>
	-__DEVICE__
	+__DEVICE__ __CONSTEXPR__
	typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type
	scalbln(__T __x, long int __exp) {
	return ::scalbln((double)__x, __exp);
	}

	template <typename __T>
	-__DEVICE__
	+__DEVICE__ __CONSTEXPR__
	typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type
	scalbn(__T __x, int __exp) {
	return ::scalbn((double)__x, __exp);
	}

	#pragma pop_macro("__HIP_OVERLOAD1")
	#pragma pop_macro("__HIP_OVERLOAD2")

	// END HIP_OVERLOAD

	// END DEF_FUN and HIP_OVERLOAD

	+#endif // ifndef __OPENMP_AMDGCN__
	#endif // defined(__cplusplus)

	+#ifndef __OPENMP_AMDGCN__
	// Define these overloads inside the namespace our standard library uses.
	#if !defined(__HIPCC_RTC__)
	#ifdef _LIBCPP_BEGIN_NAMESPACE_STD
	_LIBCPP_BEGIN_NAMESPACE_STD
	#else
	namespace std {
	#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION
	_GLIBCXX_BEGIN_NAMESPACE_VERSION
	#endif // _GLIBCXX_BEGIN_NAMESPACE_VERSION
	#endif // _LIBCPP_BEGIN_NAMESPACE_STD

	// Pull the new overloads we defined above into namespace std.
	// using ::abs; - This may be considered for C++.
	using ::acos;
	using ::acosh;
	using ::asin;
	using ::asinh;
	using ::atan;
	using ::atan2;
	using ::atanh;
	using ::cbrt;
	using ::ceil;
	using ::copysign;
	using ::cos;
	using ::cosh;
	using ::erf;
	using ::erfc;
	using ::exp;
	using ::exp2;
	using ::expm1;
	using ::fabs;
	using ::fdim;
	using ::floor;
	using ::fma;
	using ::fmax;
	using ::fmin;
	using ::fmod;
	using ::fpclassify;
	using ::frexp;
	using ::hypot;
	using ::ilogb;
	using ::isfinite;
	using ::isgreater;
	using ::isgreaterequal;
	using ::isless;
	using ::islessequal;
	using ::islessgreater;
	using ::isnormal;
	using ::isunordered;
	using ::ldexp;
	using ::lgamma;
	using ::llrint;
	using ::llround;
	using ::log;
	using ::log10;
	using ::log1p;
	using ::log2;
	using ::logb;
	using ::lrint;
	using ::lround;
	using ::modf;
	// using ::nan; - This may be considered for C++.
	// using ::nanf; - This may be considered for C++.
	// using ::nanl; - This is not yet defined.
	using ::nearbyint;
	using ::nextafter;
	// using ::nexttoward; - Omit this since we do not have a definition.
	using ::pow;
	using ::remainder;
	using ::remquo;
	using ::rint;
	using ::round;
	using ::scalbln;
	using ::scalbn;
	using ::signbit;
	using ::sin;
	using ::sinh;
	using ::sqrt;
	using ::tan;
	using ::tanh;
	using ::tgamma;
	using ::trunc;

	// Well this is fun: We need to pull these symbols in for libc++, but we can't
	// pull them in with libstdc++, because its ::isinf and ::isnan are different
	// than its std::isinf and std::isnan.
	#ifndef __GLIBCXX__
	using ::isinf;
	using ::isnan;
	#endif

	// Finally, pull the "foobarf" functions that HIP defines into std.
	using ::acosf;
	using ::acoshf;
	using ::asinf;
	using ::asinhf;
	using ::atan2f;
	using ::atanf;
	using ::atanhf;
	using ::cbrtf;
	using ::ceilf;
	using ::copysignf;
	using ::cosf;
	using ::coshf;
	using ::erfcf;
	using ::erff;
	using ::exp2f;
	using ::expf;
	using ::expm1f;
	using ::fabsf;
	using ::fdimf;
	using ::floorf;
	using ::fmaf;
	using ::fmaxf;
	using ::fminf;
	using ::fmodf;
	using ::frexpf;
	using ::hypotf;
	using ::ilogbf;
	using ::ldexpf;
	using ::lgammaf;
	using ::llrintf;
	using ::llroundf;
	using ::log10f;
	using ::log1pf;
	using ::log2f;
	using ::logbf;
	using ::logf;
	using ::lrintf;
	using ::lroundf;
	using ::modff;
	using ::nearbyintf;
	using ::nextafterf;
	// using ::nexttowardf; - Omit this since we do not have a definition.
	using ::powf;
	using ::remainderf;
	using ::remquof;
	using ::rintf;
	using ::roundf;
	using ::scalblnf;
	using ::scalbnf;
	using ::sinf;
	using ::sinhf;
	using ::sqrtf;
	using ::tanf;
	using ::tanhf;
	using ::tgammaf;
	using ::truncf;

	#ifdef _LIBCPP_END_NAMESPACE_STD
	_LIBCPP_END_NAMESPACE_STD
	#else
	#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION
	_GLIBCXX_END_NAMESPACE_VERSION
	#endif // _GLIBCXX_BEGIN_NAMESPACE_VERSION
	} // namespace std
	#endif // _LIBCPP_END_NAMESPACE_STD
	#endif // !defined(__HIPCC_RTC__)

	// Define device-side math functions from <ymath.h> on MSVC.
	#if !defined(__HIPCC_RTC__)
	#if defined(_MSC_VER)

	// Before VS2019, `<ymath.h>` is also included in `<limits>` and other headers.
	// But, from VS2019, it's only included in `<complex>`. Need to include
	// `<ymath.h>` here to ensure C functions declared there won't be markded as
	// `__host__` and `__device__` through `<complex>` wrapper.
	#include <ymath.h>

	#if defined(__cplusplus)
	extern "C" {
	#endif // defined(__cplusplus)
	-__DEVICE__ __attribute__((overloadable)) double _Cosh(double x, double y) {
	+__DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) double _Cosh(double x,
	+ double y) {
	return cosh(x) * y;
	}
	-__DEVICE__ __attribute__((overloadable)) float _FCosh(float x, float y) {
	+__DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) float _FCosh(float x,
	+ float y) {
	return coshf(x) * y;
	}
	-__DEVICE__ __attribute__((overloadable)) short _Dtest(double *p) {
	+__DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) short _Dtest(double *p) {
	return fpclassify(*p);
	}
	-__DEVICE__ __attribute__((overloadable)) short _FDtest(float *p) {
	+__DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) short _FDtest(float *p) {
	return fpclassify(*p);
	}
	-__DEVICE__ __attribute__((overloadable)) double _Sinh(double x, double y) {
	+__DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) double _Sinh(double x,
	+ double y) {
	return sinh(x) * y;
	}
	-__DEVICE__ __attribute__((overloadable)) float _FSinh(float x, float y) {
	+__DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) float _FSinh(float x,
	+ float y) {
	return sinhf(x) * y;
	}
	#if defined(__cplusplus)
	}
	#endif // defined(__cplusplus)
	#endif // defined(_MSC_VER)
	#endif // !defined(__HIPCC_RTC__)
	+#endif // ifndef __OPENMP_AMDGCN__

	#pragma pop_macro("__DEVICE__")
	+#pragma pop_macro("__CONSTEXPR__")

	#endif // __CLANG_HIP_CMATH_H__
	diff --git a/contrib/llvm-project/clang/lib/Headers/__clang_hip_math.h b/contrib/llvm-project/clang/lib/Headers/__clang_hip_math.h
	index 1f0982d92eff..ef7e087b832c 100644
	--- a/contrib/llvm-project/clang/lib/Headers/__clang_hip_math.h
	+++ b/contrib/llvm-project/clang/lib/Headers/__clang_hip_math.h
	@@ -1,1279 +1,1321 @@
	/*===---- __clang_hip_math.h - Device-side HIP math support ----------------===
	*
	* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	* See https://llvm.org/LICENSE.txt for license information.
	* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	*
	*===-----------------------------------------------------------------------===
	*/
	#ifndef __CLANG_HIP_MATH_H__
	#define __CLANG_HIP_MATH_H__

	-#if !defined(__HIP__)
	+#if !defined(__HIP__) && !defined(__OPENMP_AMDGCN__)
	#error "This file is for HIP and OpenMP AMDGCN device compilation only."
	#endif

	#if !defined(__HIPCC_RTC__)
	#if defined(__cplusplus)
	#include <algorithm>
	#endif
	#include <limits.h>
	#include <stdint.h>
	-#endif // __HIPCC_RTC__
	+#ifdef __OPENMP_AMDGCN__
	+#include <omp.h>
	+#endif
	+#endif // !defined(__HIPCC_RTC__)

	#pragma push_macro("__DEVICE__")
	+
	+#ifdef __OPENMP_AMDGCN__
	+#define __DEVICE__ static inline __attribute__((always_inline, nothrow))
	+#else
	#define __DEVICE__ static __device__ inline __attribute__((always_inline))
	+#endif

	// A few functions return bool type starting only in C++11.
	#pragma push_macro("__RETURN_TYPE")
	+#ifdef __OPENMP_AMDGCN__
	+#define __RETURN_TYPE int
	+#else
	#if defined(__cplusplus)
	#define __RETURN_TYPE bool
	#else
	#define __RETURN_TYPE int
	#endif
	+#endif // __OPENMP_AMDGCN__

	#if defined (__cplusplus) && __cplusplus < 201103L
	// emulate static_assert on type sizes
	template<bool>
	struct __compare_result{};
	template<>
	struct __compare_result<true> {
	static const __device__ bool valid;
	};

	__DEVICE__
	void __suppress_unused_warning(bool b){};
	template <unsigned int S, unsigned int T>
	__DEVICE__ void __static_assert_equal_size() {
	__suppress_unused_warning(__compare_result<S == T>::valid);
	}

	#define __static_assert_type_size_equal(A, B) \
	__static_assert_equal_size<A,B>()

	#else
	#define __static_assert_type_size_equal(A,B) \
	static_assert((A) == (B), "")

	#endif

	__DEVICE__
	uint64_t __make_mantissa_base8(const char *__tagp) {
	uint64_t __r = 0;
	while (__tagp) {
	char __tmp = *__tagp;

	if (__tmp >= '0' && __tmp <= '7')
	__r = (__r * 8u) + __tmp - '0';
	else
	return 0;

	++__tagp;
	}

	return __r;
	}

	__DEVICE__
	uint64_t __make_mantissa_base10(const char *__tagp) {
	uint64_t __r = 0;
	while (__tagp) {
	char __tmp = *__tagp;

	if (__tmp >= '0' && __tmp <= '9')
	__r = (__r * 10u) + __tmp - '0';
	else
	return 0;

	++__tagp;
	}

	return __r;
	}

	__DEVICE__
	uint64_t __make_mantissa_base16(const char *__tagp) {
	uint64_t __r = 0;
	while (__tagp) {
	char __tmp = *__tagp;

	if (__tmp >= '0' && __tmp <= '9')
	__r = (__r * 16u) + __tmp - '0';
	else if (__tmp >= 'a' && __tmp <= 'f')
	__r = (__r * 16u) + __tmp - 'a' + 10;
	else if (__tmp >= 'A' && __tmp <= 'F')
	__r = (__r * 16u) + __tmp - 'A' + 10;
	else
	return 0;

	++__tagp;
	}

	return __r;
	}

	__DEVICE__
	uint64_t __make_mantissa(const char *__tagp) {
	if (!__tagp)
	return 0u;

	if (*__tagp == '0') {
	++__tagp;

	if (__tagp == 'x' \|\| __tagp == 'X')
	return __make_mantissa_base16(__tagp);
	else
	return __make_mantissa_base8(__tagp);
	}

	return __make_mantissa_base10(__tagp);
	}

	// BEGIN FLOAT
	#if defined(__cplusplus)
	__DEVICE__
	int abs(int __x) {
	int __sgn = __x >> (sizeof(int) * CHAR_BIT - 1);
	return (__x ^ __sgn) - __sgn;
	}
	__DEVICE__
	long labs(long __x) {
	long __sgn = __x >> (sizeof(long) * CHAR_BIT - 1);
	return (__x ^ __sgn) - __sgn;
	}
	__DEVICE__
	long long llabs(long long __x) {
	long long __sgn = __x >> (sizeof(long long) * CHAR_BIT - 1);
	return (__x ^ __sgn) - __sgn;
	}
	#endif

	__DEVICE__
	float acosf(float __x) { return __ocml_acos_f32(__x); }

	__DEVICE__
	float acoshf(float __x) { return __ocml_acosh_f32(__x); }

	__DEVICE__
	float asinf(float __x) { return __ocml_asin_f32(__x); }

	__DEVICE__
	float asinhf(float __x) { return __ocml_asinh_f32(__x); }

	__DEVICE__
	float atan2f(float __x, float __y) { return __ocml_atan2_f32(__x, __y); }

	__DEVICE__
	float atanf(float __x) { return __ocml_atan_f32(__x); }

	__DEVICE__
	float atanhf(float __x) { return __ocml_atanh_f32(__x); }

	__DEVICE__
	float cbrtf(float __x) { return __ocml_cbrt_f32(__x); }

	__DEVICE__
	float ceilf(float __x) { return __ocml_ceil_f32(__x); }

	__DEVICE__
	float copysignf(float __x, float __y) { return __ocml_copysign_f32(__x, __y); }

	__DEVICE__
	float cosf(float __x) { return __ocml_cos_f32(__x); }

	__DEVICE__
	float coshf(float __x) { return __ocml_cosh_f32(__x); }

	__DEVICE__
	float cospif(float __x) { return __ocml_cospi_f32(__x); }

	__DEVICE__
	float cyl_bessel_i0f(float __x) { return __ocml_i0_f32(__x); }

	__DEVICE__
	float cyl_bessel_i1f(float __x) { return __ocml_i1_f32(__x); }

	__DEVICE__
	float erfcf(float __x) { return __ocml_erfc_f32(__x); }

	__DEVICE__
	float erfcinvf(float __x) { return __ocml_erfcinv_f32(__x); }

	__DEVICE__
	float erfcxf(float __x) { return __ocml_erfcx_f32(__x); }

	__DEVICE__
	float erff(float __x) { return __ocml_erf_f32(__x); }

	__DEVICE__
	float erfinvf(float __x) { return __ocml_erfinv_f32(__x); }

	__DEVICE__
	float exp10f(float __x) { return __ocml_exp10_f32(__x); }

	__DEVICE__
	float exp2f(float __x) { return __ocml_exp2_f32(__x); }

	__DEVICE__
	float expf(float __x) { return __ocml_exp_f32(__x); }

	__DEVICE__
	float expm1f(float __x) { return __ocml_expm1_f32(__x); }

	__DEVICE__
	float fabsf(float __x) { return __ocml_fabs_f32(__x); }

	__DEVICE__
	float fdimf(float __x, float __y) { return __ocml_fdim_f32(__x, __y); }

	__DEVICE__
	float fdividef(float __x, float __y) { return __x / __y; }

	__DEVICE__
	float floorf(float __x) { return __ocml_floor_f32(__x); }

	__DEVICE__
	float fmaf(float __x, float __y, float __z) {
	return __ocml_fma_f32(__x, __y, __z);
	}

	__DEVICE__
	float fmaxf(float __x, float __y) { return __ocml_fmax_f32(__x, __y); }

	__DEVICE__
	float fminf(float __x, float __y) { return __ocml_fmin_f32(__x, __y); }

	__DEVICE__
	float fmodf(float __x, float __y) { return __ocml_fmod_f32(__x, __y); }

	__DEVICE__
	float frexpf(float __x, int *__nptr) {
	int __tmp;
	+#ifdef __OPENMP_AMDGCN__
	+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
	+#endif
	float __r =
	__ocml_frexp_f32(__x, (__attribute__((address_space(5))) int *)&__tmp);
	*__nptr = __tmp;

	return __r;
	}

	__DEVICE__
	float hypotf(float __x, float __y) { return __ocml_hypot_f32(__x, __y); }

	__DEVICE__
	int ilogbf(float __x) { return __ocml_ilogb_f32(__x); }

	__DEVICE__
	__RETURN_TYPE __finitef(float __x) { return __ocml_isfinite_f32(__x); }

	__DEVICE__
	__RETURN_TYPE __isinff(float __x) { return __ocml_isinf_f32(__x); }

	__DEVICE__
	__RETURN_TYPE __isnanf(float __x) { return __ocml_isnan_f32(__x); }

	__DEVICE__
	float j0f(float __x) { return __ocml_j0_f32(__x); }

	__DEVICE__
	float j1f(float __x) { return __ocml_j1_f32(__x); }

	__DEVICE__
	float jnf(int __n, float __x) { // TODO: we could use Ahmes multiplication
	// and the Miller & Brown algorithm
	// for linear recurrences to get O(log n) steps, but it's unclear if
	// it'd be beneficial in this case.
	if (__n == 0)
	return j0f(__x);
	if (__n == 1)
	return j1f(__x);

	float __x0 = j0f(__x);
	float __x1 = j1f(__x);
	for (int __i = 1; __i < __n; ++__i) {
	float __x2 = (2 * __i) / __x * __x1 - __x0;
	__x0 = __x1;
	__x1 = __x2;
	}

	return __x1;
	}

	__DEVICE__
	float ldexpf(float __x, int __e) { return __ocml_ldexp_f32(__x, __e); }

	__DEVICE__
	float lgammaf(float __x) { return __ocml_lgamma_f32(__x); }

	__DEVICE__
	long long int llrintf(float __x) { return __ocml_rint_f32(__x); }

	__DEVICE__
	long long int llroundf(float __x) { return __ocml_round_f32(__x); }

	__DEVICE__
	float log10f(float __x) { return __ocml_log10_f32(__x); }

	__DEVICE__
	float log1pf(float __x) { return __ocml_log1p_f32(__x); }

	__DEVICE__
	float log2f(float __x) { return __ocml_log2_f32(__x); }

	__DEVICE__
	float logbf(float __x) { return __ocml_logb_f32(__x); }

	__DEVICE__
	float logf(float __x) { return __ocml_log_f32(__x); }

	__DEVICE__
	long int lrintf(float __x) { return __ocml_rint_f32(__x); }

	__DEVICE__
	long int lroundf(float __x) { return __ocml_round_f32(__x); }

	__DEVICE__
	float modff(float __x, float *__iptr) {
	float __tmp;
	+#ifdef __OPENMP_AMDGCN__
	+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
	+#endif
	float __r =
	__ocml_modf_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
	*__iptr = __tmp;
	return __r;
	}

	__DEVICE__
	float nanf(const char *__tagp) {
	union {
	float val;
	struct ieee_float {
	unsigned int mantissa : 22;
	unsigned int quiet : 1;
	unsigned int exponent : 8;
	unsigned int sign : 1;
	} bits;
	} __tmp;
	__static_assert_type_size_equal(sizeof(__tmp.val), sizeof(__tmp.bits));

	__tmp.bits.sign = 0u;
	__tmp.bits.exponent = ~0u;
	__tmp.bits.quiet = 1u;
	__tmp.bits.mantissa = __make_mantissa(__tagp);

	return __tmp.val;
	}

	__DEVICE__
	float nearbyintf(float __x) { return __ocml_nearbyint_f32(__x); }

	__DEVICE__
	float nextafterf(float __x, float __y) {
	return __ocml_nextafter_f32(__x, __y);
	}

	__DEVICE__
	float norm3df(float __x, float __y, float __z) {
	return __ocml_len3_f32(__x, __y, __z);
	}

	__DEVICE__
	float norm4df(float __x, float __y, float __z, float __w) {
	return __ocml_len4_f32(__x, __y, __z, __w);
	}

	__DEVICE__
	float normcdff(float __x) { return __ocml_ncdf_f32(__x); }

	__DEVICE__
	float normcdfinvf(float __x) { return __ocml_ncdfinv_f32(__x); }

	__DEVICE__
	float normf(int __dim,
	const float *__a) { // TODO: placeholder until OCML adds support.
	float __r = 0;
	while (__dim--) {
	__r += __a[0] * __a[0];
	++__a;
	}

	return __ocml_sqrt_f32(__r);
	}

	__DEVICE__
	float powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); }

	__DEVICE__
	float powif(float __x, int __y) { return __ocml_pown_f32(__x, __y); }

	__DEVICE__
	float rcbrtf(float __x) { return __ocml_rcbrt_f32(__x); }

	__DEVICE__
	float remainderf(float __x, float __y) {
	return __ocml_remainder_f32(__x, __y);
	}

	__DEVICE__
	float remquof(float __x, float __y, int *__quo) {
	int __tmp;
	+#ifdef __OPENMP_AMDGCN__
	+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
	+#endif
	float __r = __ocml_remquo_f32(
	__x, __y, (__attribute__((address_space(5))) int *)&__tmp);
	*__quo = __tmp;

	return __r;
	}

	__DEVICE__
	float rhypotf(float __x, float __y) { return __ocml_rhypot_f32(__x, __y); }

	__DEVICE__
	float rintf(float __x) { return __ocml_rint_f32(__x); }

	__DEVICE__
	float rnorm3df(float __x, float __y, float __z) {
	return __ocml_rlen3_f32(__x, __y, __z);
	}

	__DEVICE__
	float rnorm4df(float __x, float __y, float __z, float __w) {
	return __ocml_rlen4_f32(__x, __y, __z, __w);
	}

	__DEVICE__
	float rnormf(int __dim,
	const float *__a) { // TODO: placeholder until OCML adds support.
	float __r = 0;
	while (__dim--) {
	__r += __a[0] * __a[0];
	++__a;
	}

	return __ocml_rsqrt_f32(__r);
	}

	__DEVICE__
	float roundf(float __x) { return __ocml_round_f32(__x); }

	__DEVICE__
	float rsqrtf(float __x) { return __ocml_rsqrt_f32(__x); }

	__DEVICE__
	float scalblnf(float __x, long int __n) {
	return (__n < INT_MAX) ? __ocml_scalbn_f32(__x, __n)
	: __ocml_scalb_f32(__x, __n);
	}

	__DEVICE__
	float scalbnf(float __x, int __n) { return __ocml_scalbn_f32(__x, __n); }

	__DEVICE__
	__RETURN_TYPE __signbitf(float __x) { return __ocml_signbit_f32(__x); }

	__DEVICE__
	void sincosf(float __x, float __sinptr, float __cosptr) {
	float __tmp;
	+#ifdef __OPENMP_AMDGCN__
	+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
	+#endif
	*__sinptr =
	__ocml_sincos_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
	*__cosptr = __tmp;
	}

	__DEVICE__
	void sincospif(float __x, float __sinptr, float __cosptr) {
	float __tmp;
	+#ifdef __OPENMP_AMDGCN__
	+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
	+#endif
	*__sinptr = __ocml_sincospi_f32(
	__x, (__attribute__((address_space(5))) float *)&__tmp);
	*__cosptr = __tmp;
	}

	__DEVICE__
	float sinf(float __x) { return __ocml_sin_f32(__x); }

	__DEVICE__
	float sinhf(float __x) { return __ocml_sinh_f32(__x); }

	__DEVICE__
	float sinpif(float __x) { return __ocml_sinpi_f32(__x); }

	__DEVICE__
	float sqrtf(float __x) { return __ocml_sqrt_f32(__x); }

	__DEVICE__
	float tanf(float __x) { return __ocml_tan_f32(__x); }

	__DEVICE__
	float tanhf(float __x) { return __ocml_tanh_f32(__x); }

	__DEVICE__
	float tgammaf(float __x) { return __ocml_tgamma_f32(__x); }

	__DEVICE__
	float truncf(float __x) { return __ocml_trunc_f32(__x); }

	__DEVICE__
	float y0f(float __x) { return __ocml_y0_f32(__x); }

	__DEVICE__
	float y1f(float __x) { return __ocml_y1_f32(__x); }

	__DEVICE__
	float ynf(int __n, float __x) { // TODO: we could use Ahmes multiplication
	// and the Miller & Brown algorithm
	// for linear recurrences to get O(log n) steps, but it's unclear if
	// it'd be beneficial in this case. Placeholder until OCML adds
	// support.
	if (__n == 0)
	return y0f(__x);
	if (__n == 1)
	return y1f(__x);

	float __x0 = y0f(__x);
	float __x1 = y1f(__x);
	for (int __i = 1; __i < __n; ++__i) {
	float __x2 = (2 * __i) / __x * __x1 - __x0;
	__x0 = __x1;
	__x1 = __x2;
	}

	return __x1;
	}

	// BEGIN INTRINSICS

	__DEVICE__
	float __cosf(float __x) { return __ocml_native_cos_f32(__x); }

	__DEVICE__
	float __exp10f(float __x) { return __ocml_native_exp10_f32(__x); }

	__DEVICE__
	float __expf(float __x) { return __ocml_native_exp_f32(__x); }

	#if defined OCML_BASIC_ROUNDED_OPERATIONS
	__DEVICE__
	float __fadd_rd(float __x, float __y) { return __ocml_add_rtn_f32(__x, __y); }
	__DEVICE__
	float __fadd_rn(float __x, float __y) { return __ocml_add_rte_f32(__x, __y); }
	__DEVICE__
	float __fadd_ru(float __x, float __y) { return __ocml_add_rtp_f32(__x, __y); }
	__DEVICE__
	float __fadd_rz(float __x, float __y) { return __ocml_add_rtz_f32(__x, __y); }
	#else
	__DEVICE__
	float __fadd_rn(float __x, float __y) { return __x + __y; }
	#endif

	#if defined OCML_BASIC_ROUNDED_OPERATIONS
	__DEVICE__
	float __fdiv_rd(float __x, float __y) { return __ocml_div_rtn_f32(__x, __y); }
	__DEVICE__
	float __fdiv_rn(float __x, float __y) { return __ocml_div_rte_f32(__x, __y); }
	__DEVICE__
	float __fdiv_ru(float __x, float __y) { return __ocml_div_rtp_f32(__x, __y); }
	__DEVICE__
	float __fdiv_rz(float __x, float __y) { return __ocml_div_rtz_f32(__x, __y); }
	#else
	__DEVICE__
	float __fdiv_rn(float __x, float __y) { return __x / __y; }
	#endif

	__DEVICE__
	float __fdividef(float __x, float __y) { return __x / __y; }

	#if defined OCML_BASIC_ROUNDED_OPERATIONS
	__DEVICE__
	float __fmaf_rd(float __x, float __y, float __z) {
	return __ocml_fma_rtn_f32(__x, __y, __z);
	}
	__DEVICE__
	float __fmaf_rn(float __x, float __y, float __z) {
	return __ocml_fma_rte_f32(__x, __y, __z);
	}
	__DEVICE__
	float __fmaf_ru(float __x, float __y, float __z) {
	return __ocml_fma_rtp_f32(__x, __y, __z);
	}
	__DEVICE__
	float __fmaf_rz(float __x, float __y, float __z) {
	return __ocml_fma_rtz_f32(__x, __y, __z);
	}
	#else
	__DEVICE__
	float __fmaf_rn(float __x, float __y, float __z) {
	return __ocml_fma_f32(__x, __y, __z);
	}
	#endif

	#if defined OCML_BASIC_ROUNDED_OPERATIONS
	__DEVICE__
	float __fmul_rd(float __x, float __y) { return __ocml_mul_rtn_f32(__x, __y); }
	__DEVICE__
	float __fmul_rn(float __x, float __y) { return __ocml_mul_rte_f32(__x, __y); }
	__DEVICE__
	float __fmul_ru(float __x, float __y) { return __ocml_mul_rtp_f32(__x, __y); }
	__DEVICE__
	float __fmul_rz(float __x, float __y) { return __ocml_mul_rtz_f32(__x, __y); }
	#else
	__DEVICE__
	float __fmul_rn(float __x, float __y) { return __x * __y; }
	#endif

	#if defined OCML_BASIC_ROUNDED_OPERATIONS
	__DEVICE__
	float __frcp_rd(float __x) { return __ocml_div_rtn_f32(1.0f, __x); }
	__DEVICE__
	float __frcp_rn(float __x) { return __ocml_div_rte_f32(1.0f, __x); }
	__DEVICE__
	float __frcp_ru(float __x) { return __ocml_div_rtp_f32(1.0f, __x); }
	__DEVICE__
	float __frcp_rz(float __x) { return __ocml_div_rtz_f32(1.0f, __x); }
	#else
	__DEVICE__
	float __frcp_rn(float __x) { return 1.0f / __x; }
	#endif

	__DEVICE__
	float __frsqrt_rn(float __x) { return __llvm_amdgcn_rsq_f32(__x); }

	#if defined OCML_BASIC_ROUNDED_OPERATIONS
	__DEVICE__
	float __fsqrt_rd(float __x) { return __ocml_sqrt_rtn_f32(__x); }
	__DEVICE__
	float __fsqrt_rn(float __x) { return __ocml_sqrt_rte_f32(__x); }
	__DEVICE__
	float __fsqrt_ru(float __x) { return __ocml_sqrt_rtp_f32(__x); }
	__DEVICE__
	float __fsqrt_rz(float __x) { return __ocml_sqrt_rtz_f32(__x); }
	#else
	__DEVICE__
	float __fsqrt_rn(float __x) { return __ocml_native_sqrt_f32(__x); }
	#endif

	#if defined OCML_BASIC_ROUNDED_OPERATIONS
	__DEVICE__
	float __fsub_rd(float __x, float __y) { return __ocml_sub_rtn_f32(__x, __y); }
	__DEVICE__
	float __fsub_rn(float __x, float __y) { return __ocml_sub_rte_f32(__x, __y); }
	__DEVICE__
	float __fsub_ru(float __x, float __y) { return __ocml_sub_rtp_f32(__x, __y); }
	__DEVICE__
	float __fsub_rz(float __x, float __y) { return __ocml_sub_rtz_f32(__x, __y); }
	#else
	__DEVICE__
	float __fsub_rn(float __x, float __y) { return __x - __y; }
	#endif

	__DEVICE__
	float __log10f(float __x) { return __ocml_native_log10_f32(__x); }

	__DEVICE__
	float __log2f(float __x) { return __ocml_native_log2_f32(__x); }

	__DEVICE__
	float __logf(float __x) { return __ocml_native_log_f32(__x); }

	__DEVICE__
	float __powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); }

	__DEVICE__
	float __saturatef(float __x) { return (__x < 0) ? 0 : ((__x > 1) ? 1 : __x); }

	__DEVICE__
	void __sincosf(float __x, float __sinptr, float __cosptr) {
	*__sinptr = __ocml_native_sin_f32(__x);
	*__cosptr = __ocml_native_cos_f32(__x);
	}

	__DEVICE__
	float __sinf(float __x) { return __ocml_native_sin_f32(__x); }

	__DEVICE__
	float __tanf(float __x) { return __ocml_tan_f32(__x); }
	// END INTRINSICS
	// END FLOAT

	// BEGIN DOUBLE
	__DEVICE__
	double acos(double __x) { return __ocml_acos_f64(__x); }

	__DEVICE__
	double acosh(double __x) { return __ocml_acosh_f64(__x); }

	__DEVICE__
	double asin(double __x) { return __ocml_asin_f64(__x); }

	__DEVICE__
	double asinh(double __x) { return __ocml_asinh_f64(__x); }

	__DEVICE__
	double atan(double __x) { return __ocml_atan_f64(__x); }

	__DEVICE__
	double atan2(double __x, double __y) { return __ocml_atan2_f64(__x, __y); }

	__DEVICE__
	double atanh(double __x) { return __ocml_atanh_f64(__x); }

	__DEVICE__
	double cbrt(double __x) { return __ocml_cbrt_f64(__x); }

	__DEVICE__
	double ceil(double __x) { return __ocml_ceil_f64(__x); }

	__DEVICE__
	double copysign(double __x, double __y) {
	return __ocml_copysign_f64(__x, __y);
	}

	__DEVICE__
	double cos(double __x) { return __ocml_cos_f64(__x); }

	__DEVICE__
	double cosh(double __x) { return __ocml_cosh_f64(__x); }

	__DEVICE__
	double cospi(double __x) { return __ocml_cospi_f64(__x); }

	__DEVICE__
	double cyl_bessel_i0(double __x) { return __ocml_i0_f64(__x); }

	__DEVICE__
	double cyl_bessel_i1(double __x) { return __ocml_i1_f64(__x); }

	__DEVICE__
	double erf(double __x) { return __ocml_erf_f64(__x); }

	__DEVICE__
	double erfc(double __x) { return __ocml_erfc_f64(__x); }

	__DEVICE__
	double erfcinv(double __x) { return __ocml_erfcinv_f64(__x); }

	__DEVICE__
	double erfcx(double __x) { return __ocml_erfcx_f64(__x); }

	__DEVICE__
	double erfinv(double __x) { return __ocml_erfinv_f64(__x); }

	__DEVICE__
	double exp(double __x) { return __ocml_exp_f64(__x); }

	__DEVICE__
	double exp10(double __x) { return __ocml_exp10_f64(__x); }

	__DEVICE__
	double exp2(double __x) { return __ocml_exp2_f64(__x); }

	__DEVICE__
	double expm1(double __x) { return __ocml_expm1_f64(__x); }

	__DEVICE__
	double fabs(double __x) { return __ocml_fabs_f64(__x); }

	__DEVICE__
	double fdim(double __x, double __y) { return __ocml_fdim_f64(__x, __y); }

	__DEVICE__
	double floor(double __x) { return __ocml_floor_f64(__x); }

	__DEVICE__
	double fma(double __x, double __y, double __z) {
	return __ocml_fma_f64(__x, __y, __z);
	}

	__DEVICE__
	double fmax(double __x, double __y) { return __ocml_fmax_f64(__x, __y); }

	__DEVICE__
	double fmin(double __x, double __y) { return __ocml_fmin_f64(__x, __y); }

	__DEVICE__
	double fmod(double __x, double __y) { return __ocml_fmod_f64(__x, __y); }

	__DEVICE__
	double frexp(double __x, int *__nptr) {
	int __tmp;
	+#ifdef __OPENMP_AMDGCN__
	+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
	+#endif
	double __r =
	__ocml_frexp_f64(__x, (__attribute__((address_space(5))) int *)&__tmp);
	*__nptr = __tmp;
	return __r;
	}

	__DEVICE__
	double hypot(double __x, double __y) { return __ocml_hypot_f64(__x, __y); }

	__DEVICE__
	int ilogb(double __x) { return __ocml_ilogb_f64(__x); }

	__DEVICE__
	__RETURN_TYPE __finite(double __x) { return __ocml_isfinite_f64(__x); }

	__DEVICE__
	__RETURN_TYPE __isinf(double __x) { return __ocml_isinf_f64(__x); }

	__DEVICE__
	__RETURN_TYPE __isnan(double __x) { return __ocml_isnan_f64(__x); }

	__DEVICE__
	double j0(double __x) { return __ocml_j0_f64(__x); }

	__DEVICE__
	double j1(double __x) { return __ocml_j1_f64(__x); }

	__DEVICE__
	double jn(int __n, double __x) { // TODO: we could use Ahmes multiplication
	// and the Miller & Brown algorithm
	// for linear recurrences to get O(log n) steps, but it's unclear if
	// it'd be beneficial in this case. Placeholder until OCML adds
	// support.
	if (__n == 0)
	return j0(__x);
	if (__n == 1)
	return j1(__x);

	double __x0 = j0(__x);
	double __x1 = j1(__x);
	for (int __i = 1; __i < __n; ++__i) {
	double __x2 = (2 * __i) / __x * __x1 - __x0;
	__x0 = __x1;
	__x1 = __x2;
	}
	return __x1;
	}

	__DEVICE__
	double ldexp(double __x, int __e) { return __ocml_ldexp_f64(__x, __e); }

	__DEVICE__
	double lgamma(double __x) { return __ocml_lgamma_f64(__x); }

	__DEVICE__
	long long int llrint(double __x) { return __ocml_rint_f64(__x); }

	__DEVICE__
	long long int llround(double __x) { return __ocml_round_f64(__x); }

	__DEVICE__
	double log(double __x) { return __ocml_log_f64(__x); }

	__DEVICE__
	double log10(double __x) { return __ocml_log10_f64(__x); }

	__DEVICE__
	double log1p(double __x) { return __ocml_log1p_f64(__x); }

	__DEVICE__
	double log2(double __x) { return __ocml_log2_f64(__x); }

	__DEVICE__
	double logb(double __x) { return __ocml_logb_f64(__x); }

	__DEVICE__
	long int lrint(double __x) { return __ocml_rint_f64(__x); }

	__DEVICE__
	long int lround(double __x) { return __ocml_round_f64(__x); }

	__DEVICE__
	double modf(double __x, double *__iptr) {
	double __tmp;
	+#ifdef __OPENMP_AMDGCN__
	+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
	+#endif
	double __r =
	__ocml_modf_f64(__x, (__attribute__((address_space(5))) double *)&__tmp);
	*__iptr = __tmp;

	return __r;
	}

	__DEVICE__
	double nan(const char *__tagp) {
	#if !_WIN32
	union {
	double val;
	struct ieee_double {
	uint64_t mantissa : 51;
	uint32_t quiet : 1;
	uint32_t exponent : 11;
	uint32_t sign : 1;
	} bits;
	} __tmp;
	__static_assert_type_size_equal(sizeof(__tmp.val), sizeof(__tmp.bits));

	__tmp.bits.sign = 0u;
	__tmp.bits.exponent = ~0u;
	__tmp.bits.quiet = 1u;
	__tmp.bits.mantissa = __make_mantissa(__tagp);

	return __tmp.val;
	#else
	__static_assert_type_size_equal(sizeof(uint64_t), sizeof(double));
	uint64_t __val = __make_mantissa(__tagp);
	__val \|= 0xFFF << 51;
	return reinterpret_cast<double >(&__val);
	#endif
	}

	__DEVICE__
	double nearbyint(double __x) { return __ocml_nearbyint_f64(__x); }

	__DEVICE__
	double nextafter(double __x, double __y) {
	return __ocml_nextafter_f64(__x, __y);
	}

	__DEVICE__
	double norm(int __dim,
	const double *__a) { // TODO: placeholder until OCML adds support.
	double __r = 0;
	while (__dim--) {
	__r += __a[0] * __a[0];
	++__a;
	}

	return __ocml_sqrt_f64(__r);
	}

	__DEVICE__
	double norm3d(double __x, double __y, double __z) {
	return __ocml_len3_f64(__x, __y, __z);
	}

	__DEVICE__
	double norm4d(double __x, double __y, double __z, double __w) {
	return __ocml_len4_f64(__x, __y, __z, __w);
	}

	__DEVICE__
	double normcdf(double __x) { return __ocml_ncdf_f64(__x); }

	__DEVICE__
	double normcdfinv(double __x) { return __ocml_ncdfinv_f64(__x); }

	__DEVICE__
	double pow(double __x, double __y) { return __ocml_pow_f64(__x, __y); }

	__DEVICE__
	double powi(double __x, int __y) { return __ocml_pown_f64(__x, __y); }

	__DEVICE__
	double rcbrt(double __x) { return __ocml_rcbrt_f64(__x); }

	__DEVICE__
	double remainder(double __x, double __y) {
	return __ocml_remainder_f64(__x, __y);
	}

	__DEVICE__
	double remquo(double __x, double __y, int *__quo) {
	int __tmp;
	+#ifdef __OPENMP_AMDGCN__
	+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
	+#endif
	double __r = __ocml_remquo_f64(
	__x, __y, (__attribute__((address_space(5))) int *)&__tmp);
	*__quo = __tmp;

	return __r;
	}

	__DEVICE__
	double rhypot(double __x, double __y) { return __ocml_rhypot_f64(__x, __y); }

	__DEVICE__
	double rint(double __x) { return __ocml_rint_f64(__x); }

	__DEVICE__
	double rnorm(int __dim,
	const double *__a) { // TODO: placeholder until OCML adds support.
	double __r = 0;
	while (__dim--) {
	__r += __a[0] * __a[0];
	++__a;
	}

	return __ocml_rsqrt_f64(__r);
	}

	__DEVICE__
	double rnorm3d(double __x, double __y, double __z) {
	return __ocml_rlen3_f64(__x, __y, __z);
	}

	__DEVICE__
	double rnorm4d(double __x, double __y, double __z, double __w) {
	return __ocml_rlen4_f64(__x, __y, __z, __w);
	}

	__DEVICE__
	double round(double __x) { return __ocml_round_f64(__x); }

	__DEVICE__
	double rsqrt(double __x) { return __ocml_rsqrt_f64(__x); }

	__DEVICE__
	double scalbln(double __x, long int __n) {
	return (__n < INT_MAX) ? __ocml_scalbn_f64(__x, __n)
	: __ocml_scalb_f64(__x, __n);
	}
	__DEVICE__
	double scalbn(double __x, int __n) { return __ocml_scalbn_f64(__x, __n); }

	__DEVICE__
	__RETURN_TYPE __signbit(double __x) { return __ocml_signbit_f64(__x); }

	__DEVICE__
	double sin(double __x) { return __ocml_sin_f64(__x); }

	__DEVICE__
	void sincos(double __x, double __sinptr, double __cosptr) {
	double __tmp;
	+#ifdef __OPENMP_AMDGCN__
	+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
	+#endif
	*__sinptr = __ocml_sincos_f64(
	__x, (__attribute__((address_space(5))) double *)&__tmp);
	*__cosptr = __tmp;
	}

	__DEVICE__
	void sincospi(double __x, double __sinptr, double __cosptr) {
	double __tmp;
	+#ifdef __OPENMP_AMDGCN__
	+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
	+#endif
	*__sinptr = __ocml_sincospi_f64(
	__x, (__attribute__((address_space(5))) double *)&__tmp);
	*__cosptr = __tmp;
	}

	__DEVICE__
	double sinh(double __x) { return __ocml_sinh_f64(__x); }

	__DEVICE__
	double sinpi(double __x) { return __ocml_sinpi_f64(__x); }

	__DEVICE__
	double sqrt(double __x) { return __ocml_sqrt_f64(__x); }

	__DEVICE__
	double tan(double __x) { return __ocml_tan_f64(__x); }

	__DEVICE__
	double tanh(double __x) { return __ocml_tanh_f64(__x); }

	__DEVICE__
	double tgamma(double __x) { return __ocml_tgamma_f64(__x); }

	__DEVICE__
	double trunc(double __x) { return __ocml_trunc_f64(__x); }

	__DEVICE__
	double y0(double __x) { return __ocml_y0_f64(__x); }

	__DEVICE__
	double y1(double __x) { return __ocml_y1_f64(__x); }

	__DEVICE__
	double yn(int __n, double __x) { // TODO: we could use Ahmes multiplication
	// and the Miller & Brown algorithm
	// for linear recurrences to get O(log n) steps, but it's unclear if
	// it'd be beneficial in this case. Placeholder until OCML adds
	// support.
	if (__n == 0)
	return y0(__x);
	if (__n == 1)
	return y1(__x);

	double __x0 = y0(__x);
	double __x1 = y1(__x);
	for (int __i = 1; __i < __n; ++__i) {
	double __x2 = (2 * __i) / __x * __x1 - __x0;
	__x0 = __x1;
	__x1 = __x2;
	}

	return __x1;
	}

	// BEGIN INTRINSICS
	#if defined OCML_BASIC_ROUNDED_OPERATIONS
	__DEVICE__
	double __dadd_rd(double __x, double __y) {
	return __ocml_add_rtn_f64(__x, __y);
	}
	__DEVICE__
	double __dadd_rn(double __x, double __y) {
	return __ocml_add_rte_f64(__x, __y);
	}
	__DEVICE__
	double __dadd_ru(double __x, double __y) {
	return __ocml_add_rtp_f64(__x, __y);
	}
	__DEVICE__
	double __dadd_rz(double __x, double __y) {
	return __ocml_add_rtz_f64(__x, __y);
	}
	#else
	__DEVICE__
	double __dadd_rn(double __x, double __y) { return __x + __y; }
	#endif

	#if defined OCML_BASIC_ROUNDED_OPERATIONS
	__DEVICE__
	double __ddiv_rd(double __x, double __y) {
	return __ocml_div_rtn_f64(__x, __y);
	}
	__DEVICE__
	double __ddiv_rn(double __x, double __y) {
	return __ocml_div_rte_f64(__x, __y);
	}
	__DEVICE__
	double __ddiv_ru(double __x, double __y) {
	return __ocml_div_rtp_f64(__x, __y);
	}
	__DEVICE__
	double __ddiv_rz(double __x, double __y) {
	return __ocml_div_rtz_f64(__x, __y);
	}
	#else
	__DEVICE__
	double __ddiv_rn(double __x, double __y) { return __x / __y; }
	#endif

	#if defined OCML_BASIC_ROUNDED_OPERATIONS
	__DEVICE__
	double __dmul_rd(double __x, double __y) {
	return __ocml_mul_rtn_f64(__x, __y);
	}
	__DEVICE__
	double __dmul_rn(double __x, double __y) {
	return __ocml_mul_rte_f64(__x, __y);
	}
	__DEVICE__
	double __dmul_ru(double __x, double __y) {
	return __ocml_mul_rtp_f64(__x, __y);
	}
	__DEVICE__
	double __dmul_rz(double __x, double __y) {
	return __ocml_mul_rtz_f64(__x, __y);
	}
	#else
	__DEVICE__
	double __dmul_rn(double __x, double __y) { return __x * __y; }
	#endif

	#if defined OCML_BASIC_ROUNDED_OPERATIONS
	__DEVICE__
	double __drcp_rd(double __x) { return __ocml_div_rtn_f64(1.0, __x); }
	__DEVICE__
	double __drcp_rn(double __x) { return __ocml_div_rte_f64(1.0, __x); }
	__DEVICE__
	double __drcp_ru(double __x) { return __ocml_div_rtp_f64(1.0, __x); }
	__DEVICE__
	double __drcp_rz(double __x) { return __ocml_div_rtz_f64(1.0, __x); }
	#else
	__DEVICE__
	double __drcp_rn(double __x) { return 1.0 / __x; }
	#endif

	#if defined OCML_BASIC_ROUNDED_OPERATIONS
	__DEVICE__
	double __dsqrt_rd(double __x) { return __ocml_sqrt_rtn_f64(__x); }
	__DEVICE__
	double __dsqrt_rn(double __x) { return __ocml_sqrt_rte_f64(__x); }
	__DEVICE__
	double __dsqrt_ru(double __x) { return __ocml_sqrt_rtp_f64(__x); }
	__DEVICE__
	double __dsqrt_rz(double __x) { return __ocml_sqrt_rtz_f64(__x); }
	#else
	__DEVICE__
	double __dsqrt_rn(double __x) { return __ocml_sqrt_f64(__x); }
	#endif

	#if defined OCML_BASIC_ROUNDED_OPERATIONS
	__DEVICE__
	double __dsub_rd(double __x, double __y) {
	return __ocml_sub_rtn_f64(__x, __y);
	}
	__DEVICE__
	double __dsub_rn(double __x, double __y) {
	return __ocml_sub_rte_f64(__x, __y);
	}
	__DEVICE__
	double __dsub_ru(double __x, double __y) {
	return __ocml_sub_rtp_f64(__x, __y);
	}
	__DEVICE__
	double __dsub_rz(double __x, double __y) {
	return __ocml_sub_rtz_f64(__x, __y);
	}
	#else
	__DEVICE__
	double __dsub_rn(double __x, double __y) { return __x - __y; }
	#endif

	#if defined OCML_BASIC_ROUNDED_OPERATIONS
	__DEVICE__
	double __fma_rd(double __x, double __y, double __z) {
	return __ocml_fma_rtn_f64(__x, __y, __z);
	}
	__DEVICE__
	double __fma_rn(double __x, double __y, double __z) {
	return __ocml_fma_rte_f64(__x, __y, __z);
	}
	__DEVICE__
	double __fma_ru(double __x, double __y, double __z) {
	return __ocml_fma_rtp_f64(__x, __y, __z);
	}
	__DEVICE__
	double __fma_rz(double __x, double __y, double __z) {
	return __ocml_fma_rtz_f64(__x, __y, __z);
	}
	#else
	__DEVICE__
	double __fma_rn(double __x, double __y, double __z) {
	return __ocml_fma_f64(__x, __y, __z);
	}
	#endif
	// END INTRINSICS
	// END DOUBLE

	// C only macros
	#if !defined(__cplusplus) && __STDC_VERSION__ >= 201112L
	#define isfinite(__x) _Generic((__x), float : __finitef, double : __finite)(__x)
	#define isinf(__x) _Generic((__x), float : __isinff, double : __isinf)(__x)
	#define isnan(__x) _Generic((__x), float : __isnanf, double : __isnan)(__x)
	#define signbit(__x) \
	_Generic((__x), float : __signbitf, double : __signbit)(__x)
	#endif // !defined(__cplusplus) && __STDC_VERSION__ >= 201112L

	#if defined(__cplusplus)
	template <class T> __DEVICE__ T min(T __arg1, T __arg2) {
	return (__arg1 < __arg2) ? __arg1 : __arg2;
	}

	template <class T> __DEVICE__ T max(T __arg1, T __arg2) {
	return (__arg1 > __arg2) ? __arg1 : __arg2;
	}

	__DEVICE__ int min(int __arg1, int __arg2) {
	return (__arg1 < __arg2) ? __arg1 : __arg2;
	}
	__DEVICE__ int max(int __arg1, int __arg2) {
	return (__arg1 > __arg2) ? __arg1 : __arg2;
	}

	__DEVICE__
	float max(float __x, float __y) { return fmaxf(__x, __y); }

	__DEVICE__
	double max(double __x, double __y) { return fmax(__x, __y); }

	__DEVICE__
	float min(float __x, float __y) { return fminf(__x, __y); }

	__DEVICE__
	double min(double __x, double __y) { return fmin(__x, __y); }

	-#if !defined(__HIPCC_RTC__)
	+#if !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
	__host__ inline static int min(int __arg1, int __arg2) {
	return std::min(__arg1, __arg2);
	}

	__host__ inline static int max(int __arg1, int __arg2) {
	return std::max(__arg1, __arg2);
	}
	-#endif // __HIPCC_RTC__
	+#endif // !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
	#endif

	#pragma pop_macro("__DEVICE__")
	#pragma pop_macro("__RETURN_TYPE")

	#endif // __CLANG_HIP_MATH_H__
	diff --git a/contrib/llvm-project/clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h b/contrib/llvm-project/clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
	index 953857badfc4..279fb26fbaf7 100644
	--- a/contrib/llvm-project/clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
	+++ b/contrib/llvm-project/clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
	@@ -1,84 +1,106 @@
	/*===- __clang_openmp_device_functions.h - OpenMP device function declares -===
	*
	* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	* See https://llvm.org/LICENSE.txt for license information.
	* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	*
	*===-----------------------------------------------------------------------===
	*/

	#ifndef __CLANG_OPENMP_DEVICE_FUNCTIONS_H__
	#define __CLANG_OPENMP_DEVICE_FUNCTIONS_H__

	#ifndef _OPENMP
	#error "This file is for OpenMP compilation only."
	#endif

	-#pragma omp begin declare variant match( \
	- device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})
	-
	#ifdef __cplusplus
	extern "C" {
	#endif

	+#pragma omp begin declare variant match( \
	+ device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})
	+
	#define __CUDA__
	#define __OPENMP_NVPTX__

	/// Include declarations for libdevice functions.
	#include <__clang_cuda_libdevice_declares.h>

	/// Provide definitions for these functions.
	#include <__clang_cuda_device_functions.h>

	#undef __OPENMP_NVPTX__
	#undef __CUDA__

	-#ifdef __cplusplus
	-} // extern "C"
	+#pragma omp end declare variant
	+
	+#ifdef __AMDGCN__
	+#pragma omp begin declare variant match(device = {arch(amdgcn)})
	+
	+// Import types which will be used by __clang_hip_libdevice_declares.h
	+#ifndef __cplusplus
	+#include <stdbool.h>
	+#include <stdint.h>
	#endif

	+#define __OPENMP_AMDGCN__
	+#pragma push_macro("__device__")
	+#define __device__
	+
	+/// Include declarations for libdevice functions.
	+#include <__clang_hip_libdevice_declares.h>
	+
	+#pragma pop_macro("__device__")
	+#undef __OPENMP_AMDGCN__
	+
	#pragma omp end declare variant
	+#endif
	+
	+#ifdef __cplusplus
	+} // extern "C"
	+#endif

	// Ensure we make `_ZdlPv`, aka. `operator delete(void*)` available without the
	// need to `include <new>` in C++ mode.
	#ifdef __cplusplus

	// We require malloc/free.
	#include <cstdlib>

	#pragma push_macro("OPENMP_NOEXCEPT")
	#if __cplusplus >= 201103L
	#define OPENMP_NOEXCEPT noexcept
	#else
	#define OPENMP_NOEXCEPT
	#endif

	// Device overrides for non-placement new and delete.
	inline void *operator new(__SIZE_TYPE__ size) {
	if (size == 0)
	size = 1;
	return ::malloc(size);
	}

	inline void *operator new[](__SIZE_TYPE__ size) { return ::operator new(size); }

	inline void operator delete(void *ptr)OPENMP_NOEXCEPT { ::free(ptr); }

	inline void operator delete[](void *ptr) OPENMP_NOEXCEPT {
	::operator delete(ptr);
	}

	// Sized delete, C++14 only.
	#if __cplusplus >= 201402L
	inline void operator delete(void *ptr, __SIZE_TYPE__ size)OPENMP_NOEXCEPT {
	::operator delete(ptr);
	}
	inline void operator delete[](void *ptr, __SIZE_TYPE__ size) OPENMP_NOEXCEPT {
	::operator delete(ptr);
	}
	#endif

	#pragma pop_macro("OPENMP_NOEXCEPT")
	#endif

	#endif
	diff --git a/contrib/llvm-project/clang/lib/Headers/openmp_wrappers/cmath b/contrib/llvm-project/clang/lib/Headers/openmp_wrappers/cmath
	index 1aff66af7d52..22a720aca956 100644
	--- a/contrib/llvm-project/clang/lib/Headers/openmp_wrappers/cmath
	+++ b/contrib/llvm-project/clang/lib/Headers/openmp_wrappers/cmath
	@@ -1,78 +1,132 @@
	/*===-- __clang_openmp_device_functions.h - OpenMP math declares ------ c++ -===
	*
	* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	* See https://llvm.org/LICENSE.txt for license information.
	* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	*
	*===-----------------------------------------------------------------------===
	*/

	#ifndef __CLANG_OPENMP_CMATH_H__
	#define __CLANG_OPENMP_CMATH_H__

	#ifndef _OPENMP
	#error "This file is for OpenMP compilation only."
	#endif

	#include_next <cmath>

	// Make sure we include our math.h overlay, it probably happend already but we
	// need to be sure.
	#include <math.h>

	// We (might) need cstdlib because __clang_cuda_cmath.h below declares `abs`
	// which might live in cstdlib.
	#include <cstdlib>

	// We need limits because __clang_cuda_cmath.h below uses `std::numeric_limit`.
	#include <limits>

	#pragma omp begin declare variant match( \
	device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any, allow_templates)})

	#define __CUDA__
	#define __OPENMP_NVPTX__
	#include <__clang_cuda_cmath.h>
	#undef __OPENMP_NVPTX__
	#undef __CUDA__

	// Overloads not provided by the CUDA wrappers but by the CUDA system headers.
	// Since we do not include the latter we define them ourselves.
	#define __DEVICE__ static constexpr __attribute__((always_inline, nothrow))

	__DEVICE__ float acosh(float __x) { return ::acoshf(__x); }
	__DEVICE__ float asinh(float __x) { return ::asinhf(__x); }
	__DEVICE__ float atanh(float __x) { return ::atanhf(__x); }
	__DEVICE__ float cbrt(float __x) { return ::cbrtf(__x); }
	__DEVICE__ float erf(float __x) { return ::erff(__x); }
	__DEVICE__ float erfc(float __x) { return ::erfcf(__x); }
	__DEVICE__ float exp2(float __x) { return ::exp2f(__x); }
	__DEVICE__ float expm1(float __x) { return ::expm1f(__x); }
	__DEVICE__ float fdim(float __x, float __y) { return ::fdimf(__x, __y); }
	__DEVICE__ float hypot(float __x, float __y) { return ::hypotf(__x, __y); }
	__DEVICE__ int ilogb(float __x) { return ::ilogbf(__x); }
	__DEVICE__ float lgamma(float __x) { return ::lgammaf(__x); }
	__DEVICE__ long long int llrint(float __x) { return ::llrintf(__x); }
	__DEVICE__ long long int llround(float __x) { return ::llroundf(__x); }
	__DEVICE__ float log1p(float __x) { return ::log1pf(__x); }
	__DEVICE__ float log2(float __x) { return ::log2f(__x); }
	__DEVICE__ float logb(float __x) { return ::logbf(__x); }
	__DEVICE__ long int lrint(float __x) { return ::lrintf(__x); }
	__DEVICE__ long int lround(float __x) { return ::lroundf(__x); }
	__DEVICE__ float nextafter(float __x, float __y) {
	return ::nextafterf(__x, __y);
	}
	__DEVICE__ float remainder(float __x, float __y) {
	return ::remainderf(__x, __y);
	}
	__DEVICE__ float scalbln(float __x, long int __y) {
	return ::scalblnf(__x, __y);
	}
	__DEVICE__ float scalbn(float __x, int __y) { return ::scalbnf(__x, __y); }
	__DEVICE__ float tgamma(float __x) { return ::tgammaf(__x); }

	#undef __DEVICE__

	#pragma omp end declare variant

	+#ifdef __AMDGCN__
	+#pragma omp begin declare variant match(device = {arch(amdgcn)})
	+
	+#pragma push_macro("__constant__")
	+#define __constant__ __attribute__((constant))
	+#define __OPENMP_AMDGCN__
	+
	+#include <__clang_hip_cmath.h>
	+
	+#pragma pop_macro("__constant__")
	+#undef __OPENMP_AMDGCN__
	+
	+// Define overloads otherwise which are absent
	+#define __DEVICE__ static constexpr __attribute__((always_inline, nothrow))
	+
	+__DEVICE__ float acos(float __x) { return ::acosf(__x); }
	+__DEVICE__ float acosh(float __x) { return ::acoshf(__x); }
	+__DEVICE__ float asin(float __x) { return ::asinf(__x); }
	+__DEVICE__ float asinh(float __x) { return ::asinhf(__x); }
	+__DEVICE__ float atan(float __x) { return ::atanf(__x); }
	+__DEVICE__ float atan2(float __x, float __y) { return ::atan2f(__x, __y); }
	+__DEVICE__ float atanh(float __x) { return ::atanhf(__x); }
	+__DEVICE__ float cbrt(float __x) { return ::cbrtf(__x); }
	+__DEVICE__ float cosh(float __x) { return ::coshf(__x); }
	+__DEVICE__ float erf(float __x) { return ::erff(__x); }
	+__DEVICE__ float erfc(float __x) { return ::erfcf(__x); }
	+__DEVICE__ float exp2(float __x) { return ::exp2f(__x); }
	+__DEVICE__ float expm1(float __x) { return ::expm1f(__x); }
	+__DEVICE__ float fdim(float __x, float __y) { return ::fdimf(__x, __y); }
	+__DEVICE__ float hypot(float __x, float __y) { return ::hypotf(__x, __y); }
	+__DEVICE__ int ilogb(float __x) { return ::ilogbf(__x); }
	+__DEVICE__ float ldexp(float __arg, int __exp) {
	+ return ::ldexpf(__arg, __exp);
	+}
	+__DEVICE__ float lgamma(float __x) { return ::lgammaf(__x); }
	+__DEVICE__ float log1p(float __x) { return ::log1pf(__x); }
	+__DEVICE__ float logb(float __x) { return ::logbf(__x); }
	+__DEVICE__ float nextafter(float __x, float __y) {
	+ return ::nextafterf(__x, __y);
	+}
	+__DEVICE__ float remainder(float __x, float __y) {
	+ return ::remainderf(__x, __y);
	+}
	+__DEVICE__ float scalbn(float __x, int __y) { return ::scalbnf(__x, __y); }
	+__DEVICE__ float sinh(float __x) { return ::sinhf(__x); }
	+__DEVICE__ float tan(float __x) { return ::tanf(__x); }
	+__DEVICE__ float tanh(float __x) { return ::tanhf(__x); }
	+__DEVICE__ float tgamma(float __x) { return ::tgammaf(__x); }
	+
	+#undef __DEVICE__
	+
	+#pragma omp end declare variant
	+#endif // __AMDGCN__
	+
	#endif
	diff --git a/contrib/llvm-project/clang/lib/Headers/openmp_wrappers/math.h b/contrib/llvm-project/clang/lib/Headers/openmp_wrappers/math.h
	index c64af8b13ece..1e3c07cfdb8c 100644
	--- a/contrib/llvm-project/clang/lib/Headers/openmp_wrappers/math.h
	+++ b/contrib/llvm-project/clang/lib/Headers/openmp_wrappers/math.h
	@@ -1,51 +1,61 @@
	/*===---- openmp_wrapper/math.h -------- OpenMP math.h intercept ------ c++ -===
	*
	* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	* See https://llvm.org/LICENSE.txt for license information.
	* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	*
	*===-----------------------------------------------------------------------===
	*/

	// If we are in C++ mode and include <math.h> (not <cmath>) first, we still need
	// to make sure <cmath> is read first. The problem otherwise is that we haven't
	// seen the declarations of the math.h functions when the system math.h includes
	// our cmath overlay. However, our cmath overlay, or better the underlying
	// overlay, e.g. CUDA, uses the math.h functions. Since we haven't declared them
	// yet we get errors. CUDA avoids this by eagerly declaring all math functions
	// (in the __device__ space) but we cannot do this. Instead we break the
	// dependence by forcing cmath to go first. While our cmath will in turn include
	// this file, the cmath guards will prevent recursion.
	#ifdef __cplusplus
	#include <cmath>
	#endif

	#ifndef __CLANG_OPENMP_MATH_H__
	#define __CLANG_OPENMP_MATH_H__

	#ifndef _OPENMP
	#error "This file is for OpenMP compilation only."
	#endif

	#include_next <math.h>

	// We need limits.h for __clang_cuda_math.h below and because it should not hurt
	// we include it eagerly here.
	#include <limits.h>

	// We need stdlib.h because (for now) __clang_cuda_math.h below declares `abs`
	// which should live in stdlib.h.
	#include <stdlib.h>

	#pragma omp begin declare variant match( \
	device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})

	#define __CUDA__
	#define __OPENMP_NVPTX__
	#include <__clang_cuda_math.h>
	#undef __OPENMP_NVPTX__
	#undef __CUDA__

	#pragma omp end declare variant

	+#ifdef __AMDGCN__
	+#pragma omp begin declare variant match(device = {arch(amdgcn)})
	+
	+#define __OPENMP_AMDGCN__
	+#include <__clang_hip_math.h>
	+#undef __OPENMP_AMDGCN__
	+
	+#pragma omp end declare variant
	+#endif
	+
	#endif
	diff --git a/contrib/llvm-project/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/contrib/llvm-project/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
	index be4c51930789..25f134868758 100644
	--- a/contrib/llvm-project/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
	+++ b/contrib/llvm-project/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
	@@ -1,6367 +1,6367 @@
	//===--- SemaTemplateInstantiateDecl.cpp - C++ Template Decl Instantiation ===/
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//===----------------------------------------------------------------------===/
	//
	// This file implements C++ template instantiation for declarations.
	//
	//===----------------------------------------------------------------------===/

	#include "TreeTransform.h"
	#include "clang/AST/ASTConsumer.h"
	#include "clang/AST/ASTContext.h"
	#include "clang/AST/ASTMutationListener.h"
	#include "clang/AST/DeclTemplate.h"
	#include "clang/AST/DeclVisitor.h"
	#include "clang/AST/DependentDiagnostic.h"
	#include "clang/AST/Expr.h"
	#include "clang/AST/ExprCXX.h"
	#include "clang/AST/PrettyDeclStackTrace.h"
	#include "clang/AST/TypeLoc.h"
	#include "clang/Basic/SourceManager.h"
	#include "clang/Basic/TargetInfo.h"
	#include "clang/Sema/Initialization.h"
	#include "clang/Sema/Lookup.h"
	#include "clang/Sema/ScopeInfo.h"
	#include "clang/Sema/SemaInternal.h"
	#include "clang/Sema/Template.h"
	#include "clang/Sema/TemplateInstCallback.h"
	#include "llvm/Support/TimeProfiler.h"

	using namespace clang;

	static bool isDeclWithinFunction(const Decl *D) {
	const DeclContext *DC = D->getDeclContext();
	if (DC->isFunctionOrMethod())
	return true;

	if (DC->isRecord())
	return cast<CXXRecordDecl>(DC)->isLocalClass();

	return false;
	}

	template<typename DeclT>
	static bool SubstQualifier(Sema &SemaRef, const DeclT OldDecl, DeclT NewDecl,
	const MultiLevelTemplateArgumentList &TemplateArgs) {
	if (!OldDecl->getQualifierLoc())
	return false;

	assert((NewDecl->getFriendObjectKind() \|\|
	!OldDecl->getLexicalDeclContext()->isDependentContext()) &&
	"non-friend with qualified name defined in dependent context");
	Sema::ContextRAII SavedContext(
	SemaRef,
	const_cast<DeclContext *>(NewDecl->getFriendObjectKind()
	? NewDecl->getLexicalDeclContext()
	: OldDecl->getLexicalDeclContext()));

	NestedNameSpecifierLoc NewQualifierLoc
	= SemaRef.SubstNestedNameSpecifierLoc(OldDecl->getQualifierLoc(),
	TemplateArgs);

	if (!NewQualifierLoc)
	return true;

	NewDecl->setQualifierInfo(NewQualifierLoc);
	return false;
	}

	bool TemplateDeclInstantiator::SubstQualifier(const DeclaratorDecl *OldDecl,
	DeclaratorDecl *NewDecl) {
	return ::SubstQualifier(SemaRef, OldDecl, NewDecl, TemplateArgs);
	}

	bool TemplateDeclInstantiator::SubstQualifier(const TagDecl *OldDecl,
	TagDecl *NewDecl) {
	return ::SubstQualifier(SemaRef, OldDecl, NewDecl, TemplateArgs);
	}

	// Include attribute instantiation code.
	#include "clang/Sema/AttrTemplateInstantiate.inc"

	static void instantiateDependentAlignedAttr(
	Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
	const AlignedAttr Aligned, Decl New, bool IsPackExpansion) {
	if (Aligned->isAlignmentExpr()) {
	// The alignment expression is a constant expression.
	EnterExpressionEvaluationContext Unevaluated(
	S, Sema::ExpressionEvaluationContext::ConstantEvaluated);
	ExprResult Result = S.SubstExpr(Aligned->getAlignmentExpr(), TemplateArgs);
	if (!Result.isInvalid())
	S.AddAlignedAttr(New, *Aligned, Result.getAs<Expr>(), IsPackExpansion);
	} else {
	TypeSourceInfo *Result = S.SubstType(Aligned->getAlignmentType(),
	TemplateArgs, Aligned->getLocation(),
	DeclarationName());
	if (Result)
	S.AddAlignedAttr(New, *Aligned, Result, IsPackExpansion);
	}
	}

	static void instantiateDependentAlignedAttr(
	Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
	const AlignedAttr Aligned, Decl New) {
	if (!Aligned->isPackExpansion()) {
	instantiateDependentAlignedAttr(S, TemplateArgs, Aligned, New, false);
	return;
	}

	SmallVector<UnexpandedParameterPack, 2> Unexpanded;
	if (Aligned->isAlignmentExpr())
	S.collectUnexpandedParameterPacks(Aligned->getAlignmentExpr(),
	Unexpanded);
	else
	S.collectUnexpandedParameterPacks(Aligned->getAlignmentType()->getTypeLoc(),
	Unexpanded);
	assert(!Unexpanded.empty() && "Pack expansion without parameter packs?");

	// Determine whether we can expand this attribute pack yet.
	bool Expand = true, RetainExpansion = false;
	Optional<unsigned> NumExpansions;
	// FIXME: Use the actual location of the ellipsis.
	SourceLocation EllipsisLoc = Aligned->getLocation();
	if (S.CheckParameterPacksForExpansion(EllipsisLoc, Aligned->getRange(),
	Unexpanded, TemplateArgs, Expand,
	RetainExpansion, NumExpansions))
	return;

	if (!Expand) {
	Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(S, -1);
	instantiateDependentAlignedAttr(S, TemplateArgs, Aligned, New, true);
	} else {
	for (unsigned I = 0; I != *NumExpansions; ++I) {
	Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(S, I);
	instantiateDependentAlignedAttr(S, TemplateArgs, Aligned, New, false);
	}
	}
	}

	static void instantiateDependentAssumeAlignedAttr(
	Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
	const AssumeAlignedAttr Aligned, Decl New) {
	// The alignment expression is a constant expression.
	EnterExpressionEvaluationContext Unevaluated(
	S, Sema::ExpressionEvaluationContext::ConstantEvaluated);

	Expr E, OE = nullptr;
	ExprResult Result = S.SubstExpr(Aligned->getAlignment(), TemplateArgs);
	if (Result.isInvalid())
	return;
	E = Result.getAs<Expr>();

	if (Aligned->getOffset()) {
	Result = S.SubstExpr(Aligned->getOffset(), TemplateArgs);
	if (Result.isInvalid())
	return;
	OE = Result.getAs<Expr>();
	}

	S.AddAssumeAlignedAttr(New, *Aligned, E, OE);
	}

	static void instantiateDependentAlignValueAttr(
	Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
	const AlignValueAttr Aligned, Decl New) {
	// The alignment expression is a constant expression.
	EnterExpressionEvaluationContext Unevaluated(
	S, Sema::ExpressionEvaluationContext::ConstantEvaluated);
	ExprResult Result = S.SubstExpr(Aligned->getAlignment(), TemplateArgs);
	if (!Result.isInvalid())
	S.AddAlignValueAttr(New, *Aligned, Result.getAs<Expr>());
	}

	static void instantiateDependentAllocAlignAttr(
	Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
	const AllocAlignAttr Align, Decl New) {
	Expr *Param = IntegerLiteral::Create(
	S.getASTContext(),
	llvm::APInt(64, Align->getParamIndex().getSourceIndex()),
	S.getASTContext().UnsignedLongLongTy, Align->getLocation());
	S.AddAllocAlignAttr(New, *Align, Param);
	}

	static void instantiateDependentAnnotationAttr(
	Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
	const AnnotateAttr Attr, Decl New) {
	EnterExpressionEvaluationContext Unevaluated(
	S, Sema::ExpressionEvaluationContext::ConstantEvaluated);
	SmallVector<Expr *, 4> Args;
	Args.reserve(Attr->args_size());
	for (auto *E : Attr->args()) {
	ExprResult Result = S.SubstExpr(E, TemplateArgs);
	if (!Result.isUsable())
	return;
	Args.push_back(Result.get());
	}
	S.AddAnnotationAttr(New, *Attr, Attr->getAnnotation(), Args);
	}

	static Expr *instantiateDependentFunctionAttrCondition(
	Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
	const Attr A, Expr OldCond, const Decl Tmpl, FunctionDecl New) {
	Expr *Cond = nullptr;
	{
	Sema::ContextRAII SwitchContext(S, New);
	EnterExpressionEvaluationContext Unevaluated(
	S, Sema::ExpressionEvaluationContext::ConstantEvaluated);
	ExprResult Result = S.SubstExpr(OldCond, TemplateArgs);
	if (Result.isInvalid())
	return nullptr;
	Cond = Result.getAs<Expr>();
	}
	if (!Cond->isTypeDependent()) {
	ExprResult Converted = S.PerformContextuallyConvertToBool(Cond);
	if (Converted.isInvalid())
	return nullptr;
	Cond = Converted.get();
	}

	SmallVector<PartialDiagnosticAt, 8> Diags;
	if (OldCond->isValueDependent() && !Cond->isValueDependent() &&
	!Expr::isPotentialConstantExprUnevaluated(Cond, New, Diags)) {
	S.Diag(A->getLocation(), diag::err_attr_cond_never_constant_expr) << A;
	for (const auto &P : Diags)
	S.Diag(P.first, P.second);
	return nullptr;
	}
	return Cond;
	}

	static void instantiateDependentEnableIfAttr(
	Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
	const EnableIfAttr EIA, const Decl Tmpl, FunctionDecl *New) {
	Expr *Cond = instantiateDependentFunctionAttrCondition(
	S, TemplateArgs, EIA, EIA->getCond(), Tmpl, New);

	if (Cond)
	New->addAttr(new (S.getASTContext()) EnableIfAttr(S.getASTContext(), *EIA,
	Cond, EIA->getMessage()));
	}

	static void instantiateDependentDiagnoseIfAttr(
	Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
	const DiagnoseIfAttr DIA, const Decl Tmpl, FunctionDecl *New) {
	Expr *Cond = instantiateDependentFunctionAttrCondition(
	S, TemplateArgs, DIA, DIA->getCond(), Tmpl, New);

	if (Cond)
	New->addAttr(new (S.getASTContext()) DiagnoseIfAttr(
	S.getASTContext(), *DIA, Cond, DIA->getMessage(),
	DIA->getDiagnosticType(), DIA->getArgDependent(), New));
	}

	// Constructs and adds to New a new instance of CUDALaunchBoundsAttr using
	// template A as the base and arguments from TemplateArgs.
	static void instantiateDependentCUDALaunchBoundsAttr(
	Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
	const CUDALaunchBoundsAttr &Attr, Decl *New) {
	// The alignment expression is a constant expression.
	EnterExpressionEvaluationContext Unevaluated(
	S, Sema::ExpressionEvaluationContext::ConstantEvaluated);

	ExprResult Result = S.SubstExpr(Attr.getMaxThreads(), TemplateArgs);
	if (Result.isInvalid())
	return;
	Expr *MaxThreads = Result.getAs<Expr>();

	Expr *MinBlocks = nullptr;
	if (Attr.getMinBlocks()) {
	Result = S.SubstExpr(Attr.getMinBlocks(), TemplateArgs);
	if (Result.isInvalid())
	return;
	MinBlocks = Result.getAs<Expr>();
	}

	S.AddLaunchBoundsAttr(New, Attr, MaxThreads, MinBlocks);
	}

	static void
	instantiateDependentModeAttr(Sema &S,
	const MultiLevelTemplateArgumentList &TemplateArgs,
	const ModeAttr &Attr, Decl *New) {
	S.AddModeAttr(New, Attr, Attr.getMode(),
	/InInstantiation=/true);
	}

	/// Instantiation of 'declare simd' attribute and its arguments.
	static void instantiateOMPDeclareSimdDeclAttr(
	Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
	const OMPDeclareSimdDeclAttr &Attr, Decl *New) {
	// Allow 'this' in clauses with varlists.
	if (auto *FTD = dyn_cast<FunctionTemplateDecl>(New))
	New = FTD->getTemplatedDecl();
	auto *FD = cast<FunctionDecl>(New);
	auto *ThisContext = dyn_cast_or_null<CXXRecordDecl>(FD->getDeclContext());
	SmallVector<Expr *, 4> Uniforms, Aligneds, Alignments, Linears, Steps;
	SmallVector<unsigned, 4> LinModifiers;

	auto SubstExpr = [&](Expr *E) -> ExprResult {
	if (auto *DRE = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts()))
	if (auto *PVD = dyn_cast<ParmVarDecl>(DRE->getDecl())) {
	Sema::ContextRAII SavedContext(S, FD);
	LocalInstantiationScope Local(S);
	if (FD->getNumParams() > PVD->getFunctionScopeIndex())
	Local.InstantiatedLocal(
	PVD, FD->getParamDecl(PVD->getFunctionScopeIndex()));
	return S.SubstExpr(E, TemplateArgs);
	}
	Sema::CXXThisScopeRAII ThisScope(S, ThisContext, Qualifiers(),
	FD->isCXXInstanceMember());
	return S.SubstExpr(E, TemplateArgs);
	};

	// Substitute a single OpenMP clause, which is a potentially-evaluated
	// full-expression.
	auto Subst = [&](Expr *E) -> ExprResult {
	EnterExpressionEvaluationContext Evaluated(
	S, Sema::ExpressionEvaluationContext::PotentiallyEvaluated);
	ExprResult Res = SubstExpr(E);
	if (Res.isInvalid())
	return Res;
	return S.ActOnFinishFullExpr(Res.get(), false);
	};

	ExprResult Simdlen;
	if (auto *E = Attr.getSimdlen())
	Simdlen = Subst(E);

	if (Attr.uniforms_size() > 0) {
	for(auto *E : Attr.uniforms()) {
	ExprResult Inst = Subst(E);
	if (Inst.isInvalid())
	continue;
	Uniforms.push_back(Inst.get());
	}
	}

	auto AI = Attr.alignments_begin();
	for (auto *E : Attr.aligneds()) {
	ExprResult Inst = Subst(E);
	if (Inst.isInvalid())
	continue;
	Aligneds.push_back(Inst.get());
	Inst = ExprEmpty();
	if (*AI)
	Inst = S.SubstExpr(*AI, TemplateArgs);
	Alignments.push_back(Inst.get());
	++AI;
	}

	auto SI = Attr.steps_begin();
	for (auto *E : Attr.linears()) {
	ExprResult Inst = Subst(E);
	if (Inst.isInvalid())
	continue;
	Linears.push_back(Inst.get());
	Inst = ExprEmpty();
	if (*SI)
	Inst = S.SubstExpr(*SI, TemplateArgs);
	Steps.push_back(Inst.get());
	++SI;
	}
	LinModifiers.append(Attr.modifiers_begin(), Attr.modifiers_end());
	(void)S.ActOnOpenMPDeclareSimdDirective(
	S.ConvertDeclToDeclGroup(New), Attr.getBranchState(), Simdlen.get(),
	Uniforms, Aligneds, Alignments, Linears, LinModifiers, Steps,
	Attr.getRange());
	}

	/// Instantiation of 'declare variant' attribute and its arguments.
	static void instantiateOMPDeclareVariantAttr(
	Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
	const OMPDeclareVariantAttr &Attr, Decl *New) {
	// Allow 'this' in clauses with varlists.
	if (auto *FTD = dyn_cast<FunctionTemplateDecl>(New))
	New = FTD->getTemplatedDecl();
	auto *FD = cast<FunctionDecl>(New);
	auto *ThisContext = dyn_cast_or_null<CXXRecordDecl>(FD->getDeclContext());

	auto &&SubstExpr = [FD, ThisContext, &S, &TemplateArgs](Expr *E) {
	if (auto *DRE = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts()))
	if (auto *PVD = dyn_cast<ParmVarDecl>(DRE->getDecl())) {
	Sema::ContextRAII SavedContext(S, FD);
	LocalInstantiationScope Local(S);
	if (FD->getNumParams() > PVD->getFunctionScopeIndex())
	Local.InstantiatedLocal(
	PVD, FD->getParamDecl(PVD->getFunctionScopeIndex()));
	return S.SubstExpr(E, TemplateArgs);
	}
	Sema::CXXThisScopeRAII ThisScope(S, ThisContext, Qualifiers(),
	FD->isCXXInstanceMember());
	return S.SubstExpr(E, TemplateArgs);
	};

	// Substitute a single OpenMP clause, which is a potentially-evaluated
	// full-expression.
	auto &&Subst = [&SubstExpr, &S](Expr *E) {
	EnterExpressionEvaluationContext Evaluated(
	S, Sema::ExpressionEvaluationContext::PotentiallyEvaluated);
	ExprResult Res = SubstExpr(E);
	if (Res.isInvalid())
	return Res;
	return S.ActOnFinishFullExpr(Res.get(), false);
	};

	ExprResult VariantFuncRef;
	if (Expr *E = Attr.getVariantFuncRef()) {
	// Do not mark function as is used to prevent its emission if this is the
	// only place where it is used.
	EnterExpressionEvaluationContext Unevaluated(
	S, Sema::ExpressionEvaluationContext::ConstantEvaluated);
	VariantFuncRef = Subst(E);
	}

	// Copy the template version of the OMPTraitInfo and run substitute on all
	// score and condition expressiosn.
	OMPTraitInfo &TI = S.getASTContext().getNewOMPTraitInfo();
	TI = *Attr.getTraitInfos();

	// Try to substitute template parameters in score and condition expressions.
	auto SubstScoreOrConditionExpr = [&S, Subst](Expr *&E, bool) {
	if (E) {
	EnterExpressionEvaluationContext Unevaluated(
	S, Sema::ExpressionEvaluationContext::ConstantEvaluated);
	ExprResult ER = Subst(E);
	if (ER.isUsable())
	E = ER.get();
	else
	return true;
	}
	return false;
	};
	if (TI.anyScoreOrCondition(SubstScoreOrConditionExpr))
	return;

	Expr *E = VariantFuncRef.get();
	// Check function/variant ref for `omp declare variant` but not for `omp
	// begin declare variant` (which use implicit attributes).
	Optional<std::pair<FunctionDecl , Expr >> DeclVarData =
	S.checkOpenMPDeclareVariantFunction(S.ConvertDeclToDeclGroup(New),
	VariantFuncRef.get(), TI,
	Attr.getRange());

	if (!DeclVarData)
	return;

	E = DeclVarData.getValue().second;
	FD = DeclVarData.getValue().first;

	if (auto *VariantDRE = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
	if (auto *VariantFD = dyn_cast<FunctionDecl>(VariantDRE->getDecl())) {
	if (auto *VariantFTD = VariantFD->getDescribedFunctionTemplate()) {
	if (!VariantFTD->isThisDeclarationADefinition())
	return;
	Sema::TentativeAnalysisScope Trap(S);
	const TemplateArgumentList *TAL = TemplateArgumentList::CreateCopy(
	S.Context, TemplateArgs.getInnermost());

	auto *SubstFD = S.InstantiateFunctionDeclaration(VariantFTD, TAL,
	New->getLocation());
	if (!SubstFD)
	return;
	QualType NewType = S.Context.mergeFunctionTypes(
	SubstFD->getType(), FD->getType(),
	/* OfBlockPointer */ false,
	/* Unqualified / false, / AllowCXX */ true);
	if (NewType.isNull())
	return;
	S.InstantiateFunctionDefinition(
	New->getLocation(), SubstFD, /* Recursive */ true,
	/* DefinitionRequired / false, / AtEndOfTU */ false);
	SubstFD->setInstantiationIsPending(!SubstFD->isDefined());
	E = DeclRefExpr::Create(S.Context, NestedNameSpecifierLoc(),
	SourceLocation(), SubstFD,
	/* RefersToEnclosingVariableOrCapture */ false,
	/* NameLoc */ SubstFD->getLocation(),
	SubstFD->getType(), ExprValueKind::VK_PRValue);
	}
	}
	}

	S.ActOnOpenMPDeclareVariantDirective(FD, E, TI, Attr.getRange());
	}

	static void instantiateDependentAMDGPUFlatWorkGroupSizeAttr(
	Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
	const AMDGPUFlatWorkGroupSizeAttr &Attr, Decl *New) {
	// Both min and max expression are constant expressions.
	EnterExpressionEvaluationContext Unevaluated(
	S, Sema::ExpressionEvaluationContext::ConstantEvaluated);

	ExprResult Result = S.SubstExpr(Attr.getMin(), TemplateArgs);
	if (Result.isInvalid())
	return;
	Expr *MinExpr = Result.getAs<Expr>();

	Result = S.SubstExpr(Attr.getMax(), TemplateArgs);
	if (Result.isInvalid())
	return;
	Expr *MaxExpr = Result.getAs<Expr>();

	S.addAMDGPUFlatWorkGroupSizeAttr(New, Attr, MinExpr, MaxExpr);
	}

	static ExplicitSpecifier
	instantiateExplicitSpecifier(Sema &S,
	const MultiLevelTemplateArgumentList &TemplateArgs,
	ExplicitSpecifier ES, FunctionDecl *New) {
	if (!ES.getExpr())
	return ES;
	Expr *OldCond = ES.getExpr();
	Expr *Cond = nullptr;
	{
	EnterExpressionEvaluationContext Unevaluated(
	S, Sema::ExpressionEvaluationContext::ConstantEvaluated);
	ExprResult SubstResult = S.SubstExpr(OldCond, TemplateArgs);
	if (SubstResult.isInvalid()) {
	return ExplicitSpecifier::Invalid();
	}
	Cond = SubstResult.get();
	}
	ExplicitSpecifier Result(Cond, ES.getKind());
	if (!Cond->isTypeDependent())
	S.tryResolveExplicitSpecifier(Result);
	return Result;
	}

	static void instantiateDependentAMDGPUWavesPerEUAttr(
	Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
	const AMDGPUWavesPerEUAttr &Attr, Decl *New) {
	// Both min and max expression are constant expressions.
	EnterExpressionEvaluationContext Unevaluated(
	S, Sema::ExpressionEvaluationContext::ConstantEvaluated);

	ExprResult Result = S.SubstExpr(Attr.getMin(), TemplateArgs);
	if (Result.isInvalid())
	return;
	Expr *MinExpr = Result.getAs<Expr>();

	Expr *MaxExpr = nullptr;
	if (auto Max = Attr.getMax()) {
	Result = S.SubstExpr(Max, TemplateArgs);
	if (Result.isInvalid())
	return;
	MaxExpr = Result.getAs<Expr>();
	}

	S.addAMDGPUWavesPerEUAttr(New, Attr, MinExpr, MaxExpr);
	}

	// This doesn't take any template parameters, but we have a custom action that
	// needs to happen when the kernel itself is instantiated. We need to run the
	// ItaniumMangler to mark the names required to name this kernel.
	static void instantiateDependentSYCLKernelAttr(
	Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
	const SYCLKernelAttr &Attr, Decl *New) {
	// Functions cannot be partially specialized, so if we are being instantiated,
	// we are obviously a complete specialization. Since this attribute is only
	// valid on function template declarations, we know that this is a full
	// instantiation of a kernel.
	S.AddSYCLKernelLambda(cast<FunctionDecl>(New));

	// Evaluate whether this would change any of the already evaluated
	// __builtin_sycl_unique_stable_name values.
	for (auto &Itr : S.Context.SYCLUniqueStableNameEvaluatedValues) {
	const std::string &CurName = Itr.first->ComputeName(S.Context);
	if (Itr.second != CurName) {
	S.Diag(New->getLocation(),
	diag::err_kernel_invalidates_sycl_unique_stable_name);
	S.Diag(Itr.first->getLocation(),
	diag::note_sycl_unique_stable_name_evaluated_here);
	// Update this so future diagnostics work correctly.
	Itr.second = CurName;
	}
	}

	New->addAttr(Attr.clone(S.getASTContext()));
	}

	/// Determine whether the attribute A might be relevent to the declaration D.
	/// If not, we can skip instantiating it. The attribute may or may not have
	/// been instantiated yet.
	static bool isRelevantAttr(Sema &S, const Decl D, const Attr A) {
	// 'preferred_name' is only relevant to the matching specialization of the
	// template.
	if (const auto *PNA = dyn_cast<PreferredNameAttr>(A)) {
	QualType T = PNA->getTypedefType();
	const auto *RD = cast<CXXRecordDecl>(D);
	if (!T->isDependentType() && !RD->isDependentContext() &&
	!declaresSameEntity(T->getAsCXXRecordDecl(), RD))
	return false;
	for (const auto *ExistingPNA : D->specific_attrs<PreferredNameAttr>())
	if (S.Context.hasSameType(ExistingPNA->getTypedefType(),
	PNA->getTypedefType()))
	return false;
	return true;
	}

	return true;
	}

	void Sema::InstantiateAttrsForDecl(
	const MultiLevelTemplateArgumentList &TemplateArgs, const Decl *Tmpl,
	Decl New, LateInstantiatedAttrVec LateAttrs,
	LocalInstantiationScope *OuterMostScope) {
	if (NamedDecl *ND = dyn_cast<NamedDecl>(New)) {
	// FIXME: This function is called multiple times for the same template
	// specialization. We should only instantiate attributes that were added
	// since the previous instantiation.
	for (const auto *TmplAttr : Tmpl->attrs()) {
	if (!isRelevantAttr(*this, New, TmplAttr))
	continue;

	// FIXME: If any of the special case versions from InstantiateAttrs become
	// applicable to template declaration, we'll need to add them here.
	CXXThisScopeRAII ThisScope(
	*this, dyn_cast_or_null<CXXRecordDecl>(ND->getDeclContext()),
	Qualifiers(), ND->isCXXInstanceMember());

	Attr *NewAttr = sema::instantiateTemplateAttributeForDecl(
	TmplAttr, Context, *this, TemplateArgs);
	if (NewAttr && isRelevantAttr(*this, New, NewAttr))
	New->addAttr(NewAttr);
	}
	}
	}

	static Sema::RetainOwnershipKind
	attrToRetainOwnershipKind(const Attr *A) {
	switch (A->getKind()) {
	case clang::attr::CFConsumed:
	return Sema::RetainOwnershipKind::CF;
	case clang::attr::OSConsumed:
	return Sema::RetainOwnershipKind::OS;
	case clang::attr::NSConsumed:
	return Sema::RetainOwnershipKind::NS;
	default:
	llvm_unreachable("Wrong argument supplied");
	}
	}

	void Sema::InstantiateAttrs(const MultiLevelTemplateArgumentList &TemplateArgs,
	const Decl Tmpl, Decl New,
	LateInstantiatedAttrVec *LateAttrs,
	LocalInstantiationScope *OuterMostScope) {
	for (const auto *TmplAttr : Tmpl->attrs()) {
	if (!isRelevantAttr(*this, New, TmplAttr))
	continue;

	// FIXME: This should be generalized to more than just the AlignedAttr.
	const AlignedAttr *Aligned = dyn_cast<AlignedAttr>(TmplAttr);
	if (Aligned && Aligned->isAlignmentDependent()) {
	instantiateDependentAlignedAttr(*this, TemplateArgs, Aligned, New);
	continue;
	}

	if (const auto *AssumeAligned = dyn_cast<AssumeAlignedAttr>(TmplAttr)) {
	instantiateDependentAssumeAlignedAttr(*this, TemplateArgs, AssumeAligned, New);
	continue;
	}

	if (const auto *AlignValue = dyn_cast<AlignValueAttr>(TmplAttr)) {
	instantiateDependentAlignValueAttr(*this, TemplateArgs, AlignValue, New);
	continue;
	}

	if (const auto *AllocAlign = dyn_cast<AllocAlignAttr>(TmplAttr)) {
	instantiateDependentAllocAlignAttr(*this, TemplateArgs, AllocAlign, New);
	continue;
	}

	if (const auto *Annotate = dyn_cast<AnnotateAttr>(TmplAttr)) {
	instantiateDependentAnnotationAttr(*this, TemplateArgs, Annotate, New);
	continue;
	}

	if (const auto *EnableIf = dyn_cast<EnableIfAttr>(TmplAttr)) {
	instantiateDependentEnableIfAttr(*this, TemplateArgs, EnableIf, Tmpl,
	cast<FunctionDecl>(New));
	continue;
	}

	if (const auto *DiagnoseIf = dyn_cast<DiagnoseIfAttr>(TmplAttr)) {
	instantiateDependentDiagnoseIfAttr(*this, TemplateArgs, DiagnoseIf, Tmpl,
	cast<FunctionDecl>(New));
	continue;
	}

	if (const auto *CUDALaunchBounds =
	dyn_cast<CUDALaunchBoundsAttr>(TmplAttr)) {
	instantiateDependentCUDALaunchBoundsAttr(*this, TemplateArgs,
	*CUDALaunchBounds, New);
	continue;
	}

	if (const auto *Mode = dyn_cast<ModeAttr>(TmplAttr)) {
	instantiateDependentModeAttr(this, TemplateArgs, Mode, New);
	continue;
	}

	if (const auto *OMPAttr = dyn_cast<OMPDeclareSimdDeclAttr>(TmplAttr)) {
	instantiateOMPDeclareSimdDeclAttr(this, TemplateArgs, OMPAttr, New);
	continue;
	}

	if (const auto *OMPAttr = dyn_cast<OMPDeclareVariantAttr>(TmplAttr)) {
	instantiateOMPDeclareVariantAttr(this, TemplateArgs, OMPAttr, New);
	continue;
	}

	if (const auto *AMDGPUFlatWorkGroupSize =
	dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(TmplAttr)) {
	instantiateDependentAMDGPUFlatWorkGroupSizeAttr(
	this, TemplateArgs, AMDGPUFlatWorkGroupSize, New);
	}

	if (const auto *AMDGPUFlatWorkGroupSize =
	dyn_cast<AMDGPUWavesPerEUAttr>(TmplAttr)) {
	instantiateDependentAMDGPUWavesPerEUAttr(*this, TemplateArgs,
	*AMDGPUFlatWorkGroupSize, New);
	}

	// Existing DLL attribute on the instantiation takes precedence.
	if (TmplAttr->getKind() == attr::DLLExport \|\|
	TmplAttr->getKind() == attr::DLLImport) {
	if (New->hasAttr<DLLExportAttr>() \|\| New->hasAttr<DLLImportAttr>()) {
	continue;
	}
	}

	if (const auto *ABIAttr = dyn_cast<ParameterABIAttr>(TmplAttr)) {
	AddParameterABIAttr(New, *ABIAttr, ABIAttr->getABI());
	continue;
	}

	if (isa<NSConsumedAttr>(TmplAttr) \|\| isa<OSConsumedAttr>(TmplAttr) \|\|
	isa<CFConsumedAttr>(TmplAttr)) {
	AddXConsumedAttr(New, *TmplAttr, attrToRetainOwnershipKind(TmplAttr),
	/template instantiation=/true);
	continue;
	}

	if (auto *A = dyn_cast<PointerAttr>(TmplAttr)) {
	if (!New->hasAttr<PointerAttr>())
	New->addAttr(A->clone(Context));
	continue;
	}

	if (auto *A = dyn_cast<OwnerAttr>(TmplAttr)) {
	if (!New->hasAttr<OwnerAttr>())
	New->addAttr(A->clone(Context));
	continue;
	}

	if (auto *A = dyn_cast<SYCLKernelAttr>(TmplAttr)) {
	instantiateDependentSYCLKernelAttr(this, TemplateArgs, A, New);
	continue;
	}

	assert(!TmplAttr->isPackExpansion());
	if (TmplAttr->isLateParsed() && LateAttrs) {
	// Late parsed attributes must be instantiated and attached after the
	// enclosing class has been instantiated. See Sema::InstantiateClass.
	LocalInstantiationScope *Saved = nullptr;
	if (CurrentInstantiationScope)
	Saved = CurrentInstantiationScope->cloneScopes(OuterMostScope);
	LateAttrs->push_back(LateInstantiatedAttribute(TmplAttr, Saved, New));
	} else {
	// Allow 'this' within late-parsed attributes.
	auto *ND = cast<NamedDecl>(New);
	auto *ThisContext = dyn_cast_or_null<CXXRecordDecl>(ND->getDeclContext());
	CXXThisScopeRAII ThisScope(*this, ThisContext, Qualifiers(),
	ND->isCXXInstanceMember());

	Attr *NewAttr = sema::instantiateTemplateAttribute(TmplAttr, Context,
	*this, TemplateArgs);
	if (NewAttr && isRelevantAttr(*this, New, TmplAttr))
	New->addAttr(NewAttr);
	}
	}
	}

	/// In the MS ABI, we need to instantiate default arguments of dllexported
	/// default constructors along with the constructor definition. This allows IR
	/// gen to emit a constructor closure which calls the default constructor with
	/// its default arguments.
	void Sema::InstantiateDefaultCtorDefaultArgs(CXXConstructorDecl *Ctor) {
	assert(Context.getTargetInfo().getCXXABI().isMicrosoft() &&
	Ctor->isDefaultConstructor());
	unsigned NumParams = Ctor->getNumParams();
	if (NumParams == 0)
	return;
	DLLExportAttr *Attr = Ctor->getAttr<DLLExportAttr>();
	if (!Attr)
	return;
	for (unsigned I = 0; I != NumParams; ++I) {
	(void)CheckCXXDefaultArgExpr(Attr->getLocation(), Ctor,
	Ctor->getParamDecl(I));
	DiscardCleanupsInEvaluationContext();
	}
	}

	/// Get the previous declaration of a declaration for the purposes of template
	/// instantiation. If this finds a previous declaration, then the previous
	/// declaration of the instantiation of D should be an instantiation of the
	/// result of this function.
	template<typename DeclT>
	static DeclT getPreviousDeclForInstantiation(DeclT D) {
	DeclT *Result = D->getPreviousDecl();

	// If the declaration is within a class, and the previous declaration was
	// merged from a different definition of that class, then we don't have a
	// previous declaration for the purpose of template instantiation.
	if (Result && isa<CXXRecordDecl>(D->getDeclContext()) &&
	D->getLexicalDeclContext() != Result->getLexicalDeclContext())
	return nullptr;

	return Result;
	}

	Decl *
	TemplateDeclInstantiator::VisitTranslationUnitDecl(TranslationUnitDecl *D) {
	llvm_unreachable("Translation units cannot be instantiated");
	}

	Decl *
	TemplateDeclInstantiator::VisitPragmaCommentDecl(PragmaCommentDecl *D) {
	llvm_unreachable("pragma comment cannot be instantiated");
	}

	Decl *TemplateDeclInstantiator::VisitPragmaDetectMismatchDecl(
	PragmaDetectMismatchDecl *D) {
	llvm_unreachable("pragma comment cannot be instantiated");
	}

	Decl *
	TemplateDeclInstantiator::VisitExternCContextDecl(ExternCContextDecl *D) {
	llvm_unreachable("extern \"C\" context cannot be instantiated");
	}

	Decl TemplateDeclInstantiator::VisitMSGuidDecl(MSGuidDecl D) {
	llvm_unreachable("GUID declaration cannot be instantiated");
	}

	Decl *TemplateDeclInstantiator::VisitTemplateParamObjectDecl(
	TemplateParamObjectDecl *D) {
	llvm_unreachable("template parameter objects cannot be instantiated");
	}

	Decl *
	TemplateDeclInstantiator::VisitLabelDecl(LabelDecl *D) {
	LabelDecl *Inst = LabelDecl::Create(SemaRef.Context, Owner, D->getLocation(),
	D->getIdentifier());
	Owner->addDecl(Inst);
	return Inst;
	}

	Decl *
	TemplateDeclInstantiator::VisitNamespaceDecl(NamespaceDecl *D) {
	llvm_unreachable("Namespaces cannot be instantiated");
	}

	Decl *
	TemplateDeclInstantiator::VisitNamespaceAliasDecl(NamespaceAliasDecl *D) {
	NamespaceAliasDecl *Inst
	= NamespaceAliasDecl::Create(SemaRef.Context, Owner,
	D->getNamespaceLoc(),
	D->getAliasLoc(),
	D->getIdentifier(),
	D->getQualifierLoc(),
	D->getTargetNameLoc(),
	D->getNamespace());
	Owner->addDecl(Inst);
	return Inst;
	}

	Decl TemplateDeclInstantiator::InstantiateTypedefNameDecl(TypedefNameDecl D,
	bool IsTypeAlias) {
	bool Invalid = false;
	TypeSourceInfo *DI = D->getTypeSourceInfo();
	if (DI->getType()->isInstantiationDependentType() \|\|
	DI->getType()->isVariablyModifiedType()) {
	DI = SemaRef.SubstType(DI, TemplateArgs,
	D->getLocation(), D->getDeclName());
	if (!DI) {
	Invalid = true;
	DI = SemaRef.Context.getTrivialTypeSourceInfo(SemaRef.Context.IntTy);
	}
	} else {
	SemaRef.MarkDeclarationsReferencedInType(D->getLocation(), DI->getType());
	}

	// HACK: 2012-10-23 g++ has a bug where it gets the value kind of ?: wrong.
	// libstdc++ relies upon this bug in its implementation of common_type. If we
	// happen to be processing that implementation, fake up the g++ ?:
	// semantics. See LWG issue 2141 for more information on the bug. The bugs
	// are fixed in g++ and libstdc++ 4.9.0 (2014-04-22).
	const DecltypeType *DT = DI->getType()->getAs<DecltypeType>();
	CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(D->getDeclContext());
	if (DT && RD && isa<ConditionalOperator>(DT->getUnderlyingExpr()) &&
	DT->isReferenceType() &&
	RD->getEnclosingNamespaceContext() == SemaRef.getStdNamespace() &&
	RD->getIdentifier() && RD->getIdentifier()->isStr("common_type") &&
	D->getIdentifier() && D->getIdentifier()->isStr("type") &&
	SemaRef.getSourceManager().isInSystemHeader(D->getBeginLoc()))
	// Fold it to the (non-reference) type which g++ would have produced.
	DI = SemaRef.Context.getTrivialTypeSourceInfo(
	DI->getType().getNonReferenceType());

	// Create the new typedef
	TypedefNameDecl *Typedef;
	if (IsTypeAlias)
	Typedef = TypeAliasDecl::Create(SemaRef.Context, Owner, D->getBeginLoc(),
	D->getLocation(), D->getIdentifier(), DI);
	else
	Typedef = TypedefDecl::Create(SemaRef.Context, Owner, D->getBeginLoc(),
	D->getLocation(), D->getIdentifier(), DI);
	if (Invalid)
	Typedef->setInvalidDecl();

	// If the old typedef was the name for linkage purposes of an anonymous
	// tag decl, re-establish that relationship for the new typedef.
	if (const TagType *oldTagType = D->getUnderlyingType()->getAs<TagType>()) {
	TagDecl *oldTag = oldTagType->getDecl();
	if (oldTag->getTypedefNameForAnonDecl() == D && !Invalid) {
	TagDecl *newTag = DI->getType()->castAs<TagType>()->getDecl();
	assert(!newTag->hasNameForLinkage());
	newTag->setTypedefNameForAnonDecl(Typedef);
	}
	}

	if (TypedefNameDecl *Prev = getPreviousDeclForInstantiation(D)) {
	NamedDecl *InstPrev = SemaRef.FindInstantiatedDecl(D->getLocation(), Prev,
	TemplateArgs);
	if (!InstPrev)
	return nullptr;

	TypedefNameDecl *InstPrevTypedef = cast<TypedefNameDecl>(InstPrev);

	// If the typedef types are not identical, reject them.
	SemaRef.isIncompatibleTypedef(InstPrevTypedef, Typedef);

	Typedef->setPreviousDecl(InstPrevTypedef);
	}

	SemaRef.InstantiateAttrs(TemplateArgs, D, Typedef);

	if (D->getUnderlyingType()->getAs<DependentNameType>())
	SemaRef.inferGslPointerAttribute(Typedef);

	Typedef->setAccess(D->getAccess());

	return Typedef;
	}

	Decl TemplateDeclInstantiator::VisitTypedefDecl(TypedefDecl D) {
	Decl Typedef = InstantiateTypedefNameDecl(D, /IsTypeAlias=*/false);
	if (Typedef)
	Owner->addDecl(Typedef);
	return Typedef;
	}

	Decl TemplateDeclInstantiator::VisitTypeAliasDecl(TypeAliasDecl D) {
	Decl Typedef = InstantiateTypedefNameDecl(D, /IsTypeAlias=*/true);
	if (Typedef)
	Owner->addDecl(Typedef);
	return Typedef;
	}

	Decl *
	TemplateDeclInstantiator::VisitTypeAliasTemplateDecl(TypeAliasTemplateDecl *D) {
	// Create a local instantiation scope for this type alias template, which
	// will contain the instantiations of the template parameters.
	LocalInstantiationScope Scope(SemaRef);

	TemplateParameterList *TempParams = D->getTemplateParameters();
	TemplateParameterList *InstParams = SubstTemplateParams(TempParams);
	if (!InstParams)
	return nullptr;

	TypeAliasDecl *Pattern = D->getTemplatedDecl();

	TypeAliasTemplateDecl *PrevAliasTemplate = nullptr;
	if (getPreviousDeclForInstantiation<TypedefNameDecl>(Pattern)) {
	DeclContext::lookup_result Found = Owner->lookup(Pattern->getDeclName());
	if (!Found.empty()) {
	PrevAliasTemplate = dyn_cast<TypeAliasTemplateDecl>(Found.front());
	}
	}

	TypeAliasDecl *AliasInst = cast_or_null<TypeAliasDecl>(
	InstantiateTypedefNameDecl(Pattern, /IsTypeAlias=/true));
	if (!AliasInst)
	return nullptr;

	TypeAliasTemplateDecl *Inst
	= TypeAliasTemplateDecl::Create(SemaRef.Context, Owner, D->getLocation(),
	D->getDeclName(), InstParams, AliasInst);
	AliasInst->setDescribedAliasTemplate(Inst);
	if (PrevAliasTemplate)
	Inst->setPreviousDecl(PrevAliasTemplate);

	Inst->setAccess(D->getAccess());

	if (!PrevAliasTemplate)
	Inst->setInstantiatedFromMemberTemplate(D);

	Owner->addDecl(Inst);

	return Inst;
	}

	Decl TemplateDeclInstantiator::VisitBindingDecl(BindingDecl D) {
	auto *NewBD = BindingDecl::Create(SemaRef.Context, Owner, D->getLocation(),
	D->getIdentifier());
	NewBD->setReferenced(D->isReferenced());
	SemaRef.CurrentInstantiationScope->InstantiatedLocal(D, NewBD);
	return NewBD;
	}

	Decl TemplateDeclInstantiator::VisitDecompositionDecl(DecompositionDecl D) {
	// Transform the bindings first.
	SmallVector<BindingDecl*, 16> NewBindings;
	for (auto *OldBD : D->bindings())
	NewBindings.push_back(cast<BindingDecl>(VisitBindingDecl(OldBD)));
	ArrayRef<BindingDecl*> NewBindingArray = NewBindings;

	auto *NewDD = cast_or_null<DecompositionDecl>(
	VisitVarDecl(D, /InstantiatingVarTemplate=/false, &NewBindingArray));

	if (!NewDD \|\| NewDD->isInvalidDecl())
	for (auto *NewBD : NewBindings)
	NewBD->setInvalidDecl();

	return NewDD;
	}

	Decl TemplateDeclInstantiator::VisitVarDecl(VarDecl D) {
	return VisitVarDecl(D, /InstantiatingVarTemplate=/false);
	}

	Decl TemplateDeclInstantiator::VisitVarDecl(VarDecl D,
	bool InstantiatingVarTemplate,
	ArrayRef<BindingDecl> Bindings) {

	// Do substitution on the type of the declaration
	TypeSourceInfo *DI = SemaRef.SubstType(
	D->getTypeSourceInfo(), TemplateArgs, D->getTypeSpecStartLoc(),
	D->getDeclName(), /AllowDeducedTST/true);
	if (!DI)
	return nullptr;

	if (DI->getType()->isFunctionType()) {
	SemaRef.Diag(D->getLocation(), diag::err_variable_instantiates_to_function)
	<< D->isStaticDataMember() << DI->getType();
	return nullptr;
	}

	DeclContext *DC = Owner;
	if (D->isLocalExternDecl())
	SemaRef.adjustContextForLocalExternDecl(DC);

	// Build the instantiated declaration.
	VarDecl *Var;
	if (Bindings)
	Var = DecompositionDecl::Create(SemaRef.Context, DC, D->getInnerLocStart(),
	D->getLocation(), DI->getType(), DI,
	D->getStorageClass(), *Bindings);
	else
	Var = VarDecl::Create(SemaRef.Context, DC, D->getInnerLocStart(),
	D->getLocation(), D->getIdentifier(), DI->getType(),
	DI, D->getStorageClass());

	// In ARC, infer 'retaining' for variables of retainable type.
	if (SemaRef.getLangOpts().ObjCAutoRefCount &&
	SemaRef.inferObjCARCLifetime(Var))
	Var->setInvalidDecl();

	if (SemaRef.getLangOpts().OpenCL)
	SemaRef.deduceOpenCLAddressSpace(Var);

	// Substitute the nested name specifier, if any.
	if (SubstQualifier(D, Var))
	return nullptr;

	SemaRef.BuildVariableInstantiation(Var, D, TemplateArgs, LateAttrs, Owner,
	StartingScope, InstantiatingVarTemplate);
	- if (D->isNRVOVariable()) {
	+ if (D->isNRVOVariable() && !Var->isInvalidDecl()) {
	QualType RT;
	if (auto *F = dyn_cast<FunctionDecl>(DC))
	RT = F->getReturnType();
	else if (isa<BlockDecl>(DC))
	RT = cast<FunctionType>(SemaRef.getCurBlock()->FunctionType)
	->getReturnType();
	else
	llvm_unreachable("Unknown context type");

	// This is the last chance we have of checking copy elision eligibility
	// for functions in dependent contexts. The sema actions for building
	// the return statement during template instantiation will have no effect
	// regarding copy elision, since NRVO propagation runs on the scope exit
	// actions, and these are not run on instantiation.
	// This might run through some VarDecls which were returned from non-taken
	// 'if constexpr' branches, and these will end up being constructed on the
	// return slot even if they will never be returned, as a sort of accidental
	// 'optimization'. Notably, functions with 'auto' return types won't have it
	// deduced by this point. Coupled with the limitation described
	// previously, this makes it very hard to support copy elision for these.
	Sema::NamedReturnInfo Info = SemaRef.getNamedReturnInfo(Var);
	bool NRVO = SemaRef.getCopyElisionCandidate(Info, RT) != nullptr;
	Var->setNRVOVariable(NRVO);
	}

	Var->setImplicit(D->isImplicit());

	if (Var->isStaticLocal())
	SemaRef.CheckStaticLocalForDllExport(Var);

	return Var;
	}

	Decl TemplateDeclInstantiator::VisitAccessSpecDecl(AccessSpecDecl D) {
	AccessSpecDecl* AD
	= AccessSpecDecl::Create(SemaRef.Context, D->getAccess(), Owner,
	D->getAccessSpecifierLoc(), D->getColonLoc());
	Owner->addHiddenDecl(AD);
	return AD;
	}

	Decl TemplateDeclInstantiator::VisitFieldDecl(FieldDecl D) {
	bool Invalid = false;
	TypeSourceInfo *DI = D->getTypeSourceInfo();
	if (DI->getType()->isInstantiationDependentType() \|\|
	DI->getType()->isVariablyModifiedType()) {
	DI = SemaRef.SubstType(DI, TemplateArgs,
	D->getLocation(), D->getDeclName());
	if (!DI) {
	DI = D->getTypeSourceInfo();
	Invalid = true;
	} else if (DI->getType()->isFunctionType()) {
	// C++ [temp.arg.type]p3:
	// If a declaration acquires a function type through a type
	// dependent on a template-parameter and this causes a
	// declaration that does not use the syntactic form of a
	// function declarator to have function type, the program is
	// ill-formed.
	SemaRef.Diag(D->getLocation(), diag::err_field_instantiates_to_function)
	<< DI->getType();
	Invalid = true;
	}
	} else {
	SemaRef.MarkDeclarationsReferencedInType(D->getLocation(), DI->getType());
	}

	Expr *BitWidth = D->getBitWidth();
	if (Invalid)
	BitWidth = nullptr;
	else if (BitWidth) {
	// The bit-width expression is a constant expression.
	EnterExpressionEvaluationContext Unevaluated(
	SemaRef, Sema::ExpressionEvaluationContext::ConstantEvaluated);

	ExprResult InstantiatedBitWidth
	= SemaRef.SubstExpr(BitWidth, TemplateArgs);
	if (InstantiatedBitWidth.isInvalid()) {
	Invalid = true;
	BitWidth = nullptr;
	} else
	BitWidth = InstantiatedBitWidth.getAs<Expr>();
	}

	FieldDecl *Field = SemaRef.CheckFieldDecl(D->getDeclName(),
	DI->getType(), DI,
	cast<RecordDecl>(Owner),
	D->getLocation(),
	D->isMutable(),
	BitWidth,
	D->getInClassInitStyle(),
	D->getInnerLocStart(),
	D->getAccess(),
	nullptr);
	if (!Field) {
	cast<Decl>(Owner)->setInvalidDecl();
	return nullptr;
	}

	SemaRef.InstantiateAttrs(TemplateArgs, D, Field, LateAttrs, StartingScope);

	if (Field->hasAttrs())
	SemaRef.CheckAlignasUnderalignment(Field);

	if (Invalid)
	Field->setInvalidDecl();

	if (!Field->getDeclName()) {
	// Keep track of where this decl came from.
	SemaRef.Context.setInstantiatedFromUnnamedFieldDecl(Field, D);
	}
	if (CXXRecordDecl *Parent= dyn_cast<CXXRecordDecl>(Field->getDeclContext())) {
	if (Parent->isAnonymousStructOrUnion() &&
	Parent->getRedeclContext()->isFunctionOrMethod())
	SemaRef.CurrentInstantiationScope->InstantiatedLocal(D, Field);
	}

	Field->setImplicit(D->isImplicit());
	Field->setAccess(D->getAccess());
	Owner->addDecl(Field);

	return Field;
	}

	Decl TemplateDeclInstantiator::VisitMSPropertyDecl(MSPropertyDecl D) {
	bool Invalid = false;
	TypeSourceInfo *DI = D->getTypeSourceInfo();

	if (DI->getType()->isVariablyModifiedType()) {
	SemaRef.Diag(D->getLocation(), diag::err_property_is_variably_modified)
	<< D;
	Invalid = true;
	} else if (DI->getType()->isInstantiationDependentType()) {
	DI = SemaRef.SubstType(DI, TemplateArgs,
	D->getLocation(), D->getDeclName());
	if (!DI) {
	DI = D->getTypeSourceInfo();
	Invalid = true;
	} else if (DI->getType()->isFunctionType()) {
	// C++ [temp.arg.type]p3:
	// If a declaration acquires a function type through a type
	// dependent on a template-parameter and this causes a
	// declaration that does not use the syntactic form of a
	// function declarator to have function type, the program is
	// ill-formed.
	SemaRef.Diag(D->getLocation(), diag::err_field_instantiates_to_function)
	<< DI->getType();
	Invalid = true;
	}
	} else {
	SemaRef.MarkDeclarationsReferencedInType(D->getLocation(), DI->getType());
	}

	MSPropertyDecl *Property = MSPropertyDecl::Create(
	SemaRef.Context, Owner, D->getLocation(), D->getDeclName(), DI->getType(),
	DI, D->getBeginLoc(), D->getGetterId(), D->getSetterId());

	SemaRef.InstantiateAttrs(TemplateArgs, D, Property, LateAttrs,
	StartingScope);

	if (Invalid)
	Property->setInvalidDecl();

	Property->setAccess(D->getAccess());
	Owner->addDecl(Property);

	return Property;
	}

	Decl TemplateDeclInstantiator::VisitIndirectFieldDecl(IndirectFieldDecl D) {
	NamedDecl **NamedChain =
	new (SemaRef.Context)NamedDecl*[D->getChainingSize()];

	int i = 0;
	for (auto *PI : D->chain()) {
	NamedDecl *Next = SemaRef.FindInstantiatedDecl(D->getLocation(), PI,
	TemplateArgs);
	if (!Next)
	return nullptr;

	NamedChain[i++] = Next;
	}

	QualType T = cast<FieldDecl>(NamedChain[i-1])->getType();
	IndirectFieldDecl *IndirectField = IndirectFieldDecl::Create(
	SemaRef.Context, Owner, D->getLocation(), D->getIdentifier(), T,
	{NamedChain, D->getChainingSize()});

	for (const auto *Attr : D->attrs())
	IndirectField->addAttr(Attr->clone(SemaRef.Context));

	IndirectField->setImplicit(D->isImplicit());
	IndirectField->setAccess(D->getAccess());
	Owner->addDecl(IndirectField);
	return IndirectField;
	}

	Decl TemplateDeclInstantiator::VisitFriendDecl(FriendDecl D) {
	// Handle friend type expressions by simply substituting template
	// parameters into the pattern type and checking the result.
	if (TypeSourceInfo *Ty = D->getFriendType()) {
	TypeSourceInfo *InstTy;
	// If this is an unsupported friend, don't bother substituting template
	// arguments into it. The actual type referred to won't be used by any
	// parts of Clang, and may not be valid for instantiating. Just use the
	// same info for the instantiated friend.
	if (D->isUnsupportedFriend()) {
	InstTy = Ty;
	} else {
	InstTy = SemaRef.SubstType(Ty, TemplateArgs,
	D->getLocation(), DeclarationName());
	}
	if (!InstTy)
	return nullptr;

	FriendDecl *FD = SemaRef.CheckFriendTypeDecl(D->getBeginLoc(),
	D->getFriendLoc(), InstTy);
	if (!FD)
	return nullptr;

	FD->setAccess(AS_public);
	FD->setUnsupportedFriend(D->isUnsupportedFriend());
	Owner->addDecl(FD);
	return FD;
	}

	NamedDecl *ND = D->getFriendDecl();
	assert(ND && "friend decl must be a decl or a type!");

	// All of the Visit implementations for the various potential friend
	// declarations have to be carefully written to work for friend
	// objects, with the most important detail being that the target
	// decl should almost certainly not be placed in Owner.
	Decl *NewND = Visit(ND);
	if (!NewND) return nullptr;

	FriendDecl *FD =
	FriendDecl::Create(SemaRef.Context, Owner, D->getLocation(),
	cast<NamedDecl>(NewND), D->getFriendLoc());
	FD->setAccess(AS_public);
	FD->setUnsupportedFriend(D->isUnsupportedFriend());
	Owner->addDecl(FD);
	return FD;
	}

	Decl TemplateDeclInstantiator::VisitStaticAssertDecl(StaticAssertDecl D) {
	Expr *AssertExpr = D->getAssertExpr();

	// The expression in a static assertion is a constant expression.
	EnterExpressionEvaluationContext Unevaluated(
	SemaRef, Sema::ExpressionEvaluationContext::ConstantEvaluated);

	ExprResult InstantiatedAssertExpr
	= SemaRef.SubstExpr(AssertExpr, TemplateArgs);
	if (InstantiatedAssertExpr.isInvalid())
	return nullptr;

	return SemaRef.BuildStaticAssertDeclaration(D->getLocation(),
	InstantiatedAssertExpr.get(),
	D->getMessage(),
	D->getRParenLoc(),
	D->isFailed());
	}

	Decl TemplateDeclInstantiator::VisitEnumDecl(EnumDecl D) {
	EnumDecl *PrevDecl = nullptr;
	if (EnumDecl *PatternPrev = getPreviousDeclForInstantiation(D)) {
	NamedDecl *Prev = SemaRef.FindInstantiatedDecl(D->getLocation(),
	PatternPrev,
	TemplateArgs);
	if (!Prev) return nullptr;
	PrevDecl = cast<EnumDecl>(Prev);
	}

	EnumDecl *Enum =
	EnumDecl::Create(SemaRef.Context, Owner, D->getBeginLoc(),
	D->getLocation(), D->getIdentifier(), PrevDecl,
	D->isScoped(), D->isScopedUsingClassTag(), D->isFixed());
	if (D->isFixed()) {
	if (TypeSourceInfo *TI = D->getIntegerTypeSourceInfo()) {
	// If we have type source information for the underlying type, it means it
	// has been explicitly set by the user. Perform substitution on it before
	// moving on.
	SourceLocation UnderlyingLoc = TI->getTypeLoc().getBeginLoc();
	TypeSourceInfo *NewTI = SemaRef.SubstType(TI, TemplateArgs, UnderlyingLoc,
	DeclarationName());
	if (!NewTI \|\| SemaRef.CheckEnumUnderlyingType(NewTI))
	Enum->setIntegerType(SemaRef.Context.IntTy);
	else
	Enum->setIntegerTypeSourceInfo(NewTI);
	} else {
	assert(!D->getIntegerType()->isDependentType()
	&& "Dependent type without type source info");
	Enum->setIntegerType(D->getIntegerType());
	}
	}

	SemaRef.InstantiateAttrs(TemplateArgs, D, Enum);

	Enum->setInstantiationOfMemberEnum(D, TSK_ImplicitInstantiation);
	Enum->setAccess(D->getAccess());
	// Forward the mangling number from the template to the instantiated decl.
	SemaRef.Context.setManglingNumber(Enum, SemaRef.Context.getManglingNumber(D));
	// See if the old tag was defined along with a declarator.
	// If it did, mark the new tag as being associated with that declarator.
	if (DeclaratorDecl *DD = SemaRef.Context.getDeclaratorForUnnamedTagDecl(D))
	SemaRef.Context.addDeclaratorForUnnamedTagDecl(Enum, DD);
	// See if the old tag was defined along with a typedef.
	// If it did, mark the new tag as being associated with that typedef.
	if (TypedefNameDecl *TND = SemaRef.Context.getTypedefNameForUnnamedTagDecl(D))
	SemaRef.Context.addTypedefNameForUnnamedTagDecl(Enum, TND);
	if (SubstQualifier(D, Enum)) return nullptr;
	Owner->addDecl(Enum);

	EnumDecl *Def = D->getDefinition();
	if (Def && Def != D) {
	// If this is an out-of-line definition of an enum member template, check
	// that the underlying types match in the instantiation of both
	// declarations.
	if (TypeSourceInfo *TI = Def->getIntegerTypeSourceInfo()) {
	SourceLocation UnderlyingLoc = TI->getTypeLoc().getBeginLoc();
	QualType DefnUnderlying =
	SemaRef.SubstType(TI->getType(), TemplateArgs,
	UnderlyingLoc, DeclarationName());
	SemaRef.CheckEnumRedeclaration(Def->getLocation(), Def->isScoped(),
	DefnUnderlying, /IsFixed=/true, Enum);
	}
	}

	// C++11 [temp.inst]p1: The implicit instantiation of a class template
	// specialization causes the implicit instantiation of the declarations, but
	// not the definitions of scoped member enumerations.
	//
	// DR1484 clarifies that enumeration definitions inside of a template
	// declaration aren't considered entities that can be separately instantiated
	// from the rest of the entity they are declared inside of.
	if (isDeclWithinFunction(D) ? D == Def : Def && !Enum->isScoped()) {
	SemaRef.CurrentInstantiationScope->InstantiatedLocal(D, Enum);
	InstantiateEnumDefinition(Enum, Def);
	}

	return Enum;
	}

	void TemplateDeclInstantiator::InstantiateEnumDefinition(
	EnumDecl Enum, EnumDecl Pattern) {
	Enum->startDefinition();

	// Update the location to refer to the definition.
	Enum->setLocation(Pattern->getLocation());

	SmallVector<Decl*, 4> Enumerators;

	EnumConstantDecl *LastEnumConst = nullptr;
	for (auto *EC : Pattern->enumerators()) {
	// The specified value for the enumerator.
	ExprResult Value((Expr *)nullptr);
	if (Expr *UninstValue = EC->getInitExpr()) {
	// The enumerator's value expression is a constant expression.
	EnterExpressionEvaluationContext Unevaluated(
	SemaRef, Sema::ExpressionEvaluationContext::ConstantEvaluated);

	Value = SemaRef.SubstExpr(UninstValue, TemplateArgs);
	}

	// Drop the initial value and continue.
	bool isInvalid = false;
	if (Value.isInvalid()) {
	Value = nullptr;
	isInvalid = true;
	}

	EnumConstantDecl *EnumConst
	= SemaRef.CheckEnumConstant(Enum, LastEnumConst,
	EC->getLocation(), EC->getIdentifier(),
	Value.get());

	if (isInvalid) {
	if (EnumConst)
	EnumConst->setInvalidDecl();
	Enum->setInvalidDecl();
	}

	if (EnumConst) {
	SemaRef.InstantiateAttrs(TemplateArgs, EC, EnumConst);

	EnumConst->setAccess(Enum->getAccess());
	Enum->addDecl(EnumConst);
	Enumerators.push_back(EnumConst);
	LastEnumConst = EnumConst;

	if (Pattern->getDeclContext()->isFunctionOrMethod() &&
	!Enum->isScoped()) {
	// If the enumeration is within a function or method, record the enum
	// constant as a local.
	SemaRef.CurrentInstantiationScope->InstantiatedLocal(EC, EnumConst);
	}
	}
	}

	SemaRef.ActOnEnumBody(Enum->getLocation(), Enum->getBraceRange(), Enum,
	Enumerators, nullptr, ParsedAttributesView());
	}

	Decl TemplateDeclInstantiator::VisitEnumConstantDecl(EnumConstantDecl D) {
	llvm_unreachable("EnumConstantDecls can only occur within EnumDecls.");
	}

	Decl *
	TemplateDeclInstantiator::VisitBuiltinTemplateDecl(BuiltinTemplateDecl *D) {
	llvm_unreachable("BuiltinTemplateDecls cannot be instantiated.");
	}

	Decl TemplateDeclInstantiator::VisitClassTemplateDecl(ClassTemplateDecl D) {
	bool isFriend = (D->getFriendObjectKind() != Decl::FOK_None);

	// Create a local instantiation scope for this class template, which
	// will contain the instantiations of the template parameters.
	LocalInstantiationScope Scope(SemaRef);
	TemplateParameterList *TempParams = D->getTemplateParameters();
	TemplateParameterList *InstParams = SubstTemplateParams(TempParams);
	if (!InstParams)
	return nullptr;

	CXXRecordDecl *Pattern = D->getTemplatedDecl();

	// Instantiate the qualifier. We have to do this first in case
	// we're a friend declaration, because if we are then we need to put
	// the new declaration in the appropriate context.
	NestedNameSpecifierLoc QualifierLoc = Pattern->getQualifierLoc();
	if (QualifierLoc) {
	QualifierLoc = SemaRef.SubstNestedNameSpecifierLoc(QualifierLoc,
	TemplateArgs);
	if (!QualifierLoc)
	return nullptr;
	}

	CXXRecordDecl *PrevDecl = nullptr;
	ClassTemplateDecl *PrevClassTemplate = nullptr;

	if (!isFriend && getPreviousDeclForInstantiation(Pattern)) {
	DeclContext::lookup_result Found = Owner->lookup(Pattern->getDeclName());
	if (!Found.empty()) {
	PrevClassTemplate = dyn_cast<ClassTemplateDecl>(Found.front());
	if (PrevClassTemplate)
	PrevDecl = PrevClassTemplate->getTemplatedDecl();
	}
	}

	// If this isn't a friend, then it's a member template, in which
	// case we just want to build the instantiation in the
	// specialization. If it is a friend, we want to build it in
	// the appropriate context.
	DeclContext *DC = Owner;
	if (isFriend) {
	if (QualifierLoc) {
	CXXScopeSpec SS;
	SS.Adopt(QualifierLoc);
	DC = SemaRef.computeDeclContext(SS);
	if (!DC) return nullptr;
	} else {
	DC = SemaRef.FindInstantiatedContext(Pattern->getLocation(),
	Pattern->getDeclContext(),
	TemplateArgs);
	}

	// Look for a previous declaration of the template in the owning
	// context.
	LookupResult R(SemaRef, Pattern->getDeclName(), Pattern->getLocation(),
	Sema::LookupOrdinaryName,
	SemaRef.forRedeclarationInCurContext());
	SemaRef.LookupQualifiedName(R, DC);

	if (R.isSingleResult()) {
	PrevClassTemplate = R.getAsSingle<ClassTemplateDecl>();
	if (PrevClassTemplate)
	PrevDecl = PrevClassTemplate->getTemplatedDecl();
	}

	if (!PrevClassTemplate && QualifierLoc) {
	SemaRef.Diag(Pattern->getLocation(), diag::err_not_tag_in_scope)
	<< D->getTemplatedDecl()->getTagKind() << Pattern->getDeclName() << DC
	<< QualifierLoc.getSourceRange();
	return nullptr;
	}

	if (PrevClassTemplate) {
	TemplateParameterList *PrevParams
	= PrevClassTemplate->getMostRecentDecl()->getTemplateParameters();

	// Make sure the parameter lists match.
	if (!SemaRef.TemplateParameterListsAreEqual(InstParams, PrevParams, true,
	Sema::TPL_TemplateMatch))
	return nullptr;

	// Do some additional validation, then merge default arguments
	// from the existing declarations.
	if (SemaRef.CheckTemplateParameterList(InstParams, PrevParams,
	Sema::TPC_ClassTemplate))
	return nullptr;
	}
	}

	CXXRecordDecl *RecordInst = CXXRecordDecl::Create(
	SemaRef.Context, Pattern->getTagKind(), DC, Pattern->getBeginLoc(),
	Pattern->getLocation(), Pattern->getIdentifier(), PrevDecl,
	/DelayTypeCreation=/true);

	if (QualifierLoc)
	RecordInst->setQualifierInfo(QualifierLoc);

	SemaRef.InstantiateAttrsForDecl(TemplateArgs, Pattern, RecordInst, LateAttrs,
	StartingScope);

	ClassTemplateDecl *Inst
	= ClassTemplateDecl::Create(SemaRef.Context, DC, D->getLocation(),
	D->getIdentifier(), InstParams, RecordInst);
	assert(!(isFriend && Owner->isDependentContext()));
	Inst->setPreviousDecl(PrevClassTemplate);

	RecordInst->setDescribedClassTemplate(Inst);

	if (isFriend) {
	if (PrevClassTemplate)
	Inst->setAccess(PrevClassTemplate->getAccess());
	else
	Inst->setAccess(D->getAccess());

	Inst->setObjectOfFriendDecl();
	// TODO: do we want to track the instantiation progeny of this
	// friend target decl?
	} else {
	Inst->setAccess(D->getAccess());
	if (!PrevClassTemplate)
	Inst->setInstantiatedFromMemberTemplate(D);
	}

	// Trigger creation of the type for the instantiation.
	SemaRef.Context.getInjectedClassNameType(RecordInst,
	Inst->getInjectedClassNameSpecialization());

	// Finish handling of friends.
	if (isFriend) {
	DC->makeDeclVisibleInContext(Inst);
	Inst->setLexicalDeclContext(Owner);
	RecordInst->setLexicalDeclContext(Owner);
	return Inst;
	}

	if (D->isOutOfLine()) {
	Inst->setLexicalDeclContext(D->getLexicalDeclContext());
	RecordInst->setLexicalDeclContext(D->getLexicalDeclContext());
	}

	Owner->addDecl(Inst);

	if (!PrevClassTemplate) {
	// Queue up any out-of-line partial specializations of this member
	// class template; the client will force their instantiation once
	// the enclosing class has been instantiated.
	SmallVector<ClassTemplatePartialSpecializationDecl *, 4> PartialSpecs;
	D->getPartialSpecializations(PartialSpecs);
	for (unsigned I = 0, N = PartialSpecs.size(); I != N; ++I)
	if (PartialSpecs[I]->getFirstDecl()->isOutOfLine())
	OutOfLinePartialSpecs.push_back(std::make_pair(Inst, PartialSpecs[I]));
	}

	return Inst;
	}

	Decl *
	TemplateDeclInstantiator::VisitClassTemplatePartialSpecializationDecl(
	ClassTemplatePartialSpecializationDecl *D) {
	ClassTemplateDecl *ClassTemplate = D->getSpecializedTemplate();

	// Lookup the already-instantiated declaration in the instantiation
	// of the class template and return that.
	DeclContext::lookup_result Found
	= Owner->lookup(ClassTemplate->getDeclName());
	if (Found.empty())
	return nullptr;

	ClassTemplateDecl *InstClassTemplate
	= dyn_cast<ClassTemplateDecl>(Found.front());
	if (!InstClassTemplate)
	return nullptr;

	if (ClassTemplatePartialSpecializationDecl *Result
	= InstClassTemplate->findPartialSpecInstantiatedFromMember(D))
	return Result;

	return InstantiateClassTemplatePartialSpecialization(InstClassTemplate, D);
	}

	Decl TemplateDeclInstantiator::VisitVarTemplateDecl(VarTemplateDecl D) {
	assert(D->getTemplatedDecl()->isStaticDataMember() &&
	"Only static data member templates are allowed.");

	// Create a local instantiation scope for this variable template, which
	// will contain the instantiations of the template parameters.
	LocalInstantiationScope Scope(SemaRef);
	TemplateParameterList *TempParams = D->getTemplateParameters();
	TemplateParameterList *InstParams = SubstTemplateParams(TempParams);
	if (!InstParams)
	return nullptr;

	VarDecl *Pattern = D->getTemplatedDecl();
	VarTemplateDecl *PrevVarTemplate = nullptr;

	if (getPreviousDeclForInstantiation(Pattern)) {
	DeclContext::lookup_result Found = Owner->lookup(Pattern->getDeclName());
	if (!Found.empty())
	PrevVarTemplate = dyn_cast<VarTemplateDecl>(Found.front());
	}

	VarDecl *VarInst =
	cast_or_null<VarDecl>(VisitVarDecl(Pattern,
	/InstantiatingVarTemplate=/true));
	if (!VarInst) return nullptr;

	DeclContext *DC = Owner;

	VarTemplateDecl *Inst = VarTemplateDecl::Create(
	SemaRef.Context, DC, D->getLocation(), D->getIdentifier(), InstParams,
	VarInst);
	VarInst->setDescribedVarTemplate(Inst);
	Inst->setPreviousDecl(PrevVarTemplate);

	Inst->setAccess(D->getAccess());
	if (!PrevVarTemplate)
	Inst->setInstantiatedFromMemberTemplate(D);

	if (D->isOutOfLine()) {
	Inst->setLexicalDeclContext(D->getLexicalDeclContext());
	VarInst->setLexicalDeclContext(D->getLexicalDeclContext());
	}

	Owner->addDecl(Inst);

	if (!PrevVarTemplate) {
	// Queue up any out-of-line partial specializations of this member
	// variable template; the client will force their instantiation once
	// the enclosing class has been instantiated.
	SmallVector<VarTemplatePartialSpecializationDecl *, 4> PartialSpecs;
	D->getPartialSpecializations(PartialSpecs);
	for (unsigned I = 0, N = PartialSpecs.size(); I != N; ++I)
	if (PartialSpecs[I]->getFirstDecl()->isOutOfLine())
	OutOfLineVarPartialSpecs.push_back(
	std::make_pair(Inst, PartialSpecs[I]));
	}

	return Inst;
	}

	Decl *TemplateDeclInstantiator::VisitVarTemplatePartialSpecializationDecl(
	VarTemplatePartialSpecializationDecl *D) {
	assert(D->isStaticDataMember() &&
	"Only static data member templates are allowed.");

	VarTemplateDecl *VarTemplate = D->getSpecializedTemplate();

	// Lookup the already-instantiated declaration and return that.
	DeclContext::lookup_result Found = Owner->lookup(VarTemplate->getDeclName());
	assert(!Found.empty() && "Instantiation found nothing?");

	VarTemplateDecl *InstVarTemplate = dyn_cast<VarTemplateDecl>(Found.front());
	assert(InstVarTemplate && "Instantiation did not find a variable template?");

	if (VarTemplatePartialSpecializationDecl *Result =
	InstVarTemplate->findPartialSpecInstantiatedFromMember(D))
	return Result;

	return InstantiateVarTemplatePartialSpecialization(InstVarTemplate, D);
	}

	Decl *
	TemplateDeclInstantiator::VisitFunctionTemplateDecl(FunctionTemplateDecl *D) {
	// Create a local instantiation scope for this function template, which
	// will contain the instantiations of the template parameters and then get
	// merged with the local instantiation scope for the function template
	// itself.
	LocalInstantiationScope Scope(SemaRef);

	TemplateParameterList *TempParams = D->getTemplateParameters();
	TemplateParameterList *InstParams = SubstTemplateParams(TempParams);
	if (!InstParams)
	return nullptr;

	FunctionDecl *Instantiated = nullptr;
	if (CXXMethodDecl *DMethod = dyn_cast<CXXMethodDecl>(D->getTemplatedDecl()))
	Instantiated = cast_or_null<FunctionDecl>(VisitCXXMethodDecl(DMethod,
	InstParams));
	else
	Instantiated = cast_or_null<FunctionDecl>(VisitFunctionDecl(
	D->getTemplatedDecl(),
	InstParams));

	if (!Instantiated)
	return nullptr;

	// Link the instantiated function template declaration to the function
	// template from which it was instantiated.
	FunctionTemplateDecl *InstTemplate
	= Instantiated->getDescribedFunctionTemplate();
	InstTemplate->setAccess(D->getAccess());
	assert(InstTemplate &&
	"VisitFunctionDecl/CXXMethodDecl didn't create a template!");

	bool isFriend = (InstTemplate->getFriendObjectKind() != Decl::FOK_None);

	// Link the instantiation back to the pattern unless this is a
	// non-definition friend declaration.
	if (!InstTemplate->getInstantiatedFromMemberTemplate() &&
	!(isFriend && !D->getTemplatedDecl()->isThisDeclarationADefinition()))
	InstTemplate->setInstantiatedFromMemberTemplate(D);

	// Make declarations visible in the appropriate context.
	if (!isFriend) {
	Owner->addDecl(InstTemplate);
	} else if (InstTemplate->getDeclContext()->isRecord() &&
	!getPreviousDeclForInstantiation(D)) {
	SemaRef.CheckFriendAccess(InstTemplate);
	}

	return InstTemplate;
	}

	Decl TemplateDeclInstantiator::VisitCXXRecordDecl(CXXRecordDecl D) {
	CXXRecordDecl *PrevDecl = nullptr;
	if (D->isInjectedClassName())
	PrevDecl = cast<CXXRecordDecl>(Owner);
	else if (CXXRecordDecl *PatternPrev = getPreviousDeclForInstantiation(D)) {
	NamedDecl *Prev = SemaRef.FindInstantiatedDecl(D->getLocation(),
	PatternPrev,
	TemplateArgs);
	if (!Prev) return nullptr;
	PrevDecl = cast<CXXRecordDecl>(Prev);
	}

	CXXRecordDecl *Record = nullptr;
	if (D->isLambda())
	Record = CXXRecordDecl::CreateLambda(
	SemaRef.Context, Owner, D->getLambdaTypeInfo(), D->getLocation(),
	D->isDependentLambda(), D->isGenericLambda(),
	D->getLambdaCaptureDefault());
	else
	Record = CXXRecordDecl::Create(SemaRef.Context, D->getTagKind(), Owner,
	D->getBeginLoc(), D->getLocation(),
	D->getIdentifier(), PrevDecl);

	// Substitute the nested name specifier, if any.
	if (SubstQualifier(D, Record))
	return nullptr;

	SemaRef.InstantiateAttrsForDecl(TemplateArgs, D, Record, LateAttrs,
	StartingScope);

	Record->setImplicit(D->isImplicit());
	// FIXME: Check against AS_none is an ugly hack to work around the issue that
	// the tag decls introduced by friend class declarations don't have an access
	// specifier. Remove once this area of the code gets sorted out.
	if (D->getAccess() != AS_none)
	Record->setAccess(D->getAccess());
	if (!D->isInjectedClassName())
	Record->setInstantiationOfMemberClass(D, TSK_ImplicitInstantiation);

	// If the original function was part of a friend declaration,
	// inherit its namespace state.
	if (D->getFriendObjectKind())
	Record->setObjectOfFriendDecl();

	// Make sure that anonymous structs and unions are recorded.
	if (D->isAnonymousStructOrUnion())
	Record->setAnonymousStructOrUnion(true);

	if (D->isLocalClass())
	SemaRef.CurrentInstantiationScope->InstantiatedLocal(D, Record);

	// Forward the mangling number from the template to the instantiated decl.
	SemaRef.Context.setManglingNumber(Record,
	SemaRef.Context.getManglingNumber(D));

	// See if the old tag was defined along with a declarator.
	// If it did, mark the new tag as being associated with that declarator.
	if (DeclaratorDecl *DD = SemaRef.Context.getDeclaratorForUnnamedTagDecl(D))
	SemaRef.Context.addDeclaratorForUnnamedTagDecl(Record, DD);

	// See if the old tag was defined along with a typedef.
	// If it did, mark the new tag as being associated with that typedef.
	if (TypedefNameDecl *TND = SemaRef.Context.getTypedefNameForUnnamedTagDecl(D))
	SemaRef.Context.addTypedefNameForUnnamedTagDecl(Record, TND);

	Owner->addDecl(Record);

	// DR1484 clarifies that the members of a local class are instantiated as part
	// of the instantiation of their enclosing entity.
	if (D->isCompleteDefinition() && D->isLocalClass()) {
	Sema::LocalEagerInstantiationScope LocalInstantiations(SemaRef);

	SemaRef.InstantiateClass(D->getLocation(), Record, D, TemplateArgs,
	TSK_ImplicitInstantiation,
	/Complain=/true);

	// For nested local classes, we will instantiate the members when we
	// reach the end of the outermost (non-nested) local class.
	if (!D->isCXXClassMember())
	SemaRef.InstantiateClassMembers(D->getLocation(), Record, TemplateArgs,
	TSK_ImplicitInstantiation);

	// This class may have local implicit instantiations that need to be
	// performed within this scope.
	LocalInstantiations.perform();
	}

	SemaRef.DiagnoseUnusedNestedTypedefs(Record);

	return Record;
	}

	/// Adjust the given function type for an instantiation of the
	/// given declaration, to cope with modifications to the function's type that
	/// aren't reflected in the type-source information.
	///
	/// \param D The declaration we're instantiating.
	/// \param TInfo The already-instantiated type.
	static QualType adjustFunctionTypeForInstantiation(ASTContext &Context,
	FunctionDecl *D,
	TypeSourceInfo *TInfo) {
	const FunctionProtoType *OrigFunc
	= D->getType()->castAs<FunctionProtoType>();
	const FunctionProtoType *NewFunc
	= TInfo->getType()->castAs<FunctionProtoType>();
	if (OrigFunc->getExtInfo() == NewFunc->getExtInfo())
	return TInfo->getType();

	FunctionProtoType::ExtProtoInfo NewEPI = NewFunc->getExtProtoInfo();
	NewEPI.ExtInfo = OrigFunc->getExtInfo();
	return Context.getFunctionType(NewFunc->getReturnType(),
	NewFunc->getParamTypes(), NewEPI);
	}

	/// Normal class members are of more specific types and therefore
	/// don't make it here. This function serves three purposes:
	/// 1) instantiating function templates
	/// 2) substituting friend declarations
	/// 3) substituting deduction guide declarations for nested class templates
	Decl *TemplateDeclInstantiator::VisitFunctionDecl(
	FunctionDecl D, TemplateParameterList TemplateParams,
	RewriteKind FunctionRewriteKind) {
	// Check whether there is already a function template specialization for
	// this declaration.
	FunctionTemplateDecl *FunctionTemplate = D->getDescribedFunctionTemplate();
	if (FunctionTemplate && !TemplateParams) {
	ArrayRef<TemplateArgument> Innermost = TemplateArgs.getInnermost();

	void *InsertPos = nullptr;
	FunctionDecl *SpecFunc
	= FunctionTemplate->findSpecialization(Innermost, InsertPos);

	// If we already have a function template specialization, return it.
	if (SpecFunc)
	return SpecFunc;
	}

	bool isFriend;
	if (FunctionTemplate)
	isFriend = (FunctionTemplate->getFriendObjectKind() != Decl::FOK_None);
	else
	isFriend = (D->getFriendObjectKind() != Decl::FOK_None);

	bool MergeWithParentScope = (TemplateParams != nullptr) \|\|
	Owner->isFunctionOrMethod() \|\|
	!(isa<Decl>(Owner) &&
	cast<Decl>(Owner)->isDefinedOutsideFunctionOrMethod());
	LocalInstantiationScope Scope(SemaRef, MergeWithParentScope);

	ExplicitSpecifier InstantiatedExplicitSpecifier;
	if (auto *DGuide = dyn_cast<CXXDeductionGuideDecl>(D)) {
	InstantiatedExplicitSpecifier = instantiateExplicitSpecifier(
	SemaRef, TemplateArgs, DGuide->getExplicitSpecifier(), DGuide);
	if (InstantiatedExplicitSpecifier.isInvalid())
	return nullptr;
	}

	SmallVector<ParmVarDecl *, 4> Params;
	TypeSourceInfo *TInfo = SubstFunctionType(D, Params);
	if (!TInfo)
	return nullptr;
	QualType T = adjustFunctionTypeForInstantiation(SemaRef.Context, D, TInfo);

	if (TemplateParams && TemplateParams->size()) {
	auto *LastParam =
	dyn_cast<TemplateTypeParmDecl>(TemplateParams->asArray().back());
	if (LastParam && LastParam->isImplicit() &&
	LastParam->hasTypeConstraint()) {
	// In abbreviated templates, the type-constraints of invented template
	// type parameters are instantiated with the function type, invalidating
	// the TemplateParameterList which relied on the template type parameter
	// not having a type constraint. Recreate the TemplateParameterList with
	// the updated parameter list.
	TemplateParams = TemplateParameterList::Create(
	SemaRef.Context, TemplateParams->getTemplateLoc(),
	TemplateParams->getLAngleLoc(), TemplateParams->asArray(),
	TemplateParams->getRAngleLoc(), TemplateParams->getRequiresClause());
	}
	}

	NestedNameSpecifierLoc QualifierLoc = D->getQualifierLoc();
	if (QualifierLoc) {
	QualifierLoc = SemaRef.SubstNestedNameSpecifierLoc(QualifierLoc,
	TemplateArgs);
	if (!QualifierLoc)
	return nullptr;
	}

	// FIXME: Concepts: Do not substitute into constraint expressions
	Expr *TrailingRequiresClause = D->getTrailingRequiresClause();
	if (TrailingRequiresClause) {
	EnterExpressionEvaluationContext ConstantEvaluated(
	SemaRef, Sema::ExpressionEvaluationContext::Unevaluated);
	ExprResult SubstRC = SemaRef.SubstExpr(TrailingRequiresClause,
	TemplateArgs);
	if (SubstRC.isInvalid())
	return nullptr;
	TrailingRequiresClause = SubstRC.get();
	if (!SemaRef.CheckConstraintExpression(TrailingRequiresClause))
	return nullptr;
	}

	// If we're instantiating a local function declaration, put the result
	// in the enclosing namespace; otherwise we need to find the instantiated
	// context.
	DeclContext *DC;
	if (D->isLocalExternDecl()) {
	DC = Owner;
	SemaRef.adjustContextForLocalExternDecl(DC);
	} else if (isFriend && QualifierLoc) {
	CXXScopeSpec SS;
	SS.Adopt(QualifierLoc);
	DC = SemaRef.computeDeclContext(SS);
	if (!DC) return nullptr;
	} else {
	DC = SemaRef.FindInstantiatedContext(D->getLocation(), D->getDeclContext(),
	TemplateArgs);
	}

	DeclarationNameInfo NameInfo
	= SemaRef.SubstDeclarationNameInfo(D->getNameInfo(), TemplateArgs);

	if (FunctionRewriteKind != RewriteKind::None)
	adjustForRewrite(FunctionRewriteKind, D, T, TInfo, NameInfo);

	FunctionDecl *Function;
	if (auto *DGuide = dyn_cast<CXXDeductionGuideDecl>(D)) {
	Function = CXXDeductionGuideDecl::Create(
	SemaRef.Context, DC, D->getInnerLocStart(),
	InstantiatedExplicitSpecifier, NameInfo, T, TInfo,
	D->getSourceRange().getEnd());
	if (DGuide->isCopyDeductionCandidate())
	cast<CXXDeductionGuideDecl>(Function)->setIsCopyDeductionCandidate();
	Function->setAccess(D->getAccess());
	} else {
	Function = FunctionDecl::Create(
	SemaRef.Context, DC, D->getInnerLocStart(), NameInfo, T, TInfo,
	D->getCanonicalDecl()->getStorageClass(), D->isInlineSpecified(),
	D->hasWrittenPrototype(), D->getConstexprKind(),
	TrailingRequiresClause);
	Function->setRangeEnd(D->getSourceRange().getEnd());
	}

	if (D->isInlined())
	Function->setImplicitlyInline();

	if (QualifierLoc)
	Function->setQualifierInfo(QualifierLoc);

	if (D->isLocalExternDecl())
	Function->setLocalExternDecl();

	DeclContext *LexicalDC = Owner;
	if (!isFriend && D->isOutOfLine() && !D->isLocalExternDecl()) {
	assert(D->getDeclContext()->isFileContext());
	LexicalDC = D->getDeclContext();
	}

	Function->setLexicalDeclContext(LexicalDC);

	// Attach the parameters
	for (unsigned P = 0; P < Params.size(); ++P)
	if (Params[P])
	Params[P]->setOwningFunction(Function);
	Function->setParams(Params);

	if (TrailingRequiresClause)
	Function->setTrailingRequiresClause(TrailingRequiresClause);

	if (TemplateParams) {
	// Our resulting instantiation is actually a function template, since we
	// are substituting only the outer template parameters. For example, given
	//
	// template<typename T>
	// struct X {
	// template<typename U> friend void f(T, U);
	// };
	//
	// X<int> x;
	//
	// We are instantiating the friend function template "f" within X<int>,
	// which means substituting int for T, but leaving "f" as a friend function
	// template.
	// Build the function template itself.
	FunctionTemplate = FunctionTemplateDecl::Create(SemaRef.Context, DC,
	Function->getLocation(),
	Function->getDeclName(),
	TemplateParams, Function);
	Function->setDescribedFunctionTemplate(FunctionTemplate);

	FunctionTemplate->setLexicalDeclContext(LexicalDC);

	if (isFriend && D->isThisDeclarationADefinition()) {
	FunctionTemplate->setInstantiatedFromMemberTemplate(
	D->getDescribedFunctionTemplate());
	}
	} else if (FunctionTemplate) {
	// Record this function template specialization.
	ArrayRef<TemplateArgument> Innermost = TemplateArgs.getInnermost();
	Function->setFunctionTemplateSpecialization(FunctionTemplate,
	TemplateArgumentList::CreateCopy(SemaRef.Context,
	Innermost),
	/InsertPos=/nullptr);
	} else if (isFriend && D->isThisDeclarationADefinition()) {
	// Do not connect the friend to the template unless it's actually a
	// definition. We don't want non-template functions to be marked as being
	// template instantiations.
	Function->setInstantiationOfMemberFunction(D, TSK_ImplicitInstantiation);
	}

	if (isFriend) {
	Function->setObjectOfFriendDecl();
	if (FunctionTemplateDecl *FT = Function->getDescribedFunctionTemplate())
	FT->setObjectOfFriendDecl();
	}

	if (InitFunctionInstantiation(Function, D))
	Function->setInvalidDecl();

	bool IsExplicitSpecialization = false;

	LookupResult Previous(
	SemaRef, Function->getDeclName(), SourceLocation(),
	D->isLocalExternDecl() ? Sema::LookupRedeclarationWithLinkage
	: Sema::LookupOrdinaryName,
	D->isLocalExternDecl() ? Sema::ForExternalRedeclaration
	: SemaRef.forRedeclarationInCurContext());

	if (DependentFunctionTemplateSpecializationInfo *Info
	= D->getDependentSpecializationInfo()) {
	assert(isFriend && "non-friend has dependent specialization info?");

	// Instantiate the explicit template arguments.
	TemplateArgumentListInfo ExplicitArgs(Info->getLAngleLoc(),
	Info->getRAngleLoc());
	if (SemaRef.Subst(Info->getTemplateArgs(), Info->getNumTemplateArgs(),
	ExplicitArgs, TemplateArgs))
	return nullptr;

	// Map the candidate templates to their instantiations.
	for (unsigned I = 0, E = Info->getNumTemplates(); I != E; ++I) {
	Decl *Temp = SemaRef.FindInstantiatedDecl(D->getLocation(),
	Info->getTemplate(I),
	TemplateArgs);
	if (!Temp) return nullptr;

	Previous.addDecl(cast<FunctionTemplateDecl>(Temp));
	}

	if (SemaRef.CheckFunctionTemplateSpecialization(Function,
	&ExplicitArgs,
	Previous))
	Function->setInvalidDecl();

	IsExplicitSpecialization = true;
	} else if (const ASTTemplateArgumentListInfo *Info =
	D->getTemplateSpecializationArgsAsWritten()) {
	// The name of this function was written as a template-id.
	SemaRef.LookupQualifiedName(Previous, DC);

	// Instantiate the explicit template arguments.
	TemplateArgumentListInfo ExplicitArgs(Info->getLAngleLoc(),
	Info->getRAngleLoc());
	if (SemaRef.Subst(Info->getTemplateArgs(), Info->getNumTemplateArgs(),
	ExplicitArgs, TemplateArgs))
	return nullptr;

	if (SemaRef.CheckFunctionTemplateSpecialization(Function,
	&ExplicitArgs,
	Previous))
	Function->setInvalidDecl();

	IsExplicitSpecialization = true;
	} else if (TemplateParams \|\| !FunctionTemplate) {
	// Look only into the namespace where the friend would be declared to
	// find a previous declaration. This is the innermost enclosing namespace,
	// as described in ActOnFriendFunctionDecl.
	SemaRef.LookupQualifiedName(Previous, DC->getRedeclContext());

	// In C++, the previous declaration we find might be a tag type
	// (class or enum). In this case, the new declaration will hide the
	// tag type. Note that this does does not apply if we're declaring a
	// typedef (C++ [dcl.typedef]p4).
	if (Previous.isSingleTagDecl())
	Previous.clear();

	// Filter out previous declarations that don't match the scope. The only
	// effect this has is to remove declarations found in inline namespaces
	// for friend declarations with unqualified names.
	SemaRef.FilterLookupForScope(Previous, DC, /Scope/ nullptr,
	/ConsiderLinkage/ true,
	QualifierLoc.hasQualifier());
	}

	SemaRef.CheckFunctionDeclaration(/Scope/ nullptr, Function, Previous,
	IsExplicitSpecialization);

	// Check the template parameter list against the previous declaration. The
	// goal here is to pick up default arguments added since the friend was
	// declared; we know the template parameter lists match, since otherwise
	// we would not have picked this template as the previous declaration.
	if (isFriend && TemplateParams && FunctionTemplate->getPreviousDecl()) {
	SemaRef.CheckTemplateParameterList(
	TemplateParams,
	FunctionTemplate->getPreviousDecl()->getTemplateParameters(),
	Function->isThisDeclarationADefinition()
	? Sema::TPC_FriendFunctionTemplateDefinition
	: Sema::TPC_FriendFunctionTemplate);
	}

	// If we're introducing a friend definition after the first use, trigger
	// instantiation.
	// FIXME: If this is a friend function template definition, we should check
	// to see if any specializations have been used.
	if (isFriend && D->isThisDeclarationADefinition() && Function->isUsed(false)) {
	if (MemberSpecializationInfo *MSInfo =
	Function->getMemberSpecializationInfo()) {
	if (MSInfo->getPointOfInstantiation().isInvalid()) {
	SourceLocation Loc = D->getLocation(); // FIXME
	MSInfo->setPointOfInstantiation(Loc);
	SemaRef.PendingLocalImplicitInstantiations.push_back(
	std::make_pair(Function, Loc));
	}
	}
	}

	if (D->isExplicitlyDefaulted()) {
	if (SubstDefaultedFunction(Function, D))
	return nullptr;
	}
	if (D->isDeleted())
	SemaRef.SetDeclDeleted(Function, D->getLocation());

	NamedDecl *PrincipalDecl =
	(TemplateParams ? cast<NamedDecl>(FunctionTemplate) : Function);

	// If this declaration lives in a different context from its lexical context,
	// add it to the corresponding lookup table.
	if (isFriend \|\|
	(Function->isLocalExternDecl() && !Function->getPreviousDecl()))
	DC->makeDeclVisibleInContext(PrincipalDecl);

	if (Function->isOverloadedOperator() && !DC->isRecord() &&
	PrincipalDecl->isInIdentifierNamespace(Decl::IDNS_Ordinary))
	PrincipalDecl->setNonMemberOperator();

	return Function;
	}

	Decl *TemplateDeclInstantiator::VisitCXXMethodDecl(
	CXXMethodDecl D, TemplateParameterList TemplateParams,
	Optional<const ASTTemplateArgumentListInfo *> ClassScopeSpecializationArgs,
	RewriteKind FunctionRewriteKind) {
	FunctionTemplateDecl *FunctionTemplate = D->getDescribedFunctionTemplate();
	if (FunctionTemplate && !TemplateParams) {
	// We are creating a function template specialization from a function
	// template. Check whether there is already a function template
	// specialization for this particular set of template arguments.
	ArrayRef<TemplateArgument> Innermost = TemplateArgs.getInnermost();

	void *InsertPos = nullptr;
	FunctionDecl *SpecFunc
	= FunctionTemplate->findSpecialization(Innermost, InsertPos);

	// If we already have a function template specialization, return it.
	if (SpecFunc)
	return SpecFunc;
	}

	bool isFriend;
	if (FunctionTemplate)
	isFriend = (FunctionTemplate->getFriendObjectKind() != Decl::FOK_None);
	else
	isFriend = (D->getFriendObjectKind() != Decl::FOK_None);

	bool MergeWithParentScope = (TemplateParams != nullptr) \|\|
	!(isa<Decl>(Owner) &&
	cast<Decl>(Owner)->isDefinedOutsideFunctionOrMethod());
	LocalInstantiationScope Scope(SemaRef, MergeWithParentScope);

	// Instantiate enclosing template arguments for friends.
	SmallVector<TemplateParameterList *, 4> TempParamLists;
	unsigned NumTempParamLists = 0;
	if (isFriend && (NumTempParamLists = D->getNumTemplateParameterLists())) {
	TempParamLists.resize(NumTempParamLists);
	for (unsigned I = 0; I != NumTempParamLists; ++I) {
	TemplateParameterList *TempParams = D->getTemplateParameterList(I);
	TemplateParameterList *InstParams = SubstTemplateParams(TempParams);
	if (!InstParams)
	return nullptr;
	TempParamLists[I] = InstParams;
	}
	}

	ExplicitSpecifier InstantiatedExplicitSpecifier =
	instantiateExplicitSpecifier(SemaRef, TemplateArgs,
	ExplicitSpecifier::getFromDecl(D), D);
	if (InstantiatedExplicitSpecifier.isInvalid())
	return nullptr;

	// Implicit destructors/constructors created for local classes in
	// DeclareImplicit* (see SemaDeclCXX.cpp) might not have an associated TSI.
	// Unfortunately there isn't enough context in those functions to
	// conditionally populate the TSI without breaking non-template related use
	// cases. Populate TSIs prior to calling SubstFunctionType to make sure we get
	// a proper transformation.
	if (cast<CXXRecordDecl>(D->getParent())->isLambda() &&
	!D->getTypeSourceInfo() &&
	isa<CXXConstructorDecl, CXXDestructorDecl>(D)) {
	TypeSourceInfo *TSI =
	SemaRef.Context.getTrivialTypeSourceInfo(D->getType());
	D->setTypeSourceInfo(TSI);
	}

	SmallVector<ParmVarDecl *, 4> Params;
	TypeSourceInfo *TInfo = SubstFunctionType(D, Params);
	if (!TInfo)
	return nullptr;
	QualType T = adjustFunctionTypeForInstantiation(SemaRef.Context, D, TInfo);

	if (TemplateParams && TemplateParams->size()) {
	auto *LastParam =
	dyn_cast<TemplateTypeParmDecl>(TemplateParams->asArray().back());
	if (LastParam && LastParam->isImplicit() &&
	LastParam->hasTypeConstraint()) {
	// In abbreviated templates, the type-constraints of invented template
	// type parameters are instantiated with the function type, invalidating
	// the TemplateParameterList which relied on the template type parameter
	// not having a type constraint. Recreate the TemplateParameterList with
	// the updated parameter list.
	TemplateParams = TemplateParameterList::Create(
	SemaRef.Context, TemplateParams->getTemplateLoc(),
	TemplateParams->getLAngleLoc(), TemplateParams->asArray(),
	TemplateParams->getRAngleLoc(), TemplateParams->getRequiresClause());
	}
	}

	NestedNameSpecifierLoc QualifierLoc = D->getQualifierLoc();
	if (QualifierLoc) {
	QualifierLoc = SemaRef.SubstNestedNameSpecifierLoc(QualifierLoc,
	TemplateArgs);
	if (!QualifierLoc)
	return nullptr;
	}

	// FIXME: Concepts: Do not substitute into constraint expressions
	Expr *TrailingRequiresClause = D->getTrailingRequiresClause();
	if (TrailingRequiresClause) {
	EnterExpressionEvaluationContext ConstantEvaluated(
	SemaRef, Sema::ExpressionEvaluationContext::Unevaluated);
	auto *ThisContext = dyn_cast_or_null<CXXRecordDecl>(Owner);
	Sema::CXXThisScopeRAII ThisScope(SemaRef, ThisContext,
	D->getMethodQualifiers(), ThisContext);
	ExprResult SubstRC = SemaRef.SubstExpr(TrailingRequiresClause,
	TemplateArgs);
	if (SubstRC.isInvalid())
	return nullptr;
	TrailingRequiresClause = SubstRC.get();
	if (!SemaRef.CheckConstraintExpression(TrailingRequiresClause))
	return nullptr;
	}

	DeclContext *DC = Owner;
	if (isFriend) {
	if (QualifierLoc) {
	CXXScopeSpec SS;
	SS.Adopt(QualifierLoc);
	DC = SemaRef.computeDeclContext(SS);

	if (DC && SemaRef.RequireCompleteDeclContext(SS, DC))
	return nullptr;
	} else {
	DC = SemaRef.FindInstantiatedContext(D->getLocation(),
	D->getDeclContext(),
	TemplateArgs);
	}
	if (!DC) return nullptr;
	}

	DeclarationNameInfo NameInfo
	= SemaRef.SubstDeclarationNameInfo(D->getNameInfo(), TemplateArgs);

	if (FunctionRewriteKind != RewriteKind::None)
	adjustForRewrite(FunctionRewriteKind, D, T, TInfo, NameInfo);

	// Build the instantiated method declaration.
	CXXRecordDecl *Record = cast<CXXRecordDecl>(DC);
	CXXMethodDecl *Method = nullptr;

	SourceLocation StartLoc = D->getInnerLocStart();
	if (CXXConstructorDecl *Constructor = dyn_cast<CXXConstructorDecl>(D)) {
	Method = CXXConstructorDecl::Create(
	SemaRef.Context, Record, StartLoc, NameInfo, T, TInfo,
	InstantiatedExplicitSpecifier, Constructor->isInlineSpecified(), false,
	Constructor->getConstexprKind(), InheritedConstructor(),
	TrailingRequiresClause);
	Method->setRangeEnd(Constructor->getEndLoc());
	} else if (CXXDestructorDecl *Destructor = dyn_cast<CXXDestructorDecl>(D)) {
	Method = CXXDestructorDecl::Create(
	SemaRef.Context, Record, StartLoc, NameInfo, T, TInfo,
	Destructor->isInlineSpecified(), false, Destructor->getConstexprKind(),
	TrailingRequiresClause);
	Method->setRangeEnd(Destructor->getEndLoc());
	Method->setDeclName(SemaRef.Context.DeclarationNames.getCXXDestructorName(
	SemaRef.Context.getCanonicalType(
	SemaRef.Context.getTypeDeclType(Record))));
	} else if (CXXConversionDecl *Conversion = dyn_cast<CXXConversionDecl>(D)) {
	Method = CXXConversionDecl::Create(
	SemaRef.Context, Record, StartLoc, NameInfo, T, TInfo,
	Conversion->isInlineSpecified(), InstantiatedExplicitSpecifier,
	Conversion->getConstexprKind(), Conversion->getEndLoc(),
	TrailingRequiresClause);
	} else {
	StorageClass SC = D->isStatic() ? SC_Static : SC_None;
	Method = CXXMethodDecl::Create(SemaRef.Context, Record, StartLoc, NameInfo,
	T, TInfo, SC, D->isInlineSpecified(),
	D->getConstexprKind(), D->getEndLoc(),
	TrailingRequiresClause);
	}

	if (D->isInlined())
	Method->setImplicitlyInline();

	if (QualifierLoc)
	Method->setQualifierInfo(QualifierLoc);

	if (TemplateParams) {
	// Our resulting instantiation is actually a function template, since we
	// are substituting only the outer template parameters. For example, given
	//
	// template<typename T>
	// struct X {
	// template<typename U> void f(T, U);
	// };
	//
	// X<int> x;
	//
	// We are instantiating the member template "f" within X<int>, which means
	// substituting int for T, but leaving "f" as a member function template.
	// Build the function template itself.
	FunctionTemplate = FunctionTemplateDecl::Create(SemaRef.Context, Record,
	Method->getLocation(),
	Method->getDeclName(),
	TemplateParams, Method);
	if (isFriend) {
	FunctionTemplate->setLexicalDeclContext(Owner);
	FunctionTemplate->setObjectOfFriendDecl();
	} else if (D->isOutOfLine())
	FunctionTemplate->setLexicalDeclContext(D->getLexicalDeclContext());
	Method->setDescribedFunctionTemplate(FunctionTemplate);
	} else if (FunctionTemplate) {
	// Record this function template specialization.
	ArrayRef<TemplateArgument> Innermost = TemplateArgs.getInnermost();
	Method->setFunctionTemplateSpecialization(FunctionTemplate,
	TemplateArgumentList::CreateCopy(SemaRef.Context,
	Innermost),
	/InsertPos=/nullptr);
	} else if (!isFriend) {
	// Record that this is an instantiation of a member function.
	Method->setInstantiationOfMemberFunction(D, TSK_ImplicitInstantiation);
	}

	// If we are instantiating a member function defined
	// out-of-line, the instantiation will have the same lexical
	// context (which will be a namespace scope) as the template.
	if (isFriend) {
	if (NumTempParamLists)
	Method->setTemplateParameterListsInfo(
	SemaRef.Context,
	llvm::makeArrayRef(TempParamLists.data(), NumTempParamLists));

	Method->setLexicalDeclContext(Owner);
	Method->setObjectOfFriendDecl();
	} else if (D->isOutOfLine())
	Method->setLexicalDeclContext(D->getLexicalDeclContext());

	// Attach the parameters
	for (unsigned P = 0; P < Params.size(); ++P)
	Params[P]->setOwningFunction(Method);
	Method->setParams(Params);

	if (InitMethodInstantiation(Method, D))
	Method->setInvalidDecl();

	LookupResult Previous(SemaRef, NameInfo, Sema::LookupOrdinaryName,
	Sema::ForExternalRedeclaration);

	bool IsExplicitSpecialization = false;

	// If the name of this function was written as a template-id, instantiate
	// the explicit template arguments.
	if (DependentFunctionTemplateSpecializationInfo *Info
	= D->getDependentSpecializationInfo()) {
	assert(isFriend && "non-friend has dependent specialization info?");

	// Instantiate the explicit template arguments.
	TemplateArgumentListInfo ExplicitArgs(Info->getLAngleLoc(),
	Info->getRAngleLoc());
	if (SemaRef.Subst(Info->getTemplateArgs(), Info->getNumTemplateArgs(),
	ExplicitArgs, TemplateArgs))
	return nullptr;

	// Map the candidate templates to their instantiations.
	for (unsigned I = 0, E = Info->getNumTemplates(); I != E; ++I) {
	Decl *Temp = SemaRef.FindInstantiatedDecl(D->getLocation(),
	Info->getTemplate(I),
	TemplateArgs);
	if (!Temp) return nullptr;

	Previous.addDecl(cast<FunctionTemplateDecl>(Temp));
	}

	if (SemaRef.CheckFunctionTemplateSpecialization(Method,
	&ExplicitArgs,
	Previous))
	Method->setInvalidDecl();

	IsExplicitSpecialization = true;
	} else if (const ASTTemplateArgumentListInfo *Info =
	ClassScopeSpecializationArgs.getValueOr(
	D->getTemplateSpecializationArgsAsWritten())) {
	SemaRef.LookupQualifiedName(Previous, DC);

	TemplateArgumentListInfo ExplicitArgs(Info->getLAngleLoc(),
	Info->getRAngleLoc());
	if (SemaRef.Subst(Info->getTemplateArgs(), Info->getNumTemplateArgs(),
	ExplicitArgs, TemplateArgs))
	return nullptr;

	if (SemaRef.CheckFunctionTemplateSpecialization(Method,
	&ExplicitArgs,
	Previous))
	Method->setInvalidDecl();

	IsExplicitSpecialization = true;
	} else if (ClassScopeSpecializationArgs) {
	// Class-scope explicit specialization written without explicit template
	// arguments.
	SemaRef.LookupQualifiedName(Previous, DC);
	if (SemaRef.CheckFunctionTemplateSpecialization(Method, nullptr, Previous))
	Method->setInvalidDecl();

	IsExplicitSpecialization = true;
	} else if (!FunctionTemplate \|\| TemplateParams \|\| isFriend) {
	SemaRef.LookupQualifiedName(Previous, Record);

	// In C++, the previous declaration we find might be a tag type
	// (class or enum). In this case, the new declaration will hide the
	// tag type. Note that this does does not apply if we're declaring a
	// typedef (C++ [dcl.typedef]p4).
	if (Previous.isSingleTagDecl())
	Previous.clear();
	}

	SemaRef.CheckFunctionDeclaration(nullptr, Method, Previous,
	IsExplicitSpecialization);

	if (D->isPure())
	SemaRef.CheckPureMethod(Method, SourceRange());

	// Propagate access. For a non-friend declaration, the access is
	// whatever we're propagating from. For a friend, it should be the
	// previous declaration we just found.
	if (isFriend && Method->getPreviousDecl())
	Method->setAccess(Method->getPreviousDecl()->getAccess());
	else
	Method->setAccess(D->getAccess());
	if (FunctionTemplate)
	FunctionTemplate->setAccess(Method->getAccess());

	SemaRef.CheckOverrideControl(Method);

	// If a function is defined as defaulted or deleted, mark it as such now.
	if (D->isExplicitlyDefaulted()) {
	if (SubstDefaultedFunction(Method, D))
	return nullptr;
	}
	if (D->isDeletedAsWritten())
	SemaRef.SetDeclDeleted(Method, Method->getLocation());

	// If this is an explicit specialization, mark the implicitly-instantiated
	// template specialization as being an explicit specialization too.
	// FIXME: Is this necessary?
	if (IsExplicitSpecialization && !isFriend)
	SemaRef.CompleteMemberSpecialization(Method, Previous);

	// If there's a function template, let our caller handle it.
	if (FunctionTemplate) {
	// do nothing

	// Don't hide a (potentially) valid declaration with an invalid one.
	} else if (Method->isInvalidDecl() && !Previous.empty()) {
	// do nothing

	// Otherwise, check access to friends and make them visible.
	} else if (isFriend) {
	// We only need to re-check access for methods which we didn't
	// manage to match during parsing.
	if (!D->getPreviousDecl())
	SemaRef.CheckFriendAccess(Method);

	Record->makeDeclVisibleInContext(Method);

	// Otherwise, add the declaration. We don't need to do this for
	// class-scope specializations because we'll have matched them with
	// the appropriate template.
	} else {
	Owner->addDecl(Method);
	}

	// PR17480: Honor the used attribute to instantiate member function
	// definitions
	if (Method->hasAttr<UsedAttr>()) {
	if (const auto *A = dyn_cast<CXXRecordDecl>(Owner)) {
	SourceLocation Loc;
	if (const MemberSpecializationInfo *MSInfo =
	A->getMemberSpecializationInfo())
	Loc = MSInfo->getPointOfInstantiation();
	else if (const auto *Spec = dyn_cast<ClassTemplateSpecializationDecl>(A))
	Loc = Spec->getPointOfInstantiation();
	SemaRef.MarkFunctionReferenced(Loc, Method);
	}
	}

	return Method;
	}

	Decl TemplateDeclInstantiator::VisitCXXConstructorDecl(CXXConstructorDecl D) {
	return VisitCXXMethodDecl(D);
	}

	Decl TemplateDeclInstantiator::VisitCXXDestructorDecl(CXXDestructorDecl D) {
	return VisitCXXMethodDecl(D);
	}

	Decl TemplateDeclInstantiator::VisitCXXConversionDecl(CXXConversionDecl D) {
	return VisitCXXMethodDecl(D);
	}

	Decl TemplateDeclInstantiator::VisitParmVarDecl(ParmVarDecl D) {
	return SemaRef.SubstParmVarDecl(D, TemplateArgs, /indexAdjustment/ 0, None,
	/ExpectParameterPack=/ false);
	}

	Decl *TemplateDeclInstantiator::VisitTemplateTypeParmDecl(
	TemplateTypeParmDecl *D) {
	assert(D->getTypeForDecl()->isTemplateTypeParmType());

	Optional<unsigned> NumExpanded;

	if (const TypeConstraint *TC = D->getTypeConstraint()) {
	if (D->isPackExpansion() && !D->isExpandedParameterPack()) {
	assert(TC->getTemplateArgsAsWritten() &&
	"type parameter can only be an expansion when explicit arguments "
	"are specified");
	// The template type parameter pack's type is a pack expansion of types.
	// Determine whether we need to expand this parameter pack into separate
	// types.
	SmallVector<UnexpandedParameterPack, 2> Unexpanded;
	for (auto &ArgLoc : TC->getTemplateArgsAsWritten()->arguments())
	SemaRef.collectUnexpandedParameterPacks(ArgLoc, Unexpanded);

	// Determine whether the set of unexpanded parameter packs can and should
	// be expanded.
	bool Expand = true;
	bool RetainExpansion = false;
	if (SemaRef.CheckParameterPacksForExpansion(
	cast<CXXFoldExpr>(TC->getImmediatelyDeclaredConstraint())
	->getEllipsisLoc(),
	SourceRange(TC->getConceptNameLoc(),
	TC->hasExplicitTemplateArgs() ?
	TC->getTemplateArgsAsWritten()->getRAngleLoc() :
	TC->getConceptNameInfo().getEndLoc()),
	Unexpanded, TemplateArgs, Expand, RetainExpansion, NumExpanded))
	return nullptr;
	}
	}

	TemplateTypeParmDecl *Inst = TemplateTypeParmDecl::Create(
	SemaRef.Context, Owner, D->getBeginLoc(), D->getLocation(),
	D->getDepth() - TemplateArgs.getNumSubstitutedLevels(), D->getIndex(),
	D->getIdentifier(), D->wasDeclaredWithTypename(), D->isParameterPack(),
	D->hasTypeConstraint(), NumExpanded);

	Inst->setAccess(AS_public);
	Inst->setImplicit(D->isImplicit());
	if (auto *TC = D->getTypeConstraint()) {
	if (!D->isImplicit()) {
	// Invented template parameter type constraints will be instantiated with
	// the corresponding auto-typed parameter as it might reference other
	// parameters.

	// TODO: Concepts: do not instantiate the constraint (delayed constraint
	// substitution)
	const ASTTemplateArgumentListInfo *TemplArgInfo
	= TC->getTemplateArgsAsWritten();
	TemplateArgumentListInfo InstArgs;

	if (TemplArgInfo) {
	InstArgs.setLAngleLoc(TemplArgInfo->LAngleLoc);
	InstArgs.setRAngleLoc(TemplArgInfo->RAngleLoc);
	if (SemaRef.Subst(TemplArgInfo->getTemplateArgs(),
	TemplArgInfo->NumTemplateArgs,
	InstArgs, TemplateArgs))
	return nullptr;
	}
	if (SemaRef.AttachTypeConstraint(
	TC->getNestedNameSpecifierLoc(), TC->getConceptNameInfo(),
	TC->getNamedConcept(), &InstArgs, Inst,
	D->isParameterPack()
	? cast<CXXFoldExpr>(TC->getImmediatelyDeclaredConstraint())
	->getEllipsisLoc()
	: SourceLocation()))
	return nullptr;
	}
	}
	if (D->hasDefaultArgument() && !D->defaultArgumentWasInherited()) {
	TypeSourceInfo *InstantiatedDefaultArg =
	SemaRef.SubstType(D->getDefaultArgumentInfo(), TemplateArgs,
	D->getDefaultArgumentLoc(), D->getDeclName());
	if (InstantiatedDefaultArg)
	Inst->setDefaultArgument(InstantiatedDefaultArg);
	}

	// Introduce this template parameter's instantiation into the instantiation
	// scope.
	SemaRef.CurrentInstantiationScope->InstantiatedLocal(D, Inst);

	return Inst;
	}

	Decl *TemplateDeclInstantiator::VisitNonTypeTemplateParmDecl(
	NonTypeTemplateParmDecl *D) {
	// Substitute into the type of the non-type template parameter.
	TypeLoc TL = D->getTypeSourceInfo()->getTypeLoc();
	SmallVector<TypeSourceInfo *, 4> ExpandedParameterPackTypesAsWritten;
	SmallVector<QualType, 4> ExpandedParameterPackTypes;
	bool IsExpandedParameterPack = false;
	TypeSourceInfo *DI;
	QualType T;
	bool Invalid = false;

	if (D->isExpandedParameterPack()) {
	// The non-type template parameter pack is an already-expanded pack
	// expansion of types. Substitute into each of the expanded types.
	ExpandedParameterPackTypes.reserve(D->getNumExpansionTypes());
	ExpandedParameterPackTypesAsWritten.reserve(D->getNumExpansionTypes());
	for (unsigned I = 0, N = D->getNumExpansionTypes(); I != N; ++I) {
	TypeSourceInfo *NewDI =
	SemaRef.SubstType(D->getExpansionTypeSourceInfo(I), TemplateArgs,
	D->getLocation(), D->getDeclName());
	if (!NewDI)
	return nullptr;

	QualType NewT =
	SemaRef.CheckNonTypeTemplateParameterType(NewDI, D->getLocation());
	if (NewT.isNull())
	return nullptr;

	ExpandedParameterPackTypesAsWritten.push_back(NewDI);
	ExpandedParameterPackTypes.push_back(NewT);
	}

	IsExpandedParameterPack = true;
	DI = D->getTypeSourceInfo();
	T = DI->getType();
	} else if (D->isPackExpansion()) {
	// The non-type template parameter pack's type is a pack expansion of types.
	// Determine whether we need to expand this parameter pack into separate
	// types.
	PackExpansionTypeLoc Expansion = TL.castAs<PackExpansionTypeLoc>();
	TypeLoc Pattern = Expansion.getPatternLoc();
	SmallVector<UnexpandedParameterPack, 2> Unexpanded;
	SemaRef.collectUnexpandedParameterPacks(Pattern, Unexpanded);

	// Determine whether the set of unexpanded parameter packs can and should
	// be expanded.
	bool Expand = true;
	bool RetainExpansion = false;
	Optional<unsigned> OrigNumExpansions
	= Expansion.getTypePtr()->getNumExpansions();
	Optional<unsigned> NumExpansions = OrigNumExpansions;
	if (SemaRef.CheckParameterPacksForExpansion(Expansion.getEllipsisLoc(),
	Pattern.getSourceRange(),
	Unexpanded,
	TemplateArgs,
	Expand, RetainExpansion,
	NumExpansions))
	return nullptr;

	if (Expand) {
	for (unsigned I = 0; I != *NumExpansions; ++I) {
	Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(SemaRef, I);
	TypeSourceInfo *NewDI = SemaRef.SubstType(Pattern, TemplateArgs,
	D->getLocation(),
	D->getDeclName());
	if (!NewDI)
	return nullptr;

	QualType NewT =
	SemaRef.CheckNonTypeTemplateParameterType(NewDI, D->getLocation());
	if (NewT.isNull())
	return nullptr;

	ExpandedParameterPackTypesAsWritten.push_back(NewDI);
	ExpandedParameterPackTypes.push_back(NewT);
	}

	// Note that we have an expanded parameter pack. The "type" of this
	// expanded parameter pack is the original expansion type, but callers
	// will end up using the expanded parameter pack types for type-checking.
	IsExpandedParameterPack = true;
	DI = D->getTypeSourceInfo();
	T = DI->getType();
	} else {
	// We cannot fully expand the pack expansion now, so substitute into the
	// pattern and create a new pack expansion type.
	Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(SemaRef, -1);
	TypeSourceInfo *NewPattern = SemaRef.SubstType(Pattern, TemplateArgs,
	D->getLocation(),
	D->getDeclName());
	if (!NewPattern)
	return nullptr;

	SemaRef.CheckNonTypeTemplateParameterType(NewPattern, D->getLocation());
	DI = SemaRef.CheckPackExpansion(NewPattern, Expansion.getEllipsisLoc(),
	NumExpansions);
	if (!DI)
	return nullptr;

	T = DI->getType();
	}
	} else {
	// Simple case: substitution into a parameter that is not a parameter pack.
	DI = SemaRef.SubstType(D->getTypeSourceInfo(), TemplateArgs,
	D->getLocation(), D->getDeclName());
	if (!DI)
	return nullptr;

	// Check that this type is acceptable for a non-type template parameter.
	T = SemaRef.CheckNonTypeTemplateParameterType(DI, D->getLocation());
	if (T.isNull()) {
	T = SemaRef.Context.IntTy;
	Invalid = true;
	}
	}

	NonTypeTemplateParmDecl *Param;
	if (IsExpandedParameterPack)
	Param = NonTypeTemplateParmDecl::Create(
	SemaRef.Context, Owner, D->getInnerLocStart(), D->getLocation(),
	D->getDepth() - TemplateArgs.getNumSubstitutedLevels(),
	D->getPosition(), D->getIdentifier(), T, DI, ExpandedParameterPackTypes,
	ExpandedParameterPackTypesAsWritten);
	else
	Param = NonTypeTemplateParmDecl::Create(
	SemaRef.Context, Owner, D->getInnerLocStart(), D->getLocation(),
	D->getDepth() - TemplateArgs.getNumSubstitutedLevels(),
	D->getPosition(), D->getIdentifier(), T, D->isParameterPack(), DI);

	if (AutoTypeLoc AutoLoc = DI->getTypeLoc().getContainedAutoTypeLoc())
	if (AutoLoc.isConstrained())
	if (SemaRef.AttachTypeConstraint(
	AutoLoc, Param,
	IsExpandedParameterPack
	? DI->getTypeLoc().getAs<PackExpansionTypeLoc>()
	.getEllipsisLoc()
	: SourceLocation()))
	Invalid = true;

	Param->setAccess(AS_public);
	Param->setImplicit(D->isImplicit());
	if (Invalid)
	Param->setInvalidDecl();

	if (D->hasDefaultArgument() && !D->defaultArgumentWasInherited()) {
	EnterExpressionEvaluationContext ConstantEvaluated(
	SemaRef, Sema::ExpressionEvaluationContext::ConstantEvaluated);
	ExprResult Value = SemaRef.SubstExpr(D->getDefaultArgument(), TemplateArgs);
	if (!Value.isInvalid())
	Param->setDefaultArgument(Value.get());
	}

	// Introduce this template parameter's instantiation into the instantiation
	// scope.
	SemaRef.CurrentInstantiationScope->InstantiatedLocal(D, Param);
	return Param;
	}

	static void collectUnexpandedParameterPacks(
	Sema &S,
	TemplateParameterList *Params,
	SmallVectorImpl<UnexpandedParameterPack> &Unexpanded) {
	for (const auto &P : *Params) {
	if (P->isTemplateParameterPack())
	continue;
	if (NonTypeTemplateParmDecl *NTTP = dyn_cast<NonTypeTemplateParmDecl>(P))
	S.collectUnexpandedParameterPacks(NTTP->getTypeSourceInfo()->getTypeLoc(),
	Unexpanded);
	if (TemplateTemplateParmDecl *TTP = dyn_cast<TemplateTemplateParmDecl>(P))
	collectUnexpandedParameterPacks(S, TTP->getTemplateParameters(),
	Unexpanded);
	}
	}

	Decl *
	TemplateDeclInstantiator::VisitTemplateTemplateParmDecl(
	TemplateTemplateParmDecl *D) {
	// Instantiate the template parameter list of the template template parameter.
	TemplateParameterList *TempParams = D->getTemplateParameters();
	TemplateParameterList *InstParams;
	SmallVector<TemplateParameterList*, 8> ExpandedParams;

	bool IsExpandedParameterPack = false;

	if (D->isExpandedParameterPack()) {
	// The template template parameter pack is an already-expanded pack
	// expansion of template parameters. Substitute into each of the expanded
	// parameters.
	ExpandedParams.reserve(D->getNumExpansionTemplateParameters());
	for (unsigned I = 0, N = D->getNumExpansionTemplateParameters();
	I != N; ++I) {
	LocalInstantiationScope Scope(SemaRef);
	TemplateParameterList *Expansion =
	SubstTemplateParams(D->getExpansionTemplateParameters(I));
	if (!Expansion)
	return nullptr;
	ExpandedParams.push_back(Expansion);
	}

	IsExpandedParameterPack = true;
	InstParams = TempParams;
	} else if (D->isPackExpansion()) {
	// The template template parameter pack expands to a pack of template
	// template parameters. Determine whether we need to expand this parameter
	// pack into separate parameters.
	SmallVector<UnexpandedParameterPack, 2> Unexpanded;
	collectUnexpandedParameterPacks(SemaRef, D->getTemplateParameters(),
	Unexpanded);

	// Determine whether the set of unexpanded parameter packs can and should
	// be expanded.
	bool Expand = true;
	bool RetainExpansion = false;
	Optional<unsigned> NumExpansions;
	if (SemaRef.CheckParameterPacksForExpansion(D->getLocation(),
	TempParams->getSourceRange(),
	Unexpanded,
	TemplateArgs,
	Expand, RetainExpansion,
	NumExpansions))
	return nullptr;

	if (Expand) {
	for (unsigned I = 0; I != *NumExpansions; ++I) {
	Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(SemaRef, I);
	LocalInstantiationScope Scope(SemaRef);
	TemplateParameterList *Expansion = SubstTemplateParams(TempParams);
	if (!Expansion)
	return nullptr;
	ExpandedParams.push_back(Expansion);
	}

	// Note that we have an expanded parameter pack. The "type" of this
	// expanded parameter pack is the original expansion type, but callers
	// will end up using the expanded parameter pack types for type-checking.
	IsExpandedParameterPack = true;
	InstParams = TempParams;
	} else {
	// We cannot fully expand the pack expansion now, so just substitute
	// into the pattern.
	Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(SemaRef, -1);

	LocalInstantiationScope Scope(SemaRef);
	InstParams = SubstTemplateParams(TempParams);
	if (!InstParams)
	return nullptr;
	}
	} else {
	// Perform the actual substitution of template parameters within a new,
	// local instantiation scope.
	LocalInstantiationScope Scope(SemaRef);
	InstParams = SubstTemplateParams(TempParams);
	if (!InstParams)
	return nullptr;
	}

	// Build the template template parameter.
	TemplateTemplateParmDecl *Param;
	if (IsExpandedParameterPack)
	Param = TemplateTemplateParmDecl::Create(
	SemaRef.Context, Owner, D->getLocation(),
	D->getDepth() - TemplateArgs.getNumSubstitutedLevels(),
	D->getPosition(), D->getIdentifier(), InstParams, ExpandedParams);
	else
	Param = TemplateTemplateParmDecl::Create(
	SemaRef.Context, Owner, D->getLocation(),
	D->getDepth() - TemplateArgs.getNumSubstitutedLevels(),
	D->getPosition(), D->isParameterPack(), D->getIdentifier(), InstParams);
	if (D->hasDefaultArgument() && !D->defaultArgumentWasInherited()) {
	NestedNameSpecifierLoc QualifierLoc =
	D->getDefaultArgument().getTemplateQualifierLoc();
	QualifierLoc =
	SemaRef.SubstNestedNameSpecifierLoc(QualifierLoc, TemplateArgs);
	TemplateName TName = SemaRef.SubstTemplateName(
	QualifierLoc, D->getDefaultArgument().getArgument().getAsTemplate(),
	D->getDefaultArgument().getTemplateNameLoc(), TemplateArgs);
	if (!TName.isNull())
	Param->setDefaultArgument(
	SemaRef.Context,
	TemplateArgumentLoc(SemaRef.Context, TemplateArgument(TName),
	D->getDefaultArgument().getTemplateQualifierLoc(),
	D->getDefaultArgument().getTemplateNameLoc()));
	}
	Param->setAccess(AS_public);
	Param->setImplicit(D->isImplicit());

	// Introduce this template parameter's instantiation into the instantiation
	// scope.
	SemaRef.CurrentInstantiationScope->InstantiatedLocal(D, Param);

	return Param;
	}

	Decl TemplateDeclInstantiator::VisitUsingDirectiveDecl(UsingDirectiveDecl D) {
	// Using directives are never dependent (and never contain any types or
	// expressions), so they require no explicit instantiation work.

	UsingDirectiveDecl *Inst
	= UsingDirectiveDecl::Create(SemaRef.Context, Owner, D->getLocation(),
	D->getNamespaceKeyLocation(),
	D->getQualifierLoc(),
	D->getIdentLocation(),
	D->getNominatedNamespace(),
	D->getCommonAncestor());

	// Add the using directive to its declaration context
	// only if this is not a function or method.
	if (!Owner->isFunctionOrMethod())
	Owner->addDecl(Inst);

	return Inst;
	}

	Decl TemplateDeclInstantiator::VisitBaseUsingDecls(BaseUsingDecl D,
	BaseUsingDecl *Inst,
	LookupResult *Lookup) {

	bool isFunctionScope = Owner->isFunctionOrMethod();

	for (auto *Shadow : D->shadows()) {
	// FIXME: UsingShadowDecl doesn't preserve its immediate target, so
	// reconstruct it in the case where it matters. Hm, can we extract it from
	// the DeclSpec when parsing and save it in the UsingDecl itself?
	NamedDecl *OldTarget = Shadow->getTargetDecl();
	if (auto *CUSD = dyn_cast<ConstructorUsingShadowDecl>(Shadow))
	if (auto *BaseShadow = CUSD->getNominatedBaseClassShadowDecl())
	OldTarget = BaseShadow;

	NamedDecl *InstTarget = nullptr;
	if (auto *EmptyD =
	dyn_cast<UnresolvedUsingIfExistsDecl>(Shadow->getTargetDecl())) {
	InstTarget = UnresolvedUsingIfExistsDecl::Create(
	SemaRef.Context, Owner, EmptyD->getLocation(), EmptyD->getDeclName());
	} else {
	InstTarget = cast_or_null<NamedDecl>(SemaRef.FindInstantiatedDecl(
	Shadow->getLocation(), OldTarget, TemplateArgs));
	}
	if (!InstTarget)
	return nullptr;

	UsingShadowDecl *PrevDecl = nullptr;
	if (Lookup &&
	SemaRef.CheckUsingShadowDecl(Inst, InstTarget, *Lookup, PrevDecl))
	continue;

	if (UsingShadowDecl *OldPrev = getPreviousDeclForInstantiation(Shadow))
	PrevDecl = cast_or_null<UsingShadowDecl>(SemaRef.FindInstantiatedDecl(
	Shadow->getLocation(), OldPrev, TemplateArgs));

	UsingShadowDecl *InstShadow = SemaRef.BuildUsingShadowDecl(
	/Scope/ nullptr, Inst, InstTarget, PrevDecl);
	SemaRef.Context.setInstantiatedFromUsingShadowDecl(InstShadow, Shadow);

	if (isFunctionScope)
	SemaRef.CurrentInstantiationScope->InstantiatedLocal(Shadow, InstShadow);
	}

	return Inst;
	}

	Decl TemplateDeclInstantiator::VisitUsingDecl(UsingDecl D) {

	// The nested name specifier may be dependent, for example
	// template <typename T> struct t {
	// struct s1 { T f1(); };
	// struct s2 : s1 { using s1::f1; };
	// };
	// template struct t<int>;
	// Here, in using s1::f1, s1 refers to t<T>::s1;
	// we need to substitute for t<int>::s1.
	NestedNameSpecifierLoc QualifierLoc
	= SemaRef.SubstNestedNameSpecifierLoc(D->getQualifierLoc(),
	TemplateArgs);
	if (!QualifierLoc)
	return nullptr;

	// For an inheriting constructor declaration, the name of the using
	// declaration is the name of a constructor in this class, not in the
	// base class.
	DeclarationNameInfo NameInfo = D->getNameInfo();
	if (NameInfo.getName().getNameKind() == DeclarationName::CXXConstructorName)
	if (auto *RD = dyn_cast<CXXRecordDecl>(SemaRef.CurContext))
	NameInfo.setName(SemaRef.Context.DeclarationNames.getCXXConstructorName(
	SemaRef.Context.getCanonicalType(SemaRef.Context.getRecordType(RD))));

	// We only need to do redeclaration lookups if we're in a class scope (in
	// fact, it's not really even possible in non-class scopes).
	bool CheckRedeclaration = Owner->isRecord();
	LookupResult Prev(SemaRef, NameInfo, Sema::LookupUsingDeclName,
	Sema::ForVisibleRedeclaration);

	UsingDecl *NewUD = UsingDecl::Create(SemaRef.Context, Owner,
	D->getUsingLoc(),
	QualifierLoc,
	NameInfo,
	D->hasTypename());

	CXXScopeSpec SS;
	SS.Adopt(QualifierLoc);
	if (CheckRedeclaration) {
	Prev.setHideTags(false);
	SemaRef.LookupQualifiedName(Prev, Owner);

	// Check for invalid redeclarations.
	if (SemaRef.CheckUsingDeclRedeclaration(D->getUsingLoc(),
	D->hasTypename(), SS,
	D->getLocation(), Prev))
	NewUD->setInvalidDecl();
	}

	if (!NewUD->isInvalidDecl() &&
	SemaRef.CheckUsingDeclQualifier(D->getUsingLoc(), D->hasTypename(), SS,
	NameInfo, D->getLocation(), nullptr, D))
	NewUD->setInvalidDecl();

	SemaRef.Context.setInstantiatedFromUsingDecl(NewUD, D);
	NewUD->setAccess(D->getAccess());
	Owner->addDecl(NewUD);

	// Don't process the shadow decls for an invalid decl.
	if (NewUD->isInvalidDecl())
	return NewUD;

	// If the using scope was dependent, or we had dependent bases, we need to
	// recheck the inheritance
	if (NameInfo.getName().getNameKind() == DeclarationName::CXXConstructorName)
	SemaRef.CheckInheritingConstructorUsingDecl(NewUD);

	return VisitBaseUsingDecls(D, NewUD, CheckRedeclaration ? &Prev : nullptr);
	}

	Decl TemplateDeclInstantiator::VisitUsingEnumDecl(UsingEnumDecl D) {
	// Cannot be a dependent type, but still could be an instantiation
	EnumDecl *EnumD = cast_or_null<EnumDecl>(SemaRef.FindInstantiatedDecl(
	D->getLocation(), D->getEnumDecl(), TemplateArgs));

	if (SemaRef.RequireCompleteEnumDecl(EnumD, EnumD->getLocation()))
	return nullptr;

	UsingEnumDecl *NewUD =
	UsingEnumDecl::Create(SemaRef.Context, Owner, D->getUsingLoc(),
	D->getEnumLoc(), D->getLocation(), EnumD);

	SemaRef.Context.setInstantiatedFromUsingEnumDecl(NewUD, D);
	NewUD->setAccess(D->getAccess());
	Owner->addDecl(NewUD);

	// Don't process the shadow decls for an invalid decl.
	if (NewUD->isInvalidDecl())
	return NewUD;

	// We don't have to recheck for duplication of the UsingEnumDecl itself, as it
	// cannot be dependent, and will therefore have been checked during template
	// definition.

	return VisitBaseUsingDecls(D, NewUD, nullptr);
	}

	Decl TemplateDeclInstantiator::VisitUsingShadowDecl(UsingShadowDecl D) {
	// Ignore these; we handle them in bulk when processing the UsingDecl.
	return nullptr;
	}

	Decl *TemplateDeclInstantiator::VisitConstructorUsingShadowDecl(
	ConstructorUsingShadowDecl *D) {
	// Ignore these; we handle them in bulk when processing the UsingDecl.
	return nullptr;
	}

	template <typename T>
	Decl *TemplateDeclInstantiator::instantiateUnresolvedUsingDecl(
	T *D, bool InstantiatingPackElement) {
	// If this is a pack expansion, expand it now.
	if (D->isPackExpansion() && !InstantiatingPackElement) {
	SmallVector<UnexpandedParameterPack, 2> Unexpanded;
	SemaRef.collectUnexpandedParameterPacks(D->getQualifierLoc(), Unexpanded);
	SemaRef.collectUnexpandedParameterPacks(D->getNameInfo(), Unexpanded);

	// Determine whether the set of unexpanded parameter packs can and should
	// be expanded.
	bool Expand = true;
	bool RetainExpansion = false;
	Optional<unsigned> NumExpansions;
	if (SemaRef.CheckParameterPacksForExpansion(
	D->getEllipsisLoc(), D->getSourceRange(), Unexpanded, TemplateArgs,
	Expand, RetainExpansion, NumExpansions))
	return nullptr;

	// This declaration cannot appear within a function template signature,
	// so we can't have a partial argument list for a parameter pack.
	assert(!RetainExpansion &&
	"should never need to retain an expansion for UsingPackDecl");

	if (!Expand) {
	// We cannot fully expand the pack expansion now, so substitute into the
	// pattern and create a new pack expansion.
	Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(SemaRef, -1);
	return instantiateUnresolvedUsingDecl(D, true);
	}

	// Within a function, we don't have any normal way to check for conflicts
	// between shadow declarations from different using declarations in the
	// same pack expansion, but this is always ill-formed because all expansions
	// must produce (conflicting) enumerators.
	//
	// Sadly we can't just reject this in the template definition because it
	// could be valid if the pack is empty or has exactly one expansion.
	if (D->getDeclContext()->isFunctionOrMethod() && *NumExpansions > 1) {
	SemaRef.Diag(D->getEllipsisLoc(),
	diag::err_using_decl_redeclaration_expansion);
	return nullptr;
	}

	// Instantiate the slices of this pack and build a UsingPackDecl.
	SmallVector<NamedDecl*, 8> Expansions;
	for (unsigned I = 0; I != *NumExpansions; ++I) {
	Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(SemaRef, I);
	Decl *Slice = instantiateUnresolvedUsingDecl(D, true);
	if (!Slice)
	return nullptr;
	// Note that we can still get unresolved using declarations here, if we
	// had arguments for all packs but the pattern also contained other
	// template arguments (this only happens during partial substitution, eg
	// into the body of a generic lambda in a function template).
	Expansions.push_back(cast<NamedDecl>(Slice));
	}

	auto *NewD = SemaRef.BuildUsingPackDecl(D, Expansions);
	if (isDeclWithinFunction(D))
	SemaRef.CurrentInstantiationScope->InstantiatedLocal(D, NewD);
	return NewD;
	}

	UnresolvedUsingTypenameDecl *TD = dyn_cast<UnresolvedUsingTypenameDecl>(D);
	SourceLocation TypenameLoc = TD ? TD->getTypenameLoc() : SourceLocation();

	NestedNameSpecifierLoc QualifierLoc
	= SemaRef.SubstNestedNameSpecifierLoc(D->getQualifierLoc(),
	TemplateArgs);
	if (!QualifierLoc)
	return nullptr;

	CXXScopeSpec SS;
	SS.Adopt(QualifierLoc);

	DeclarationNameInfo NameInfo
	= SemaRef.SubstDeclarationNameInfo(D->getNameInfo(), TemplateArgs);

	// Produce a pack expansion only if we're not instantiating a particular
	// slice of a pack expansion.
	bool InstantiatingSlice = D->getEllipsisLoc().isValid() &&
	SemaRef.ArgumentPackSubstitutionIndex != -1;
	SourceLocation EllipsisLoc =
	InstantiatingSlice ? SourceLocation() : D->getEllipsisLoc();

	bool IsUsingIfExists = D->template hasAttr<UsingIfExistsAttr>();
	NamedDecl *UD = SemaRef.BuildUsingDeclaration(
	/Scope/ nullptr, D->getAccess(), D->getUsingLoc(),
	/HasTypename/ TD, TypenameLoc, SS, NameInfo, EllipsisLoc,
	ParsedAttributesView(),
	/IsInstantiation/ true, IsUsingIfExists);
	if (UD) {
	SemaRef.InstantiateAttrs(TemplateArgs, D, UD);
	SemaRef.Context.setInstantiatedFromUsingDecl(UD, D);
	}

	return UD;
	}

	Decl *TemplateDeclInstantiator::VisitUnresolvedUsingTypenameDecl(
	UnresolvedUsingTypenameDecl *D) {
	return instantiateUnresolvedUsingDecl(D);
	}

	Decl *TemplateDeclInstantiator::VisitUnresolvedUsingValueDecl(
	UnresolvedUsingValueDecl *D) {
	return instantiateUnresolvedUsingDecl(D);
	}

	Decl *TemplateDeclInstantiator::VisitUnresolvedUsingIfExistsDecl(
	UnresolvedUsingIfExistsDecl *D) {
	llvm_unreachable("referring to unresolved decl out of UsingShadowDecl");
	}

	Decl TemplateDeclInstantiator::VisitUsingPackDecl(UsingPackDecl D) {
	SmallVector<NamedDecl*, 8> Expansions;
	for (auto *UD : D->expansions()) {
	if (NamedDecl *NewUD =
	SemaRef.FindInstantiatedDecl(D->getLocation(), UD, TemplateArgs))
	Expansions.push_back(NewUD);
	else
	return nullptr;
	}

	auto *NewD = SemaRef.BuildUsingPackDecl(D, Expansions);
	if (isDeclWithinFunction(D))
	SemaRef.CurrentInstantiationScope->InstantiatedLocal(D, NewD);
	return NewD;
	}

	Decl *TemplateDeclInstantiator::VisitClassScopeFunctionSpecializationDecl(
	ClassScopeFunctionSpecializationDecl *Decl) {
	CXXMethodDecl *OldFD = Decl->getSpecialization();
	return cast_or_null<CXXMethodDecl>(
	VisitCXXMethodDecl(OldFD, nullptr, Decl->getTemplateArgsAsWritten()));
	}

	Decl *TemplateDeclInstantiator::VisitOMPThreadPrivateDecl(
	OMPThreadPrivateDecl *D) {
	SmallVector<Expr *, 5> Vars;
	for (auto *I : D->varlists()) {
	Expr *Var = SemaRef.SubstExpr(I, TemplateArgs).get();
	assert(isa<DeclRefExpr>(Var) && "threadprivate arg is not a DeclRefExpr");
	Vars.push_back(Var);
	}

	OMPThreadPrivateDecl *TD =
	SemaRef.CheckOMPThreadPrivateDecl(D->getLocation(), Vars);

	TD->setAccess(AS_public);
	Owner->addDecl(TD);

	return TD;
	}

	Decl TemplateDeclInstantiator::VisitOMPAllocateDecl(OMPAllocateDecl D) {
	SmallVector<Expr *, 5> Vars;
	for (auto *I : D->varlists()) {
	Expr *Var = SemaRef.SubstExpr(I, TemplateArgs).get();
	assert(isa<DeclRefExpr>(Var) && "allocate arg is not a DeclRefExpr");
	Vars.push_back(Var);
	}
	SmallVector<OMPClause *, 4> Clauses;
	// Copy map clauses from the original mapper.
	for (OMPClause *C : D->clauselists()) {
	auto *AC = cast<OMPAllocatorClause>(C);
	ExprResult NewE = SemaRef.SubstExpr(AC->getAllocator(), TemplateArgs);
	if (!NewE.isUsable())
	continue;
	OMPClause *IC = SemaRef.ActOnOpenMPAllocatorClause(
	NewE.get(), AC->getBeginLoc(), AC->getLParenLoc(), AC->getEndLoc());
	Clauses.push_back(IC);
	}

	Sema::DeclGroupPtrTy Res = SemaRef.ActOnOpenMPAllocateDirective(
	D->getLocation(), Vars, Clauses, Owner);
	if (Res.get().isNull())
	return nullptr;
	return Res.get().getSingleDecl();
	}

	Decl TemplateDeclInstantiator::VisitOMPRequiresDecl(OMPRequiresDecl D) {
	llvm_unreachable(
	"Requires directive cannot be instantiated within a dependent context");
	}

	Decl *TemplateDeclInstantiator::VisitOMPDeclareReductionDecl(
	OMPDeclareReductionDecl *D) {
	// Instantiate type and check if it is allowed.
	const bool RequiresInstantiation =
	D->getType()->isDependentType() \|\|
	D->getType()->isInstantiationDependentType() \|\|
	D->getType()->containsUnexpandedParameterPack();
	QualType SubstReductionType;
	if (RequiresInstantiation) {
	SubstReductionType = SemaRef.ActOnOpenMPDeclareReductionType(
	D->getLocation(),
	ParsedType::make(SemaRef.SubstType(
	D->getType(), TemplateArgs, D->getLocation(), DeclarationName())));
	} else {
	SubstReductionType = D->getType();
	}
	if (SubstReductionType.isNull())
	return nullptr;
	Expr *Combiner = D->getCombiner();
	Expr *Init = D->getInitializer();
	bool IsCorrect = true;
	// Create instantiated copy.
	std::pair<QualType, SourceLocation> ReductionTypes[] = {
	std::make_pair(SubstReductionType, D->getLocation())};
	auto *PrevDeclInScope = D->getPrevDeclInScope();
	if (PrevDeclInScope && !PrevDeclInScope->isInvalidDecl()) {
	PrevDeclInScope = cast<OMPDeclareReductionDecl>(
	SemaRef.CurrentInstantiationScope->findInstantiationOf(PrevDeclInScope)
	->get<Decl *>());
	}
	auto DRD = SemaRef.ActOnOpenMPDeclareReductionDirectiveStart(
	/S=/nullptr, Owner, D->getDeclName(), ReductionTypes, D->getAccess(),
	PrevDeclInScope);
	auto *NewDRD = cast<OMPDeclareReductionDecl>(DRD.get().getSingleDecl());
	SemaRef.CurrentInstantiationScope->InstantiatedLocal(D, NewDRD);
	Expr *SubstCombiner = nullptr;
	Expr *SubstInitializer = nullptr;
	// Combiners instantiation sequence.
	if (Combiner) {
	SemaRef.ActOnOpenMPDeclareReductionCombinerStart(
	/S=/nullptr, NewDRD);
	SemaRef.CurrentInstantiationScope->InstantiatedLocal(
	cast<DeclRefExpr>(D->getCombinerIn())->getDecl(),
	cast<DeclRefExpr>(NewDRD->getCombinerIn())->getDecl());
	SemaRef.CurrentInstantiationScope->InstantiatedLocal(
	cast<DeclRefExpr>(D->getCombinerOut())->getDecl(),
	cast<DeclRefExpr>(NewDRD->getCombinerOut())->getDecl());
	auto *ThisContext = dyn_cast_or_null<CXXRecordDecl>(Owner);
	Sema::CXXThisScopeRAII ThisScope(SemaRef, ThisContext, Qualifiers(),
	ThisContext);
	SubstCombiner = SemaRef.SubstExpr(Combiner, TemplateArgs).get();
	SemaRef.ActOnOpenMPDeclareReductionCombinerEnd(NewDRD, SubstCombiner);
	}
	// Initializers instantiation sequence.
	if (Init) {
	VarDecl *OmpPrivParm = SemaRef.ActOnOpenMPDeclareReductionInitializerStart(
	/S=/nullptr, NewDRD);
	SemaRef.CurrentInstantiationScope->InstantiatedLocal(
	cast<DeclRefExpr>(D->getInitOrig())->getDecl(),
	cast<DeclRefExpr>(NewDRD->getInitOrig())->getDecl());
	SemaRef.CurrentInstantiationScope->InstantiatedLocal(
	cast<DeclRefExpr>(D->getInitPriv())->getDecl(),
	cast<DeclRefExpr>(NewDRD->getInitPriv())->getDecl());
	if (D->getInitializerKind() == OMPDeclareReductionDecl::CallInit) {
	SubstInitializer = SemaRef.SubstExpr(Init, TemplateArgs).get();
	} else {
	auto *OldPrivParm =
	cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl());
	IsCorrect = IsCorrect && OldPrivParm->hasInit();
	if (IsCorrect)
	SemaRef.InstantiateVariableInitializer(OmpPrivParm, OldPrivParm,
	TemplateArgs);
	}
	SemaRef.ActOnOpenMPDeclareReductionInitializerEnd(NewDRD, SubstInitializer,
	OmpPrivParm);
	}
	IsCorrect = IsCorrect && SubstCombiner &&
	(!Init \|\|
	(D->getInitializerKind() == OMPDeclareReductionDecl::CallInit &&
	SubstInitializer) \|\|
	(D->getInitializerKind() != OMPDeclareReductionDecl::CallInit &&
	!SubstInitializer));

	(void)SemaRef.ActOnOpenMPDeclareReductionDirectiveEnd(
	/S=/nullptr, DRD, IsCorrect && !D->isInvalidDecl());

	return NewDRD;
	}

	Decl *
	TemplateDeclInstantiator::VisitOMPDeclareMapperDecl(OMPDeclareMapperDecl *D) {
	// Instantiate type and check if it is allowed.
	const bool RequiresInstantiation =
	D->getType()->isDependentType() \|\|
	D->getType()->isInstantiationDependentType() \|\|
	D->getType()->containsUnexpandedParameterPack();
	QualType SubstMapperTy;
	DeclarationName VN = D->getVarName();
	if (RequiresInstantiation) {
	SubstMapperTy = SemaRef.ActOnOpenMPDeclareMapperType(
	D->getLocation(),
	ParsedType::make(SemaRef.SubstType(D->getType(), TemplateArgs,
	D->getLocation(), VN)));
	} else {
	SubstMapperTy = D->getType();
	}
	if (SubstMapperTy.isNull())
	return nullptr;
	// Create an instantiated copy of mapper.
	auto *PrevDeclInScope = D->getPrevDeclInScope();
	if (PrevDeclInScope && !PrevDeclInScope->isInvalidDecl()) {
	PrevDeclInScope = cast<OMPDeclareMapperDecl>(
	SemaRef.CurrentInstantiationScope->findInstantiationOf(PrevDeclInScope)
	->get<Decl *>());
	}
	bool IsCorrect = true;
	SmallVector<OMPClause *, 6> Clauses;
	// Instantiate the mapper variable.
	DeclarationNameInfo DirName;
	SemaRef.StartOpenMPDSABlock(llvm::omp::OMPD_declare_mapper, DirName,
	/S=/nullptr,
	(*D->clauselist_begin())->getBeginLoc());
	ExprResult MapperVarRef = SemaRef.ActOnOpenMPDeclareMapperDirectiveVarDecl(
	/S=/nullptr, SubstMapperTy, D->getLocation(), VN);
	SemaRef.CurrentInstantiationScope->InstantiatedLocal(
	cast<DeclRefExpr>(D->getMapperVarRef())->getDecl(),
	cast<DeclRefExpr>(MapperVarRef.get())->getDecl());
	auto *ThisContext = dyn_cast_or_null<CXXRecordDecl>(Owner);
	Sema::CXXThisScopeRAII ThisScope(SemaRef, ThisContext, Qualifiers(),
	ThisContext);
	// Instantiate map clauses.
	for (OMPClause *C : D->clauselists()) {
	auto *OldC = cast<OMPMapClause>(C);
	SmallVector<Expr *, 4> NewVars;
	for (Expr *OE : OldC->varlists()) {
	Expr *NE = SemaRef.SubstExpr(OE, TemplateArgs).get();
	if (!NE) {
	IsCorrect = false;
	break;
	}
	NewVars.push_back(NE);
	}
	if (!IsCorrect)
	break;
	NestedNameSpecifierLoc NewQualifierLoc =
	SemaRef.SubstNestedNameSpecifierLoc(OldC->getMapperQualifierLoc(),
	TemplateArgs);
	CXXScopeSpec SS;
	SS.Adopt(NewQualifierLoc);
	DeclarationNameInfo NewNameInfo =
	SemaRef.SubstDeclarationNameInfo(OldC->getMapperIdInfo(), TemplateArgs);
	OMPVarListLocTy Locs(OldC->getBeginLoc(), OldC->getLParenLoc(),
	OldC->getEndLoc());
	OMPClause *NewC = SemaRef.ActOnOpenMPMapClause(
	OldC->getMapTypeModifiers(), OldC->getMapTypeModifiersLoc(), SS,
	NewNameInfo, OldC->getMapType(), OldC->isImplicitMapType(),
	OldC->getMapLoc(), OldC->getColonLoc(), NewVars, Locs);
	Clauses.push_back(NewC);
	}
	SemaRef.EndOpenMPDSABlock(nullptr);
	if (!IsCorrect)
	return nullptr;
	Sema::DeclGroupPtrTy DG = SemaRef.ActOnOpenMPDeclareMapperDirective(
	/S=/nullptr, Owner, D->getDeclName(), SubstMapperTy, D->getLocation(),
	VN, D->getAccess(), MapperVarRef.get(), Clauses, PrevDeclInScope);
	Decl *NewDMD = DG.get().getSingleDecl();
	SemaRef.CurrentInstantiationScope->InstantiatedLocal(D, NewDMD);
	return NewDMD;
	}

	Decl *TemplateDeclInstantiator::VisitOMPCapturedExprDecl(
	OMPCapturedExprDecl * /D/) {
	llvm_unreachable("Should not be met in templates");
	}

	Decl TemplateDeclInstantiator::VisitFunctionDecl(FunctionDecl D) {
	return VisitFunctionDecl(D, nullptr);
	}

	Decl *
	TemplateDeclInstantiator::VisitCXXDeductionGuideDecl(CXXDeductionGuideDecl *D) {
	Decl *Inst = VisitFunctionDecl(D, nullptr);
	if (Inst && !D->getDescribedFunctionTemplate())
	Owner->addDecl(Inst);
	return Inst;
	}

	Decl TemplateDeclInstantiator::VisitCXXMethodDecl(CXXMethodDecl D) {
	return VisitCXXMethodDecl(D, nullptr);
	}

	Decl TemplateDeclInstantiator::VisitRecordDecl(RecordDecl D) {
	llvm_unreachable("There are only CXXRecordDecls in C++");
	}

	Decl *
	TemplateDeclInstantiator::VisitClassTemplateSpecializationDecl(
	ClassTemplateSpecializationDecl *D) {
	// As a MS extension, we permit class-scope explicit specialization
	// of member class templates.
	ClassTemplateDecl *ClassTemplate = D->getSpecializedTemplate();
	assert(ClassTemplate->getDeclContext()->isRecord() &&
	D->getTemplateSpecializationKind() == TSK_ExplicitSpecialization &&
	"can only instantiate an explicit specialization "
	"for a member class template");

	// Lookup the already-instantiated declaration in the instantiation
	// of the class template.
	ClassTemplateDecl *InstClassTemplate =
	cast_or_null<ClassTemplateDecl>(SemaRef.FindInstantiatedDecl(
	D->getLocation(), ClassTemplate, TemplateArgs));
	if (!InstClassTemplate)
	return nullptr;

	// Substitute into the template arguments of the class template explicit
	// specialization.
	TemplateSpecializationTypeLoc Loc = D->getTypeAsWritten()->getTypeLoc().
	castAs<TemplateSpecializationTypeLoc>();
	TemplateArgumentListInfo InstTemplateArgs(Loc.getLAngleLoc(),
	Loc.getRAngleLoc());
	SmallVector<TemplateArgumentLoc, 4> ArgLocs;
	for (unsigned I = 0; I != Loc.getNumArgs(); ++I)
	ArgLocs.push_back(Loc.getArgLoc(I));
	if (SemaRef.Subst(ArgLocs.data(), ArgLocs.size(),
	InstTemplateArgs, TemplateArgs))
	return nullptr;

	// Check that the template argument list is well-formed for this
	// class template.
	SmallVector<TemplateArgument, 4> Converted;
	if (SemaRef.CheckTemplateArgumentList(InstClassTemplate,
	D->getLocation(),
	InstTemplateArgs,
	false,
	Converted,
	/UpdateArgsWithConversion=/true))
	return nullptr;

	// Figure out where to insert this class template explicit specialization
	// in the member template's set of class template explicit specializations.
	void *InsertPos = nullptr;
	ClassTemplateSpecializationDecl *PrevDecl =
	InstClassTemplate->findSpecialization(Converted, InsertPos);

	// Check whether we've already seen a conflicting instantiation of this
	// declaration (for instance, if there was a prior implicit instantiation).
	bool Ignored;
	if (PrevDecl &&
	SemaRef.CheckSpecializationInstantiationRedecl(D->getLocation(),
	D->getSpecializationKind(),
	PrevDecl,
	PrevDecl->getSpecializationKind(),
	PrevDecl->getPointOfInstantiation(),
	Ignored))
	return nullptr;

	// If PrevDecl was a definition and D is also a definition, diagnose.
	// This happens in cases like:
	//
	// template<typename T, typename U>
	// struct Outer {
	// template<typename X> struct Inner;
	// template<> struct Inner<T> {};
	// template<> struct Inner<U> {};
	// };
	//
	// Outer<int, int> outer; // error: the explicit specializations of Inner
	// // have the same signature.
	if (PrevDecl && PrevDecl->getDefinition() &&
	D->isThisDeclarationADefinition()) {
	SemaRef.Diag(D->getLocation(), diag::err_redefinition) << PrevDecl;
	SemaRef.Diag(PrevDecl->getDefinition()->getLocation(),
	diag::note_previous_definition);
	return nullptr;
	}

	// Create the class template partial specialization declaration.
	ClassTemplateSpecializationDecl *InstD =
	ClassTemplateSpecializationDecl::Create(
	SemaRef.Context, D->getTagKind(), Owner, D->getBeginLoc(),
	D->getLocation(), InstClassTemplate, Converted, PrevDecl);

	// Add this partial specialization to the set of class template partial
	// specializations.
	if (!PrevDecl)
	InstClassTemplate->AddSpecialization(InstD, InsertPos);

	// Substitute the nested name specifier, if any.
	if (SubstQualifier(D, InstD))
	return nullptr;

	// Build the canonical type that describes the converted template
	// arguments of the class template explicit specialization.
	QualType CanonType = SemaRef.Context.getTemplateSpecializationType(
	TemplateName(InstClassTemplate), Converted,
	SemaRef.Context.getRecordType(InstD));

	// Build the fully-sugared type for this class template
	// specialization as the user wrote in the specialization
	// itself. This means that we'll pretty-print the type retrieved
	// from the specialization's declaration the way that the user
	// actually wrote the specialization, rather than formatting the
	// name based on the "canonical" representation used to store the
	// template arguments in the specialization.
	TypeSourceInfo *WrittenTy = SemaRef.Context.getTemplateSpecializationTypeInfo(
	TemplateName(InstClassTemplate), D->getLocation(), InstTemplateArgs,
	CanonType);

	InstD->setAccess(D->getAccess());
	InstD->setInstantiationOfMemberClass(D, TSK_ImplicitInstantiation);
	InstD->setSpecializationKind(D->getSpecializationKind());
	InstD->setTypeAsWritten(WrittenTy);
	InstD->setExternLoc(D->getExternLoc());
	InstD->setTemplateKeywordLoc(D->getTemplateKeywordLoc());

	Owner->addDecl(InstD);

	// Instantiate the members of the class-scope explicit specialization eagerly.
	// We don't have support for lazy instantiation of an explicit specialization
	// yet, and MSVC eagerly instantiates in this case.
	// FIXME: This is wrong in standard C++.
	if (D->isThisDeclarationADefinition() &&
	SemaRef.InstantiateClass(D->getLocation(), InstD, D, TemplateArgs,
	TSK_ImplicitInstantiation,
	/Complain=/true))
	return nullptr;

	return InstD;
	}

	Decl *TemplateDeclInstantiator::VisitVarTemplateSpecializationDecl(
	VarTemplateSpecializationDecl *D) {

	TemplateArgumentListInfo VarTemplateArgsInfo;
	VarTemplateDecl *VarTemplate = D->getSpecializedTemplate();
	assert(VarTemplate &&
	"A template specialization without specialized template?");

	VarTemplateDecl *InstVarTemplate =
	cast_or_null<VarTemplateDecl>(SemaRef.FindInstantiatedDecl(
	D->getLocation(), VarTemplate, TemplateArgs));
	if (!InstVarTemplate)
	return nullptr;

	// Substitute the current template arguments.
	const TemplateArgumentListInfo &TemplateArgsInfo = D->getTemplateArgsInfo();
	VarTemplateArgsInfo.setLAngleLoc(TemplateArgsInfo.getLAngleLoc());
	VarTemplateArgsInfo.setRAngleLoc(TemplateArgsInfo.getRAngleLoc());

	if (SemaRef.Subst(TemplateArgsInfo.getArgumentArray(),
	TemplateArgsInfo.size(), VarTemplateArgsInfo, TemplateArgs))
	return nullptr;

	// Check that the template argument list is well-formed for this template.
	SmallVector<TemplateArgument, 4> Converted;
	if (SemaRef.CheckTemplateArgumentList(InstVarTemplate, D->getLocation(),
	VarTemplateArgsInfo, false, Converted,
	/UpdateArgsWithConversion=/true))
	return nullptr;

	// Check whether we've already seen a declaration of this specialization.
	void *InsertPos = nullptr;
	VarTemplateSpecializationDecl *PrevDecl =
	InstVarTemplate->findSpecialization(Converted, InsertPos);

	// Check whether we've already seen a conflicting instantiation of this
	// declaration (for instance, if there was a prior implicit instantiation).
	bool Ignored;
	if (PrevDecl && SemaRef.CheckSpecializationInstantiationRedecl(
	D->getLocation(), D->getSpecializationKind(), PrevDecl,
	PrevDecl->getSpecializationKind(),
	PrevDecl->getPointOfInstantiation(), Ignored))
	return nullptr;

	return VisitVarTemplateSpecializationDecl(
	InstVarTemplate, D, VarTemplateArgsInfo, Converted, PrevDecl);
	}

	Decl *TemplateDeclInstantiator::VisitVarTemplateSpecializationDecl(
	VarTemplateDecl VarTemplate, VarDecl D,
	const TemplateArgumentListInfo &TemplateArgsInfo,
	ArrayRef<TemplateArgument> Converted,
	VarTemplateSpecializationDecl *PrevDecl) {

	// Do substitution on the type of the declaration
	TypeSourceInfo *DI =
	SemaRef.SubstType(D->getTypeSourceInfo(), TemplateArgs,
	D->getTypeSpecStartLoc(), D->getDeclName());
	if (!DI)
	return nullptr;

	if (DI->getType()->isFunctionType()) {
	SemaRef.Diag(D->getLocation(), diag::err_variable_instantiates_to_function)
	<< D->isStaticDataMember() << DI->getType();
	return nullptr;
	}

	// Build the instantiated declaration
	VarTemplateSpecializationDecl *Var = VarTemplateSpecializationDecl::Create(
	SemaRef.Context, Owner, D->getInnerLocStart(), D->getLocation(),
	VarTemplate, DI->getType(), DI, D->getStorageClass(), Converted);
	Var->setTemplateArgsInfo(TemplateArgsInfo);
	if (!PrevDecl) {
	void *InsertPos = nullptr;
	VarTemplate->findSpecialization(Converted, InsertPos);
	VarTemplate->AddSpecialization(Var, InsertPos);
	}

	if (SemaRef.getLangOpts().OpenCL)
	SemaRef.deduceOpenCLAddressSpace(Var);

	// Substitute the nested name specifier, if any.
	if (SubstQualifier(D, Var))
	return nullptr;

	SemaRef.BuildVariableInstantiation(Var, D, TemplateArgs, LateAttrs, Owner,
	StartingScope, false, PrevDecl);

	return Var;
	}

	Decl TemplateDeclInstantiator::VisitObjCAtDefsFieldDecl(ObjCAtDefsFieldDecl D) {
	llvm_unreachable("@defs is not supported in Objective-C++");
	}

	Decl TemplateDeclInstantiator::VisitFriendTemplateDecl(FriendTemplateDecl D) {
	// FIXME: We need to be able to instantiate FriendTemplateDecls.
	unsigned DiagID = SemaRef.getDiagnostics().getCustomDiagID(
	DiagnosticsEngine::Error,
	"cannot instantiate %0 yet");
	SemaRef.Diag(D->getLocation(), DiagID)
	<< D->getDeclKindName();

	return nullptr;
	}

	Decl TemplateDeclInstantiator::VisitConceptDecl(ConceptDecl D) {
	llvm_unreachable("Concept definitions cannot reside inside a template");
	}

	Decl *
	TemplateDeclInstantiator::VisitRequiresExprBodyDecl(RequiresExprBodyDecl *D) {
	return RequiresExprBodyDecl::Create(SemaRef.Context, D->getDeclContext(),
	D->getBeginLoc());
	}

	Decl TemplateDeclInstantiator::VisitDecl(Decl D) {
	llvm_unreachable("Unexpected decl");
	}

	Decl Sema::SubstDecl(Decl D, DeclContext *Owner,
	const MultiLevelTemplateArgumentList &TemplateArgs) {
	TemplateDeclInstantiator Instantiator(*this, Owner, TemplateArgs);
	if (D->isInvalidDecl())
	return nullptr;

	Decl *SubstD;
	runWithSufficientStackSpace(D->getLocation(), [&] {
	SubstD = Instantiator.Visit(D);
	});
	return SubstD;
	}

	void TemplateDeclInstantiator::adjustForRewrite(RewriteKind RK,
	FunctionDecl *Orig, QualType &T,
	TypeSourceInfo *&TInfo,
	DeclarationNameInfo &NameInfo) {
	assert(RK == RewriteKind::RewriteSpaceshipAsEqualEqual);

	// C++2a [class.compare.default]p3:
	// the return type is replaced with bool
	auto *FPT = T->castAs<FunctionProtoType>();
	T = SemaRef.Context.getFunctionType(
	SemaRef.Context.BoolTy, FPT->getParamTypes(), FPT->getExtProtoInfo());

	// Update the return type in the source info too. The most straightforward
	// way is to create new TypeSourceInfo for the new type. Use the location of
	// the '= default' as the location of the new type.
	//
	// FIXME: Set the correct return type when we initially transform the type,
	// rather than delaying it to now.
	TypeSourceInfo *NewTInfo =
	SemaRef.Context.getTrivialTypeSourceInfo(T, Orig->getEndLoc());
	auto OldLoc = TInfo->getTypeLoc().getAsAdjusted<FunctionProtoTypeLoc>();
	assert(OldLoc && "type of function is not a function type?");
	auto NewLoc = NewTInfo->getTypeLoc().castAs<FunctionProtoTypeLoc>();
	for (unsigned I = 0, N = OldLoc.getNumParams(); I != N; ++I)
	NewLoc.setParam(I, OldLoc.getParam(I));
	TInfo = NewTInfo;

	// and the declarator-id is replaced with operator==
	NameInfo.setName(
	SemaRef.Context.DeclarationNames.getCXXOperatorName(OO_EqualEqual));
	}

	FunctionDecl Sema::SubstSpaceshipAsEqualEqual(CXXRecordDecl RD,
	FunctionDecl *Spaceship) {
	if (Spaceship->isInvalidDecl())
	return nullptr;

	// C++2a [class.compare.default]p3:
	// an == operator function is declared implicitly [...] with the same
	// access and function-definition and in the same class scope as the
	// three-way comparison operator function
	MultiLevelTemplateArgumentList NoTemplateArgs;
	NoTemplateArgs.setKind(TemplateSubstitutionKind::Rewrite);
	NoTemplateArgs.addOuterRetainedLevels(RD->getTemplateDepth());
	TemplateDeclInstantiator Instantiator(*this, RD, NoTemplateArgs);
	Decl *R;
	if (auto *MD = dyn_cast<CXXMethodDecl>(Spaceship)) {
	R = Instantiator.VisitCXXMethodDecl(
	MD, nullptr, None,
	TemplateDeclInstantiator::RewriteKind::RewriteSpaceshipAsEqualEqual);
	} else {
	assert(Spaceship->getFriendObjectKind() &&
	"defaulted spaceship is neither a member nor a friend");

	R = Instantiator.VisitFunctionDecl(
	Spaceship, nullptr,
	TemplateDeclInstantiator::RewriteKind::RewriteSpaceshipAsEqualEqual);
	if (!R)
	return nullptr;

	FriendDecl *FD =
	FriendDecl::Create(Context, RD, Spaceship->getLocation(),
	cast<NamedDecl>(R), Spaceship->getBeginLoc());
	FD->setAccess(AS_public);
	RD->addDecl(FD);
	}
	return cast_or_null<FunctionDecl>(R);
	}

	/// Instantiates a nested template parameter list in the current
	/// instantiation context.
	///
	/// \param L The parameter list to instantiate
	///
	/// \returns NULL if there was an error
	TemplateParameterList *
	TemplateDeclInstantiator::SubstTemplateParams(TemplateParameterList *L) {
	// Get errors for all the parameters before bailing out.
	bool Invalid = false;

	unsigned N = L->size();
	typedef SmallVector<NamedDecl *, 8> ParamVector;
	ParamVector Params;
	Params.reserve(N);
	for (auto &P : *L) {
	NamedDecl *D = cast_or_null<NamedDecl>(Visit(P));
	Params.push_back(D);
	Invalid = Invalid \|\| !D \|\| D->isInvalidDecl();
	}

	// Clean up if we had an error.
	if (Invalid)
	return nullptr;

	// FIXME: Concepts: Substitution into requires clause should only happen when
	// checking satisfaction.
	Expr *InstRequiresClause = nullptr;
	if (Expr *E = L->getRequiresClause()) {
	EnterExpressionEvaluationContext ConstantEvaluated(
	SemaRef, Sema::ExpressionEvaluationContext::Unevaluated);
	ExprResult Res = SemaRef.SubstExpr(E, TemplateArgs);
	if (Res.isInvalid() \|\| !Res.isUsable()) {
	return nullptr;
	}
	InstRequiresClause = Res.get();
	}

	TemplateParameterList *InstL
	= TemplateParameterList::Create(SemaRef.Context, L->getTemplateLoc(),
	L->getLAngleLoc(), Params,
	L->getRAngleLoc(), InstRequiresClause);
	return InstL;
	}

	TemplateParameterList *
	Sema::SubstTemplateParams(TemplateParameterList Params, DeclContext Owner,
	const MultiLevelTemplateArgumentList &TemplateArgs) {
	TemplateDeclInstantiator Instantiator(*this, Owner, TemplateArgs);
	return Instantiator.SubstTemplateParams(Params);
	}

	/// Instantiate the declaration of a class template partial
	/// specialization.
	///
	/// \param ClassTemplate the (instantiated) class template that is partially
	// specialized by the instantiation of \p PartialSpec.
	///
	/// \param PartialSpec the (uninstantiated) class template partial
	/// specialization that we are instantiating.
	///
	/// \returns The instantiated partial specialization, if successful; otherwise,
	/// NULL to indicate an error.
	ClassTemplatePartialSpecializationDecl *
	TemplateDeclInstantiator::InstantiateClassTemplatePartialSpecialization(
	ClassTemplateDecl *ClassTemplate,
	ClassTemplatePartialSpecializationDecl *PartialSpec) {
	// Create a local instantiation scope for this class template partial
	// specialization, which will contain the instantiations of the template
	// parameters.
	LocalInstantiationScope Scope(SemaRef);

	// Substitute into the template parameters of the class template partial
	// specialization.
	TemplateParameterList *TempParams = PartialSpec->getTemplateParameters();
	TemplateParameterList *InstParams = SubstTemplateParams(TempParams);
	if (!InstParams)
	return nullptr;

	// Substitute into the template arguments of the class template partial
	// specialization.
	const ASTTemplateArgumentListInfo *TemplArgInfo
	= PartialSpec->getTemplateArgsAsWritten();
	TemplateArgumentListInfo InstTemplateArgs(TemplArgInfo->LAngleLoc,
	TemplArgInfo->RAngleLoc);
	if (SemaRef.Subst(TemplArgInfo->getTemplateArgs(),
	TemplArgInfo->NumTemplateArgs,
	InstTemplateArgs, TemplateArgs))
	return nullptr;

	// Check that the template argument list is well-formed for this
	// class template.
	SmallVector<TemplateArgument, 4> Converted;
	if (SemaRef.CheckTemplateArgumentList(ClassTemplate,
	PartialSpec->getLocation(),
	InstTemplateArgs,
	false,
	Converted))
	return nullptr;

	// Check these arguments are valid for a template partial specialization.
	if (SemaRef.CheckTemplatePartialSpecializationArgs(
	PartialSpec->getLocation(), ClassTemplate, InstTemplateArgs.size(),
	Converted))
	return nullptr;

	// Figure out where to insert this class template partial specialization
	// in the member template's set of class template partial specializations.
	void *InsertPos = nullptr;
	ClassTemplateSpecializationDecl *PrevDecl
	= ClassTemplate->findPartialSpecialization(Converted, InstParams,
	InsertPos);

	// Build the canonical type that describes the converted template
	// arguments of the class template partial specialization.
	QualType CanonType
	= SemaRef.Context.getTemplateSpecializationType(TemplateName(ClassTemplate),
	Converted);

	// Build the fully-sugared type for this class template
	// specialization as the user wrote in the specialization
	// itself. This means that we'll pretty-print the type retrieved
	// from the specialization's declaration the way that the user
	// actually wrote the specialization, rather than formatting the
	// name based on the "canonical" representation used to store the
	// template arguments in the specialization.
	TypeSourceInfo *WrittenTy
	= SemaRef.Context.getTemplateSpecializationTypeInfo(
	TemplateName(ClassTemplate),
	PartialSpec->getLocation(),
	InstTemplateArgs,
	CanonType);

	if (PrevDecl) {
	// We've already seen a partial specialization with the same template
	// parameters and template arguments. This can happen, for example, when
	// substituting the outer template arguments ends up causing two
	// class template partial specializations of a member class template
	// to have identical forms, e.g.,
	//
	// template<typename T, typename U>
	// struct Outer {
	// template<typename X, typename Y> struct Inner;
	// template<typename Y> struct Inner<T, Y>;
	// template<typename Y> struct Inner<U, Y>;
	// };
	//
	// Outer<int, int> outer; // error: the partial specializations of Inner
	// // have the same signature.
	SemaRef.Diag(PartialSpec->getLocation(), diag::err_partial_spec_redeclared)
	<< WrittenTy->getType();
	SemaRef.Diag(PrevDecl->getLocation(), diag::note_prev_partial_spec_here)
	<< SemaRef.Context.getTypeDeclType(PrevDecl);
	return nullptr;
	}


	// Create the class template partial specialization declaration.
	ClassTemplatePartialSpecializationDecl *InstPartialSpec =
	ClassTemplatePartialSpecializationDecl::Create(
	SemaRef.Context, PartialSpec->getTagKind(), Owner,
	PartialSpec->getBeginLoc(), PartialSpec->getLocation(), InstParams,
	ClassTemplate, Converted, InstTemplateArgs, CanonType, nullptr);
	// Substitute the nested name specifier, if any.
	if (SubstQualifier(PartialSpec, InstPartialSpec))
	return nullptr;

	InstPartialSpec->setInstantiatedFromMember(PartialSpec);
	InstPartialSpec->setTypeAsWritten(WrittenTy);

	// Check the completed partial specialization.
	SemaRef.CheckTemplatePartialSpecialization(InstPartialSpec);

	// Add this partial specialization to the set of class template partial
	// specializations.
	ClassTemplate->AddPartialSpecialization(InstPartialSpec,
	/InsertPos=/nullptr);
	return InstPartialSpec;
	}

	/// Instantiate the declaration of a variable template partial
	/// specialization.
	///
	/// \param VarTemplate the (instantiated) variable template that is partially
	/// specialized by the instantiation of \p PartialSpec.
	///
	/// \param PartialSpec the (uninstantiated) variable template partial
	/// specialization that we are instantiating.
	///
	/// \returns The instantiated partial specialization, if successful; otherwise,
	/// NULL to indicate an error.
	VarTemplatePartialSpecializationDecl *
	TemplateDeclInstantiator::InstantiateVarTemplatePartialSpecialization(
	VarTemplateDecl *VarTemplate,
	VarTemplatePartialSpecializationDecl *PartialSpec) {
	// Create a local instantiation scope for this variable template partial
	// specialization, which will contain the instantiations of the template
	// parameters.
	LocalInstantiationScope Scope(SemaRef);

	// Substitute into the template parameters of the variable template partial
	// specialization.
	TemplateParameterList *TempParams = PartialSpec->getTemplateParameters();
	TemplateParameterList *InstParams = SubstTemplateParams(TempParams);
	if (!InstParams)
	return nullptr;

	// Substitute into the template arguments of the variable template partial
	// specialization.
	const ASTTemplateArgumentListInfo *TemplArgInfo
	= PartialSpec->getTemplateArgsAsWritten();
	TemplateArgumentListInfo InstTemplateArgs(TemplArgInfo->LAngleLoc,
	TemplArgInfo->RAngleLoc);
	if (SemaRef.Subst(TemplArgInfo->getTemplateArgs(),
	TemplArgInfo->NumTemplateArgs,
	InstTemplateArgs, TemplateArgs))
	return nullptr;

	// Check that the template argument list is well-formed for this
	// class template.
	SmallVector<TemplateArgument, 4> Converted;
	if (SemaRef.CheckTemplateArgumentList(VarTemplate, PartialSpec->getLocation(),
	InstTemplateArgs, false, Converted))
	return nullptr;

	// Check these arguments are valid for a template partial specialization.
	if (SemaRef.CheckTemplatePartialSpecializationArgs(
	PartialSpec->getLocation(), VarTemplate, InstTemplateArgs.size(),
	Converted))
	return nullptr;

	// Figure out where to insert this variable template partial specialization
	// in the member template's set of variable template partial specializations.
	void *InsertPos = nullptr;
	VarTemplateSpecializationDecl *PrevDecl =
	VarTemplate->findPartialSpecialization(Converted, InstParams, InsertPos);

	// Build the canonical type that describes the converted template
	// arguments of the variable template partial specialization.
	QualType CanonType = SemaRef.Context.getTemplateSpecializationType(
	TemplateName(VarTemplate), Converted);

	// Build the fully-sugared type for this variable template
	// specialization as the user wrote in the specialization
	// itself. This means that we'll pretty-print the type retrieved
	// from the specialization's declaration the way that the user
	// actually wrote the specialization, rather than formatting the
	// name based on the "canonical" representation used to store the
	// template arguments in the specialization.
	TypeSourceInfo *WrittenTy = SemaRef.Context.getTemplateSpecializationTypeInfo(
	TemplateName(VarTemplate), PartialSpec->getLocation(), InstTemplateArgs,
	CanonType);

	if (PrevDecl) {
	// We've already seen a partial specialization with the same template
	// parameters and template arguments. This can happen, for example, when
	// substituting the outer template arguments ends up causing two
	// variable template partial specializations of a member variable template
	// to have identical forms, e.g.,
	//
	// template<typename T, typename U>
	// struct Outer {
	// template<typename X, typename Y> pair<X,Y> p;
	// template<typename Y> pair<T, Y> p;
	// template<typename Y> pair<U, Y> p;
	// };
	//
	// Outer<int, int> outer; // error: the partial specializations of Inner
	// // have the same signature.
	SemaRef.Diag(PartialSpec->getLocation(),
	diag::err_var_partial_spec_redeclared)
	<< WrittenTy->getType();
	SemaRef.Diag(PrevDecl->getLocation(),
	diag::note_var_prev_partial_spec_here);
	return nullptr;
	}

	// Do substitution on the type of the declaration
	TypeSourceInfo *DI = SemaRef.SubstType(
	PartialSpec->getTypeSourceInfo(), TemplateArgs,
	PartialSpec->getTypeSpecStartLoc(), PartialSpec->getDeclName());
	if (!DI)
	return nullptr;

	if (DI->getType()->isFunctionType()) {
	SemaRef.Diag(PartialSpec->getLocation(),
	diag::err_variable_instantiates_to_function)
	<< PartialSpec->isStaticDataMember() << DI->getType();
	return nullptr;
	}

	// Create the variable template partial specialization declaration.
	VarTemplatePartialSpecializationDecl *InstPartialSpec =
	VarTemplatePartialSpecializationDecl::Create(
	SemaRef.Context, Owner, PartialSpec->getInnerLocStart(),
	PartialSpec->getLocation(), InstParams, VarTemplate, DI->getType(),
	DI, PartialSpec->getStorageClass(), Converted, InstTemplateArgs);

	// Substitute the nested name specifier, if any.
	if (SubstQualifier(PartialSpec, InstPartialSpec))
	return nullptr;

	InstPartialSpec->setInstantiatedFromMember(PartialSpec);
	InstPartialSpec->setTypeAsWritten(WrittenTy);

	// Check the completed partial specialization.
	SemaRef.CheckTemplatePartialSpecialization(InstPartialSpec);

	// Add this partial specialization to the set of variable template partial
	// specializations. The instantiation of the initializer is not necessary.
	VarTemplate->AddPartialSpecialization(InstPartialSpec, /InsertPos=/nullptr);

	SemaRef.BuildVariableInstantiation(InstPartialSpec, PartialSpec, TemplateArgs,
	LateAttrs, Owner, StartingScope);

	return InstPartialSpec;
	}

	TypeSourceInfo*
	TemplateDeclInstantiator::SubstFunctionType(FunctionDecl *D,
	SmallVectorImpl<ParmVarDecl *> &Params) {
	TypeSourceInfo *OldTInfo = D->getTypeSourceInfo();
	assert(OldTInfo && "substituting function without type source info");
	assert(Params.empty() && "parameter vector is non-empty at start");

	CXXRecordDecl *ThisContext = nullptr;
	Qualifiers ThisTypeQuals;
	if (CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(D)) {
	ThisContext = cast<CXXRecordDecl>(Owner);
	ThisTypeQuals = Method->getMethodQualifiers();
	}

	TypeSourceInfo *NewTInfo
	= SemaRef.SubstFunctionDeclType(OldTInfo, TemplateArgs,
	D->getTypeSpecStartLoc(),
	D->getDeclName(),
	ThisContext, ThisTypeQuals);
	if (!NewTInfo)
	return nullptr;

	TypeLoc OldTL = OldTInfo->getTypeLoc().IgnoreParens();
	if (FunctionProtoTypeLoc OldProtoLoc = OldTL.getAs<FunctionProtoTypeLoc>()) {
	if (NewTInfo != OldTInfo) {
	// Get parameters from the new type info.
	TypeLoc NewTL = NewTInfo->getTypeLoc().IgnoreParens();
	FunctionProtoTypeLoc NewProtoLoc = NewTL.castAs<FunctionProtoTypeLoc>();
	unsigned NewIdx = 0;
	for (unsigned OldIdx = 0, NumOldParams = OldProtoLoc.getNumParams();
	OldIdx != NumOldParams; ++OldIdx) {
	ParmVarDecl *OldParam = OldProtoLoc.getParam(OldIdx);
	if (!OldParam)
	return nullptr;

	LocalInstantiationScope *Scope = SemaRef.CurrentInstantiationScope;

	Optional<unsigned> NumArgumentsInExpansion;
	if (OldParam->isParameterPack())
	NumArgumentsInExpansion =
	SemaRef.getNumArgumentsInExpansion(OldParam->getType(),
	TemplateArgs);
	if (!NumArgumentsInExpansion) {
	// Simple case: normal parameter, or a parameter pack that's
	// instantiated to a (still-dependent) parameter pack.
	ParmVarDecl *NewParam = NewProtoLoc.getParam(NewIdx++);
	Params.push_back(NewParam);
	Scope->InstantiatedLocal(OldParam, NewParam);
	} else {
	// Parameter pack expansion: make the instantiation an argument pack.
	Scope->MakeInstantiatedLocalArgPack(OldParam);
	for (unsigned I = 0; I != *NumArgumentsInExpansion; ++I) {
	ParmVarDecl *NewParam = NewProtoLoc.getParam(NewIdx++);
	Params.push_back(NewParam);
	Scope->InstantiatedLocalPackArg(OldParam, NewParam);
	}
	}
	}
	} else {
	// The function type itself was not dependent and therefore no
	// substitution occurred. However, we still need to instantiate
	// the function parameters themselves.
	const FunctionProtoType *OldProto =
	cast<FunctionProtoType>(OldProtoLoc.getType());
	for (unsigned i = 0, i_end = OldProtoLoc.getNumParams(); i != i_end;
	++i) {
	ParmVarDecl *OldParam = OldProtoLoc.getParam(i);
	if (!OldParam) {
	Params.push_back(SemaRef.BuildParmVarDeclForTypedef(
	D, D->getLocation(), OldProto->getParamType(i)));
	continue;
	}

	ParmVarDecl *Parm =
	cast_or_null<ParmVarDecl>(VisitParmVarDecl(OldParam));
	if (!Parm)
	return nullptr;
	Params.push_back(Parm);
	}
	}
	} else {
	// If the type of this function, after ignoring parentheses, is not
	// directly a function type, then we're instantiating a function that
	// was declared via a typedef or with attributes, e.g.,
	//
	// typedef int functype(int, int);
	// functype func;
	// int __cdecl meth(int, int);
	//
	// In this case, we'll just go instantiate the ParmVarDecls that we
	// synthesized in the method declaration.
	SmallVector<QualType, 4> ParamTypes;
	Sema::ExtParameterInfoBuilder ExtParamInfos;
	if (SemaRef.SubstParmTypes(D->getLocation(), D->parameters(), nullptr,
	TemplateArgs, ParamTypes, &Params,
	ExtParamInfos))
	return nullptr;
	}

	return NewTInfo;
	}

	/// Introduce the instantiated function parameters into the local
	/// instantiation scope, and set the parameter names to those used
	/// in the template.
	static bool addInstantiatedParametersToScope(Sema &S, FunctionDecl *Function,
	const FunctionDecl *PatternDecl,
	LocalInstantiationScope &Scope,
	const MultiLevelTemplateArgumentList &TemplateArgs) {
	unsigned FParamIdx = 0;
	for (unsigned I = 0, N = PatternDecl->getNumParams(); I != N; ++I) {
	const ParmVarDecl *PatternParam = PatternDecl->getParamDecl(I);
	if (!PatternParam->isParameterPack()) {
	// Simple case: not a parameter pack.
	assert(FParamIdx < Function->getNumParams());
	ParmVarDecl *FunctionParam = Function->getParamDecl(FParamIdx);
	FunctionParam->setDeclName(PatternParam->getDeclName());
	// If the parameter's type is not dependent, update it to match the type
	// in the pattern. They can differ in top-level cv-qualifiers, and we want
	// the pattern's type here. If the type is dependent, they can't differ,
	// per core issue 1668. Substitute into the type from the pattern, in case
	// it's instantiation-dependent.
	// FIXME: Updating the type to work around this is at best fragile.
	if (!PatternDecl->getType()->isDependentType()) {
	QualType T = S.SubstType(PatternParam->getType(), TemplateArgs,
	FunctionParam->getLocation(),
	FunctionParam->getDeclName());
	if (T.isNull())
	return true;
	FunctionParam->setType(T);
	}

	Scope.InstantiatedLocal(PatternParam, FunctionParam);
	++FParamIdx;
	continue;
	}

	// Expand the parameter pack.
	Scope.MakeInstantiatedLocalArgPack(PatternParam);
	Optional<unsigned> NumArgumentsInExpansion
	= S.getNumArgumentsInExpansion(PatternParam->getType(), TemplateArgs);
	if (NumArgumentsInExpansion) {
	QualType PatternType =
	PatternParam->getType()->castAs<PackExpansionType>()->getPattern();
	for (unsigned Arg = 0; Arg < *NumArgumentsInExpansion; ++Arg) {
	ParmVarDecl *FunctionParam = Function->getParamDecl(FParamIdx);
	FunctionParam->setDeclName(PatternParam->getDeclName());
	if (!PatternDecl->getType()->isDependentType()) {
	Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(S, Arg);
	QualType T = S.SubstType(PatternType, TemplateArgs,
	FunctionParam->getLocation(),
	FunctionParam->getDeclName());
	if (T.isNull())
	return true;
	FunctionParam->setType(T);
	}

	Scope.InstantiatedLocalPackArg(PatternParam, FunctionParam);
	++FParamIdx;
	}
	}
	}

	return false;
	}

	bool Sema::InstantiateDefaultArgument(SourceLocation CallLoc, FunctionDecl *FD,
	ParmVarDecl *Param) {
	assert(Param->hasUninstantiatedDefaultArg());
	Expr *UninstExpr = Param->getUninstantiatedDefaultArg();

	EnterExpressionEvaluationContext EvalContext(
	*this, ExpressionEvaluationContext::PotentiallyEvaluated, Param);

	// Instantiate the expression.
	//
	// FIXME: Pass in a correct Pattern argument, otherwise
	// getTemplateInstantiationArgs uses the lexical context of FD, e.g.
	//
	// template<typename T>
	// struct A {
	// static int FooImpl();
	//
	// template<typename Tp>
	// // bug: default argument A<T>::FooImpl() is evaluated with 2-level
	// // template argument list [[T], [Tp]], should be [[Tp]].
	// friend A<Tp> Foo(int a);
	// };
	//
	// template<typename T>
	// A<T> Foo(int a = A<T>::FooImpl());
	MultiLevelTemplateArgumentList TemplateArgs
	= getTemplateInstantiationArgs(FD, nullptr, /RelativeToPrimary=/true);

	InstantiatingTemplate Inst(*this, CallLoc, Param,
	TemplateArgs.getInnermost());
	if (Inst.isInvalid())
	return true;
	if (Inst.isAlreadyInstantiating()) {
	Diag(Param->getBeginLoc(), diag::err_recursive_default_argument) << FD;
	Param->setInvalidDecl();
	return true;
	}

	ExprResult Result;
	{
	// C++ [dcl.fct.default]p5:
	// The names in the [default argument] expression are bound, and
	// the semantic constraints are checked, at the point where the
	// default argument expression appears.
	ContextRAII SavedContext(*this, FD);
	LocalInstantiationScope Local(*this);

	FunctionDecl *Pattern = FD->getTemplateInstantiationPattern(
	/ForDefinition/ false);
	if (addInstantiatedParametersToScope(*this, FD, Pattern, Local,
	TemplateArgs))
	return true;

	runWithSufficientStackSpace(CallLoc, [&] {
	Result = SubstInitializer(UninstExpr, TemplateArgs,
	/DirectInit/false);
	});
	}
	if (Result.isInvalid())
	return true;

	// Check the expression as an initializer for the parameter.
	InitializedEntity Entity
	= InitializedEntity::InitializeParameter(Context, Param);
	InitializationKind Kind = InitializationKind::CreateCopy(
	Param->getLocation(),
	/FIXME:EqualLoc/ UninstExpr->getBeginLoc());
	Expr *ResultE = Result.getAs<Expr>();

	InitializationSequence InitSeq(*this, Entity, Kind, ResultE);
	Result = InitSeq.Perform(*this, Entity, Kind, ResultE);
	if (Result.isInvalid())
	return true;

	Result =
	ActOnFinishFullExpr(Result.getAs<Expr>(), Param->getOuterLocStart(),
	/DiscardedValue/ false);
	if (Result.isInvalid())
	return true;

	// Remember the instantiated default argument.
	Param->setDefaultArg(Result.getAs<Expr>());
	if (ASTMutationListener *L = getASTMutationListener())
	L->DefaultArgumentInstantiated(Param);

	return false;
	}

	void Sema::InstantiateExceptionSpec(SourceLocation PointOfInstantiation,
	FunctionDecl *Decl) {
	const FunctionProtoType *Proto = Decl->getType()->castAs<FunctionProtoType>();
	if (Proto->getExceptionSpecType() != EST_Uninstantiated)
	return;

	InstantiatingTemplate Inst(*this, PointOfInstantiation, Decl,
	InstantiatingTemplate::ExceptionSpecification());
	if (Inst.isInvalid()) {
	// We hit the instantiation depth limit. Clear the exception specification
	// so that our callers don't have to cope with EST_Uninstantiated.
	UpdateExceptionSpec(Decl, EST_None);
	return;
	}
	if (Inst.isAlreadyInstantiating()) {
	// This exception specification indirectly depends on itself. Reject.
	// FIXME: Corresponding rule in the standard?
	Diag(PointOfInstantiation, diag::err_exception_spec_cycle) << Decl;
	UpdateExceptionSpec(Decl, EST_None);
	return;
	}

	// Enter the scope of this instantiation. We don't use
	// PushDeclContext because we don't have a scope.
	Sema::ContextRAII savedContext(*this, Decl);
	LocalInstantiationScope Scope(*this);

	MultiLevelTemplateArgumentList TemplateArgs =
	getTemplateInstantiationArgs(Decl, nullptr, /RelativeToPrimary/true);

	// FIXME: We can't use getTemplateInstantiationPattern(false) in general
	// here, because for a non-defining friend declaration in a class template,
	// we don't store enough information to map back to the friend declaration in
	// the template.
	FunctionDecl *Template = Proto->getExceptionSpecTemplate();
	if (addInstantiatedParametersToScope(*this, Decl, Template, Scope,
	TemplateArgs)) {
	UpdateExceptionSpec(Decl, EST_None);
	return;
	}

	SubstExceptionSpec(Decl, Template->getType()->castAs<FunctionProtoType>(),
	TemplateArgs);
	}

	bool Sema::CheckInstantiatedFunctionTemplateConstraints(
	SourceLocation PointOfInstantiation, FunctionDecl *Decl,
	ArrayRef<TemplateArgument> TemplateArgs,
	ConstraintSatisfaction &Satisfaction) {
	// In most cases we're not going to have constraints, so check for that first.
	FunctionTemplateDecl *Template = Decl->getPrimaryTemplate();
	// Note - code synthesis context for the constraints check is created
	// inside CheckConstraintsSatisfaction.
	SmallVector<const Expr *, 3> TemplateAC;
	Template->getAssociatedConstraints(TemplateAC);
	if (TemplateAC.empty()) {
	Satisfaction.IsSatisfied = true;
	return false;
	}

	// Enter the scope of this instantiation. We don't use
	// PushDeclContext because we don't have a scope.
	Sema::ContextRAII savedContext(*this, Decl);
	LocalInstantiationScope Scope(*this);

	// If this is not an explicit specialization - we need to get the instantiated
	// version of the template arguments and add them to scope for the
	// substitution.
	if (Decl->isTemplateInstantiation()) {
	InstantiatingTemplate Inst(*this, Decl->getPointOfInstantiation(),
	InstantiatingTemplate::ConstraintsCheck{}, Decl->getPrimaryTemplate(),
	TemplateArgs, SourceRange());
	if (Inst.isInvalid())
	return true;
	MultiLevelTemplateArgumentList MLTAL(
	*Decl->getTemplateSpecializationArgs());
	if (addInstantiatedParametersToScope(
	*this, Decl, Decl->getPrimaryTemplate()->getTemplatedDecl(),
	Scope, MLTAL))
	return true;
	}
	Qualifiers ThisQuals;
	CXXRecordDecl *Record = nullptr;
	if (auto *Method = dyn_cast<CXXMethodDecl>(Decl)) {
	ThisQuals = Method->getMethodQualifiers();
	Record = Method->getParent();
	}
	CXXThisScopeRAII ThisScope(*this, Record, ThisQuals, Record != nullptr);
	return CheckConstraintSatisfaction(Template, TemplateAC, TemplateArgs,
	PointOfInstantiation, Satisfaction);
	}

	/// Initializes the common fields of an instantiation function
	/// declaration (New) from the corresponding fields of its template (Tmpl).
	///
	/// \returns true if there was an error
	bool
	TemplateDeclInstantiator::InitFunctionInstantiation(FunctionDecl *New,
	FunctionDecl *Tmpl) {
	New->setImplicit(Tmpl->isImplicit());

	// Forward the mangling number from the template to the instantiated decl.
	SemaRef.Context.setManglingNumber(New,
	SemaRef.Context.getManglingNumber(Tmpl));

	// If we are performing substituting explicitly-specified template arguments
	// or deduced template arguments into a function template and we reach this
	// point, we are now past the point where SFINAE applies and have committed
	// to keeping the new function template specialization. We therefore
	// convert the active template instantiation for the function template
	// into a template instantiation for this specific function template
	// specialization, which is not a SFINAE context, so that we diagnose any
	// further errors in the declaration itself.
	//
	// FIXME: This is a hack.
	typedef Sema::CodeSynthesisContext ActiveInstType;
	ActiveInstType &ActiveInst = SemaRef.CodeSynthesisContexts.back();
	if (ActiveInst.Kind == ActiveInstType::ExplicitTemplateArgumentSubstitution \|\|
	ActiveInst.Kind == ActiveInstType::DeducedTemplateArgumentSubstitution) {
	if (FunctionTemplateDecl *FunTmpl
	= dyn_cast<FunctionTemplateDecl>(ActiveInst.Entity)) {
	assert(FunTmpl->getTemplatedDecl() == Tmpl &&
	"Deduction from the wrong function template?");
	(void) FunTmpl;
	SemaRef.InstantiatingSpecializations.erase(
	{ActiveInst.Entity->getCanonicalDecl(), ActiveInst.Kind});
	atTemplateEnd(SemaRef.TemplateInstCallbacks, SemaRef, ActiveInst);
	ActiveInst.Kind = ActiveInstType::TemplateInstantiation;
	ActiveInst.Entity = New;
	atTemplateBegin(SemaRef.TemplateInstCallbacks, SemaRef, ActiveInst);
	}
	}

	const FunctionProtoType *Proto = Tmpl->getType()->getAs<FunctionProtoType>();
	assert(Proto && "Function template without prototype?");

	if (Proto->hasExceptionSpec() \|\| Proto->getNoReturnAttr()) {
	FunctionProtoType::ExtProtoInfo EPI = Proto->getExtProtoInfo();

	// DR1330: In C++11, defer instantiation of a non-trivial
	// exception specification.
	// DR1484: Local classes and their members are instantiated along with the
	// containing function.
	if (SemaRef.getLangOpts().CPlusPlus11 &&
	EPI.ExceptionSpec.Type != EST_None &&
	EPI.ExceptionSpec.Type != EST_DynamicNone &&
	EPI.ExceptionSpec.Type != EST_BasicNoexcept &&
	!Tmpl->isInLocalScopeForInstantiation()) {
	FunctionDecl *ExceptionSpecTemplate = Tmpl;
	if (EPI.ExceptionSpec.Type == EST_Uninstantiated)
	ExceptionSpecTemplate = EPI.ExceptionSpec.SourceTemplate;
	ExceptionSpecificationType NewEST = EST_Uninstantiated;
	if (EPI.ExceptionSpec.Type == EST_Unevaluated)
	NewEST = EST_Unevaluated;

	// Mark the function has having an uninstantiated exception specification.
	const FunctionProtoType *NewProto
	= New->getType()->getAs<FunctionProtoType>();
	assert(NewProto && "Template instantiation without function prototype?");
	EPI = NewProto->getExtProtoInfo();
	EPI.ExceptionSpec.Type = NewEST;
	EPI.ExceptionSpec.SourceDecl = New;
	EPI.ExceptionSpec.SourceTemplate = ExceptionSpecTemplate;
	New->setType(SemaRef.Context.getFunctionType(
	NewProto->getReturnType(), NewProto->getParamTypes(), EPI));
	} else {
	Sema::ContextRAII SwitchContext(SemaRef, New);
	SemaRef.SubstExceptionSpec(New, Proto, TemplateArgs);
	}
	}

	// Get the definition. Leaves the variable unchanged if undefined.
	const FunctionDecl *Definition = Tmpl;
	Tmpl->isDefined(Definition);

	SemaRef.InstantiateAttrs(TemplateArgs, Definition, New,
	LateAttrs, StartingScope);

	return false;
	}

	/// Initializes common fields of an instantiated method
	/// declaration (New) from the corresponding fields of its template
	/// (Tmpl).
	///
	/// \returns true if there was an error
	bool
	TemplateDeclInstantiator::InitMethodInstantiation(CXXMethodDecl *New,
	CXXMethodDecl *Tmpl) {
	if (InitFunctionInstantiation(New, Tmpl))
	return true;

	if (isa<CXXDestructorDecl>(New) && SemaRef.getLangOpts().CPlusPlus11)
	SemaRef.AdjustDestructorExceptionSpec(cast<CXXDestructorDecl>(New));

	New->setAccess(Tmpl->getAccess());
	if (Tmpl->isVirtualAsWritten())
	New->setVirtualAsWritten(true);

	// FIXME: New needs a pointer to Tmpl
	return false;
	}

	bool TemplateDeclInstantiator::SubstDefaultedFunction(FunctionDecl *New,
	FunctionDecl *Tmpl) {
	// Transfer across any unqualified lookups.
	if (auto *DFI = Tmpl->getDefaultedFunctionInfo()) {
	SmallVector<DeclAccessPair, 32> Lookups;
	Lookups.reserve(DFI->getUnqualifiedLookups().size());
	bool AnyChanged = false;
	for (DeclAccessPair DA : DFI->getUnqualifiedLookups()) {
	NamedDecl *D = SemaRef.FindInstantiatedDecl(New->getLocation(),
	DA.getDecl(), TemplateArgs);
	if (!D)
	return true;
	AnyChanged \|= (D != DA.getDecl());
	Lookups.push_back(DeclAccessPair::make(D, DA.getAccess()));
	}

	// It's unlikely that substitution will change any declarations. Don't
	// store an unnecessary copy in that case.
	New->setDefaultedFunctionInfo(
	AnyChanged ? FunctionDecl::DefaultedFunctionInfo::Create(
	SemaRef.Context, Lookups)
	: DFI);
	}

	SemaRef.SetDeclDefaulted(New, Tmpl->getLocation());
	return false;
	}

	/// Instantiate (or find existing instantiation of) a function template with a
	/// given set of template arguments.
	///
	/// Usually this should not be used, and template argument deduction should be
	/// used in its place.
	FunctionDecl *
	Sema::InstantiateFunctionDeclaration(FunctionTemplateDecl *FTD,
	const TemplateArgumentList *Args,
	SourceLocation Loc) {
	FunctionDecl *FD = FTD->getTemplatedDecl();

	sema::TemplateDeductionInfo Info(Loc);
	InstantiatingTemplate Inst(
	*this, Loc, FTD, Args->asArray(),
	CodeSynthesisContext::ExplicitTemplateArgumentSubstitution, Info);
	if (Inst.isInvalid())
	return nullptr;

	ContextRAII SavedContext(*this, FD);
	MultiLevelTemplateArgumentList MArgs(*Args);

	return cast_or_null<FunctionDecl>(SubstDecl(FD, FD->getParent(), MArgs));
	}

	/// Instantiate the definition of the given function from its
	/// template.
	///
	/// \param PointOfInstantiation the point at which the instantiation was
	/// required. Note that this is not precisely a "point of instantiation"
	/// for the function, but it's close.
	///
	/// \param Function the already-instantiated declaration of a
	/// function template specialization or member function of a class template
	/// specialization.
	///
	/// \param Recursive if true, recursively instantiates any functions that
	/// are required by this instantiation.
	///
	/// \param DefinitionRequired if true, then we are performing an explicit
	/// instantiation where the body of the function is required. Complain if
	/// there is no such body.
	void Sema::InstantiateFunctionDefinition(SourceLocation PointOfInstantiation,
	FunctionDecl *Function,
	bool Recursive,
	bool DefinitionRequired,
	bool AtEndOfTU) {
	if (Function->isInvalidDecl() \|\| isa<CXXDeductionGuideDecl>(Function))
	return;

	// Never instantiate an explicit specialization except if it is a class scope
	// explicit specialization.
	TemplateSpecializationKind TSK =
	Function->getTemplateSpecializationKindForInstantiation();
	if (TSK == TSK_ExplicitSpecialization)
	return;

	// Don't instantiate a definition if we already have one.
	const FunctionDecl *ExistingDefn = nullptr;
	if (Function->isDefined(ExistingDefn,
	/CheckForPendingFriendDefinition=/true)) {
	if (ExistingDefn->isThisDeclarationADefinition())
	return;

	// If we're asked to instantiate a function whose body comes from an
	// instantiated friend declaration, attach the instantiated body to the
	// corresponding declaration of the function.
	assert(ExistingDefn->isThisDeclarationInstantiatedFromAFriendDefinition());
	Function = const_cast<FunctionDecl*>(ExistingDefn);
	}

	// Find the function body that we'll be substituting.
	const FunctionDecl *PatternDecl = Function->getTemplateInstantiationPattern();
	assert(PatternDecl && "instantiating a non-template");

	const FunctionDecl *PatternDef = PatternDecl->getDefinition();
	Stmt *Pattern = nullptr;
	if (PatternDef) {
	Pattern = PatternDef->getBody(PatternDef);
	PatternDecl = PatternDef;
	if (PatternDef->willHaveBody())
	PatternDef = nullptr;
	}

	// FIXME: We need to track the instantiation stack in order to know which
	// definitions should be visible within this instantiation.
	if (DiagnoseUninstantiableTemplate(PointOfInstantiation, Function,
	Function->getInstantiatedFromMemberFunction(),
	PatternDecl, PatternDef, TSK,
	/Complain/DefinitionRequired)) {
	if (DefinitionRequired)
	Function->setInvalidDecl();
	else if (TSK == TSK_ExplicitInstantiationDefinition) {
	// Try again at the end of the translation unit (at which point a
	// definition will be required).
	assert(!Recursive);
	Function->setInstantiationIsPending(true);
	PendingInstantiations.push_back(
	std::make_pair(Function, PointOfInstantiation));
	} else if (TSK == TSK_ImplicitInstantiation) {
	if (AtEndOfTU && !getDiagnostics().hasErrorOccurred() &&
	!getSourceManager().isInSystemHeader(PatternDecl->getBeginLoc())) {
	Diag(PointOfInstantiation, diag::warn_func_template_missing)
	<< Function;
	Diag(PatternDecl->getLocation(), diag::note_forward_template_decl);
	if (getLangOpts().CPlusPlus11)
	Diag(PointOfInstantiation, diag::note_inst_declaration_hint)
	<< Function;
	}
	}

	return;
	}

	// Postpone late parsed template instantiations.
	if (PatternDecl->isLateTemplateParsed() &&
	!LateTemplateParser) {
	Function->setInstantiationIsPending(true);
	LateParsedInstantiations.push_back(
	std::make_pair(Function, PointOfInstantiation));
	return;
	}

	llvm::TimeTraceScope TimeScope("InstantiateFunction", [&]() {
	std::string Name;
	llvm::raw_string_ostream OS(Name);
	Function->getNameForDiagnostic(OS, getPrintingPolicy(),
	/Qualified=/true);
	return Name;
	});

	// If we're performing recursive template instantiation, create our own
	// queue of pending implicit instantiations that we will instantiate later,
	// while we're still within our own instantiation context.
	// This has to happen before LateTemplateParser below is called, so that
	// it marks vtables used in late parsed templates as used.
	GlobalEagerInstantiationScope GlobalInstantiations(*this,
	/Enabled=/Recursive);
	LocalEagerInstantiationScope LocalInstantiations(*this);

	// Call the LateTemplateParser callback if there is a need to late parse
	// a templated function definition.
	if (!Pattern && PatternDecl->isLateTemplateParsed() &&
	LateTemplateParser) {
	// FIXME: Optimize to allow individual templates to be deserialized.
	if (PatternDecl->isFromASTFile())
	ExternalSource->ReadLateParsedTemplates(LateParsedTemplateMap);

	auto LPTIter = LateParsedTemplateMap.find(PatternDecl);
	assert(LPTIter != LateParsedTemplateMap.end() &&
	"missing LateParsedTemplate");
	LateTemplateParser(OpaqueParser, *LPTIter->second);
	Pattern = PatternDecl->getBody(PatternDecl);
	}

	// Note, we should never try to instantiate a deleted function template.
	assert((Pattern \|\| PatternDecl->isDefaulted() \|\|
	PatternDecl->hasSkippedBody()) &&
	"unexpected kind of function template definition");

	// C++1y [temp.explicit]p10:
	// Except for inline functions, declarations with types deduced from their
	// initializer or return value, and class template specializations, other
	// explicit instantiation declarations have the effect of suppressing the
	// implicit instantiation of the entity to which they refer.
	if (TSK == TSK_ExplicitInstantiationDeclaration &&
	!PatternDecl->isInlined() &&
	!PatternDecl->getReturnType()->getContainedAutoType())
	return;

	if (PatternDecl->isInlined()) {
	// Function, and all later redeclarations of it (from imported modules,
	// for instance), are now implicitly inline.
	for (auto D = Function->getMostRecentDecl(); /*/;
	D = D->getPreviousDecl()) {
	D->setImplicitlyInline();
	if (D == Function)
	break;
	}
	}

	InstantiatingTemplate Inst(*this, PointOfInstantiation, Function);
	if (Inst.isInvalid() \|\| Inst.isAlreadyInstantiating())
	return;
	PrettyDeclStackTraceEntry CrashInfo(Context, Function, SourceLocation(),
	"instantiating function definition");

	// The instantiation is visible here, even if it was first declared in an
	// unimported module.
	Function->setVisibleDespiteOwningModule();

	// Copy the inner loc start from the pattern.
	Function->setInnerLocStart(PatternDecl->getInnerLocStart());

	EnterExpressionEvaluationContext EvalContext(
	*this, Sema::ExpressionEvaluationContext::PotentiallyEvaluated);

	// Introduce a new scope where local variable instantiations will be
	// recorded, unless we're actually a member function within a local
	// class, in which case we need to merge our results with the parent
	// scope (of the enclosing function). The exception is instantiating
	// a function template specialization, since the template to be
	// instantiated already has references to locals properly substituted.
	bool MergeWithParentScope = false;
	if (CXXRecordDecl *Rec = dyn_cast<CXXRecordDecl>(Function->getDeclContext()))
	MergeWithParentScope =
	Rec->isLocalClass() && !Function->isFunctionTemplateSpecialization();

	LocalInstantiationScope Scope(*this, MergeWithParentScope);
	auto RebuildTypeSourceInfoForDefaultSpecialMembers = [&]() {
	// Special members might get their TypeSourceInfo set up w.r.t the
	// PatternDecl context, in which case parameters could still be pointing
	// back to the original class, make sure arguments are bound to the
	// instantiated record instead.
	assert(PatternDecl->isDefaulted() &&
	"Special member needs to be defaulted");
	auto PatternSM = getDefaultedFunctionKind(PatternDecl).asSpecialMember();
	if (!(PatternSM == Sema::CXXCopyConstructor \|\|
	PatternSM == Sema::CXXCopyAssignment \|\|
	PatternSM == Sema::CXXMoveConstructor \|\|
	PatternSM == Sema::CXXMoveAssignment))
	return;

	auto *NewRec = dyn_cast<CXXRecordDecl>(Function->getDeclContext());
	const auto *PatternRec =
	dyn_cast<CXXRecordDecl>(PatternDecl->getDeclContext());
	if (!NewRec \|\| !PatternRec)
	return;
	if (!PatternRec->isLambda())
	return;

	struct SpecialMemberTypeInfoRebuilder
	: TreeTransform<SpecialMemberTypeInfoRebuilder> {
	using Base = TreeTransform<SpecialMemberTypeInfoRebuilder>;
	const CXXRecordDecl *OldDecl;
	CXXRecordDecl *NewDecl;

	SpecialMemberTypeInfoRebuilder(Sema &SemaRef, const CXXRecordDecl *O,
	CXXRecordDecl *N)
	: TreeTransform(SemaRef), OldDecl(O), NewDecl(N) {}

	bool TransformExceptionSpec(SourceLocation Loc,
	FunctionProtoType::ExceptionSpecInfo &ESI,
	SmallVectorImpl<QualType> &Exceptions,
	bool &Changed) {
	return false;
	}

	QualType TransformRecordType(TypeLocBuilder &TLB, RecordTypeLoc TL) {
	const RecordType *T = TL.getTypePtr();
	RecordDecl *Record = cast_or_null<RecordDecl>(
	getDerived().TransformDecl(TL.getNameLoc(), T->getDecl()));
	if (Record != OldDecl)
	return Base::TransformRecordType(TLB, TL);

	QualType Result = getDerived().RebuildRecordType(NewDecl);
	if (Result.isNull())
	return QualType();

	RecordTypeLoc NewTL = TLB.push<RecordTypeLoc>(Result);
	NewTL.setNameLoc(TL.getNameLoc());
	return Result;
	}
	} IR{*this, PatternRec, NewRec};

	TypeSourceInfo *NewSI = IR.TransformType(Function->getTypeSourceInfo());
	Function->setType(NewSI->getType());
	Function->setTypeSourceInfo(NewSI);

	ParmVarDecl *Parm = Function->getParamDecl(0);
	TypeSourceInfo *NewParmSI = IR.TransformType(Parm->getTypeSourceInfo());
	Parm->setType(NewParmSI->getType());
	Parm->setTypeSourceInfo(NewParmSI);
	};

	if (PatternDecl->isDefaulted()) {
	RebuildTypeSourceInfoForDefaultSpecialMembers();
	SetDeclDefaulted(Function, PatternDecl->getLocation());
	} else {
	MultiLevelTemplateArgumentList TemplateArgs =
	getTemplateInstantiationArgs(Function, nullptr, false, PatternDecl);

	// Substitute into the qualifier; we can get a substitution failure here
	// through evil use of alias templates.
	// FIXME: Is CurContext correct for this? Should we go to the (instantiation
	// of the) lexical context of the pattern?
	SubstQualifier(*this, PatternDecl, Function, TemplateArgs);

	ActOnStartOfFunctionDef(nullptr, Function);

	// Enter the scope of this instantiation. We don't use
	// PushDeclContext because we don't have a scope.
	Sema::ContextRAII savedContext(*this, Function);

	if (addInstantiatedParametersToScope(*this, Function, PatternDecl, Scope,
	TemplateArgs))
	return;

	StmtResult Body;
	if (PatternDecl->hasSkippedBody()) {
	ActOnSkippedFunctionBody(Function);
	Body = nullptr;
	} else {
	if (CXXConstructorDecl *Ctor = dyn_cast<CXXConstructorDecl>(Function)) {
	// If this is a constructor, instantiate the member initializers.
	InstantiateMemInitializers(Ctor, cast<CXXConstructorDecl>(PatternDecl),
	TemplateArgs);

	// If this is an MS ABI dllexport default constructor, instantiate any
	// default arguments.
	if (Context.getTargetInfo().getCXXABI().isMicrosoft() &&
	Ctor->isDefaultConstructor()) {
	InstantiateDefaultCtorDefaultArgs(Ctor);
	}
	}

	// Instantiate the function body.
	Body = SubstStmt(Pattern, TemplateArgs);

	if (Body.isInvalid())
	Function->setInvalidDecl();
	}
	// FIXME: finishing the function body while in an expression evaluation
	// context seems wrong. Investigate more.
	ActOnFinishFunctionBody(Function, Body.get(), /IsInstantiation=/true);

	PerformDependentDiagnostics(PatternDecl, TemplateArgs);

	if (auto *Listener = getASTMutationListener())
	Listener->FunctionDefinitionInstantiated(Function);

	savedContext.pop();
	}

	DeclGroupRef DG(Function);
	Consumer.HandleTopLevelDecl(DG);

	// This class may have local implicit instantiations that need to be
	// instantiation within this scope.
	LocalInstantiations.perform();
	Scope.Exit();
	GlobalInstantiations.perform();
	}

	VarTemplateSpecializationDecl *Sema::BuildVarTemplateInstantiation(
	VarTemplateDecl VarTemplate, VarDecl FromVar,
	const TemplateArgumentList &TemplateArgList,
	const TemplateArgumentListInfo &TemplateArgsInfo,
	SmallVectorImpl<TemplateArgument> &Converted,
	SourceLocation PointOfInstantiation,
	LateInstantiatedAttrVec *LateAttrs,
	LocalInstantiationScope *StartingScope) {
	if (FromVar->isInvalidDecl())
	return nullptr;

	InstantiatingTemplate Inst(*this, PointOfInstantiation, FromVar);
	if (Inst.isInvalid())
	return nullptr;

	MultiLevelTemplateArgumentList TemplateArgLists;
	TemplateArgLists.addOuterTemplateArguments(&TemplateArgList);

	// Instantiate the first declaration of the variable template: for a partial
	// specialization of a static data member template, the first declaration may
	// or may not be the declaration in the class; if it's in the class, we want
	// to instantiate a member in the class (a declaration), and if it's outside,
	// we want to instantiate a definition.
	//
	// If we're instantiating an explicitly-specialized member template or member
	// partial specialization, don't do this. The member specialization completely
	// replaces the original declaration in this case.
	bool IsMemberSpec = false;
	if (VarTemplatePartialSpecializationDecl *PartialSpec =
	dyn_cast<VarTemplatePartialSpecializationDecl>(FromVar))
	IsMemberSpec = PartialSpec->isMemberSpecialization();
	else if (VarTemplateDecl *FromTemplate = FromVar->getDescribedVarTemplate())
	IsMemberSpec = FromTemplate->isMemberSpecialization();
	if (!IsMemberSpec)
	FromVar = FromVar->getFirstDecl();

	MultiLevelTemplateArgumentList MultiLevelList(TemplateArgList);
	TemplateDeclInstantiator Instantiator(*this, FromVar->getDeclContext(),
	MultiLevelList);

	// TODO: Set LateAttrs and StartingScope ...

	return cast_or_null<VarTemplateSpecializationDecl>(
	Instantiator.VisitVarTemplateSpecializationDecl(
	VarTemplate, FromVar, TemplateArgsInfo, Converted));
	}

	/// Instantiates a variable template specialization by completing it
	/// with appropriate type information and initializer.
	VarTemplateSpecializationDecl *Sema::CompleteVarTemplateSpecializationDecl(
	VarTemplateSpecializationDecl VarSpec, VarDecl PatternDecl,
	const MultiLevelTemplateArgumentList &TemplateArgs) {
	assert(PatternDecl->isThisDeclarationADefinition() &&
	"don't have a definition to instantiate from");

	// Do substitution on the type of the declaration
	TypeSourceInfo *DI =
	SubstType(PatternDecl->getTypeSourceInfo(), TemplateArgs,
	PatternDecl->getTypeSpecStartLoc(), PatternDecl->getDeclName());
	if (!DI)
	return nullptr;

	// Update the type of this variable template specialization.
	VarSpec->setType(DI->getType());

	// Convert the declaration into a definition now.
	VarSpec->setCompleteDefinition();

	// Instantiate the initializer.
	InstantiateVariableInitializer(VarSpec, PatternDecl, TemplateArgs);

	if (getLangOpts().OpenCL)
	deduceOpenCLAddressSpace(VarSpec);

	return VarSpec;
	}

	/// BuildVariableInstantiation - Used after a new variable has been created.
	/// Sets basic variable data and decides whether to postpone the
	/// variable instantiation.
	void Sema::BuildVariableInstantiation(
	VarDecl NewVar, VarDecl OldVar,
	const MultiLevelTemplateArgumentList &TemplateArgs,
	LateInstantiatedAttrVec LateAttrs, DeclContext Owner,
	LocalInstantiationScope *StartingScope,
	bool InstantiatingVarTemplate,
	VarTemplateSpecializationDecl *PrevDeclForVarTemplateSpecialization) {
	// Instantiating a partial specialization to produce a partial
	// specialization.
	bool InstantiatingVarTemplatePartialSpec =
	isa<VarTemplatePartialSpecializationDecl>(OldVar) &&
	isa<VarTemplatePartialSpecializationDecl>(NewVar);
	// Instantiating from a variable template (or partial specialization) to
	// produce a variable template specialization.
	bool InstantiatingSpecFromTemplate =
	isa<VarTemplateSpecializationDecl>(NewVar) &&
	(OldVar->getDescribedVarTemplate() \|\|
	isa<VarTemplatePartialSpecializationDecl>(OldVar));

	// If we are instantiating a local extern declaration, the
	// instantiation belongs lexically to the containing function.
	// If we are instantiating a static data member defined
	// out-of-line, the instantiation will have the same lexical
	// context (which will be a namespace scope) as the template.
	if (OldVar->isLocalExternDecl()) {
	NewVar->setLocalExternDecl();
	NewVar->setLexicalDeclContext(Owner);
	} else if (OldVar->isOutOfLine())
	NewVar->setLexicalDeclContext(OldVar->getLexicalDeclContext());
	NewVar->setTSCSpec(OldVar->getTSCSpec());
	NewVar->setInitStyle(OldVar->getInitStyle());
	NewVar->setCXXForRangeDecl(OldVar->isCXXForRangeDecl());
	NewVar->setObjCForDecl(OldVar->isObjCForDecl());
	NewVar->setConstexpr(OldVar->isConstexpr());
	NewVar->setInitCapture(OldVar->isInitCapture());
	NewVar->setPreviousDeclInSameBlockScope(
	OldVar->isPreviousDeclInSameBlockScope());
	NewVar->setAccess(OldVar->getAccess());

	if (!OldVar->isStaticDataMember()) {
	if (OldVar->isUsed(false))
	NewVar->setIsUsed();
	NewVar->setReferenced(OldVar->isReferenced());
	}

	InstantiateAttrs(TemplateArgs, OldVar, NewVar, LateAttrs, StartingScope);

	LookupResult Previous(
	*this, NewVar->getDeclName(), NewVar->getLocation(),
	NewVar->isLocalExternDecl() ? Sema::LookupRedeclarationWithLinkage
	: Sema::LookupOrdinaryName,
	NewVar->isLocalExternDecl() ? Sema::ForExternalRedeclaration
	: forRedeclarationInCurContext());

	if (NewVar->isLocalExternDecl() && OldVar->getPreviousDecl() &&
	(!OldVar->getPreviousDecl()->getDeclContext()->isDependentContext() \|\|
	OldVar->getPreviousDecl()->getDeclContext()==OldVar->getDeclContext())) {
	// We have a previous declaration. Use that one, so we merge with the
	// right type.
	if (NamedDecl *NewPrev = FindInstantiatedDecl(
	NewVar->getLocation(), OldVar->getPreviousDecl(), TemplateArgs))
	Previous.addDecl(NewPrev);
	} else if (!isa<VarTemplateSpecializationDecl>(NewVar) &&
	OldVar->hasLinkage()) {
	LookupQualifiedName(Previous, NewVar->getDeclContext(), false);
	} else if (PrevDeclForVarTemplateSpecialization) {
	Previous.addDecl(PrevDeclForVarTemplateSpecialization);
	}
	CheckVariableDeclaration(NewVar, Previous);

	if (!InstantiatingVarTemplate) {
	NewVar->getLexicalDeclContext()->addHiddenDecl(NewVar);
	if (!NewVar->isLocalExternDecl() \|\| !NewVar->getPreviousDecl())
	NewVar->getDeclContext()->makeDeclVisibleInContext(NewVar);
	}

	if (!OldVar->isOutOfLine()) {
	if (NewVar->getDeclContext()->isFunctionOrMethod())
	CurrentInstantiationScope->InstantiatedLocal(OldVar, NewVar);
	}

	// Link instantiations of static data members back to the template from
	// which they were instantiated.
	//
	// Don't do this when instantiating a template (we link the template itself
	// back in that case) nor when instantiating a static data member template
	// (that's not a member specialization).
	if (NewVar->isStaticDataMember() && !InstantiatingVarTemplate &&
	!InstantiatingSpecFromTemplate)
	NewVar->setInstantiationOfStaticDataMember(OldVar,
	TSK_ImplicitInstantiation);

	// If the pattern is an (in-class) explicit specialization, then the result
	// is also an explicit specialization.
	if (VarTemplateSpecializationDecl *OldVTSD =
	dyn_cast<VarTemplateSpecializationDecl>(OldVar)) {
	if (OldVTSD->getSpecializationKind() == TSK_ExplicitSpecialization &&
	!isa<VarTemplatePartialSpecializationDecl>(OldVTSD))
	cast<VarTemplateSpecializationDecl>(NewVar)->setSpecializationKind(
	TSK_ExplicitSpecialization);
	}

	// Forward the mangling number from the template to the instantiated decl.
	Context.setManglingNumber(NewVar, Context.getManglingNumber(OldVar));
	Context.setStaticLocalNumber(NewVar, Context.getStaticLocalNumber(OldVar));

	// Figure out whether to eagerly instantiate the initializer.
	if (InstantiatingVarTemplate \|\| InstantiatingVarTemplatePartialSpec) {
	// We're producing a template. Don't instantiate the initializer yet.
	} else if (NewVar->getType()->isUndeducedType()) {
	// We need the type to complete the declaration of the variable.
	InstantiateVariableInitializer(NewVar, OldVar, TemplateArgs);
	} else if (InstantiatingSpecFromTemplate \|\|
	(OldVar->isInline() && OldVar->isThisDeclarationADefinition() &&
	!NewVar->isThisDeclarationADefinition())) {
	// Delay instantiation of the initializer for variable template
	// specializations or inline static data members until a definition of the
	// variable is needed.
	} else {
	InstantiateVariableInitializer(NewVar, OldVar, TemplateArgs);
	}

	// Diagnose unused local variables with dependent types, where the diagnostic
	// will have been deferred.
	if (!NewVar->isInvalidDecl() &&
	NewVar->getDeclContext()->isFunctionOrMethod() &&
	OldVar->getType()->isDependentType())
	DiagnoseUnusedDecl(NewVar);
	}

	/// Instantiate the initializer of a variable.
	void Sema::InstantiateVariableInitializer(
	VarDecl Var, VarDecl OldVar,
	const MultiLevelTemplateArgumentList &TemplateArgs) {
	if (ASTMutationListener *L = getASTContext().getASTMutationListener())
	L->VariableDefinitionInstantiated(Var);

	// We propagate the 'inline' flag with the initializer, because it
	// would otherwise imply that the variable is a definition for a
	// non-static data member.
	if (OldVar->isInlineSpecified())
	Var->setInlineSpecified();
	else if (OldVar->isInline())
	Var->setImplicitlyInline();

	if (OldVar->getInit()) {
	EnterExpressionEvaluationContext Evaluated(
	*this, Sema::ExpressionEvaluationContext::PotentiallyEvaluated, Var);

	// Instantiate the initializer.
	ExprResult Init;

	{
	ContextRAII SwitchContext(*this, Var->getDeclContext());
	Init = SubstInitializer(OldVar->getInit(), TemplateArgs,
	OldVar->getInitStyle() == VarDecl::CallInit);
	}

	if (!Init.isInvalid()) {
	Expr *InitExpr = Init.get();

	if (Var->hasAttr<DLLImportAttr>() &&
	(!InitExpr \|\|
	!InitExpr->isConstantInitializer(getASTContext(), false))) {
	// Do not dynamically initialize dllimport variables.
	} else if (InitExpr) {
	bool DirectInit = OldVar->isDirectInit();
	AddInitializerToDecl(Var, InitExpr, DirectInit);
	} else
	ActOnUninitializedDecl(Var);
	} else {
	// FIXME: Not too happy about invalidating the declaration
	// because of a bogus initializer.
	Var->setInvalidDecl();
	}
	} else {
	// `inline` variables are a definition and declaration all in one; we won't
	// pick up an initializer from anywhere else.
	if (Var->isStaticDataMember() && !Var->isInline()) {
	if (!Var->isOutOfLine())
	return;

	// If the declaration inside the class had an initializer, don't add
	// another one to the out-of-line definition.
	if (OldVar->getFirstDecl()->hasInit())
	return;
	}

	// We'll add an initializer to a for-range declaration later.
	if (Var->isCXXForRangeDecl() \|\| Var->isObjCForDecl())
	return;

	ActOnUninitializedDecl(Var);
	}

	if (getLangOpts().CUDA)
	checkAllowedCUDAInitializer(Var);
	}

	/// Instantiate the definition of the given variable from its
	/// template.
	///
	/// \param PointOfInstantiation the point at which the instantiation was
	/// required. Note that this is not precisely a "point of instantiation"
	/// for the variable, but it's close.
	///
	/// \param Var the already-instantiated declaration of a templated variable.
	///
	/// \param Recursive if true, recursively instantiates any functions that
	/// are required by this instantiation.
	///
	/// \param DefinitionRequired if true, then we are performing an explicit
	/// instantiation where a definition of the variable is required. Complain
	/// if there is no such definition.
	void Sema::InstantiateVariableDefinition(SourceLocation PointOfInstantiation,
	VarDecl *Var, bool Recursive,
	bool DefinitionRequired, bool AtEndOfTU) {
	if (Var->isInvalidDecl())
	return;

	// Never instantiate an explicitly-specialized entity.
	TemplateSpecializationKind TSK =
	Var->getTemplateSpecializationKindForInstantiation();
	if (TSK == TSK_ExplicitSpecialization)
	return;

	// Find the pattern and the arguments to substitute into it.
	VarDecl *PatternDecl = Var->getTemplateInstantiationPattern();
	assert(PatternDecl && "no pattern for templated variable");
	MultiLevelTemplateArgumentList TemplateArgs =
	getTemplateInstantiationArgs(Var);

	VarTemplateSpecializationDecl *VarSpec =
	dyn_cast<VarTemplateSpecializationDecl>(Var);
	if (VarSpec) {
	// If this is a static data member template, there might be an
	// uninstantiated initializer on the declaration. If so, instantiate
	// it now.
	//
	// FIXME: This largely duplicates what we would do below. The difference
	// is that along this path we may instantiate an initializer from an
	// in-class declaration of the template and instantiate the definition
	// from a separate out-of-class definition.
	if (PatternDecl->isStaticDataMember() &&
	(PatternDecl = PatternDecl->getFirstDecl())->hasInit() &&
	!Var->hasInit()) {
	// FIXME: Factor out the duplicated instantiation context setup/tear down
	// code here.
	InstantiatingTemplate Inst(*this, PointOfInstantiation, Var);
	if (Inst.isInvalid() \|\| Inst.isAlreadyInstantiating())
	return;
	PrettyDeclStackTraceEntry CrashInfo(Context, Var, SourceLocation(),
	"instantiating variable initializer");

	// The instantiation is visible here, even if it was first declared in an
	// unimported module.
	Var->setVisibleDespiteOwningModule();

	// If we're performing recursive template instantiation, create our own
	// queue of pending implicit instantiations that we will instantiate
	// later, while we're still within our own instantiation context.
	GlobalEagerInstantiationScope GlobalInstantiations(*this,
	/Enabled=/Recursive);
	LocalInstantiationScope Local(*this);
	LocalEagerInstantiationScope LocalInstantiations(*this);

	// Enter the scope of this instantiation. We don't use
	// PushDeclContext because we don't have a scope.
	ContextRAII PreviousContext(*this, Var->getDeclContext());
	InstantiateVariableInitializer(Var, PatternDecl, TemplateArgs);
	PreviousContext.pop();

	// This variable may have local implicit instantiations that need to be
	// instantiated within this scope.
	LocalInstantiations.perform();
	Local.Exit();
	GlobalInstantiations.perform();
	}
	} else {
	assert(Var->isStaticDataMember() && PatternDecl->isStaticDataMember() &&
	"not a static data member?");
	}

	VarDecl *Def = PatternDecl->getDefinition(getASTContext());

	// If we don't have a definition of the variable template, we won't perform
	// any instantiation. Rather, we rely on the user to instantiate this
	// definition (or provide a specialization for it) in another translation
	// unit.
	if (!Def && !DefinitionRequired) {
	if (TSK == TSK_ExplicitInstantiationDefinition) {
	PendingInstantiations.push_back(
	std::make_pair(Var, PointOfInstantiation));
	} else if (TSK == TSK_ImplicitInstantiation) {
	// Warn about missing definition at the end of translation unit.
	if (AtEndOfTU && !getDiagnostics().hasErrorOccurred() &&
	!getSourceManager().isInSystemHeader(PatternDecl->getBeginLoc())) {
	Diag(PointOfInstantiation, diag::warn_var_template_missing)
	<< Var;
	Diag(PatternDecl->getLocation(), diag::note_forward_template_decl);
	if (getLangOpts().CPlusPlus11)
	Diag(PointOfInstantiation, diag::note_inst_declaration_hint) << Var;
	}
	return;
	}
	}

	// FIXME: We need to track the instantiation stack in order to know which
	// definitions should be visible within this instantiation.
	// FIXME: Produce diagnostics when Var->getInstantiatedFromStaticDataMember().
	if (DiagnoseUninstantiableTemplate(PointOfInstantiation, Var,
	/InstantiatedFromMember/false,
	PatternDecl, Def, TSK,
	/Complain/DefinitionRequired))
	return;

	// C++11 [temp.explicit]p10:
	// Except for inline functions, const variables of literal types, variables
	// of reference types, [...] explicit instantiation declarations
	// have the effect of suppressing the implicit instantiation of the entity
	// to which they refer.
	//
	// FIXME: That's not exactly the same as "might be usable in constant
	// expressions", which only allows constexpr variables and const integral
	// types, not arbitrary const literal types.
	if (TSK == TSK_ExplicitInstantiationDeclaration &&
	!Var->mightBeUsableInConstantExpressions(getASTContext()))
	return;

	// Make sure to pass the instantiated variable to the consumer at the end.
	struct PassToConsumerRAII {
	ASTConsumer &Consumer;
	VarDecl *Var;

	PassToConsumerRAII(ASTConsumer &Consumer, VarDecl *Var)
	: Consumer(Consumer), Var(Var) { }

	~PassToConsumerRAII() {
	Consumer.HandleCXXStaticMemberVarInstantiation(Var);
	}
	} PassToConsumerRAII(Consumer, Var);

	// If we already have a definition, we're done.
	if (VarDecl *Def = Var->getDefinition()) {
	// We may be explicitly instantiating something we've already implicitly
	// instantiated.
	Def->setTemplateSpecializationKind(Var->getTemplateSpecializationKind(),
	PointOfInstantiation);
	return;
	}

	InstantiatingTemplate Inst(*this, PointOfInstantiation, Var);
	if (Inst.isInvalid() \|\| Inst.isAlreadyInstantiating())
	return;
	PrettyDeclStackTraceEntry CrashInfo(Context, Var, SourceLocation(),
	"instantiating variable definition");

	// If we're performing recursive template instantiation, create our own
	// queue of pending implicit instantiations that we will instantiate later,
	// while we're still within our own instantiation context.
	GlobalEagerInstantiationScope GlobalInstantiations(*this,
	/Enabled=/Recursive);

	// Enter the scope of this instantiation. We don't use
	// PushDeclContext because we don't have a scope.
	ContextRAII PreviousContext(*this, Var->getDeclContext());
	LocalInstantiationScope Local(*this);

	LocalEagerInstantiationScope LocalInstantiations(*this);

	VarDecl *OldVar = Var;
	if (Def->isStaticDataMember() && !Def->isOutOfLine()) {
	// We're instantiating an inline static data member whose definition was
	// provided inside the class.
	InstantiateVariableInitializer(Var, Def, TemplateArgs);
	} else if (!VarSpec) {
	Var = cast_or_null<VarDecl>(SubstDecl(Def, Var->getDeclContext(),
	TemplateArgs));
	} else if (Var->isStaticDataMember() &&
	Var->getLexicalDeclContext()->isRecord()) {
	// We need to instantiate the definition of a static data member template,
	// and all we have is the in-class declaration of it. Instantiate a separate
	// declaration of the definition.
	TemplateDeclInstantiator Instantiator(*this, Var->getDeclContext(),
	TemplateArgs);
	Var = cast_or_null<VarDecl>(Instantiator.VisitVarTemplateSpecializationDecl(
	VarSpec->getSpecializedTemplate(), Def, VarSpec->getTemplateArgsInfo(),
	VarSpec->getTemplateArgs().asArray(), VarSpec));
	if (Var) {
	llvm::PointerUnion<VarTemplateDecl *,
	VarTemplatePartialSpecializationDecl *> PatternPtr =
	VarSpec->getSpecializedTemplateOrPartial();
	if (VarTemplatePartialSpecializationDecl *Partial =
	PatternPtr.dyn_cast<VarTemplatePartialSpecializationDecl *>())
	cast<VarTemplateSpecializationDecl>(Var)->setInstantiationOf(
	Partial, &VarSpec->getTemplateInstantiationArgs());

	// Attach the initializer.
	InstantiateVariableInitializer(Var, Def, TemplateArgs);
	}
	} else
	// Complete the existing variable's definition with an appropriately
	// substituted type and initializer.
	Var = CompleteVarTemplateSpecializationDecl(VarSpec, Def, TemplateArgs);

	PreviousContext.pop();

	if (Var) {
	PassToConsumerRAII.Var = Var;
	Var->setTemplateSpecializationKind(OldVar->getTemplateSpecializationKind(),
	OldVar->getPointOfInstantiation());
	}

	// This variable may have local implicit instantiations that need to be
	// instantiated within this scope.
	LocalInstantiations.perform();
	Local.Exit();
	GlobalInstantiations.perform();
	}

	void
	Sema::InstantiateMemInitializers(CXXConstructorDecl *New,
	const CXXConstructorDecl *Tmpl,
	const MultiLevelTemplateArgumentList &TemplateArgs) {

	SmallVector<CXXCtorInitializer*, 4> NewInits;
	bool AnyErrors = Tmpl->isInvalidDecl();

	// Instantiate all the initializers.
	for (const auto *Init : Tmpl->inits()) {
	// Only instantiate written initializers, let Sema re-construct implicit
	// ones.
	if (!Init->isWritten())
	continue;

	SourceLocation EllipsisLoc;

	if (Init->isPackExpansion()) {
	// This is a pack expansion. We should expand it now.
	TypeLoc BaseTL = Init->getTypeSourceInfo()->getTypeLoc();
	SmallVector<UnexpandedParameterPack, 4> Unexpanded;
	collectUnexpandedParameterPacks(BaseTL, Unexpanded);
	collectUnexpandedParameterPacks(Init->getInit(), Unexpanded);
	bool ShouldExpand = false;
	bool RetainExpansion = false;
	Optional<unsigned> NumExpansions;
	if (CheckParameterPacksForExpansion(Init->getEllipsisLoc(),
	BaseTL.getSourceRange(),
	Unexpanded,
	TemplateArgs, ShouldExpand,
	RetainExpansion,
	NumExpansions)) {
	AnyErrors = true;
	New->setInvalidDecl();
	continue;
	}
	assert(ShouldExpand && "Partial instantiation of base initializer?");

	// Loop over all of the arguments in the argument pack(s),
	for (unsigned I = 0; I != *NumExpansions; ++I) {
	Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(*this, I);

	// Instantiate the initializer.
	ExprResult TempInit = SubstInitializer(Init->getInit(), TemplateArgs,
	/CXXDirectInit=/true);
	if (TempInit.isInvalid()) {
	AnyErrors = true;
	break;
	}

	// Instantiate the base type.
	TypeSourceInfo *BaseTInfo = SubstType(Init->getTypeSourceInfo(),
	TemplateArgs,
	Init->getSourceLocation(),
	New->getDeclName());
	if (!BaseTInfo) {
	AnyErrors = true;
	break;
	}

	// Build the initializer.
	MemInitResult NewInit = BuildBaseInitializer(BaseTInfo->getType(),
	BaseTInfo, TempInit.get(),
	New->getParent(),
	SourceLocation());
	if (NewInit.isInvalid()) {
	AnyErrors = true;
	break;
	}

	NewInits.push_back(NewInit.get());
	}

	continue;
	}

	// Instantiate the initializer.
	ExprResult TempInit = SubstInitializer(Init->getInit(), TemplateArgs,
	/CXXDirectInit=/true);
	if (TempInit.isInvalid()) {
	AnyErrors = true;
	continue;
	}

	MemInitResult NewInit;
	if (Init->isDelegatingInitializer() \|\| Init->isBaseInitializer()) {
	TypeSourceInfo *TInfo = SubstType(Init->getTypeSourceInfo(),
	TemplateArgs,
	Init->getSourceLocation(),
	New->getDeclName());
	if (!TInfo) {
	AnyErrors = true;
	New->setInvalidDecl();
	continue;
	}

	if (Init->isBaseInitializer())
	NewInit = BuildBaseInitializer(TInfo->getType(), TInfo, TempInit.get(),
	New->getParent(), EllipsisLoc);
	else
	NewInit = BuildDelegatingInitializer(TInfo, TempInit.get(),
	cast<CXXRecordDecl>(CurContext->getParent()));
	} else if (Init->isMemberInitializer()) {
	FieldDecl *Member = cast_or_null<FieldDecl>(FindInstantiatedDecl(
	Init->getMemberLocation(),
	Init->getMember(),
	TemplateArgs));
	if (!Member) {
	AnyErrors = true;
	New->setInvalidDecl();
	continue;
	}

	NewInit = BuildMemberInitializer(Member, TempInit.get(),
	Init->getSourceLocation());
	} else if (Init->isIndirectMemberInitializer()) {
	IndirectFieldDecl *IndirectMember =
	cast_or_null<IndirectFieldDecl>(FindInstantiatedDecl(
	Init->getMemberLocation(),
	Init->getIndirectMember(), TemplateArgs));

	if (!IndirectMember) {
	AnyErrors = true;
	New->setInvalidDecl();
	continue;
	}

	NewInit = BuildMemberInitializer(IndirectMember, TempInit.get(),
	Init->getSourceLocation());
	}

	if (NewInit.isInvalid()) {
	AnyErrors = true;
	New->setInvalidDecl();
	} else {
	NewInits.push_back(NewInit.get());
	}
	}

	// Assign all the initializers to the new constructor.
	ActOnMemInitializers(New,
	/FIXME: ColonLoc /
	SourceLocation(),
	NewInits,
	AnyErrors);
	}

	// TODO: this could be templated if the various decl types used the
	// same method name.
	static bool isInstantiationOf(ClassTemplateDecl *Pattern,
	ClassTemplateDecl *Instance) {
	Pattern = Pattern->getCanonicalDecl();

	do {
	Instance = Instance->getCanonicalDecl();
	if (Pattern == Instance) return true;
	Instance = Instance->getInstantiatedFromMemberTemplate();
	} while (Instance);

	return false;
	}

	static bool isInstantiationOf(FunctionTemplateDecl *Pattern,
	FunctionTemplateDecl *Instance) {
	Pattern = Pattern->getCanonicalDecl();

	do {
	Instance = Instance->getCanonicalDecl();
	if (Pattern == Instance) return true;
	Instance = Instance->getInstantiatedFromMemberTemplate();
	} while (Instance);

	return false;
	}

	static bool
	isInstantiationOf(ClassTemplatePartialSpecializationDecl *Pattern,
	ClassTemplatePartialSpecializationDecl *Instance) {
	Pattern
	= cast<ClassTemplatePartialSpecializationDecl>(Pattern->getCanonicalDecl());
	do {
	Instance = cast<ClassTemplatePartialSpecializationDecl>(
	Instance->getCanonicalDecl());
	if (Pattern == Instance)
	return true;
	Instance = Instance->getInstantiatedFromMember();
	} while (Instance);

	return false;
	}

	static bool isInstantiationOf(CXXRecordDecl *Pattern,
	CXXRecordDecl *Instance) {
	Pattern = Pattern->getCanonicalDecl();

	do {
	Instance = Instance->getCanonicalDecl();
	if (Pattern == Instance) return true;
	Instance = Instance->getInstantiatedFromMemberClass();
	} while (Instance);

	return false;
	}

	static bool isInstantiationOf(FunctionDecl *Pattern,
	FunctionDecl *Instance) {
	Pattern = Pattern->getCanonicalDecl();

	do {
	Instance = Instance->getCanonicalDecl();
	if (Pattern == Instance) return true;
	Instance = Instance->getInstantiatedFromMemberFunction();
	} while (Instance);

	return false;
	}

	static bool isInstantiationOf(EnumDecl *Pattern,
	EnumDecl *Instance) {
	Pattern = Pattern->getCanonicalDecl();

	do {
	Instance = Instance->getCanonicalDecl();
	if (Pattern == Instance) return true;
	Instance = Instance->getInstantiatedFromMemberEnum();
	} while (Instance);

	return false;
	}

	static bool isInstantiationOf(UsingShadowDecl *Pattern,
	UsingShadowDecl *Instance,
	ASTContext &C) {
	return declaresSameEntity(C.getInstantiatedFromUsingShadowDecl(Instance),
	Pattern);
	}

	static bool isInstantiationOf(UsingDecl Pattern, UsingDecl Instance,
	ASTContext &C) {
	return declaresSameEntity(C.getInstantiatedFromUsingDecl(Instance), Pattern);
	}

	template<typename T>
	static bool isInstantiationOfUnresolvedUsingDecl(T Pattern, Decl Other,
	ASTContext &Ctx) {
	// An unresolved using declaration can instantiate to an unresolved using
	// declaration, or to a using declaration or a using declaration pack.
	//
	// Multiple declarations can claim to be instantiated from an unresolved
	// using declaration if it's a pack expansion. We want the UsingPackDecl
	// in that case, not the individual UsingDecls within the pack.
	bool OtherIsPackExpansion;
	NamedDecl *OtherFrom;
	if (auto *OtherUUD = dyn_cast<T>(Other)) {
	OtherIsPackExpansion = OtherUUD->isPackExpansion();
	OtherFrom = Ctx.getInstantiatedFromUsingDecl(OtherUUD);
	} else if (auto *OtherUPD = dyn_cast<UsingPackDecl>(Other)) {
	OtherIsPackExpansion = true;
	OtherFrom = OtherUPD->getInstantiatedFromUsingDecl();
	} else if (auto *OtherUD = dyn_cast<UsingDecl>(Other)) {
	OtherIsPackExpansion = false;
	OtherFrom = Ctx.getInstantiatedFromUsingDecl(OtherUD);
	} else {
	return false;
	}
	return Pattern->isPackExpansion() == OtherIsPackExpansion &&
	declaresSameEntity(OtherFrom, Pattern);
	}

	static bool isInstantiationOfStaticDataMember(VarDecl *Pattern,
	VarDecl *Instance) {
	assert(Instance->isStaticDataMember());

	Pattern = Pattern->getCanonicalDecl();

	do {
	Instance = Instance->getCanonicalDecl();
	if (Pattern == Instance) return true;
	Instance = Instance->getInstantiatedFromStaticDataMember();
	} while (Instance);

	return false;
	}

	// Other is the prospective instantiation
	// D is the prospective pattern
	static bool isInstantiationOf(ASTContext &Ctx, NamedDecl D, Decl Other) {
	if (auto *UUD = dyn_cast<UnresolvedUsingTypenameDecl>(D))
	return isInstantiationOfUnresolvedUsingDecl(UUD, Other, Ctx);

	if (auto *UUD = dyn_cast<UnresolvedUsingValueDecl>(D))
	return isInstantiationOfUnresolvedUsingDecl(UUD, Other, Ctx);

	if (D->getKind() != Other->getKind())
	return false;

	if (auto *Record = dyn_cast<CXXRecordDecl>(Other))
	return isInstantiationOf(cast<CXXRecordDecl>(D), Record);

	if (auto *Function = dyn_cast<FunctionDecl>(Other))
	return isInstantiationOf(cast<FunctionDecl>(D), Function);

	if (auto *Enum = dyn_cast<EnumDecl>(Other))
	return isInstantiationOf(cast<EnumDecl>(D), Enum);

	if (auto *Var = dyn_cast<VarDecl>(Other))
	if (Var->isStaticDataMember())
	return isInstantiationOfStaticDataMember(cast<VarDecl>(D), Var);

	if (auto *Temp = dyn_cast<ClassTemplateDecl>(Other))
	return isInstantiationOf(cast<ClassTemplateDecl>(D), Temp);

	if (auto *Temp = dyn_cast<FunctionTemplateDecl>(Other))
	return isInstantiationOf(cast<FunctionTemplateDecl>(D), Temp);

	if (auto *PartialSpec =
	dyn_cast<ClassTemplatePartialSpecializationDecl>(Other))
	return isInstantiationOf(cast<ClassTemplatePartialSpecializationDecl>(D),
	PartialSpec);

	if (auto *Field = dyn_cast<FieldDecl>(Other)) {
	if (!Field->getDeclName()) {
	// This is an unnamed field.
	return declaresSameEntity(Ctx.getInstantiatedFromUnnamedFieldDecl(Field),
	cast<FieldDecl>(D));
	}
	}

	if (auto *Using = dyn_cast<UsingDecl>(Other))
	return isInstantiationOf(cast<UsingDecl>(D), Using, Ctx);

	if (auto *Shadow = dyn_cast<UsingShadowDecl>(Other))
	return isInstantiationOf(cast<UsingShadowDecl>(D), Shadow, Ctx);

	return D->getDeclName() &&
	D->getDeclName() == cast<NamedDecl>(Other)->getDeclName();
	}

	template<typename ForwardIterator>
	static NamedDecl *findInstantiationOf(ASTContext &Ctx,
	NamedDecl *D,
	ForwardIterator first,
	ForwardIterator last) {
	for (; first != last; ++first)
	if (isInstantiationOf(Ctx, D, *first))
	return cast<NamedDecl>(*first);

	return nullptr;
	}

	/// Finds the instantiation of the given declaration context
	/// within the current instantiation.
	///
	/// \returns NULL if there was an error
	DeclContext Sema::FindInstantiatedContext(SourceLocation Loc, DeclContext DC,
	const MultiLevelTemplateArgumentList &TemplateArgs) {
	if (NamedDecl *D = dyn_cast<NamedDecl>(DC)) {
	Decl* ID = FindInstantiatedDecl(Loc, D, TemplateArgs, true);
	return cast_or_null<DeclContext>(ID);
	} else return DC;
	}

	/// Determine whether the given context is dependent on template parameters at
	/// level \p Level or below.
	///
	/// Sometimes we only substitute an inner set of template arguments and leave
	/// the outer templates alone. In such cases, contexts dependent only on the
	/// outer levels are not effectively dependent.
	static bool isDependentContextAtLevel(DeclContext *DC, unsigned Level) {
	if (!DC->isDependentContext())
	return false;
	if (!Level)
	return true;
	return cast<Decl>(DC)->getTemplateDepth() > Level;
	}

	/// Find the instantiation of the given declaration within the
	/// current instantiation.
	///
	/// This routine is intended to be used when \p D is a declaration
	/// referenced from within a template, that needs to mapped into the
	/// corresponding declaration within an instantiation. For example,
	/// given:
	///
	/// \code
	/// template<typename T>
	/// struct X {
	/// enum Kind {
	/// KnownValue = sizeof(T)
	/// };
	///
	/// bool getKind() const { return KnownValue; }
	/// };
	///
	/// template struct X<int>;
	/// \endcode
	///
	/// In the instantiation of X<int>::getKind(), we need to map the \p
	/// EnumConstantDecl for \p KnownValue (which refers to
	/// X<T>::<Kind>::KnownValue) to its instantiation (X<int>::<Kind>::KnownValue).
	/// \p FindInstantiatedDecl performs this mapping from within the instantiation
	/// of X<int>.
	NamedDecl Sema::FindInstantiatedDecl(SourceLocation Loc, NamedDecl D,
	const MultiLevelTemplateArgumentList &TemplateArgs,
	bool FindingInstantiatedContext) {
	DeclContext *ParentDC = D->getDeclContext();
	// Determine whether our parent context depends on any of the tempalte
	// arguments we're currently substituting.
	bool ParentDependsOnArgs = isDependentContextAtLevel(
	ParentDC, TemplateArgs.getNumRetainedOuterLevels());
	// FIXME: Parmeters of pointer to functions (y below) that are themselves
	// parameters (p below) can have their ParentDC set to the translation-unit
	// - thus we can not consistently check if the ParentDC of such a parameter
	// is Dependent or/and a FunctionOrMethod.
	// For e.g. this code, during Template argument deduction tries to
	// find an instantiated decl for (T y) when the ParentDC for y is
	// the translation unit.
	// e.g. template <class T> void Foo(auto (*p)(T y) -> decltype(y())) {}
	// float baz(float(*)()) { return 0.0; }
	// Foo(baz);
	// The better fix here is perhaps to ensure that a ParmVarDecl, by the time
	// it gets here, always has a FunctionOrMethod as its ParentDC??
	// For now:
	// - as long as we have a ParmVarDecl whose parent is non-dependent and
	// whose type is not instantiation dependent, do nothing to the decl
	// - otherwise find its instantiated decl.
	if (isa<ParmVarDecl>(D) && !ParentDependsOnArgs &&
	!cast<ParmVarDecl>(D)->getType()->isInstantiationDependentType())
	return D;
	if (isa<ParmVarDecl>(D) \|\| isa<NonTypeTemplateParmDecl>(D) \|\|
	isa<TemplateTypeParmDecl>(D) \|\| isa<TemplateTemplateParmDecl>(D) \|\|
	(ParentDependsOnArgs && (ParentDC->isFunctionOrMethod() \|\|
	isa<OMPDeclareReductionDecl>(ParentDC) \|\|
	isa<OMPDeclareMapperDecl>(ParentDC))) \|\|
	(isa<CXXRecordDecl>(D) && cast<CXXRecordDecl>(D)->isLambda())) {
	// D is a local of some kind. Look into the map of local
	// declarations to their instantiations.
	if (CurrentInstantiationScope) {
	if (auto Found = CurrentInstantiationScope->findInstantiationOf(D)) {
	if (Decl FD = Found->dyn_cast<Decl >())
	return cast<NamedDecl>(FD);

	int PackIdx = ArgumentPackSubstitutionIndex;
	assert(PackIdx != -1 &&
	"found declaration pack but not pack expanding");
	typedef LocalInstantiationScope::DeclArgumentPack DeclArgumentPack;
	return cast<NamedDecl>((Found->get<DeclArgumentPack >())[PackIdx]);
	}
	}

	// If we're performing a partial substitution during template argument
	// deduction, we may not have values for template parameters yet. They
	// just map to themselves.
	if (isa<NonTypeTemplateParmDecl>(D) \|\| isa<TemplateTypeParmDecl>(D) \|\|
	isa<TemplateTemplateParmDecl>(D))
	return D;

	if (D->isInvalidDecl())
	return nullptr;

	// Normally this function only searches for already instantiated declaration
	// however we have to make an exclusion for local types used before
	// definition as in the code:
	//
	// template<typename T> void f1() {
	// void g1(struct x1);
	// struct x1 {};
	// }
	//
	// In this case instantiation of the type of 'g1' requires definition of
	// 'x1', which is defined later. Error recovery may produce an enum used
	// before definition. In these cases we need to instantiate relevant
	// declarations here.
	bool NeedInstantiate = false;
	if (CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(D))
	NeedInstantiate = RD->isLocalClass();
	else if (isa<TypedefNameDecl>(D) &&
	isa<CXXDeductionGuideDecl>(D->getDeclContext()))
	NeedInstantiate = true;
	else
	NeedInstantiate = isa<EnumDecl>(D);
	if (NeedInstantiate) {
	Decl *Inst = SubstDecl(D, CurContext, TemplateArgs);
	CurrentInstantiationScope->InstantiatedLocal(D, Inst);
	return cast<TypeDecl>(Inst);
	}

	// If we didn't find the decl, then we must have a label decl that hasn't
	// been found yet. Lazily instantiate it and return it now.
	assert(isa<LabelDecl>(D));

	Decl *Inst = SubstDecl(D, CurContext, TemplateArgs);
	assert(Inst && "Failed to instantiate label??");

	CurrentInstantiationScope->InstantiatedLocal(D, Inst);
	return cast<LabelDecl>(Inst);
	}

	if (CXXRecordDecl *Record = dyn_cast<CXXRecordDecl>(D)) {
	if (!Record->isDependentContext())
	return D;

	// Determine whether this record is the "templated" declaration describing
	// a class template or class template partial specialization.
	ClassTemplateDecl *ClassTemplate = Record->getDescribedClassTemplate();
	if (ClassTemplate)
	ClassTemplate = ClassTemplate->getCanonicalDecl();
	else if (ClassTemplatePartialSpecializationDecl *PartialSpec
	= dyn_cast<ClassTemplatePartialSpecializationDecl>(Record))
	ClassTemplate = PartialSpec->getSpecializedTemplate()->getCanonicalDecl();

	// Walk the current context to find either the record or an instantiation of
	// it.
	DeclContext *DC = CurContext;
	while (!DC->isFileContext()) {
	// If we're performing substitution while we're inside the template
	// definition, we'll find our own context. We're done.
	if (DC->Equals(Record))
	return Record;

	if (CXXRecordDecl *InstRecord = dyn_cast<CXXRecordDecl>(DC)) {
	// Check whether we're in the process of instantiating a class template
	// specialization of the template we're mapping.
	if (ClassTemplateSpecializationDecl *InstSpec
	= dyn_cast<ClassTemplateSpecializationDecl>(InstRecord)){
	ClassTemplateDecl *SpecTemplate = InstSpec->getSpecializedTemplate();
	if (ClassTemplate && isInstantiationOf(ClassTemplate, SpecTemplate))
	return InstRecord;
	}

	// Check whether we're in the process of instantiating a member class.
	if (isInstantiationOf(Record, InstRecord))
	return InstRecord;
	}

	// Move to the outer template scope.
	if (FunctionDecl *FD = dyn_cast<FunctionDecl>(DC)) {
	if (FD->getFriendObjectKind() && FD->getDeclContext()->isFileContext()){
	DC = FD->getLexicalDeclContext();
	continue;
	}
	// An implicit deduction guide acts as if it's within the class template
	// specialization described by its name and first N template params.
	auto *Guide = dyn_cast<CXXDeductionGuideDecl>(FD);
	if (Guide && Guide->isImplicit()) {
	TemplateDecl *TD = Guide->getDeducedTemplate();
	// Convert the arguments to an "as-written" list.
	TemplateArgumentListInfo Args(Loc, Loc);
	for (TemplateArgument Arg : TemplateArgs.getInnermost().take_front(
	TD->getTemplateParameters()->size())) {
	ArrayRef<TemplateArgument> Unpacked(Arg);
	if (Arg.getKind() == TemplateArgument::Pack)
	Unpacked = Arg.pack_elements();
	for (TemplateArgument UnpackedArg : Unpacked)
	Args.addArgument(
	getTrivialTemplateArgumentLoc(UnpackedArg, QualType(), Loc));
	}
	QualType T = CheckTemplateIdType(TemplateName(TD), Loc, Args);
	if (T.isNull())
	return nullptr;
	auto *SubstRecord = T->getAsCXXRecordDecl();
	assert(SubstRecord && "class template id not a class type?");
	// Check that this template-id names the primary template and not a
	// partial or explicit specialization. (In the latter cases, it's
	// meaningless to attempt to find an instantiation of D within the
	// specialization.)
	// FIXME: The standard doesn't say what should happen here.
	if (FindingInstantiatedContext &&
	usesPartialOrExplicitSpecialization(
	Loc, cast<ClassTemplateSpecializationDecl>(SubstRecord))) {
	Diag(Loc, diag::err_specialization_not_primary_template)
	<< T << (SubstRecord->getTemplateSpecializationKind() ==
	TSK_ExplicitSpecialization);
	return nullptr;
	}
	DC = SubstRecord;
	continue;
	}
	}

	DC = DC->getParent();
	}

	// Fall through to deal with other dependent record types (e.g.,
	// anonymous unions in class templates).
	}

	if (!ParentDependsOnArgs)
	return D;

	ParentDC = FindInstantiatedContext(Loc, ParentDC, TemplateArgs);
	if (!ParentDC)
	return nullptr;

	if (ParentDC != D->getDeclContext()) {
	// We performed some kind of instantiation in the parent context,
	// so now we need to look into the instantiated parent context to
	// find the instantiation of the declaration D.

	// If our context used to be dependent, we may need to instantiate
	// it before performing lookup into that context.
	bool IsBeingInstantiated = false;
	if (CXXRecordDecl *Spec = dyn_cast<CXXRecordDecl>(ParentDC)) {
	if (!Spec->isDependentContext()) {
	QualType T = Context.getTypeDeclType(Spec);
	const RecordType *Tag = T->getAs<RecordType>();
	assert(Tag && "type of non-dependent record is not a RecordType");
	if (Tag->isBeingDefined())
	IsBeingInstantiated = true;
	if (!Tag->isBeingDefined() &&
	RequireCompleteType(Loc, T, diag::err_incomplete_type))
	return nullptr;

	ParentDC = Tag->getDecl();
	}
	}

	NamedDecl *Result = nullptr;
	// FIXME: If the name is a dependent name, this lookup won't necessarily
	// find it. Does that ever matter?
	if (auto Name = D->getDeclName()) {
	DeclarationNameInfo NameInfo(Name, D->getLocation());
	DeclarationNameInfo NewNameInfo =
	SubstDeclarationNameInfo(NameInfo, TemplateArgs);
	Name = NewNameInfo.getName();
	if (!Name)
	return nullptr;
	DeclContext::lookup_result Found = ParentDC->lookup(Name);

	Result = findInstantiationOf(Context, D, Found.begin(), Found.end());
	} else {
	// Since we don't have a name for the entity we're looking for,
	// our only option is to walk through all of the declarations to
	// find that name. This will occur in a few cases:
	//
	// - anonymous struct/union within a template
	// - unnamed class/struct/union/enum within a template
	//
	// FIXME: Find a better way to find these instantiations!
	Result = findInstantiationOf(Context, D,
	ParentDC->decls_begin(),
	ParentDC->decls_end());
	}

	if (!Result) {
	if (isa<UsingShadowDecl>(D)) {
	// UsingShadowDecls can instantiate to nothing because of using hiding.
	} else if (hasUncompilableErrorOccurred()) {
	// We've already complained about some ill-formed code, so most likely
	// this declaration failed to instantiate. There's no point in
	// complaining further, since this is normal in invalid code.
	// FIXME: Use more fine-grained 'invalid' tracking for this.
	} else if (IsBeingInstantiated) {
	// The class in which this member exists is currently being
	// instantiated, and we haven't gotten around to instantiating this
	// member yet. This can happen when the code uses forward declarations
	// of member classes, and introduces ordering dependencies via
	// template instantiation.
	Diag(Loc, diag::err_member_not_yet_instantiated)
	<< D->getDeclName()
	<< Context.getTypeDeclType(cast<CXXRecordDecl>(ParentDC));
	Diag(D->getLocation(), diag::note_non_instantiated_member_here);
	} else if (EnumConstantDecl *ED = dyn_cast<EnumConstantDecl>(D)) {
	// This enumeration constant was found when the template was defined,
	// but can't be found in the instantiation. This can happen if an
	// unscoped enumeration member is explicitly specialized.
	EnumDecl *Enum = cast<EnumDecl>(ED->getLexicalDeclContext());
	EnumDecl *Spec = cast<EnumDecl>(FindInstantiatedDecl(Loc, Enum,
	TemplateArgs));
	assert(Spec->getTemplateSpecializationKind() ==
	TSK_ExplicitSpecialization);
	Diag(Loc, diag::err_enumerator_does_not_exist)
	<< D->getDeclName()
	<< Context.getTypeDeclType(cast<TypeDecl>(Spec->getDeclContext()));
	Diag(Spec->getLocation(), diag::note_enum_specialized_here)
	<< Context.getTypeDeclType(Spec);
	} else {
	// We should have found something, but didn't.
	llvm_unreachable("Unable to find instantiation of declaration!");
	}
	}

	D = Result;
	}

	return D;
	}

	/// Performs template instantiation for all implicit template
	/// instantiations we have seen until this point.
	void Sema::PerformPendingInstantiations(bool LocalOnly) {
	std::deque<PendingImplicitInstantiation> delayedPCHInstantiations;
	while (!PendingLocalImplicitInstantiations.empty() \|\|
	(!LocalOnly && !PendingInstantiations.empty())) {
	PendingImplicitInstantiation Inst;

	if (PendingLocalImplicitInstantiations.empty()) {
	Inst = PendingInstantiations.front();
	PendingInstantiations.pop_front();
	} else {
	Inst = PendingLocalImplicitInstantiations.front();
	PendingLocalImplicitInstantiations.pop_front();
	}

	// Instantiate function definitions
	if (FunctionDecl *Function = dyn_cast<FunctionDecl>(Inst.first)) {
	bool DefinitionRequired = Function->getTemplateSpecializationKind() ==
	TSK_ExplicitInstantiationDefinition;
	if (Function->isMultiVersion()) {
	getASTContext().forEachMultiversionedFunctionVersion(
	Function, [this, Inst, DefinitionRequired](FunctionDecl *CurFD) {
	InstantiateFunctionDefinition(/FIXME:/ Inst.second, CurFD, true,
	DefinitionRequired, true);
	if (CurFD->isDefined())
	CurFD->setInstantiationIsPending(false);
	});
	} else {
	InstantiateFunctionDefinition(/FIXME:/ Inst.second, Function, true,
	DefinitionRequired, true);
	if (Function->isDefined())
	Function->setInstantiationIsPending(false);
	}
	// Definition of a PCH-ed template declaration may be available only in the TU.
	if (!LocalOnly && LangOpts.PCHInstantiateTemplates &&
	TUKind == TU_Prefix && Function->instantiationIsPending())
	delayedPCHInstantiations.push_back(Inst);
	continue;
	}

	// Instantiate variable definitions
	VarDecl *Var = cast<VarDecl>(Inst.first);

	assert((Var->isStaticDataMember() \|\|
	isa<VarTemplateSpecializationDecl>(Var)) &&
	"Not a static data member, nor a variable template"
	" specialization?");

	// Don't try to instantiate declarations if the most recent redeclaration
	// is invalid.
	if (Var->getMostRecentDecl()->isInvalidDecl())
	continue;

	// Check if the most recent declaration has changed the specialization kind
	// and removed the need for implicit instantiation.
	switch (Var->getMostRecentDecl()
	->getTemplateSpecializationKindForInstantiation()) {
	case TSK_Undeclared:
	llvm_unreachable("Cannot instantitiate an undeclared specialization.");
	case TSK_ExplicitInstantiationDeclaration:
	case TSK_ExplicitSpecialization:
	continue; // No longer need to instantiate this type.
	case TSK_ExplicitInstantiationDefinition:
	// We only need an instantiation if the pending instantiation is the
	// explicit instantiation.
	if (Var != Var->getMostRecentDecl())
	continue;
	break;
	case TSK_ImplicitInstantiation:
	break;
	}

	PrettyDeclStackTraceEntry CrashInfo(Context, Var, SourceLocation(),
	"instantiating variable definition");
	bool DefinitionRequired = Var->getTemplateSpecializationKind() ==
	TSK_ExplicitInstantiationDefinition;

	// Instantiate static data member definitions or variable template
	// specializations.
	InstantiateVariableDefinition(/FIXME:/ Inst.second, Var, true,
	DefinitionRequired, true);
	}

	if (!LocalOnly && LangOpts.PCHInstantiateTemplates)
	PendingInstantiations.swap(delayedPCHInstantiations);
	}

	void Sema::PerformDependentDiagnostics(const DeclContext *Pattern,
	const MultiLevelTemplateArgumentList &TemplateArgs) {
	for (auto DD : Pattern->ddiags()) {
	switch (DD->getKind()) {
	case DependentDiagnostic::Access:
	HandleDependentAccessCheck(*DD, TemplateArgs);
	break;
	}
	}
	}
	diff --git a/contrib/llvm-project/clang/lib/Sema/TreeTransform.h b/contrib/llvm-project/clang/lib/Sema/TreeTransform.h
	index 70ba631dbfc6..d8a5b6ad4f94 100644
	--- a/contrib/llvm-project/clang/lib/Sema/TreeTransform.h
	+++ b/contrib/llvm-project/clang/lib/Sema/TreeTransform.h
	@@ -1,14760 +1,14760 @@
	//===------- TreeTransform.h - Semantic Tree Transformation ------ C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//===----------------------------------------------------------------------===//
	//
	// This file implements a semantic tree transformation that takes a given
	// AST and rebuilds it, possibly transforming some nodes in the process.
	//
	//===----------------------------------------------------------------------===//

	#ifndef LLVM_CLANG_LIB_SEMA_TREETRANSFORM_H
	#define LLVM_CLANG_LIB_SEMA_TREETRANSFORM_H

	#include "CoroutineStmtBuilder.h"
	#include "TypeLocBuilder.h"
	#include "clang/AST/Decl.h"
	#include "clang/AST/DeclObjC.h"
	#include "clang/AST/DeclTemplate.h"
	#include "clang/AST/Expr.h"
	#include "clang/AST/ExprConcepts.h"
	#include "clang/AST/ExprCXX.h"
	#include "clang/AST/ExprObjC.h"
	#include "clang/AST/ExprOpenMP.h"
	#include "clang/AST/OpenMPClause.h"
	#include "clang/AST/Stmt.h"
	#include "clang/AST/StmtCXX.h"
	#include "clang/AST/StmtObjC.h"
	#include "clang/AST/StmtOpenMP.h"
	#include "clang/Basic/DiagnosticParse.h"
	#include "clang/Basic/OpenMPKinds.h"
	#include "clang/Sema/Designator.h"
	#include "clang/Sema/Lookup.h"
	#include "clang/Sema/Ownership.h"
	#include "clang/Sema/ParsedTemplate.h"
	#include "clang/Sema/ScopeInfo.h"
	#include "clang/Sema/SemaDiagnostic.h"
	#include "clang/Sema/SemaInternal.h"
	#include "llvm/ADT/ArrayRef.h"
	#include "llvm/Support/ErrorHandling.h"
	#include <algorithm>

	using namespace llvm::omp;

	namespace clang {
	using namespace sema;

	/// A semantic tree transformation that allows one to transform one
	/// abstract syntax tree into another.
	///
	/// A new tree transformation is defined by creating a new subclass \c X of
	/// \c TreeTransform<X> and then overriding certain operations to provide
	/// behavior specific to that transformation. For example, template
	/// instantiation is implemented as a tree transformation where the
	/// transformation of TemplateTypeParmType nodes involves substituting the
	/// template arguments for their corresponding template parameters; a similar
	/// transformation is performed for non-type template parameters and
	/// template template parameters.
	///
	/// This tree-transformation template uses static polymorphism to allow
	/// subclasses to customize any of its operations. Thus, a subclass can
	/// override any of the transformation or rebuild operators by providing an
	/// operation with the same signature as the default implementation. The
	/// overriding function should not be virtual.
	///
	/// Semantic tree transformations are split into two stages, either of which
	/// can be replaced by a subclass. The "transform" step transforms an AST node
	/// or the parts of an AST node using the various transformation functions,
	/// then passes the pieces on to the "rebuild" step, which constructs a new AST
	/// node of the appropriate kind from the pieces. The default transformation
	/// routines recursively transform the operands to composite AST nodes (e.g.,
	/// the pointee type of a PointerType node) and, if any of those operand nodes
	/// were changed by the transformation, invokes the rebuild operation to create
	/// a new AST node.
	///
	/// Subclasses can customize the transformation at various levels. The
	/// most coarse-grained transformations involve replacing TransformType(),
	/// TransformExpr(), TransformDecl(), TransformNestedNameSpecifierLoc(),
	/// TransformTemplateName(), or TransformTemplateArgument() with entirely
	/// new implementations.
	///
	/// For more fine-grained transformations, subclasses can replace any of the
	/// \c TransformXXX functions (where XXX is the name of an AST node, e.g.,
	/// PointerType, StmtExpr) to alter the transformation. As mentioned previously,
	/// replacing TransformTemplateTypeParmType() allows template instantiation
	/// to substitute template arguments for their corresponding template
	/// parameters. Additionally, subclasses can override the \c RebuildXXX
	/// functions to control how AST nodes are rebuilt when their operands change.
	/// By default, \c TreeTransform will invoke semantic analysis to rebuild
	/// AST nodes. However, certain other tree transformations (e.g, cloning) may
	/// be able to use more efficient rebuild steps.
	///
	/// There are a handful of other functions that can be overridden, allowing one
	/// to avoid traversing nodes that don't need any transformation
	/// (\c AlreadyTransformed()), force rebuilding AST nodes even when their
	/// operands have not changed (\c AlwaysRebuild()), and customize the
	/// default locations and entity names used for type-checking
	/// (\c getBaseLocation(), \c getBaseEntity()).
	template<typename Derived>
	class TreeTransform {
	/// Private RAII object that helps us forget and then re-remember
	/// the template argument corresponding to a partially-substituted parameter
	/// pack.
	class ForgetPartiallySubstitutedPackRAII {
	Derived &Self;
	TemplateArgument Old;

	public:
	ForgetPartiallySubstitutedPackRAII(Derived &Self) : Self(Self) {
	Old = Self.ForgetPartiallySubstitutedPack();
	}

	~ForgetPartiallySubstitutedPackRAII() {
	Self.RememberPartiallySubstitutedPack(Old);
	}
	};

	protected:
	Sema &SemaRef;

	/// The set of local declarations that have been transformed, for
	/// cases where we are forced to build new declarations within the transformer
	/// rather than in the subclass (e.g., lambda closure types).
	llvm::DenseMap<Decl , Decl > TransformedLocalDecls;

	public:
	/// Initializes a new tree transformer.
	TreeTransform(Sema &SemaRef) : SemaRef(SemaRef) { }

	/// Retrieves a reference to the derived class.
	Derived &getDerived() { return static_cast<Derived&>(*this); }

	/// Retrieves a reference to the derived class.
	const Derived &getDerived() const {
	return static_cast<const Derived&>(*this);
	}

	static inline ExprResult Owned(Expr *E) { return E; }
	static inline StmtResult Owned(Stmt *S) { return S; }

	/// Retrieves a reference to the semantic analysis object used for
	/// this tree transform.
	Sema &getSema() const { return SemaRef; }

	/// Whether the transformation should always rebuild AST nodes, even
	/// if none of the children have changed.
	///
	/// Subclasses may override this function to specify when the transformation
	/// should rebuild all AST nodes.
	///
	/// We must always rebuild all AST nodes when performing variadic template
	/// pack expansion, in order to avoid violating the AST invariant that each
	/// statement node appears at most once in its containing declaration.
	bool AlwaysRebuild() { return SemaRef.ArgumentPackSubstitutionIndex != -1; }

	/// Whether the transformation is forming an expression or statement that
	/// replaces the original. In this case, we'll reuse mangling numbers from
	/// existing lambdas.
	bool ReplacingOriginal() { return false; }

	/// Wether CXXConstructExpr can be skipped when they are implicit.
	/// They will be reconstructed when used if needed.
	/// This is usefull when the user that cause rebuilding of the
	/// CXXConstructExpr is outside of the expression at which the TreeTransform
	/// started.
	bool AllowSkippingCXXConstructExpr() { return true; }

	/// Returns the location of the entity being transformed, if that
	/// information was not available elsewhere in the AST.
	///
	/// By default, returns no source-location information. Subclasses can
	/// provide an alternative implementation that provides better location
	/// information.
	SourceLocation getBaseLocation() { return SourceLocation(); }

	/// Returns the name of the entity being transformed, if that
	/// information was not available elsewhere in the AST.
	///
	/// By default, returns an empty name. Subclasses can provide an alternative
	/// implementation with a more precise name.
	DeclarationName getBaseEntity() { return DeclarationName(); }

	/// Sets the "base" location and entity when that
	/// information is known based on another transformation.
	///
	/// By default, the source location and entity are ignored. Subclasses can
	/// override this function to provide a customized implementation.
	void setBase(SourceLocation Loc, DeclarationName Entity) { }

	/// RAII object that temporarily sets the base location and entity
	/// used for reporting diagnostics in types.
	class TemporaryBase {
	TreeTransform &Self;
	SourceLocation OldLocation;
	DeclarationName OldEntity;

	public:
	TemporaryBase(TreeTransform &Self, SourceLocation Location,
	DeclarationName Entity) : Self(Self) {
	OldLocation = Self.getDerived().getBaseLocation();
	OldEntity = Self.getDerived().getBaseEntity();

	if (Location.isValid())
	Self.getDerived().setBase(Location, Entity);
	}

	~TemporaryBase() {
	Self.getDerived().setBase(OldLocation, OldEntity);
	}
	};

	/// Determine whether the given type \p T has already been
	/// transformed.
	///
	/// Subclasses can provide an alternative implementation of this routine
	/// to short-circuit evaluation when it is known that a given type will
	/// not change. For example, template instantiation need not traverse
	/// non-dependent types.
	bool AlreadyTransformed(QualType T) {
	return T.isNull();
	}

	/// Transform a template parameter depth level.
	///
	/// During a transformation that transforms template parameters, this maps
	/// an old template parameter depth to a new depth.
	unsigned TransformTemplateDepth(unsigned Depth) {
	return Depth;
	}

	/// Determine whether the given call argument should be dropped, e.g.,
	/// because it is a default argument.
	///
	/// Subclasses can provide an alternative implementation of this routine to
	/// determine which kinds of call arguments get dropped. By default,
	/// CXXDefaultArgument nodes are dropped (prior to transformation).
	bool DropCallArgument(Expr *E) {
	return E->isDefaultArgument();
	}

	/// Determine whether we should expand a pack expansion with the
	/// given set of parameter packs into separate arguments by repeatedly
	/// transforming the pattern.
	///
	/// By default, the transformer never tries to expand pack expansions.
	/// Subclasses can override this routine to provide different behavior.
	///
	/// \param EllipsisLoc The location of the ellipsis that identifies the
	/// pack expansion.
	///
	/// \param PatternRange The source range that covers the entire pattern of
	/// the pack expansion.
	///
	/// \param Unexpanded The set of unexpanded parameter packs within the
	/// pattern.
	///
	/// \param ShouldExpand Will be set to \c true if the transformer should
	/// expand the corresponding pack expansions into separate arguments. When
	/// set, \c NumExpansions must also be set.
	///
	/// \param RetainExpansion Whether the caller should add an unexpanded
	/// pack expansion after all of the expanded arguments. This is used
	/// when extending explicitly-specified template argument packs per
	/// C++0x [temp.arg.explicit]p9.
	///
	/// \param NumExpansions The number of separate arguments that will be in
	/// the expanded form of the corresponding pack expansion. This is both an
	/// input and an output parameter, which can be set by the caller if the
	/// number of expansions is known a priori (e.g., due to a prior substitution)
	/// and will be set by the callee when the number of expansions is known.
	/// The callee must set this value when \c ShouldExpand is \c true; it may
	/// set this value in other cases.
	///
	/// \returns true if an error occurred (e.g., because the parameter packs
	/// are to be instantiated with arguments of different lengths), false
	/// otherwise. If false, \c ShouldExpand (and possibly \c NumExpansions)
	/// must be set.
	bool TryExpandParameterPacks(SourceLocation EllipsisLoc,
	SourceRange PatternRange,
	ArrayRef<UnexpandedParameterPack> Unexpanded,
	bool &ShouldExpand,
	bool &RetainExpansion,
	Optional<unsigned> &NumExpansions) {
	ShouldExpand = false;
	return false;
	}

	/// "Forget" about the partially-substituted pack template argument,
	/// when performing an instantiation that must preserve the parameter pack
	/// use.
	///
	/// This routine is meant to be overridden by the template instantiator.
	TemplateArgument ForgetPartiallySubstitutedPack() {
	return TemplateArgument();
	}

	/// "Remember" the partially-substituted pack template argument
	/// after performing an instantiation that must preserve the parameter pack
	/// use.
	///
	/// This routine is meant to be overridden by the template instantiator.
	void RememberPartiallySubstitutedPack(TemplateArgument Arg) { }

	/// Note to the derived class when a function parameter pack is
	/// being expanded.
	void ExpandingFunctionParameterPack(ParmVarDecl *Pack) { }

	/// Transforms the given type into another type.
	///
	/// By default, this routine transforms a type by creating a
	/// TypeSourceInfo for it and delegating to the appropriate
	/// function. This is expensive, but we don't mind, because
	/// this method is deprecated anyway; all users should be
	/// switched to storing TypeSourceInfos.
	///
	/// \returns the transformed type.
	QualType TransformType(QualType T);

	/// Transforms the given type-with-location into a new
	/// type-with-location.
	///
	/// By default, this routine transforms a type by delegating to the
	/// appropriate TransformXXXType to build a new type. Subclasses
	/// may override this function (to take over all type
	/// transformations) or some set of the TransformXXXType functions
	/// to alter the transformation.
	TypeSourceInfo TransformType(TypeSourceInfo DI);

	/// Transform the given type-with-location into a new
	/// type, collecting location information in the given builder
	/// as necessary.
	///
	QualType TransformType(TypeLocBuilder &TLB, TypeLoc TL);

	/// Transform a type that is permitted to produce a
	/// DeducedTemplateSpecializationType.
	///
	/// This is used in the (relatively rare) contexts where it is acceptable
	/// for transformation to produce a class template type with deduced
	/// template arguments.
	/// @{
	QualType TransformTypeWithDeducedTST(QualType T);
	TypeSourceInfo TransformTypeWithDeducedTST(TypeSourceInfo DI);
	/// @}

	/// The reason why the value of a statement is not discarded, if any.
	enum StmtDiscardKind {
	SDK_Discarded,
	SDK_NotDiscarded,
	SDK_StmtExprResult,
	};

	/// Transform the given statement.
	///
	/// By default, this routine transforms a statement by delegating to the
	/// appropriate TransformXXXStmt function to transform a specific kind of
	/// statement or the TransformExpr() function to transform an expression.
	/// Subclasses may override this function to transform statements using some
	/// other mechanism.
	///
	/// \returns the transformed statement.
	StmtResult TransformStmt(Stmt *S, StmtDiscardKind SDK = SDK_Discarded);

	/// Transform the given statement.
	///
	/// By default, this routine transforms a statement by delegating to the
	/// appropriate TransformOMPXXXClause function to transform a specific kind
	/// of clause. Subclasses may override this function to transform statements
	/// using some other mechanism.
	///
	/// \returns the transformed OpenMP clause.
	OMPClause TransformOMPClause(OMPClause S);

	/// Transform the given attribute.
	///
	/// By default, this routine transforms a statement by delegating to the
	/// appropriate TransformXXXAttr function to transform a specific kind
	/// of attribute. Subclasses may override this function to transform
	/// attributed statements using some other mechanism.
	///
	/// \returns the transformed attribute
	const Attr TransformAttr(const Attr S);

	/// Transform the specified attribute.
	///
	/// Subclasses should override the transformation of attributes with a pragma
	/// spelling to transform expressions stored within the attribute.
	///
	/// \returns the transformed attribute.
	#define ATTR(X)
	#define PRAGMA_SPELLING_ATTR(X) \
	const X##Attr Transform##X##Attr(const X##Attr R) { return R; }
	#include "clang/Basic/AttrList.inc"

	/// Transform the given expression.
	///
	/// By default, this routine transforms an expression by delegating to the
	/// appropriate TransformXXXExpr function to build a new expression.
	/// Subclasses may override this function to transform expressions using some
	/// other mechanism.
	///
	/// \returns the transformed expression.
	ExprResult TransformExpr(Expr *E);

	/// Transform the given initializer.
	///
	/// By default, this routine transforms an initializer by stripping off the
	/// semantic nodes added by initialization, then passing the result to
	/// TransformExpr or TransformExprs.
	///
	/// \returns the transformed initializer.
	ExprResult TransformInitializer(Expr *Init, bool NotCopyInit);

	/// Transform the given list of expressions.
	///
	/// This routine transforms a list of expressions by invoking
	/// \c TransformExpr() for each subexpression. However, it also provides
	/// support for variadic templates by expanding any pack expansions (if the
	/// derived class permits such expansion) along the way. When pack expansions
	/// are present, the number of outputs may not equal the number of inputs.
	///
	/// \param Inputs The set of expressions to be transformed.
	///
	/// \param NumInputs The number of expressions in \c Inputs.
	///
	/// \param IsCall If \c true, then this transform is being performed on
	/// function-call arguments, and any arguments that should be dropped, will
	/// be.
	///
	/// \param Outputs The transformed input expressions will be added to this
	/// vector.
	///
	/// \param ArgChanged If non-NULL, will be set \c true if any argument changed
	/// due to transformation.
	///
	/// \returns true if an error occurred, false otherwise.
	bool TransformExprs(Expr const Inputs, unsigned NumInputs, bool IsCall,
	SmallVectorImpl<Expr *> &Outputs,
	bool *ArgChanged = nullptr);

	/// Transform the given declaration, which is referenced from a type
	/// or expression.
	///
	/// By default, acts as the identity function on declarations, unless the
	/// transformer has had to transform the declaration itself. Subclasses
	/// may override this function to provide alternate behavior.
	Decl TransformDecl(SourceLocation Loc, Decl D) {
	llvm::DenseMap<Decl , Decl >::iterator Known
	= TransformedLocalDecls.find(D);
	if (Known != TransformedLocalDecls.end())
	return Known->second;

	return D;
	}

	/// Transform the specified condition.
	///
	/// By default, this transforms the variable and expression and rebuilds
	/// the condition.
	Sema::ConditionResult TransformCondition(SourceLocation Loc, VarDecl *Var,
	Expr *Expr,
	Sema::ConditionKind Kind);

	/// Transform the attributes associated with the given declaration and
	/// place them on the new declaration.
	///
	/// By default, this operation does nothing. Subclasses may override this
	/// behavior to transform attributes.
	void transformAttrs(Decl Old, Decl New) { }

	/// Note that a local declaration has been transformed by this
	/// transformer.
	///
	/// Local declarations are typically transformed via a call to
	/// TransformDefinition. However, in some cases (e.g., lambda expressions),
	/// the transformer itself has to transform the declarations. This routine
	/// can be overridden by a subclass that keeps track of such mappings.
	void transformedLocalDecl(Decl Old, ArrayRef<Decl > New) {
	assert(New.size() == 1 &&
	"must override transformedLocalDecl if performing pack expansion");
	TransformedLocalDecls[Old] = New.front();
	}

	/// Transform the definition of the given declaration.
	///
	/// By default, invokes TransformDecl() to transform the declaration.
	/// Subclasses may override this function to provide alternate behavior.
	Decl TransformDefinition(SourceLocation Loc, Decl D) {
	return getDerived().TransformDecl(Loc, D);
	}

	/// Transform the given declaration, which was the first part of a
	/// nested-name-specifier in a member access expression.
	///
	/// This specific declaration transformation only applies to the first
	/// identifier in a nested-name-specifier of a member access expression, e.g.,
	/// the \c T in \c x->T::member
	///
	/// By default, invokes TransformDecl() to transform the declaration.
	/// Subclasses may override this function to provide alternate behavior.
	NamedDecl TransformFirstQualifierInScope(NamedDecl D, SourceLocation Loc) {
	return cast_or_null<NamedDecl>(getDerived().TransformDecl(Loc, D));
	}

	/// Transform the set of declarations in an OverloadExpr.
	bool TransformOverloadExprDecls(OverloadExpr *Old, bool RequiresADL,
	LookupResult &R);

	/// Transform the given nested-name-specifier with source-location
	/// information.
	///
	/// By default, transforms all of the types and declarations within the
	/// nested-name-specifier. Subclasses may override this function to provide
	/// alternate behavior.
	NestedNameSpecifierLoc
	TransformNestedNameSpecifierLoc(NestedNameSpecifierLoc NNS,
	QualType ObjectType = QualType(),
	NamedDecl *FirstQualifierInScope = nullptr);

	/// Transform the given declaration name.
	///
	/// By default, transforms the types of conversion function, constructor,
	/// and destructor names and then (if needed) rebuilds the declaration name.
	/// Identifiers and selectors are returned unmodified. Sublcasses may
	/// override this function to provide alternate behavior.
	DeclarationNameInfo
	TransformDeclarationNameInfo(const DeclarationNameInfo &NameInfo);

	bool TransformRequiresExprRequirements(ArrayRef<concepts::Requirement *> Reqs,
	llvm::SmallVectorImpl<concepts::Requirement *> &Transformed);
	concepts::TypeRequirement *
	TransformTypeRequirement(concepts::TypeRequirement *Req);
	concepts::ExprRequirement *
	TransformExprRequirement(concepts::ExprRequirement *Req);
	concepts::NestedRequirement *
	TransformNestedRequirement(concepts::NestedRequirement *Req);

	/// Transform the given template name.
	///
	/// \param SS The nested-name-specifier that qualifies the template
	/// name. This nested-name-specifier must already have been transformed.
	///
	/// \param Name The template name to transform.
	///
	/// \param NameLoc The source location of the template name.
	///
	/// \param ObjectType If we're translating a template name within a member
	/// access expression, this is the type of the object whose member template
	/// is being referenced.
	///
	/// \param FirstQualifierInScope If the first part of a nested-name-specifier
	/// also refers to a name within the current (lexical) scope, this is the
	/// declaration it refers to.
	///
	/// By default, transforms the template name by transforming the declarations
	/// and nested-name-specifiers that occur within the template name.
	/// Subclasses may override this function to provide alternate behavior.
	TemplateName
	TransformTemplateName(CXXScopeSpec &SS, TemplateName Name,
	SourceLocation NameLoc,
	QualType ObjectType = QualType(),
	NamedDecl *FirstQualifierInScope = nullptr,
	bool AllowInjectedClassName = false);

	/// Transform the given template argument.
	///
	/// By default, this operation transforms the type, expression, or
	/// declaration stored within the template argument and constructs a
	/// new template argument from the transformed result. Subclasses may
	/// override this function to provide alternate behavior.
	///
	/// Returns true if there was an error.
	bool TransformTemplateArgument(const TemplateArgumentLoc &Input,
	TemplateArgumentLoc &Output,
	bool Uneval = false);

	/// Transform the given set of template arguments.
	///
	/// By default, this operation transforms all of the template arguments
	/// in the input set using \c TransformTemplateArgument(), and appends
	/// the transformed arguments to the output list.
	///
	/// Note that this overload of \c TransformTemplateArguments() is merely
	/// a convenience function. Subclasses that wish to override this behavior
	/// should override the iterator-based member template version.
	///
	/// \param Inputs The set of template arguments to be transformed.
	///
	/// \param NumInputs The number of template arguments in \p Inputs.
	///
	/// \param Outputs The set of transformed template arguments output by this
	/// routine.
	///
	/// Returns true if an error occurred.
	bool TransformTemplateArguments(const TemplateArgumentLoc *Inputs,
	unsigned NumInputs,
	TemplateArgumentListInfo &Outputs,
	bool Uneval = false) {
	return TransformTemplateArguments(Inputs, Inputs + NumInputs, Outputs,
	Uneval);
	}

	/// Transform the given set of template arguments.
	///
	/// By default, this operation transforms all of the template arguments
	/// in the input set using \c TransformTemplateArgument(), and appends
	/// the transformed arguments to the output list.
	///
	/// \param First An iterator to the first template argument.
	///
	/// \param Last An iterator one step past the last template argument.
	///
	/// \param Outputs The set of transformed template arguments output by this
	/// routine.
	///
	/// Returns true if an error occurred.
	template<typename InputIterator>
	bool TransformTemplateArguments(InputIterator First,
	InputIterator Last,
	TemplateArgumentListInfo &Outputs,
	bool Uneval = false);

	/// Fakes up a TemplateArgumentLoc for a given TemplateArgument.
	void InventTemplateArgumentLoc(const TemplateArgument &Arg,
	TemplateArgumentLoc &ArgLoc);

	/// Fakes up a TypeSourceInfo for a type.
	TypeSourceInfo *InventTypeSourceInfo(QualType T) {
	return SemaRef.Context.getTrivialTypeSourceInfo(T,
	getDerived().getBaseLocation());
	}

	#define ABSTRACT_TYPELOC(CLASS, PARENT)
	#define TYPELOC(CLASS, PARENT) \
	QualType Transform##CLASS##Type(TypeLocBuilder &TLB, CLASS##TypeLoc T);
	#include "clang/AST/TypeLocNodes.def"

	template<typename Fn>
	QualType TransformFunctionProtoType(TypeLocBuilder &TLB,
	FunctionProtoTypeLoc TL,
	CXXRecordDecl *ThisContext,
	Qualifiers ThisTypeQuals,
	Fn TransformExceptionSpec);

	bool TransformExceptionSpec(SourceLocation Loc,
	FunctionProtoType::ExceptionSpecInfo &ESI,
	SmallVectorImpl<QualType> &Exceptions,
	bool &Changed);

	StmtResult TransformSEHHandler(Stmt *Handler);

	QualType
	TransformTemplateSpecializationType(TypeLocBuilder &TLB,
	TemplateSpecializationTypeLoc TL,
	TemplateName Template);

	QualType
	TransformDependentTemplateSpecializationType(TypeLocBuilder &TLB,
	DependentTemplateSpecializationTypeLoc TL,
	TemplateName Template,
	CXXScopeSpec &SS);

	QualType TransformDependentTemplateSpecializationType(
	TypeLocBuilder &TLB, DependentTemplateSpecializationTypeLoc TL,
	NestedNameSpecifierLoc QualifierLoc);

	/// Transforms the parameters of a function type into the
	/// given vectors.
	///
	/// The result vectors should be kept in sync; null entries in the
	/// variables vector are acceptable.
	///
	/// Return true on error.
	bool TransformFunctionTypeParams(
	SourceLocation Loc, ArrayRef<ParmVarDecl *> Params,
	const QualType *ParamTypes,
	const FunctionProtoType::ExtParameterInfo *ParamInfos,
	SmallVectorImpl<QualType> &PTypes, SmallVectorImpl<ParmVarDecl > PVars,
	Sema::ExtParameterInfoBuilder &PInfos);

	/// Transforms a single function-type parameter. Return null
	/// on error.
	///
	/// \param indexAdjustment - A number to add to the parameter's
	/// scope index; can be negative
	ParmVarDecl TransformFunctionTypeParam(ParmVarDecl OldParm,
	int indexAdjustment,
	Optional<unsigned> NumExpansions,
	bool ExpectParameterPack);

	/// Transform the body of a lambda-expression.
	StmtResult TransformLambdaBody(LambdaExpr E, Stmt Body);
	/// Alternative implementation of TransformLambdaBody that skips transforming
	/// the body.
	StmtResult SkipLambdaBody(LambdaExpr E, Stmt Body);

	QualType TransformReferenceType(TypeLocBuilder &TLB, ReferenceTypeLoc TL);

	StmtResult TransformCompoundStmt(CompoundStmt *S, bool IsStmtExpr);
	ExprResult TransformCXXNamedCastExpr(CXXNamedCastExpr *E);

	TemplateParameterList *TransformTemplateParameterList(
	TemplateParameterList *TPL) {
	return TPL;
	}

	ExprResult TransformAddressOfOperand(Expr *E);

	ExprResult TransformDependentScopeDeclRefExpr(DependentScopeDeclRefExpr *E,
	bool IsAddressOfOperand,
	TypeSourceInfo **RecoveryTSI);

	ExprResult TransformParenDependentScopeDeclRefExpr(
	ParenExpr PE, DependentScopeDeclRefExpr DRE, bool IsAddressOfOperand,
	TypeSourceInfo **RecoveryTSI);

	StmtResult TransformOMPExecutableDirective(OMPExecutableDirective *S);

	// FIXME: We use LLVM_ATTRIBUTE_NOINLINE because inlining causes a ridiculous
	// amount of stack usage with clang.
	#define STMT(Node, Parent) \
	LLVM_ATTRIBUTE_NOINLINE \
	StmtResult Transform##Node(Node *S);
	#define VALUESTMT(Node, Parent) \
	LLVM_ATTRIBUTE_NOINLINE \
	StmtResult Transform##Node(Node *S, StmtDiscardKind SDK);
	#define EXPR(Node, Parent) \
	LLVM_ATTRIBUTE_NOINLINE \
	ExprResult Transform##Node(Node *E);
	#define ABSTRACT_STMT(Stmt)
	#include "clang/AST/StmtNodes.inc"

	#define GEN_CLANG_CLAUSE_CLASS
	#define CLAUSE_CLASS(Enum, Str, Class) \
	LLVM_ATTRIBUTE_NOINLINE \
	OMPClause Transform##Class(Class S);
	#include "llvm/Frontend/OpenMP/OMP.inc"

	/// Build a new qualified type given its unqualified type and type location.
	///
	/// By default, this routine adds type qualifiers only to types that can
	/// have qualifiers, and silently suppresses those qualifiers that are not
	/// permitted. Subclasses may override this routine to provide different
	/// behavior.
	QualType RebuildQualifiedType(QualType T, QualifiedTypeLoc TL);

	/// Build a new pointer type given its pointee type.
	///
	/// By default, performs semantic analysis when building the pointer type.
	/// Subclasses may override this routine to provide different behavior.
	QualType RebuildPointerType(QualType PointeeType, SourceLocation Sigil);

	/// Build a new block pointer type given its pointee type.
	///
	/// By default, performs semantic analysis when building the block pointer
	/// type. Subclasses may override this routine to provide different behavior.
	QualType RebuildBlockPointerType(QualType PointeeType, SourceLocation Sigil);

	/// Build a new reference type given the type it references.
	///
	/// By default, performs semantic analysis when building the
	/// reference type. Subclasses may override this routine to provide
	/// different behavior.
	///
	/// \param LValue whether the type was written with an lvalue sigil
	/// or an rvalue sigil.
	QualType RebuildReferenceType(QualType ReferentType,
	bool LValue,
	SourceLocation Sigil);

	/// Build a new member pointer type given the pointee type and the
	/// class type it refers into.
	///
	/// By default, performs semantic analysis when building the member pointer
	/// type. Subclasses may override this routine to provide different behavior.
	QualType RebuildMemberPointerType(QualType PointeeType, QualType ClassType,
	SourceLocation Sigil);

	QualType RebuildObjCTypeParamType(const ObjCTypeParamDecl *Decl,
	SourceLocation ProtocolLAngleLoc,
	ArrayRef<ObjCProtocolDecl *> Protocols,
	ArrayRef<SourceLocation> ProtocolLocs,
	SourceLocation ProtocolRAngleLoc);

	/// Build an Objective-C object type.
	///
	/// By default, performs semantic analysis when building the object type.
	/// Subclasses may override this routine to provide different behavior.
	QualType RebuildObjCObjectType(QualType BaseType,
	SourceLocation Loc,
	SourceLocation TypeArgsLAngleLoc,
	ArrayRef<TypeSourceInfo *> TypeArgs,
	SourceLocation TypeArgsRAngleLoc,
	SourceLocation ProtocolLAngleLoc,
	ArrayRef<ObjCProtocolDecl *> Protocols,
	ArrayRef<SourceLocation> ProtocolLocs,
	SourceLocation ProtocolRAngleLoc);

	/// Build a new Objective-C object pointer type given the pointee type.
	///
	/// By default, directly builds the pointer type, with no additional semantic
	/// analysis.
	QualType RebuildObjCObjectPointerType(QualType PointeeType,
	SourceLocation Star);

	/// Build a new array type given the element type, size
	/// modifier, size of the array (if known), size expression, and index type
	/// qualifiers.
	///
	/// By default, performs semantic analysis when building the array type.
	/// Subclasses may override this routine to provide different behavior.
	/// Also by default, all of the other Rebuild*Array
	QualType RebuildArrayType(QualType ElementType,
	ArrayType::ArraySizeModifier SizeMod,
	const llvm::APInt *Size,
	Expr *SizeExpr,
	unsigned IndexTypeQuals,
	SourceRange BracketsRange);

	/// Build a new constant array type given the element type, size
	/// modifier, (known) size of the array, and index type qualifiers.
	///
	/// By default, performs semantic analysis when building the array type.
	/// Subclasses may override this routine to provide different behavior.
	QualType RebuildConstantArrayType(QualType ElementType,
	ArrayType::ArraySizeModifier SizeMod,
	const llvm::APInt &Size,
	Expr *SizeExpr,
	unsigned IndexTypeQuals,
	SourceRange BracketsRange);

	/// Build a new incomplete array type given the element type, size
	/// modifier, and index type qualifiers.
	///
	/// By default, performs semantic analysis when building the array type.
	/// Subclasses may override this routine to provide different behavior.
	QualType RebuildIncompleteArrayType(QualType ElementType,
	ArrayType::ArraySizeModifier SizeMod,
	unsigned IndexTypeQuals,
	SourceRange BracketsRange);

	/// Build a new variable-length array type given the element type,
	/// size modifier, size expression, and index type qualifiers.
	///
	/// By default, performs semantic analysis when building the array type.
	/// Subclasses may override this routine to provide different behavior.
	QualType RebuildVariableArrayType(QualType ElementType,
	ArrayType::ArraySizeModifier SizeMod,
	Expr *SizeExpr,
	unsigned IndexTypeQuals,
	SourceRange BracketsRange);

	/// Build a new dependent-sized array type given the element type,
	/// size modifier, size expression, and index type qualifiers.
	///
	/// By default, performs semantic analysis when building the array type.
	/// Subclasses may override this routine to provide different behavior.
	QualType RebuildDependentSizedArrayType(QualType ElementType,
	ArrayType::ArraySizeModifier SizeMod,
	Expr *SizeExpr,
	unsigned IndexTypeQuals,
	SourceRange BracketsRange);

	/// Build a new vector type given the element type and
	/// number of elements.
	///
	/// By default, performs semantic analysis when building the vector type.
	/// Subclasses may override this routine to provide different behavior.
	QualType RebuildVectorType(QualType ElementType, unsigned NumElements,
	VectorType::VectorKind VecKind);

	/// Build a new potentially dependently-sized extended vector type
	/// given the element type and number of elements.
	///
	/// By default, performs semantic analysis when building the vector type.
	/// Subclasses may override this routine to provide different behavior.
	QualType RebuildDependentVectorType(QualType ElementType, Expr *SizeExpr,
	SourceLocation AttributeLoc,
	VectorType::VectorKind);

	/// Build a new extended vector type given the element type and
	/// number of elements.
	///
	/// By default, performs semantic analysis when building the vector type.
	/// Subclasses may override this routine to provide different behavior.
	QualType RebuildExtVectorType(QualType ElementType, unsigned NumElements,
	SourceLocation AttributeLoc);

	/// Build a new potentially dependently-sized extended vector type
	/// given the element type and number of elements.
	///
	/// By default, performs semantic analysis when building the vector type.
	/// Subclasses may override this routine to provide different behavior.
	QualType RebuildDependentSizedExtVectorType(QualType ElementType,
	Expr *SizeExpr,
	SourceLocation AttributeLoc);

	/// Build a new matrix type given the element type and dimensions.
	QualType RebuildConstantMatrixType(QualType ElementType, unsigned NumRows,
	unsigned NumColumns);

	/// Build a new matrix type given the type and dependently-defined
	/// dimensions.
	QualType RebuildDependentSizedMatrixType(QualType ElementType, Expr *RowExpr,
	Expr *ColumnExpr,
	SourceLocation AttributeLoc);

	/// Build a new DependentAddressSpaceType or return the pointee
	/// type variable with the correct address space (retrieved from
	/// AddrSpaceExpr) applied to it. The former will be returned in cases
	/// where the address space remains dependent.
	///
	/// By default, performs semantic analysis when building the type with address
	/// space applied. Subclasses may override this routine to provide different
	/// behavior.
	QualType RebuildDependentAddressSpaceType(QualType PointeeType,
	Expr *AddrSpaceExpr,
	SourceLocation AttributeLoc);

	/// Build a new function type.
	///
	/// By default, performs semantic analysis when building the function type.
	/// Subclasses may override this routine to provide different behavior.
	QualType RebuildFunctionProtoType(QualType T,
	MutableArrayRef<QualType> ParamTypes,
	const FunctionProtoType::ExtProtoInfo &EPI);

	/// Build a new unprototyped function type.
	QualType RebuildFunctionNoProtoType(QualType ResultType);

	/// Rebuild an unresolved typename type, given the decl that
	/// the UnresolvedUsingTypenameDecl was transformed to.
	QualType RebuildUnresolvedUsingType(SourceLocation NameLoc, Decl *D);

	/// Build a new typedef type.
	QualType RebuildTypedefType(TypedefNameDecl *Typedef) {
	return SemaRef.Context.getTypeDeclType(Typedef);
	}

	/// Build a new MacroDefined type.
	QualType RebuildMacroQualifiedType(QualType T,
	const IdentifierInfo *MacroII) {
	return SemaRef.Context.getMacroQualifiedType(T, MacroII);
	}

	/// Build a new class/struct/union type.
	QualType RebuildRecordType(RecordDecl *Record) {
	return SemaRef.Context.getTypeDeclType(Record);
	}

	/// Build a new Enum type.
	QualType RebuildEnumType(EnumDecl *Enum) {
	return SemaRef.Context.getTypeDeclType(Enum);
	}

	/// Build a new typeof(expr) type.
	///
	/// By default, performs semantic analysis when building the typeof type.
	/// Subclasses may override this routine to provide different behavior.
	QualType RebuildTypeOfExprType(Expr *Underlying, SourceLocation Loc);

	/// Build a new typeof(type) type.
	///
	/// By default, builds a new TypeOfType with the given underlying type.
	QualType RebuildTypeOfType(QualType Underlying);

	/// Build a new unary transform type.
	QualType RebuildUnaryTransformType(QualType BaseType,
	UnaryTransformType::UTTKind UKind,
	SourceLocation Loc);

	/// Build a new C++11 decltype type.
	///
	/// By default, performs semantic analysis when building the decltype type.
	/// Subclasses may override this routine to provide different behavior.
	QualType RebuildDecltypeType(Expr *Underlying, SourceLocation Loc);

	/// Build a new C++11 auto type.
	///
	/// By default, builds a new AutoType with the given deduced type.
	QualType RebuildAutoType(QualType Deduced, AutoTypeKeyword Keyword,
	ConceptDecl *TypeConstraintConcept,
	ArrayRef<TemplateArgument> TypeConstraintArgs) {
	// Note, IsDependent is always false here: we implicitly convert an 'auto'
	// which has been deduced to a dependent type into an undeduced 'auto', so
	// that we'll retry deduction after the transformation.
	return SemaRef.Context.getAutoType(Deduced, Keyword,
	/IsDependent/ false, /IsPack=/false,
	TypeConstraintConcept,
	TypeConstraintArgs);
	}

	/// By default, builds a new DeducedTemplateSpecializationType with the given
	/// deduced type.
	QualType RebuildDeducedTemplateSpecializationType(TemplateName Template,
	QualType Deduced) {
	return SemaRef.Context.getDeducedTemplateSpecializationType(
	Template, Deduced, /IsDependent/ false);
	}

	/// Build a new template specialization type.
	///
	/// By default, performs semantic analysis when building the template
	/// specialization type. Subclasses may override this routine to provide
	/// different behavior.
	QualType RebuildTemplateSpecializationType(TemplateName Template,
	SourceLocation TemplateLoc,
	TemplateArgumentListInfo &Args);

	/// Build a new parenthesized type.
	///
	/// By default, builds a new ParenType type from the inner type.
	/// Subclasses may override this routine to provide different behavior.
	QualType RebuildParenType(QualType InnerType) {
	return SemaRef.BuildParenType(InnerType);
	}

	/// Build a new qualified name type.
	///
	/// By default, builds a new ElaboratedType type from the keyword,
	/// the nested-name-specifier and the named type.
	/// Subclasses may override this routine to provide different behavior.
	QualType RebuildElaboratedType(SourceLocation KeywordLoc,
	ElaboratedTypeKeyword Keyword,
	NestedNameSpecifierLoc QualifierLoc,
	QualType Named) {
	return SemaRef.Context.getElaboratedType(Keyword,
	QualifierLoc.getNestedNameSpecifier(),
	Named);
	}

	/// Build a new typename type that refers to a template-id.
	///
	/// By default, builds a new DependentNameType type from the
	/// nested-name-specifier and the given type. Subclasses may override
	/// this routine to provide different behavior.
	QualType RebuildDependentTemplateSpecializationType(
	ElaboratedTypeKeyword Keyword,
	NestedNameSpecifierLoc QualifierLoc,
	SourceLocation TemplateKWLoc,
	const IdentifierInfo *Name,
	SourceLocation NameLoc,
	TemplateArgumentListInfo &Args,
	bool AllowInjectedClassName) {
	// Rebuild the template name.
	// TODO: avoid TemplateName abstraction
	CXXScopeSpec SS;
	SS.Adopt(QualifierLoc);
	TemplateName InstName = getDerived().RebuildTemplateName(
	SS, TemplateKWLoc, *Name, NameLoc, QualType(), nullptr,
	AllowInjectedClassName);

	if (InstName.isNull())
	return QualType();

	// If it's still dependent, make a dependent specialization.
	if (InstName.getAsDependentTemplateName())
	return SemaRef.Context.getDependentTemplateSpecializationType(Keyword,
	QualifierLoc.getNestedNameSpecifier(),
	Name,
	Args);

	// Otherwise, make an elaborated type wrapping a non-dependent
	// specialization.
	QualType T =
	getDerived().RebuildTemplateSpecializationType(InstName, NameLoc, Args);
	if (T.isNull()) return QualType();

	if (Keyword == ETK_None && QualifierLoc.getNestedNameSpecifier() == nullptr)
	return T;

	return SemaRef.Context.getElaboratedType(Keyword,
	QualifierLoc.getNestedNameSpecifier(),
	T);
	}

	/// Build a new typename type that refers to an identifier.
	///
	/// By default, performs semantic analysis when building the typename type
	/// (or elaborated type). Subclasses may override this routine to provide
	/// different behavior.
	QualType RebuildDependentNameType(ElaboratedTypeKeyword Keyword,
	SourceLocation KeywordLoc,
	NestedNameSpecifierLoc QualifierLoc,
	const IdentifierInfo *Id,
	SourceLocation IdLoc,
	bool DeducedTSTContext) {
	CXXScopeSpec SS;
	SS.Adopt(QualifierLoc);

	if (QualifierLoc.getNestedNameSpecifier()->isDependent()) {
	// If the name is still dependent, just build a new dependent name type.
	if (!SemaRef.computeDeclContext(SS))
	return SemaRef.Context.getDependentNameType(Keyword,
	QualifierLoc.getNestedNameSpecifier(),
	Id);
	}

	if (Keyword == ETK_None \|\| Keyword == ETK_Typename) {
	return SemaRef.CheckTypenameType(Keyword, KeywordLoc, QualifierLoc,
	*Id, IdLoc, DeducedTSTContext);
	}

	TagTypeKind Kind = TypeWithKeyword::getTagTypeKindForKeyword(Keyword);

	// We had a dependent elaborated-type-specifier that has been transformed
	// into a non-dependent elaborated-type-specifier. Find the tag we're
	// referring to.
	LookupResult Result(SemaRef, Id, IdLoc, Sema::LookupTagName);
	DeclContext *DC = SemaRef.computeDeclContext(SS, false);
	if (!DC)
	return QualType();

	if (SemaRef.RequireCompleteDeclContext(SS, DC))
	return QualType();

	TagDecl *Tag = nullptr;
	SemaRef.LookupQualifiedName(Result, DC);
	switch (Result.getResultKind()) {
	case LookupResult::NotFound:
	case LookupResult::NotFoundInCurrentInstantiation:
	break;

	case LookupResult::Found:
	Tag = Result.getAsSingle<TagDecl>();
	break;

	case LookupResult::FoundOverloaded:
	case LookupResult::FoundUnresolvedValue:
	llvm_unreachable("Tag lookup cannot find non-tags");

	case LookupResult::Ambiguous:
	// Let the LookupResult structure handle ambiguities.
	return QualType();
	}

	if (!Tag) {
	// Check where the name exists but isn't a tag type and use that to emit
	// better diagnostics.
	LookupResult Result(SemaRef, Id, IdLoc, Sema::LookupTagName);
	SemaRef.LookupQualifiedName(Result, DC);
	switch (Result.getResultKind()) {
	case LookupResult::Found:
	case LookupResult::FoundOverloaded:
	case LookupResult::FoundUnresolvedValue: {
	NamedDecl *SomeDecl = Result.getRepresentativeDecl();
	Sema::NonTagKind NTK = SemaRef.getNonTagTypeDeclKind(SomeDecl, Kind);
	SemaRef.Diag(IdLoc, diag::err_tag_reference_non_tag) << SomeDecl
	<< NTK << Kind;
	SemaRef.Diag(SomeDecl->getLocation(), diag::note_declared_at);
	break;
	}
	default:
	SemaRef.Diag(IdLoc, diag::err_not_tag_in_scope)
	<< Kind << Id << DC << QualifierLoc.getSourceRange();
	break;
	}
	return QualType();
	}

	if (!SemaRef.isAcceptableTagRedeclaration(Tag, Kind, /isDefinition/false,
	IdLoc, Id)) {
	SemaRef.Diag(KeywordLoc, diag::err_use_with_wrong_tag) << Id;
	SemaRef.Diag(Tag->getLocation(), diag::note_previous_use);
	return QualType();
	}

	// Build the elaborated-type-specifier type.
	QualType T = SemaRef.Context.getTypeDeclType(Tag);
	return SemaRef.Context.getElaboratedType(Keyword,
	QualifierLoc.getNestedNameSpecifier(),
	T);
	}

	/// Build a new pack expansion type.
	///
	/// By default, builds a new PackExpansionType type from the given pattern.
	/// Subclasses may override this routine to provide different behavior.
	QualType RebuildPackExpansionType(QualType Pattern,
	SourceRange PatternRange,
	SourceLocation EllipsisLoc,
	Optional<unsigned> NumExpansions) {
	return getSema().CheckPackExpansion(Pattern, PatternRange, EllipsisLoc,
	NumExpansions);
	}

	/// Build a new atomic type given its value type.
	///
	/// By default, performs semantic analysis when building the atomic type.
	/// Subclasses may override this routine to provide different behavior.
	QualType RebuildAtomicType(QualType ValueType, SourceLocation KWLoc);

	/// Build a new pipe type given its value type.
	QualType RebuildPipeType(QualType ValueType, SourceLocation KWLoc,
	bool isReadPipe);

	/// Build an extended int given its value type.
	QualType RebuildExtIntType(bool IsUnsigned, unsigned NumBits,
	SourceLocation Loc);

	/// Build a dependent extended int given its value type.
	QualType RebuildDependentExtIntType(bool IsUnsigned, Expr *NumBitsExpr,
	SourceLocation Loc);

	/// Build a new template name given a nested name specifier, a flag
	/// indicating whether the "template" keyword was provided, and the template
	/// that the template name refers to.
	///
	/// By default, builds the new template name directly. Subclasses may override
	/// this routine to provide different behavior.
	TemplateName RebuildTemplateName(CXXScopeSpec &SS,
	bool TemplateKW,
	TemplateDecl *Template);

	/// Build a new template name given a nested name specifier and the
	/// name that is referred to as a template.
	///
	/// By default, performs semantic analysis to determine whether the name can
	/// be resolved to a specific template, then builds the appropriate kind of
	/// template name. Subclasses may override this routine to provide different
	/// behavior.
	TemplateName RebuildTemplateName(CXXScopeSpec &SS,
	SourceLocation TemplateKWLoc,
	const IdentifierInfo &Name,
	SourceLocation NameLoc, QualType ObjectType,
	NamedDecl *FirstQualifierInScope,
	bool AllowInjectedClassName);

	/// Build a new template name given a nested name specifier and the
	/// overloaded operator name that is referred to as a template.
	///
	/// By default, performs semantic analysis to determine whether the name can
	/// be resolved to a specific template, then builds the appropriate kind of
	/// template name. Subclasses may override this routine to provide different
	/// behavior.
	TemplateName RebuildTemplateName(CXXScopeSpec &SS,
	SourceLocation TemplateKWLoc,
	OverloadedOperatorKind Operator,
	SourceLocation NameLoc, QualType ObjectType,
	bool AllowInjectedClassName);

	/// Build a new template name given a template template parameter pack
	/// and the
	///
	/// By default, performs semantic analysis to determine whether the name can
	/// be resolved to a specific template, then builds the appropriate kind of
	/// template name. Subclasses may override this routine to provide different
	/// behavior.
	TemplateName RebuildTemplateName(TemplateTemplateParmDecl *Param,
	const TemplateArgument &ArgPack) {
	return getSema().Context.getSubstTemplateTemplateParmPack(Param, ArgPack);
	}

	/// Build a new compound statement.
	///
	/// By default, performs semantic analysis to build the new statement.
	/// Subclasses may override this routine to provide different behavior.
	StmtResult RebuildCompoundStmt(SourceLocation LBraceLoc,
	MultiStmtArg Statements,
	SourceLocation RBraceLoc,
	bool IsStmtExpr) {
	return getSema().ActOnCompoundStmt(LBraceLoc, RBraceLoc, Statements,
	IsStmtExpr);
	}

	/// Build a new case statement.
	///
	/// By default, performs semantic analysis to build the new statement.
	/// Subclasses may override this routine to provide different behavior.
	StmtResult RebuildCaseStmt(SourceLocation CaseLoc,
	Expr *LHS,
	SourceLocation EllipsisLoc,
	Expr *RHS,
	SourceLocation ColonLoc) {
	return getSema().ActOnCaseStmt(CaseLoc, LHS, EllipsisLoc, RHS,
	ColonLoc);
	}

	/// Attach the body to a new case statement.
	///
	/// By default, performs semantic analysis to build the new statement.
	/// Subclasses may override this routine to provide different behavior.
	StmtResult RebuildCaseStmtBody(Stmt S, Stmt Body) {
	getSema().ActOnCaseStmtBody(S, Body);
	return S;
	}

	/// Build a new default statement.
	///
	/// By default, performs semantic analysis to build the new statement.
	/// Subclasses may override this routine to provide different behavior.
	StmtResult RebuildDefaultStmt(SourceLocation DefaultLoc,
	SourceLocation ColonLoc,
	Stmt *SubStmt) {
	return getSema().ActOnDefaultStmt(DefaultLoc, ColonLoc, SubStmt,
	/CurScope=/nullptr);
	}

	/// Build a new label statement.
	///
	/// By default, performs semantic analysis to build the new statement.
	/// Subclasses may override this routine to provide different behavior.
	StmtResult RebuildLabelStmt(SourceLocation IdentLoc, LabelDecl *L,
	SourceLocation ColonLoc, Stmt *SubStmt) {
	return SemaRef.ActOnLabelStmt(IdentLoc, L, ColonLoc, SubStmt);
	}

	/// Build a new attributed statement.
	///
	/// By default, performs semantic analysis to build the new statement.
	/// Subclasses may override this routine to provide different behavior.
	StmtResult RebuildAttributedStmt(SourceLocation AttrLoc,
	ArrayRef<const Attr *> Attrs,
	Stmt *SubStmt) {
	return SemaRef.BuildAttributedStmt(AttrLoc, Attrs, SubStmt);
	}

	/// Build a new "if" statement.
	///
	/// By default, performs semantic analysis to build the new statement.
	/// Subclasses may override this routine to provide different behavior.
	StmtResult RebuildIfStmt(SourceLocation IfLoc, bool IsConstexpr,
	SourceLocation LParenLoc, Sema::ConditionResult Cond,
	SourceLocation RParenLoc, Stmt Init, Stmt Then,
	SourceLocation ElseLoc, Stmt *Else) {
	return getSema().ActOnIfStmt(IfLoc, IsConstexpr, LParenLoc, Init, Cond,
	RParenLoc, Then, ElseLoc, Else);
	}

	/// Start building a new switch statement.
	///
	/// By default, performs semantic analysis to build the new statement.
	/// Subclasses may override this routine to provide different behavior.
	StmtResult RebuildSwitchStmtStart(SourceLocation SwitchLoc,
	SourceLocation LParenLoc, Stmt *Init,
	Sema::ConditionResult Cond,
	SourceLocation RParenLoc) {
	return getSema().ActOnStartOfSwitchStmt(SwitchLoc, LParenLoc, Init, Cond,
	RParenLoc);
	}

	/// Attach the body to the switch statement.
	///
	/// By default, performs semantic analysis to build the new statement.
	/// Subclasses may override this routine to provide different behavior.
	StmtResult RebuildSwitchStmtBody(SourceLocation SwitchLoc,
	Stmt Switch, Stmt Body) {
	return getSema().ActOnFinishSwitchStmt(SwitchLoc, Switch, Body);
	}

	/// Build a new while statement.
	///
	/// By default, performs semantic analysis to build the new statement.
	/// Subclasses may override this routine to provide different behavior.
	StmtResult RebuildWhileStmt(SourceLocation WhileLoc, SourceLocation LParenLoc,
	Sema::ConditionResult Cond,
	SourceLocation RParenLoc, Stmt *Body) {
	return getSema().ActOnWhileStmt(WhileLoc, LParenLoc, Cond, RParenLoc, Body);
	}

	/// Build a new do-while statement.
	///
	/// By default, performs semantic analysis to build the new statement.
	/// Subclasses may override this routine to provide different behavior.
	StmtResult RebuildDoStmt(SourceLocation DoLoc, Stmt *Body,
	SourceLocation WhileLoc, SourceLocation LParenLoc,
	Expr *Cond, SourceLocation RParenLoc) {
	return getSema().ActOnDoStmt(DoLoc, Body, WhileLoc, LParenLoc,
	Cond, RParenLoc);
	}

	/// Build a new for statement.
	///
	/// By default, performs semantic analysis to build the new statement.
	/// Subclasses may override this routine to provide different behavior.
	StmtResult RebuildForStmt(SourceLocation ForLoc, SourceLocation LParenLoc,
	Stmt *Init, Sema::ConditionResult Cond,
	Sema::FullExprArg Inc, SourceLocation RParenLoc,
	Stmt *Body) {
	return getSema().ActOnForStmt(ForLoc, LParenLoc, Init, Cond,
	Inc, RParenLoc, Body);
	}

	/// Build a new goto statement.
	///
	/// By default, performs semantic analysis to build the new statement.
	/// Subclasses may override this routine to provide different behavior.
	StmtResult RebuildGotoStmt(SourceLocation GotoLoc, SourceLocation LabelLoc,
	LabelDecl *Label) {
	return getSema().ActOnGotoStmt(GotoLoc, LabelLoc, Label);
	}

	/// Build a new indirect goto statement.
	///
	/// By default, performs semantic analysis to build the new statement.
	/// Subclasses may override this routine to provide different behavior.
	StmtResult RebuildIndirectGotoStmt(SourceLocation GotoLoc,
	SourceLocation StarLoc,
	Expr *Target) {
	return getSema().ActOnIndirectGotoStmt(GotoLoc, StarLoc, Target);
	}

	/// Build a new return statement.
	///
	/// By default, performs semantic analysis to build the new statement.
	/// Subclasses may override this routine to provide different behavior.
	StmtResult RebuildReturnStmt(SourceLocation ReturnLoc, Expr *Result) {
	return getSema().BuildReturnStmt(ReturnLoc, Result);
	}

	/// Build a new declaration statement.
	///
	/// By default, performs semantic analysis to build the new statement.
	/// Subclasses may override this routine to provide different behavior.
	StmtResult RebuildDeclStmt(MutableArrayRef<Decl *> Decls,
	SourceLocation StartLoc, SourceLocation EndLoc) {
	Sema::DeclGroupPtrTy DG = getSema().BuildDeclaratorGroup(Decls);
	return getSema().ActOnDeclStmt(DG, StartLoc, EndLoc);
	}

	/// Build a new inline asm statement.
	///
	/// By default, performs semantic analysis to build the new statement.
	/// Subclasses may override this routine to provide different behavior.
	StmtResult RebuildGCCAsmStmt(SourceLocation AsmLoc, bool IsSimple,
	bool IsVolatile, unsigned NumOutputs,
	unsigned NumInputs, IdentifierInfo **Names,
	MultiExprArg Constraints, MultiExprArg Exprs,
	Expr *AsmString, MultiExprArg Clobbers,
	unsigned NumLabels,
	SourceLocation RParenLoc) {
	return getSema().ActOnGCCAsmStmt(AsmLoc, IsSimple, IsVolatile, NumOutputs,
	NumInputs, Names, Constraints, Exprs,
	AsmString, Clobbers, NumLabels, RParenLoc);
	}

	/// Build a new MS style inline asm statement.
	///
	/// By default, performs semantic analysis to build the new statement.
	/// Subclasses may override this routine to provide different behavior.
	StmtResult RebuildMSAsmStmt(SourceLocation AsmLoc, SourceLocation LBraceLoc,
	ArrayRef<Token> AsmToks,
	StringRef AsmString,
	unsigned NumOutputs, unsigned NumInputs,
	ArrayRef<StringRef> Constraints,
	ArrayRef<StringRef> Clobbers,
	ArrayRef<Expr*> Exprs,
	SourceLocation EndLoc) {
	return getSema().ActOnMSAsmStmt(AsmLoc, LBraceLoc, AsmToks, AsmString,
	NumOutputs, NumInputs,
	Constraints, Clobbers, Exprs, EndLoc);
	}

	/// Build a new co_return statement.
	///
	/// By default, performs semantic analysis to build the new statement.
	/// Subclasses may override this routine to provide different behavior.
	StmtResult RebuildCoreturnStmt(SourceLocation CoreturnLoc, Expr *Result,
	bool IsImplicit) {
	return getSema().BuildCoreturnStmt(CoreturnLoc, Result, IsImplicit);
	}

	/// Build a new co_await expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildCoawaitExpr(SourceLocation CoawaitLoc, Expr *Result,
	bool IsImplicit) {
	return getSema().BuildResolvedCoawaitExpr(CoawaitLoc, Result, IsImplicit);
	}

	/// Build a new co_await expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildDependentCoawaitExpr(SourceLocation CoawaitLoc,
	Expr *Result,
	UnresolvedLookupExpr *Lookup) {
	return getSema().BuildUnresolvedCoawaitExpr(CoawaitLoc, Result, Lookup);
	}

	/// Build a new co_yield expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildCoyieldExpr(SourceLocation CoyieldLoc, Expr *Result) {
	return getSema().BuildCoyieldExpr(CoyieldLoc, Result);
	}

	StmtResult RebuildCoroutineBodyStmt(CoroutineBodyStmt::CtorArgs Args) {
	return getSema().BuildCoroutineBodyStmt(Args);
	}

	/// Build a new Objective-C \@try statement.
	///
	/// By default, performs semantic analysis to build the new statement.
	/// Subclasses may override this routine to provide different behavior.
	StmtResult RebuildObjCAtTryStmt(SourceLocation AtLoc,
	Stmt *TryBody,
	MultiStmtArg CatchStmts,
	Stmt *Finally) {
	return getSema().ActOnObjCAtTryStmt(AtLoc, TryBody, CatchStmts,
	Finally);
	}

	/// Rebuild an Objective-C exception declaration.
	///
	/// By default, performs semantic analysis to build the new declaration.
	/// Subclasses may override this routine to provide different behavior.
	VarDecl RebuildObjCExceptionDecl(VarDecl ExceptionDecl,
	TypeSourceInfo *TInfo, QualType T) {
	return getSema().BuildObjCExceptionDecl(TInfo, T,
	ExceptionDecl->getInnerLocStart(),
	ExceptionDecl->getLocation(),
	ExceptionDecl->getIdentifier());
	}

	/// Build a new Objective-C \@catch statement.
	///
	/// By default, performs semantic analysis to build the new statement.
	/// Subclasses may override this routine to provide different behavior.
	StmtResult RebuildObjCAtCatchStmt(SourceLocation AtLoc,
	SourceLocation RParenLoc,
	VarDecl *Var,
	Stmt *Body) {
	return getSema().ActOnObjCAtCatchStmt(AtLoc, RParenLoc,
	Var, Body);
	}

	/// Build a new Objective-C \@finally statement.
	///
	/// By default, performs semantic analysis to build the new statement.
	/// Subclasses may override this routine to provide different behavior.
	StmtResult RebuildObjCAtFinallyStmt(SourceLocation AtLoc,
	Stmt *Body) {
	return getSema().ActOnObjCAtFinallyStmt(AtLoc, Body);
	}

	/// Build a new Objective-C \@throw statement.
	///
	/// By default, performs semantic analysis to build the new statement.
	/// Subclasses may override this routine to provide different behavior.
	StmtResult RebuildObjCAtThrowStmt(SourceLocation AtLoc,
	Expr *Operand) {
	return getSema().BuildObjCAtThrowStmt(AtLoc, Operand);
	}

	/// Build a new OpenMP Canonical loop.
	///
	/// Ensures that the outermost loop in @p LoopStmt is wrapped by a
	/// OMPCanonicalLoop.
	StmtResult RebuildOMPCanonicalLoop(Stmt *LoopStmt) {
	return getSema().ActOnOpenMPCanonicalLoop(LoopStmt);
	}

	/// Build a new OpenMP executable directive.
	///
	/// By default, performs semantic analysis to build the new statement.
	/// Subclasses may override this routine to provide different behavior.
	StmtResult RebuildOMPExecutableDirective(OpenMPDirectiveKind Kind,
	DeclarationNameInfo DirName,
	OpenMPDirectiveKind CancelRegion,
	ArrayRef<OMPClause *> Clauses,
	Stmt *AStmt, SourceLocation StartLoc,
	SourceLocation EndLoc) {
	return getSema().ActOnOpenMPExecutableDirective(
	Kind, DirName, CancelRegion, Clauses, AStmt, StartLoc, EndLoc);
	}

	/// Build a new OpenMP 'if' clause.
	///
	/// By default, performs semantic analysis to build the new OpenMP clause.
	/// Subclasses may override this routine to provide different behavior.
	OMPClause *RebuildOMPIfClause(OpenMPDirectiveKind NameModifier,
	Expr *Condition, SourceLocation StartLoc,
	SourceLocation LParenLoc,
	SourceLocation NameModifierLoc,
	SourceLocation ColonLoc,
	SourceLocation EndLoc) {
	return getSema().ActOnOpenMPIfClause(NameModifier, Condition, StartLoc,
	LParenLoc, NameModifierLoc, ColonLoc,
	EndLoc);
	}

	/// Build a new OpenMP 'final' clause.
	///
	/// By default, performs semantic analysis to build the new OpenMP clause.
	/// Subclasses may override this routine to provide different behavior.
	OMPClause RebuildOMPFinalClause(Expr Condition, SourceLocation StartLoc,
	SourceLocation LParenLoc,
	SourceLocation EndLoc) {
	return getSema().ActOnOpenMPFinalClause(Condition, StartLoc, LParenLoc,
	EndLoc);
	}

	/// Build a new OpenMP 'num_threads' clause.
	///
	/// By default, performs semantic analysis to build the new OpenMP clause.
	/// Subclasses may override this routine to provide different behavior.
	OMPClause RebuildOMPNumThreadsClause(Expr NumThreads,
	SourceLocation StartLoc,
	SourceLocation LParenLoc,
	SourceLocation EndLoc) {
	return getSema().ActOnOpenMPNumThreadsClause(NumThreads, StartLoc,
	LParenLoc, EndLoc);
	}

	/// Build a new OpenMP 'safelen' clause.
	///
	/// By default, performs semantic analysis to build the new OpenMP clause.
	/// Subclasses may override this routine to provide different behavior.
	OMPClause RebuildOMPSafelenClause(Expr Len, SourceLocation StartLoc,
	SourceLocation LParenLoc,
	SourceLocation EndLoc) {
	return getSema().ActOnOpenMPSafelenClause(Len, StartLoc, LParenLoc, EndLoc);
	}

	/// Build a new OpenMP 'simdlen' clause.
	///
	/// By default, performs semantic analysis to build the new OpenMP clause.
	/// Subclasses may override this routine to provide different behavior.
	OMPClause RebuildOMPSimdlenClause(Expr Len, SourceLocation StartLoc,
	SourceLocation LParenLoc,
	SourceLocation EndLoc) {
	return getSema().ActOnOpenMPSimdlenClause(Len, StartLoc, LParenLoc, EndLoc);
	}

	OMPClause RebuildOMPSizesClause(ArrayRef<Expr > Sizes,
	SourceLocation StartLoc,
	SourceLocation LParenLoc,
	SourceLocation EndLoc) {
	return getSema().ActOnOpenMPSizesClause(Sizes, StartLoc, LParenLoc, EndLoc);
	}

	/// Build a new OpenMP 'full' clause.
	OMPClause *RebuildOMPFullClause(SourceLocation StartLoc,
	SourceLocation EndLoc) {
	return getSema().ActOnOpenMPFullClause(StartLoc, EndLoc);
	}

	/// Build a new OpenMP 'partial' clause.
	OMPClause RebuildOMPPartialClause(Expr Factor, SourceLocation StartLoc,
	SourceLocation LParenLoc,
	SourceLocation EndLoc) {
	return getSema().ActOnOpenMPPartialClause(Factor, StartLoc, LParenLoc,
	EndLoc);
	}

	/// Build a new OpenMP 'allocator' clause.
	///
	/// By default, performs semantic analysis to build the new OpenMP clause.
	/// Subclasses may override this routine to provide different behavior.
	OMPClause RebuildOMPAllocatorClause(Expr A, SourceLocation StartLoc,
	SourceLocation LParenLoc,
	SourceLocation EndLoc) {
	return getSema().ActOnOpenMPAllocatorClause(A, StartLoc, LParenLoc, EndLoc);
	}

	/// Build a new OpenMP 'collapse' clause.
	///
	/// By default, performs semantic analysis to build the new OpenMP clause.
	/// Subclasses may override this routine to provide different behavior.
	OMPClause RebuildOMPCollapseClause(Expr Num, SourceLocation StartLoc,
	SourceLocation LParenLoc,
	SourceLocation EndLoc) {
	return getSema().ActOnOpenMPCollapseClause(Num, StartLoc, LParenLoc,
	EndLoc);
	}

	/// Build a new OpenMP 'default' clause.
	///
	/// By default, performs semantic analysis to build the new OpenMP clause.
	/// Subclasses may override this routine to provide different behavior.
	OMPClause *RebuildOMPDefaultClause(DefaultKind Kind, SourceLocation KindKwLoc,
	SourceLocation StartLoc,
	SourceLocation LParenLoc,
	SourceLocation EndLoc) {
	return getSema().ActOnOpenMPDefaultClause(Kind, KindKwLoc,
	StartLoc, LParenLoc, EndLoc);
	}

	/// Build a new OpenMP 'proc_bind' clause.
	///
	/// By default, performs semantic analysis to build the new OpenMP clause.
	/// Subclasses may override this routine to provide different behavior.
	OMPClause *RebuildOMPProcBindClause(ProcBindKind Kind,
	SourceLocation KindKwLoc,
	SourceLocation StartLoc,
	SourceLocation LParenLoc,
	SourceLocation EndLoc) {
	return getSema().ActOnOpenMPProcBindClause(Kind, KindKwLoc,
	StartLoc, LParenLoc, EndLoc);
	}

	/// Build a new OpenMP 'schedule' clause.
	///
	/// By default, performs semantic analysis to build the new OpenMP clause.
	/// Subclasses may override this routine to provide different behavior.
	OMPClause *RebuildOMPScheduleClause(
	OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
	OpenMPScheduleClauseKind Kind, Expr *ChunkSize, SourceLocation StartLoc,
	SourceLocation LParenLoc, SourceLocation M1Loc, SourceLocation M2Loc,
	SourceLocation KindLoc, SourceLocation CommaLoc, SourceLocation EndLoc) {
	return getSema().ActOnOpenMPScheduleClause(
	M1, M2, Kind, ChunkSize, StartLoc, LParenLoc, M1Loc, M2Loc, KindLoc,
	CommaLoc, EndLoc);
	}

	/// Build a new OpenMP 'ordered' clause.
	///
	/// By default, performs semantic analysis to build the new OpenMP clause.
	/// Subclasses may override this routine to provide different behavior.
	OMPClause *RebuildOMPOrderedClause(SourceLocation StartLoc,
	SourceLocation EndLoc,
	SourceLocation LParenLoc, Expr *Num) {
	return getSema().ActOnOpenMPOrderedClause(StartLoc, EndLoc, LParenLoc, Num);
	}

	/// Build a new OpenMP 'private' clause.
	///
	/// By default, performs semantic analysis to build the new OpenMP clause.
	/// Subclasses may override this routine to provide different behavior.
	OMPClause RebuildOMPPrivateClause(ArrayRef<Expr > VarList,
	SourceLocation StartLoc,
	SourceLocation LParenLoc,
	SourceLocation EndLoc) {
	return getSema().ActOnOpenMPPrivateClause(VarList, StartLoc, LParenLoc,
	EndLoc);
	}

	/// Build a new OpenMP 'firstprivate' clause.
	///
	/// By default, performs semantic analysis to build the new OpenMP clause.
	/// Subclasses may override this routine to provide different behavior.
	OMPClause RebuildOMPFirstprivateClause(ArrayRef<Expr > VarList,
	SourceLocation StartLoc,
	SourceLocation LParenLoc,
	SourceLocation EndLoc) {
	return getSema().ActOnOpenMPFirstprivateClause(VarList, StartLoc, LParenLoc,
	EndLoc);
	}

	/// Build a new OpenMP 'lastprivate' clause.
	///
	/// By default, performs semantic analysis to build the new OpenMP clause.
	/// Subclasses may override this routine to provide different behavior.
	OMPClause RebuildOMPLastprivateClause(ArrayRef<Expr > VarList,
	OpenMPLastprivateModifier LPKind,
	SourceLocation LPKindLoc,
	SourceLocation ColonLoc,
	SourceLocation StartLoc,
	SourceLocation LParenLoc,
	SourceLocation EndLoc) {
	return getSema().ActOnOpenMPLastprivateClause(
	VarList, LPKind, LPKindLoc, ColonLoc, StartLoc, LParenLoc, EndLoc);
	}

	/// Build a new OpenMP 'shared' clause.
	///
	/// By default, performs semantic analysis to build the new OpenMP clause.
	/// Subclasses may override this routine to provide different behavior.
	OMPClause RebuildOMPSharedClause(ArrayRef<Expr > VarList,
	SourceLocation StartLoc,
	SourceLocation LParenLoc,
	SourceLocation EndLoc) {
	return getSema().ActOnOpenMPSharedClause(VarList, StartLoc, LParenLoc,
	EndLoc);
	}

	/// Build a new OpenMP 'reduction' clause.
	///
	/// By default, performs semantic analysis to build the new statement.
	/// Subclasses may override this routine to provide different behavior.
	OMPClause *RebuildOMPReductionClause(
	ArrayRef<Expr *> VarList, OpenMPReductionClauseModifier Modifier,
	SourceLocation StartLoc, SourceLocation LParenLoc,
	SourceLocation ModifierLoc, SourceLocation ColonLoc,
	SourceLocation EndLoc, CXXScopeSpec &ReductionIdScopeSpec,
	const DeclarationNameInfo &ReductionId,
	ArrayRef<Expr *> UnresolvedReductions) {
	return getSema().ActOnOpenMPReductionClause(
	VarList, Modifier, StartLoc, LParenLoc, ModifierLoc, ColonLoc, EndLoc,
	ReductionIdScopeSpec, ReductionId, UnresolvedReductions);
	}

	/// Build a new OpenMP 'task_reduction' clause.
	///
	/// By default, performs semantic analysis to build the new statement.
	/// Subclasses may override this routine to provide different behavior.
	OMPClause *RebuildOMPTaskReductionClause(
	ArrayRef<Expr *> VarList, SourceLocation StartLoc,
	SourceLocation LParenLoc, SourceLocation ColonLoc, SourceLocation EndLoc,
	CXXScopeSpec &ReductionIdScopeSpec,
	const DeclarationNameInfo &ReductionId,
	ArrayRef<Expr *> UnresolvedReductions) {
	return getSema().ActOnOpenMPTaskReductionClause(
	VarList, StartLoc, LParenLoc, ColonLoc, EndLoc, ReductionIdScopeSpec,
	ReductionId, UnresolvedReductions);
	}

	/// Build a new OpenMP 'in_reduction' clause.
	///
	/// By default, performs semantic analysis to build the new statement.
	/// Subclasses may override this routine to provide different behavior.
	OMPClause *
	RebuildOMPInReductionClause(ArrayRef<Expr *> VarList, SourceLocation StartLoc,
	SourceLocation LParenLoc, SourceLocation ColonLoc,
	SourceLocation EndLoc,
	CXXScopeSpec &ReductionIdScopeSpec,
	const DeclarationNameInfo &ReductionId,
	ArrayRef<Expr *> UnresolvedReductions) {
	return getSema().ActOnOpenMPInReductionClause(
	VarList, StartLoc, LParenLoc, ColonLoc, EndLoc, ReductionIdScopeSpec,
	ReductionId, UnresolvedReductions);
	}

	/// Build a new OpenMP 'linear' clause.
	///
	/// By default, performs semantic analysis to build the new OpenMP clause.
	/// Subclasses may override this routine to provide different behavior.
	OMPClause RebuildOMPLinearClause(ArrayRef<Expr > VarList, Expr *Step,
	SourceLocation StartLoc,
	SourceLocation LParenLoc,
	OpenMPLinearClauseKind Modifier,
	SourceLocation ModifierLoc,
	SourceLocation ColonLoc,
	SourceLocation EndLoc) {
	return getSema().ActOnOpenMPLinearClause(VarList, Step, StartLoc, LParenLoc,
	Modifier, ModifierLoc, ColonLoc,
	EndLoc);
	}

	/// Build a new OpenMP 'aligned' clause.
	///
	/// By default, performs semantic analysis to build the new OpenMP clause.
	/// Subclasses may override this routine to provide different behavior.
	OMPClause RebuildOMPAlignedClause(ArrayRef<Expr > VarList, Expr *Alignment,
	SourceLocation StartLoc,
	SourceLocation LParenLoc,
	SourceLocation ColonLoc,
	SourceLocation EndLoc) {
	return getSema().ActOnOpenMPAlignedClause(VarList, Alignment, StartLoc,
	LParenLoc, ColonLoc, EndLoc);
	}

	/// Build a new OpenMP 'copyin' clause.
	///
	/// By default, performs semantic analysis to build the new OpenMP clause.
	/// Subclasses may override this routine to provide different behavior.
	OMPClause RebuildOMPCopyinClause(ArrayRef<Expr > VarList,
	SourceLocation StartLoc,
	SourceLocation LParenLoc,
	SourceLocation EndLoc) {
	return getSema().ActOnOpenMPCopyinClause(VarList, StartLoc, LParenLoc,
	EndLoc);
	}

	/// Build a new OpenMP 'copyprivate' clause.
	///
	/// By default, performs semantic analysis to build the new OpenMP clause.
	/// Subclasses may override this routine to provide different behavior.
	OMPClause RebuildOMPCopyprivateClause(ArrayRef<Expr > VarList,
	SourceLocation StartLoc,
	SourceLocation LParenLoc,
	SourceLocation EndLoc) {
	return getSema().ActOnOpenMPCopyprivateClause(VarList, StartLoc, LParenLoc,
	EndLoc);
	}

	/// Build a new OpenMP 'flush' pseudo clause.
	///
	/// By default, performs semantic analysis to build the new OpenMP clause.
	/// Subclasses may override this routine to provide different behavior.
	OMPClause RebuildOMPFlushClause(ArrayRef<Expr > VarList,
	SourceLocation StartLoc,
	SourceLocation LParenLoc,
	SourceLocation EndLoc) {
	return getSema().ActOnOpenMPFlushClause(VarList, StartLoc, LParenLoc,
	EndLoc);
	}

	/// Build a new OpenMP 'depobj' pseudo clause.
	///
	/// By default, performs semantic analysis to build the new OpenMP clause.
	/// Subclasses may override this routine to provide different behavior.
	OMPClause RebuildOMPDepobjClause(Expr Depobj, SourceLocation StartLoc,
	SourceLocation LParenLoc,
	SourceLocation EndLoc) {
	return getSema().ActOnOpenMPDepobjClause(Depobj, StartLoc, LParenLoc,
	EndLoc);
	}

	/// Build a new OpenMP 'depend' pseudo clause.
	///
	/// By default, performs semantic analysis to build the new OpenMP clause.
	/// Subclasses may override this routine to provide different behavior.
	OMPClause *
	RebuildOMPDependClause(Expr *DepModifier, OpenMPDependClauseKind DepKind,
	SourceLocation DepLoc, SourceLocation ColonLoc,
	ArrayRef<Expr *> VarList, SourceLocation StartLoc,
	SourceLocation LParenLoc, SourceLocation EndLoc) {
	return getSema().ActOnOpenMPDependClause(DepModifier, DepKind, DepLoc,
	ColonLoc, VarList, StartLoc,
	LParenLoc, EndLoc);
	}

	/// Build a new OpenMP 'device' clause.
	///
	/// By default, performs semantic analysis to build the new statement.
	/// Subclasses may override this routine to provide different behavior.
	OMPClause *RebuildOMPDeviceClause(OpenMPDeviceClauseModifier Modifier,
	Expr *Device, SourceLocation StartLoc,
	SourceLocation LParenLoc,
	SourceLocation ModifierLoc,
	SourceLocation EndLoc) {
	return getSema().ActOnOpenMPDeviceClause(Modifier, Device, StartLoc,
	LParenLoc, ModifierLoc, EndLoc);
	}

	/// Build a new OpenMP 'map' clause.
	///
	/// By default, performs semantic analysis to build the new OpenMP clause.
	/// Subclasses may override this routine to provide different behavior.
	OMPClause *RebuildOMPMapClause(
	ArrayRef<OpenMPMapModifierKind> MapTypeModifiers,
	ArrayRef<SourceLocation> MapTypeModifiersLoc,
	CXXScopeSpec MapperIdScopeSpec, DeclarationNameInfo MapperId,
	OpenMPMapClauseKind MapType, bool IsMapTypeImplicit,
	SourceLocation MapLoc, SourceLocation ColonLoc, ArrayRef<Expr *> VarList,
	const OMPVarListLocTy &Locs, ArrayRef<Expr *> UnresolvedMappers) {
	return getSema().ActOnOpenMPMapClause(MapTypeModifiers, MapTypeModifiersLoc,
	MapperIdScopeSpec, MapperId, MapType,
	IsMapTypeImplicit, MapLoc, ColonLoc,
	VarList, Locs, UnresolvedMappers);
	}

	/// Build a new OpenMP 'allocate' clause.
	///
	/// By default, performs semantic analysis to build the new OpenMP clause.
	/// Subclasses may override this routine to provide different behavior.
	OMPClause RebuildOMPAllocateClause(Expr Allocate, ArrayRef<Expr *> VarList,
	SourceLocation StartLoc,
	SourceLocation LParenLoc,
	SourceLocation ColonLoc,
	SourceLocation EndLoc) {
	return getSema().ActOnOpenMPAllocateClause(Allocate, VarList, StartLoc,
	LParenLoc, ColonLoc, EndLoc);
	}

	/// Build a new OpenMP 'num_teams' clause.
	///
	/// By default, performs semantic analysis to build the new statement.
	/// Subclasses may override this routine to provide different behavior.
	OMPClause RebuildOMPNumTeamsClause(Expr NumTeams, SourceLocation StartLoc,
	SourceLocation LParenLoc,
	SourceLocation EndLoc) {
	return getSema().ActOnOpenMPNumTeamsClause(NumTeams, StartLoc, LParenLoc,
	EndLoc);
	}

	/// Build a new OpenMP 'thread_limit' clause.
	///
	/// By default, performs semantic analysis to build the new statement.
	/// Subclasses may override this routine to provide different behavior.
	OMPClause RebuildOMPThreadLimitClause(Expr ThreadLimit,
	SourceLocation StartLoc,
	SourceLocation LParenLoc,
	SourceLocation EndLoc) {
	return getSema().ActOnOpenMPThreadLimitClause(ThreadLimit, StartLoc,
	LParenLoc, EndLoc);
	}

	/// Build a new OpenMP 'priority' clause.
	///
	/// By default, performs semantic analysis to build the new statement.
	/// Subclasses may override this routine to provide different behavior.
	OMPClause RebuildOMPPriorityClause(Expr Priority, SourceLocation StartLoc,
	SourceLocation LParenLoc,
	SourceLocation EndLoc) {
	return getSema().ActOnOpenMPPriorityClause(Priority, StartLoc, LParenLoc,
	EndLoc);
	}

	/// Build a new OpenMP 'grainsize' clause.
	///
	/// By default, performs semantic analysis to build the new statement.
	/// Subclasses may override this routine to provide different behavior.
	OMPClause RebuildOMPGrainsizeClause(Expr Grainsize, SourceLocation StartLoc,
	SourceLocation LParenLoc,
	SourceLocation EndLoc) {
	return getSema().ActOnOpenMPGrainsizeClause(Grainsize, StartLoc, LParenLoc,
	EndLoc);
	}

	/// Build a new OpenMP 'num_tasks' clause.
	///
	/// By default, performs semantic analysis to build the new statement.
	/// Subclasses may override this routine to provide different behavior.
	OMPClause RebuildOMPNumTasksClause(Expr NumTasks, SourceLocation StartLoc,
	SourceLocation LParenLoc,
	SourceLocation EndLoc) {
	return getSema().ActOnOpenMPNumTasksClause(NumTasks, StartLoc, LParenLoc,
	EndLoc);
	}

	/// Build a new OpenMP 'hint' clause.
	///
	/// By default, performs semantic analysis to build the new statement.
	/// Subclasses may override this routine to provide different behavior.
	OMPClause RebuildOMPHintClause(Expr Hint, SourceLocation StartLoc,
	SourceLocation LParenLoc,
	SourceLocation EndLoc) {
	return getSema().ActOnOpenMPHintClause(Hint, StartLoc, LParenLoc, EndLoc);
	}

	/// Build a new OpenMP 'detach' clause.
	///
	/// By default, performs semantic analysis to build the new statement.
	/// Subclasses may override this routine to provide different behavior.
	OMPClause RebuildOMPDetachClause(Expr Evt, SourceLocation StartLoc,
	SourceLocation LParenLoc,
	SourceLocation EndLoc) {
	return getSema().ActOnOpenMPDetachClause(Evt, StartLoc, LParenLoc, EndLoc);
	}

	/// Build a new OpenMP 'dist_schedule' clause.
	///
	/// By default, performs semantic analysis to build the new OpenMP clause.
	/// Subclasses may override this routine to provide different behavior.
	OMPClause *
	RebuildOMPDistScheduleClause(OpenMPDistScheduleClauseKind Kind,
	Expr *ChunkSize, SourceLocation StartLoc,
	SourceLocation LParenLoc, SourceLocation KindLoc,
	SourceLocation CommaLoc, SourceLocation EndLoc) {
	return getSema().ActOnOpenMPDistScheduleClause(
	Kind, ChunkSize, StartLoc, LParenLoc, KindLoc, CommaLoc, EndLoc);
	}

	/// Build a new OpenMP 'to' clause.
	///
	/// By default, performs semantic analysis to build the new statement.
	/// Subclasses may override this routine to provide different behavior.
	OMPClause *
	RebuildOMPToClause(ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
	ArrayRef<SourceLocation> MotionModifiersLoc,
	CXXScopeSpec &MapperIdScopeSpec,
	DeclarationNameInfo &MapperId, SourceLocation ColonLoc,
	ArrayRef<Expr *> VarList, const OMPVarListLocTy &Locs,
	ArrayRef<Expr *> UnresolvedMappers) {
	return getSema().ActOnOpenMPToClause(MotionModifiers, MotionModifiersLoc,
	MapperIdScopeSpec, MapperId, ColonLoc,
	VarList, Locs, UnresolvedMappers);
	}

	/// Build a new OpenMP 'from' clause.
	///
	/// By default, performs semantic analysis to build the new statement.
	/// Subclasses may override this routine to provide different behavior.
	OMPClause *
	RebuildOMPFromClause(ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
	ArrayRef<SourceLocation> MotionModifiersLoc,
	CXXScopeSpec &MapperIdScopeSpec,
	DeclarationNameInfo &MapperId, SourceLocation ColonLoc,
	ArrayRef<Expr *> VarList, const OMPVarListLocTy &Locs,
	ArrayRef<Expr *> UnresolvedMappers) {
	return getSema().ActOnOpenMPFromClause(
	MotionModifiers, MotionModifiersLoc, MapperIdScopeSpec, MapperId,
	ColonLoc, VarList, Locs, UnresolvedMappers);
	}

	/// Build a new OpenMP 'use_device_ptr' clause.
	///
	/// By default, performs semantic analysis to build the new OpenMP clause.
	/// Subclasses may override this routine to provide different behavior.
	OMPClause RebuildOMPUseDevicePtrClause(ArrayRef<Expr > VarList,
	const OMPVarListLocTy &Locs) {
	return getSema().ActOnOpenMPUseDevicePtrClause(VarList, Locs);
	}

	/// Build a new OpenMP 'use_device_addr' clause.
	///
	/// By default, performs semantic analysis to build the new OpenMP clause.
	/// Subclasses may override this routine to provide different behavior.
	OMPClause RebuildOMPUseDeviceAddrClause(ArrayRef<Expr > VarList,
	const OMPVarListLocTy &Locs) {
	return getSema().ActOnOpenMPUseDeviceAddrClause(VarList, Locs);
	}

	/// Build a new OpenMP 'is_device_ptr' clause.
	///
	/// By default, performs semantic analysis to build the new OpenMP clause.
	/// Subclasses may override this routine to provide different behavior.
	OMPClause RebuildOMPIsDevicePtrClause(ArrayRef<Expr > VarList,
	const OMPVarListLocTy &Locs) {
	return getSema().ActOnOpenMPIsDevicePtrClause(VarList, Locs);
	}

	/// Build a new OpenMP 'defaultmap' clause.
	///
	/// By default, performs semantic analysis to build the new OpenMP clause.
	/// Subclasses may override this routine to provide different behavior.
	OMPClause *RebuildOMPDefaultmapClause(OpenMPDefaultmapClauseModifier M,
	OpenMPDefaultmapClauseKind Kind,
	SourceLocation StartLoc,
	SourceLocation LParenLoc,
	SourceLocation MLoc,
	SourceLocation KindLoc,
	SourceLocation EndLoc) {
	return getSema().ActOnOpenMPDefaultmapClause(M, Kind, StartLoc, LParenLoc,
	MLoc, KindLoc, EndLoc);
	}

	/// Build a new OpenMP 'nontemporal' clause.
	///
	/// By default, performs semantic analysis to build the new OpenMP clause.
	/// Subclasses may override this routine to provide different behavior.
	OMPClause RebuildOMPNontemporalClause(ArrayRef<Expr > VarList,
	SourceLocation StartLoc,
	SourceLocation LParenLoc,
	SourceLocation EndLoc) {
	return getSema().ActOnOpenMPNontemporalClause(VarList, StartLoc, LParenLoc,
	EndLoc);
	}

	/// Build a new OpenMP 'inclusive' clause.
	///
	/// By default, performs semantic analysis to build the new OpenMP clause.
	/// Subclasses may override this routine to provide different behavior.
	OMPClause RebuildOMPInclusiveClause(ArrayRef<Expr > VarList,
	SourceLocation StartLoc,
	SourceLocation LParenLoc,
	SourceLocation EndLoc) {
	return getSema().ActOnOpenMPInclusiveClause(VarList, StartLoc, LParenLoc,
	EndLoc);
	}

	/// Build a new OpenMP 'exclusive' clause.
	///
	/// By default, performs semantic analysis to build the new OpenMP clause.
	/// Subclasses may override this routine to provide different behavior.
	OMPClause RebuildOMPExclusiveClause(ArrayRef<Expr > VarList,
	SourceLocation StartLoc,
	SourceLocation LParenLoc,
	SourceLocation EndLoc) {
	return getSema().ActOnOpenMPExclusiveClause(VarList, StartLoc, LParenLoc,
	EndLoc);
	}

	/// Build a new OpenMP 'uses_allocators' clause.
	///
	/// By default, performs semantic analysis to build the new OpenMP clause.
	/// Subclasses may override this routine to provide different behavior.
	OMPClause *RebuildOMPUsesAllocatorsClause(
	ArrayRef<Sema::UsesAllocatorsData> Data, SourceLocation StartLoc,
	SourceLocation LParenLoc, SourceLocation EndLoc) {
	return getSema().ActOnOpenMPUsesAllocatorClause(StartLoc, LParenLoc, EndLoc,
	Data);
	}

	/// Build a new OpenMP 'affinity' clause.
	///
	/// By default, performs semantic analysis to build the new OpenMP clause.
	/// Subclasses may override this routine to provide different behavior.
	OMPClause *RebuildOMPAffinityClause(SourceLocation StartLoc,
	SourceLocation LParenLoc,
	SourceLocation ColonLoc,
	SourceLocation EndLoc, Expr *Modifier,
	ArrayRef<Expr *> Locators) {
	return getSema().ActOnOpenMPAffinityClause(StartLoc, LParenLoc, ColonLoc,
	EndLoc, Modifier, Locators);
	}

	/// Build a new OpenMP 'order' clause.
	///
	/// By default, performs semantic analysis to build the new OpenMP clause.
	/// Subclasses may override this routine to provide different behavior.
	OMPClause *RebuildOMPOrderClause(OpenMPOrderClauseKind Kind,
	SourceLocation KindKwLoc,
	SourceLocation StartLoc,
	SourceLocation LParenLoc,
	SourceLocation EndLoc) {
	return getSema().ActOnOpenMPOrderClause(Kind, KindKwLoc, StartLoc,
	LParenLoc, EndLoc);
	}

	/// Build a new OpenMP 'init' clause.
	///
	/// By default, performs semantic analysis to build the new OpenMP clause.
	/// Subclasses may override this routine to provide different behavior.
	OMPClause RebuildOMPInitClause(Expr InteropVar, ArrayRef<Expr *> PrefExprs,
	bool IsTarget, bool IsTargetSync,
	SourceLocation StartLoc,
	SourceLocation LParenLoc,
	SourceLocation VarLoc,
	SourceLocation EndLoc) {
	return getSema().ActOnOpenMPInitClause(InteropVar, PrefExprs, IsTarget,
	IsTargetSync, StartLoc, LParenLoc,
	VarLoc, EndLoc);
	}

	/// Build a new OpenMP 'use' clause.
	///
	/// By default, performs semantic analysis to build the new OpenMP clause.
	/// Subclasses may override this routine to provide different behavior.
	OMPClause RebuildOMPUseClause(Expr InteropVar, SourceLocation StartLoc,
	SourceLocation LParenLoc,
	SourceLocation VarLoc, SourceLocation EndLoc) {
	return getSema().ActOnOpenMPUseClause(InteropVar, StartLoc, LParenLoc,
	VarLoc, EndLoc);
	}

	/// Build a new OpenMP 'destroy' clause.
	///
	/// By default, performs semantic analysis to build the new OpenMP clause.
	/// Subclasses may override this routine to provide different behavior.
	OMPClause RebuildOMPDestroyClause(Expr InteropVar, SourceLocation StartLoc,
	SourceLocation LParenLoc,
	SourceLocation VarLoc,
	SourceLocation EndLoc) {
	return getSema().ActOnOpenMPDestroyClause(InteropVar, StartLoc, LParenLoc,
	VarLoc, EndLoc);
	}

	/// Build a new OpenMP 'novariants' clause.
	///
	/// By default, performs semantic analysis to build the new OpenMP clause.
	/// Subclasses may override this routine to provide different behavior.
	OMPClause RebuildOMPNovariantsClause(Expr Condition,
	SourceLocation StartLoc,
	SourceLocation LParenLoc,
	SourceLocation EndLoc) {
	return getSema().ActOnOpenMPNovariantsClause(Condition, StartLoc, LParenLoc,
	EndLoc);
	}

	/// Build a new OpenMP 'nocontext' clause.
	///
	/// By default, performs semantic analysis to build the new OpenMP clause.
	/// Subclasses may override this routine to provide different behavior.
	OMPClause RebuildOMPNocontextClause(Expr Condition, SourceLocation StartLoc,
	SourceLocation LParenLoc,
	SourceLocation EndLoc) {
	return getSema().ActOnOpenMPNocontextClause(Condition, StartLoc, LParenLoc,
	EndLoc);
	}

	/// Build a new OpenMP 'filter' clause.
	///
	/// By default, performs semantic analysis to build the new OpenMP clause.
	/// Subclasses may override this routine to provide different behavior.
	OMPClause RebuildOMPFilterClause(Expr ThreadID, SourceLocation StartLoc,
	SourceLocation LParenLoc,
	SourceLocation EndLoc) {
	return getSema().ActOnOpenMPFilterClause(ThreadID, StartLoc, LParenLoc,
	EndLoc);
	}

	/// Rebuild the operand to an Objective-C \@synchronized statement.
	///
	/// By default, performs semantic analysis to build the new statement.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildObjCAtSynchronizedOperand(SourceLocation atLoc,
	Expr *object) {
	return getSema().ActOnObjCAtSynchronizedOperand(atLoc, object);
	}

	/// Build a new Objective-C \@synchronized statement.
	///
	/// By default, performs semantic analysis to build the new statement.
	/// Subclasses may override this routine to provide different behavior.
	StmtResult RebuildObjCAtSynchronizedStmt(SourceLocation AtLoc,
	Expr Object, Stmt Body) {
	return getSema().ActOnObjCAtSynchronizedStmt(AtLoc, Object, Body);
	}

	/// Build a new Objective-C \@autoreleasepool statement.
	///
	/// By default, performs semantic analysis to build the new statement.
	/// Subclasses may override this routine to provide different behavior.
	StmtResult RebuildObjCAutoreleasePoolStmt(SourceLocation AtLoc,
	Stmt *Body) {
	return getSema().ActOnObjCAutoreleasePoolStmt(AtLoc, Body);
	}

	/// Build a new Objective-C fast enumeration statement.
	///
	/// By default, performs semantic analysis to build the new statement.
	/// Subclasses may override this routine to provide different behavior.
	StmtResult RebuildObjCForCollectionStmt(SourceLocation ForLoc,
	Stmt *Element,
	Expr *Collection,
	SourceLocation RParenLoc,
	Stmt *Body) {
	StmtResult ForEachStmt = getSema().ActOnObjCForCollectionStmt(ForLoc,
	Element,
	Collection,
	RParenLoc);
	if (ForEachStmt.isInvalid())
	return StmtError();

	return getSema().FinishObjCForCollectionStmt(ForEachStmt.get(), Body);
	}

	/// Build a new C++ exception declaration.
	///
	/// By default, performs semantic analysis to build the new decaration.
	/// Subclasses may override this routine to provide different behavior.
	VarDecl RebuildExceptionDecl(VarDecl ExceptionDecl,
	TypeSourceInfo *Declarator,
	SourceLocation StartLoc,
	SourceLocation IdLoc,
	IdentifierInfo *Id) {
	VarDecl *Var = getSema().BuildExceptionDeclaration(nullptr, Declarator,
	StartLoc, IdLoc, Id);
	if (Var)
	getSema().CurContext->addDecl(Var);
	return Var;
	}

	/// Build a new C++ catch statement.
	///
	/// By default, performs semantic analysis to build the new statement.
	/// Subclasses may override this routine to provide different behavior.
	StmtResult RebuildCXXCatchStmt(SourceLocation CatchLoc,
	VarDecl *ExceptionDecl,
	Stmt *Handler) {
	return Owned(new (getSema().Context) CXXCatchStmt(CatchLoc, ExceptionDecl,
	Handler));
	}

	/// Build a new C++ try statement.
	///
	/// By default, performs semantic analysis to build the new statement.
	/// Subclasses may override this routine to provide different behavior.
	StmtResult RebuildCXXTryStmt(SourceLocation TryLoc, Stmt *TryBlock,
	ArrayRef<Stmt *> Handlers) {
	return getSema().ActOnCXXTryBlock(TryLoc, TryBlock, Handlers);
	}

	/// Build a new C++0x range-based for statement.
	///
	/// By default, performs semantic analysis to build the new statement.
	/// Subclasses may override this routine to provide different behavior.
	StmtResult RebuildCXXForRangeStmt(SourceLocation ForLoc,
	SourceLocation CoawaitLoc, Stmt *Init,
	SourceLocation ColonLoc, Stmt *Range,
	Stmt Begin, Stmt End, Expr *Cond,
	Expr Inc, Stmt LoopVar,
	SourceLocation RParenLoc) {
	// If we've just learned that the range is actually an Objective-C
	// collection, treat this as an Objective-C fast enumeration loop.
	if (DeclStmt *RangeStmt = dyn_cast<DeclStmt>(Range)) {
	if (RangeStmt->isSingleDecl()) {
	if (VarDecl *RangeVar = dyn_cast<VarDecl>(RangeStmt->getSingleDecl())) {
	if (RangeVar->isInvalidDecl())
	return StmtError();

	Expr *RangeExpr = RangeVar->getInit();
	if (!RangeExpr->isTypeDependent() &&
	RangeExpr->getType()->isObjCObjectPointerType()) {
	// FIXME: Support init-statements in Objective-C++20 ranged for
	// statement.
	if (Init) {
	return SemaRef.Diag(Init->getBeginLoc(),
	diag::err_objc_for_range_init_stmt)
	<< Init->getSourceRange();
	}
	return getSema().ActOnObjCForCollectionStmt(ForLoc, LoopVar,
	RangeExpr, RParenLoc);
	}
	}
	}
	}

	return getSema().BuildCXXForRangeStmt(ForLoc, CoawaitLoc, Init, ColonLoc,
	Range, Begin, End, Cond, Inc, LoopVar,
	RParenLoc, Sema::BFRK_Rebuild);
	}

	/// Build a new C++0x range-based for statement.
	///
	/// By default, performs semantic analysis to build the new statement.
	/// Subclasses may override this routine to provide different behavior.
	StmtResult RebuildMSDependentExistsStmt(SourceLocation KeywordLoc,
	bool IsIfExists,
	NestedNameSpecifierLoc QualifierLoc,
	DeclarationNameInfo NameInfo,
	Stmt *Nested) {
	return getSema().BuildMSDependentExistsStmt(KeywordLoc, IsIfExists,
	QualifierLoc, NameInfo, Nested);
	}

	/// Attach body to a C++0x range-based for statement.
	///
	/// By default, performs semantic analysis to finish the new statement.
	/// Subclasses may override this routine to provide different behavior.
	StmtResult FinishCXXForRangeStmt(Stmt ForRange, Stmt Body) {
	return getSema().FinishCXXForRangeStmt(ForRange, Body);
	}

	StmtResult RebuildSEHTryStmt(bool IsCXXTry, SourceLocation TryLoc,
	Stmt TryBlock, Stmt Handler) {
	return getSema().ActOnSEHTryBlock(IsCXXTry, TryLoc, TryBlock, Handler);
	}

	StmtResult RebuildSEHExceptStmt(SourceLocation Loc, Expr *FilterExpr,
	Stmt *Block) {
	return getSema().ActOnSEHExceptBlock(Loc, FilterExpr, Block);
	}

	StmtResult RebuildSEHFinallyStmt(SourceLocation Loc, Stmt *Block) {
	return SEHFinallyStmt::Create(getSema().getASTContext(), Loc, Block);
	}

	ExprResult RebuildSYCLUniqueStableNameExpr(SourceLocation OpLoc,
	SourceLocation LParen,
	SourceLocation RParen,
	TypeSourceInfo *TSI) {
	return getSema().BuildSYCLUniqueStableNameExpr(OpLoc, LParen, RParen, TSI);
	}

	/// Build a new predefined expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildPredefinedExpr(SourceLocation Loc,
	PredefinedExpr::IdentKind IK) {
	return getSema().BuildPredefinedExpr(Loc, IK);
	}

	/// Build a new expression that references a declaration.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildDeclarationNameExpr(const CXXScopeSpec &SS,
	LookupResult &R,
	bool RequiresADL) {
	return getSema().BuildDeclarationNameExpr(SS, R, RequiresADL);
	}


	/// Build a new expression that references a declaration.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildDeclRefExpr(NestedNameSpecifierLoc QualifierLoc,
	ValueDecl *VD,
	const DeclarationNameInfo &NameInfo,
	NamedDecl *Found,
	TemplateArgumentListInfo *TemplateArgs) {
	CXXScopeSpec SS;
	SS.Adopt(QualifierLoc);
	return getSema().BuildDeclarationNameExpr(SS, NameInfo, VD, Found,
	TemplateArgs);
	}

	/// Build a new expression in parentheses.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildParenExpr(Expr *SubExpr, SourceLocation LParen,
	SourceLocation RParen) {
	return getSema().ActOnParenExpr(LParen, RParen, SubExpr);
	}

	/// Build a new pseudo-destructor expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildCXXPseudoDestructorExpr(Expr *Base,
	SourceLocation OperatorLoc,
	bool isArrow,
	CXXScopeSpec &SS,
	TypeSourceInfo *ScopeType,
	SourceLocation CCLoc,
	SourceLocation TildeLoc,
	PseudoDestructorTypeStorage Destroyed);

	/// Build a new unary operator expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildUnaryOperator(SourceLocation OpLoc,
	UnaryOperatorKind Opc,
	Expr *SubExpr) {
	return getSema().BuildUnaryOp(/Scope=/nullptr, OpLoc, Opc, SubExpr);
	}

	/// Build a new builtin offsetof expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildOffsetOfExpr(SourceLocation OperatorLoc,
	TypeSourceInfo *Type,
	ArrayRef<Sema::OffsetOfComponent> Components,
	SourceLocation RParenLoc) {
	return getSema().BuildBuiltinOffsetOf(OperatorLoc, Type, Components,
	RParenLoc);
	}

	/// Build a new sizeof, alignof or vec_step expression with a
	/// type argument.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildUnaryExprOrTypeTrait(TypeSourceInfo *TInfo,
	SourceLocation OpLoc,
	UnaryExprOrTypeTrait ExprKind,
	SourceRange R) {
	return getSema().CreateUnaryExprOrTypeTraitExpr(TInfo, OpLoc, ExprKind, R);
	}

	/// Build a new sizeof, alignof or vec step expression with an
	/// expression argument.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildUnaryExprOrTypeTrait(Expr *SubExpr, SourceLocation OpLoc,
	UnaryExprOrTypeTrait ExprKind,
	SourceRange R) {
	ExprResult Result
	= getSema().CreateUnaryExprOrTypeTraitExpr(SubExpr, OpLoc, ExprKind);
	if (Result.isInvalid())
	return ExprError();

	return Result;
	}

	/// Build a new array subscript expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildArraySubscriptExpr(Expr *LHS,
	SourceLocation LBracketLoc,
	Expr *RHS,
	SourceLocation RBracketLoc) {
	return getSema().ActOnArraySubscriptExpr(/Scope=/nullptr, LHS,
	LBracketLoc, RHS,
	RBracketLoc);
	}

	/// Build a new matrix subscript expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildMatrixSubscriptExpr(Expr Base, Expr RowIdx,
	Expr *ColumnIdx,
	SourceLocation RBracketLoc) {
	return getSema().CreateBuiltinMatrixSubscriptExpr(Base, RowIdx, ColumnIdx,
	RBracketLoc);
	}

	/// Build a new array section expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildOMPArraySectionExpr(Expr *Base, SourceLocation LBracketLoc,
	Expr *LowerBound,
	SourceLocation ColonLocFirst,
	SourceLocation ColonLocSecond,
	Expr Length, Expr Stride,
	SourceLocation RBracketLoc) {
	return getSema().ActOnOMPArraySectionExpr(Base, LBracketLoc, LowerBound,
	ColonLocFirst, ColonLocSecond,
	Length, Stride, RBracketLoc);
	}

	/// Build a new array shaping expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildOMPArrayShapingExpr(Expr *Base, SourceLocation LParenLoc,
	SourceLocation RParenLoc,
	ArrayRef<Expr *> Dims,
	ArrayRef<SourceRange> BracketsRanges) {
	return getSema().ActOnOMPArrayShapingExpr(Base, LParenLoc, RParenLoc, Dims,
	BracketsRanges);
	}

	/// Build a new iterator expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildOMPIteratorExpr(
	SourceLocation IteratorKwLoc, SourceLocation LLoc, SourceLocation RLoc,
	ArrayRef<Sema::OMPIteratorData> Data) {
	return getSema().ActOnOMPIteratorExpr(/Scope=/nullptr, IteratorKwLoc,
	LLoc, RLoc, Data);
	}

	/// Build a new call expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildCallExpr(Expr *Callee, SourceLocation LParenLoc,
	MultiExprArg Args,
	SourceLocation RParenLoc,
	Expr *ExecConfig = nullptr) {
	return getSema().ActOnCallExpr(
	/Scope=/nullptr, Callee, LParenLoc, Args, RParenLoc, ExecConfig);
	}

	/// Build a new member access expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildMemberExpr(Expr *Base, SourceLocation OpLoc,
	bool isArrow,
	NestedNameSpecifierLoc QualifierLoc,
	SourceLocation TemplateKWLoc,
	const DeclarationNameInfo &MemberNameInfo,
	ValueDecl *Member,
	NamedDecl *FoundDecl,
	const TemplateArgumentListInfo *ExplicitTemplateArgs,
	NamedDecl *FirstQualifierInScope) {
	ExprResult BaseResult = getSema().PerformMemberExprBaseConversion(Base,
	isArrow);
	if (!Member->getDeclName()) {
	// We have a reference to an unnamed field. This is always the
	// base of an anonymous struct/union member access, i.e. the
	// field is always of record type.
	assert(Member->getType()->isRecordType() &&
	"unnamed member not of record type?");

	BaseResult =
	getSema().PerformObjectMemberConversion(BaseResult.get(),
	QualifierLoc.getNestedNameSpecifier(),
	FoundDecl, Member);
	if (BaseResult.isInvalid())
	return ExprError();
	Base = BaseResult.get();

	CXXScopeSpec EmptySS;
	return getSema().BuildFieldReferenceExpr(
	Base, isArrow, OpLoc, EmptySS, cast<FieldDecl>(Member),
	DeclAccessPair::make(FoundDecl, FoundDecl->getAccess()), MemberNameInfo);
	}

	CXXScopeSpec SS;
	SS.Adopt(QualifierLoc);

	Base = BaseResult.get();
	QualType BaseType = Base->getType();

	if (isArrow && !BaseType->isPointerType())
	return ExprError();

	// FIXME: this involves duplicating earlier analysis in a lot of
	// cases; we should avoid this when possible.
	LookupResult R(getSema(), MemberNameInfo, Sema::LookupMemberName);
	R.addDecl(FoundDecl);
	R.resolveKind();

	return getSema().BuildMemberReferenceExpr(Base, BaseType, OpLoc, isArrow,
	SS, TemplateKWLoc,
	FirstQualifierInScope,
	R, ExplicitTemplateArgs,
	/S/nullptr);
	}

	/// Build a new binary operator expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildBinaryOperator(SourceLocation OpLoc,
	BinaryOperatorKind Opc,
	Expr LHS, Expr RHS) {
	return getSema().BuildBinOp(/Scope=/nullptr, OpLoc, Opc, LHS, RHS);
	}

	/// Build a new rewritten operator expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildCXXRewrittenBinaryOperator(
	SourceLocation OpLoc, BinaryOperatorKind Opcode,
	const UnresolvedSetImpl &UnqualLookups, Expr LHS, Expr RHS) {
	return getSema().CreateOverloadedBinOp(OpLoc, Opcode, UnqualLookups, LHS,
	RHS, /RequiresADL/false);
	}

	/// Build a new conditional operator expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildConditionalOperator(Expr *Cond,
	SourceLocation QuestionLoc,
	Expr *LHS,
	SourceLocation ColonLoc,
	Expr *RHS) {
	return getSema().ActOnConditionalOp(QuestionLoc, ColonLoc, Cond,
	LHS, RHS);
	}

	/// Build a new C-style cast expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildCStyleCastExpr(SourceLocation LParenLoc,
	TypeSourceInfo *TInfo,
	SourceLocation RParenLoc,
	Expr *SubExpr) {
	return getSema().BuildCStyleCastExpr(LParenLoc, TInfo, RParenLoc,
	SubExpr);
	}

	/// Build a new compound literal expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildCompoundLiteralExpr(SourceLocation LParenLoc,
	TypeSourceInfo *TInfo,
	SourceLocation RParenLoc,
	Expr *Init) {
	return getSema().BuildCompoundLiteralExpr(LParenLoc, TInfo, RParenLoc,
	Init);
	}

	/// Build a new extended vector element access expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildExtVectorElementExpr(Expr *Base,
	SourceLocation OpLoc,
	SourceLocation AccessorLoc,
	IdentifierInfo &Accessor) {

	CXXScopeSpec SS;
	DeclarationNameInfo NameInfo(&Accessor, AccessorLoc);
	return getSema().BuildMemberReferenceExpr(Base, Base->getType(),
	OpLoc, /IsArrow/ false,
	SS, SourceLocation(),
	/FirstQualifierInScope/ nullptr,
	NameInfo,
	/* TemplateArgs */ nullptr,
	/S/ nullptr);
	}

	/// Build a new initializer list expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildInitList(SourceLocation LBraceLoc,
	MultiExprArg Inits,
	SourceLocation RBraceLoc) {
	return SemaRef.BuildInitList(LBraceLoc, Inits, RBraceLoc);
	}

	/// Build a new designated initializer expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildDesignatedInitExpr(Designation &Desig,
	MultiExprArg ArrayExprs,
	SourceLocation EqualOrColonLoc,
	bool GNUSyntax,
	Expr *Init) {
	ExprResult Result
	= SemaRef.ActOnDesignatedInitializer(Desig, EqualOrColonLoc, GNUSyntax,
	Init);
	if (Result.isInvalid())
	return ExprError();

	return Result;
	}

	/// Build a new value-initialized expression.
	///
	/// By default, builds the implicit value initialization without performing
	/// any semantic analysis. Subclasses may override this routine to provide
	/// different behavior.
	ExprResult RebuildImplicitValueInitExpr(QualType T) {
	return new (SemaRef.Context) ImplicitValueInitExpr(T);
	}

	/// Build a new \c va_arg expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildVAArgExpr(SourceLocation BuiltinLoc,
	Expr SubExpr, TypeSourceInfo TInfo,
	SourceLocation RParenLoc) {
	return getSema().BuildVAArgExpr(BuiltinLoc,
	SubExpr, TInfo,
	RParenLoc);
	}

	/// Build a new expression list in parentheses.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildParenListExpr(SourceLocation LParenLoc,
	MultiExprArg SubExprs,
	SourceLocation RParenLoc) {
	return getSema().ActOnParenListExpr(LParenLoc, RParenLoc, SubExprs);
	}

	/// Build a new address-of-label expression.
	///
	/// By default, performs semantic analysis, using the name of the label
	/// rather than attempting to map the label statement itself.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildAddrLabelExpr(SourceLocation AmpAmpLoc,
	SourceLocation LabelLoc, LabelDecl *Label) {
	return getSema().ActOnAddrLabel(AmpAmpLoc, LabelLoc, Label);
	}

	/// Build a new GNU statement expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildStmtExpr(SourceLocation LParenLoc, Stmt *SubStmt,
	SourceLocation RParenLoc, unsigned TemplateDepth) {
	return getSema().BuildStmtExpr(LParenLoc, SubStmt, RParenLoc,
	TemplateDepth);
	}

	/// Build a new __builtin_choose_expr expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildChooseExpr(SourceLocation BuiltinLoc,
	Expr Cond, Expr LHS, Expr *RHS,
	SourceLocation RParenLoc) {
	return SemaRef.ActOnChooseExpr(BuiltinLoc,
	Cond, LHS, RHS,
	RParenLoc);
	}

	/// Build a new generic selection expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildGenericSelectionExpr(SourceLocation KeyLoc,
	SourceLocation DefaultLoc,
	SourceLocation RParenLoc,
	Expr *ControllingExpr,
	ArrayRef<TypeSourceInfo *> Types,
	ArrayRef<Expr *> Exprs) {
	return getSema().CreateGenericSelectionExpr(KeyLoc, DefaultLoc, RParenLoc,
	ControllingExpr, Types, Exprs);
	}

	/// Build a new overloaded operator call expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// The semantic analysis provides the behavior of template instantiation,
	/// copying with transformations that turn what looks like an overloaded
	/// operator call into a use of a builtin operator, performing
	/// argument-dependent lookup, etc. Subclasses may override this routine to
	/// provide different behavior.
	ExprResult RebuildCXXOperatorCallExpr(OverloadedOperatorKind Op,
	SourceLocation OpLoc,
	Expr *Callee,
	Expr *First,
	Expr *Second);

	/// Build a new C++ "named" cast expression, such as static_cast or
	/// reinterpret_cast.
	///
	/// By default, this routine dispatches to one of the more-specific routines
	/// for a particular named case, e.g., RebuildCXXStaticCastExpr().
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildCXXNamedCastExpr(SourceLocation OpLoc,
	Stmt::StmtClass Class,
	SourceLocation LAngleLoc,
	TypeSourceInfo *TInfo,
	SourceLocation RAngleLoc,
	SourceLocation LParenLoc,
	Expr *SubExpr,
	SourceLocation RParenLoc) {
	switch (Class) {
	case Stmt::CXXStaticCastExprClass:
	return getDerived().RebuildCXXStaticCastExpr(OpLoc, LAngleLoc, TInfo,
	RAngleLoc, LParenLoc,
	SubExpr, RParenLoc);

	case Stmt::CXXDynamicCastExprClass:
	return getDerived().RebuildCXXDynamicCastExpr(OpLoc, LAngleLoc, TInfo,
	RAngleLoc, LParenLoc,
	SubExpr, RParenLoc);

	case Stmt::CXXReinterpretCastExprClass:
	return getDerived().RebuildCXXReinterpretCastExpr(OpLoc, LAngleLoc, TInfo,
	RAngleLoc, LParenLoc,
	SubExpr,
	RParenLoc);

	case Stmt::CXXConstCastExprClass:
	return getDerived().RebuildCXXConstCastExpr(OpLoc, LAngleLoc, TInfo,
	RAngleLoc, LParenLoc,
	SubExpr, RParenLoc);

	case Stmt::CXXAddrspaceCastExprClass:
	return getDerived().RebuildCXXAddrspaceCastExpr(
	OpLoc, LAngleLoc, TInfo, RAngleLoc, LParenLoc, SubExpr, RParenLoc);

	default:
	llvm_unreachable("Invalid C++ named cast");
	}
	}

	/// Build a new C++ static_cast expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildCXXStaticCastExpr(SourceLocation OpLoc,
	SourceLocation LAngleLoc,
	TypeSourceInfo *TInfo,
	SourceLocation RAngleLoc,
	SourceLocation LParenLoc,
	Expr *SubExpr,
	SourceLocation RParenLoc) {
	return getSema().BuildCXXNamedCast(OpLoc, tok::kw_static_cast,
	TInfo, SubExpr,
	SourceRange(LAngleLoc, RAngleLoc),
	SourceRange(LParenLoc, RParenLoc));
	}

	/// Build a new C++ dynamic_cast expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildCXXDynamicCastExpr(SourceLocation OpLoc,
	SourceLocation LAngleLoc,
	TypeSourceInfo *TInfo,
	SourceLocation RAngleLoc,
	SourceLocation LParenLoc,
	Expr *SubExpr,
	SourceLocation RParenLoc) {
	return getSema().BuildCXXNamedCast(OpLoc, tok::kw_dynamic_cast,
	TInfo, SubExpr,
	SourceRange(LAngleLoc, RAngleLoc),
	SourceRange(LParenLoc, RParenLoc));
	}

	/// Build a new C++ reinterpret_cast expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildCXXReinterpretCastExpr(SourceLocation OpLoc,
	SourceLocation LAngleLoc,
	TypeSourceInfo *TInfo,
	SourceLocation RAngleLoc,
	SourceLocation LParenLoc,
	Expr *SubExpr,
	SourceLocation RParenLoc) {
	return getSema().BuildCXXNamedCast(OpLoc, tok::kw_reinterpret_cast,
	TInfo, SubExpr,
	SourceRange(LAngleLoc, RAngleLoc),
	SourceRange(LParenLoc, RParenLoc));
	}

	/// Build a new C++ const_cast expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildCXXConstCastExpr(SourceLocation OpLoc,
	SourceLocation LAngleLoc,
	TypeSourceInfo *TInfo,
	SourceLocation RAngleLoc,
	SourceLocation LParenLoc,
	Expr *SubExpr,
	SourceLocation RParenLoc) {
	return getSema().BuildCXXNamedCast(OpLoc, tok::kw_const_cast,
	TInfo, SubExpr,
	SourceRange(LAngleLoc, RAngleLoc),
	SourceRange(LParenLoc, RParenLoc));
	}

	ExprResult
	RebuildCXXAddrspaceCastExpr(SourceLocation OpLoc, SourceLocation LAngleLoc,
	TypeSourceInfo *TInfo, SourceLocation RAngleLoc,
	SourceLocation LParenLoc, Expr *SubExpr,
	SourceLocation RParenLoc) {
	return getSema().BuildCXXNamedCast(
	OpLoc, tok::kw_addrspace_cast, TInfo, SubExpr,
	SourceRange(LAngleLoc, RAngleLoc), SourceRange(LParenLoc, RParenLoc));
	}

	/// Build a new C++ functional-style cast expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildCXXFunctionalCastExpr(TypeSourceInfo *TInfo,
	SourceLocation LParenLoc,
	Expr *Sub,
	SourceLocation RParenLoc,
	bool ListInitialization) {
	return getSema().BuildCXXTypeConstructExpr(TInfo, LParenLoc,
	MultiExprArg(&Sub, 1), RParenLoc,
	ListInitialization);
	}

	/// Build a new C++ __builtin_bit_cast expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildBuiltinBitCastExpr(SourceLocation KWLoc,
	TypeSourceInfo TSI, Expr Sub,
	SourceLocation RParenLoc) {
	return getSema().BuildBuiltinBitCastExpr(KWLoc, TSI, Sub, RParenLoc);
	}

	/// Build a new C++ typeid(type) expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildCXXTypeidExpr(QualType TypeInfoType,
	SourceLocation TypeidLoc,
	TypeSourceInfo *Operand,
	SourceLocation RParenLoc) {
	return getSema().BuildCXXTypeId(TypeInfoType, TypeidLoc, Operand,
	RParenLoc);
	}


	/// Build a new C++ typeid(expr) expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildCXXTypeidExpr(QualType TypeInfoType,
	SourceLocation TypeidLoc,
	Expr *Operand,
	SourceLocation RParenLoc) {
	return getSema().BuildCXXTypeId(TypeInfoType, TypeidLoc, Operand,
	RParenLoc);
	}

	/// Build a new C++ __uuidof(type) expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildCXXUuidofExpr(QualType Type, SourceLocation TypeidLoc,
	TypeSourceInfo *Operand,
	SourceLocation RParenLoc) {
	return getSema().BuildCXXUuidof(Type, TypeidLoc, Operand, RParenLoc);
	}

	/// Build a new C++ __uuidof(expr) expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildCXXUuidofExpr(QualType Type, SourceLocation TypeidLoc,
	Expr *Operand, SourceLocation RParenLoc) {
	return getSema().BuildCXXUuidof(Type, TypeidLoc, Operand, RParenLoc);
	}

	/// Build a new C++ "this" expression.
	///
	/// By default, builds a new "this" expression without performing any
	/// semantic analysis. Subclasses may override this routine to provide
	/// different behavior.
	ExprResult RebuildCXXThisExpr(SourceLocation ThisLoc,
	QualType ThisType,
	bool isImplicit) {
	return getSema().BuildCXXThisExpr(ThisLoc, ThisType, isImplicit);
	}

	/// Build a new C++ throw expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildCXXThrowExpr(SourceLocation ThrowLoc, Expr *Sub,
	bool IsThrownVariableInScope) {
	return getSema().BuildCXXThrow(ThrowLoc, Sub, IsThrownVariableInScope);
	}

	/// Build a new C++ default-argument expression.
	///
	/// By default, builds a new default-argument expression, which does not
	/// require any semantic analysis. Subclasses may override this routine to
	/// provide different behavior.
	ExprResult RebuildCXXDefaultArgExpr(SourceLocation Loc, ParmVarDecl *Param) {
	return CXXDefaultArgExpr::Create(getSema().Context, Loc, Param,
	getSema().CurContext);
	}

	/// Build a new C++11 default-initialization expression.
	///
	/// By default, builds a new default field initialization expression, which
	/// does not require any semantic analysis. Subclasses may override this
	/// routine to provide different behavior.
	ExprResult RebuildCXXDefaultInitExpr(SourceLocation Loc,
	FieldDecl *Field) {
	return CXXDefaultInitExpr::Create(getSema().Context, Loc, Field,
	getSema().CurContext);
	}

	/// Build a new C++ zero-initialization expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildCXXScalarValueInitExpr(TypeSourceInfo *TSInfo,
	SourceLocation LParenLoc,
	SourceLocation RParenLoc) {
	return getSema().BuildCXXTypeConstructExpr(
	TSInfo, LParenLoc, None, RParenLoc, /ListInitialization=/false);
	}

	/// Build a new C++ "new" expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildCXXNewExpr(SourceLocation StartLoc,
	bool UseGlobal,
	SourceLocation PlacementLParen,
	MultiExprArg PlacementArgs,
	SourceLocation PlacementRParen,
	SourceRange TypeIdParens,
	QualType AllocatedType,
	TypeSourceInfo *AllocatedTypeInfo,
	Optional<Expr *> ArraySize,
	SourceRange DirectInitRange,
	Expr *Initializer) {
	return getSema().BuildCXXNew(StartLoc, UseGlobal,
	PlacementLParen,
	PlacementArgs,
	PlacementRParen,
	TypeIdParens,
	AllocatedType,
	AllocatedTypeInfo,
	ArraySize,
	DirectInitRange,
	Initializer);
	}

	/// Build a new C++ "delete" expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildCXXDeleteExpr(SourceLocation StartLoc,
	bool IsGlobalDelete,
	bool IsArrayForm,
	Expr *Operand) {
	return getSema().ActOnCXXDelete(StartLoc, IsGlobalDelete, IsArrayForm,
	Operand);
	}

	/// Build a new type trait expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildTypeTrait(TypeTrait Trait,
	SourceLocation StartLoc,
	ArrayRef<TypeSourceInfo *> Args,
	SourceLocation RParenLoc) {
	return getSema().BuildTypeTrait(Trait, StartLoc, Args, RParenLoc);
	}

	/// Build a new array type trait expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildArrayTypeTrait(ArrayTypeTrait Trait,
	SourceLocation StartLoc,
	TypeSourceInfo *TSInfo,
	Expr *DimExpr,
	SourceLocation RParenLoc) {
	return getSema().BuildArrayTypeTrait(Trait, StartLoc, TSInfo, DimExpr, RParenLoc);
	}

	/// Build a new expression trait expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildExpressionTrait(ExpressionTrait Trait,
	SourceLocation StartLoc,
	Expr *Queried,
	SourceLocation RParenLoc) {
	return getSema().BuildExpressionTrait(Trait, StartLoc, Queried, RParenLoc);
	}

	/// Build a new (previously unresolved) declaration reference
	/// expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildDependentScopeDeclRefExpr(
	NestedNameSpecifierLoc QualifierLoc,
	SourceLocation TemplateKWLoc,
	const DeclarationNameInfo &NameInfo,
	const TemplateArgumentListInfo *TemplateArgs,
	bool IsAddressOfOperand,
	TypeSourceInfo **RecoveryTSI) {
	CXXScopeSpec SS;
	SS.Adopt(QualifierLoc);

	if (TemplateArgs \|\| TemplateKWLoc.isValid())
	return getSema().BuildQualifiedTemplateIdExpr(SS, TemplateKWLoc, NameInfo,
	TemplateArgs);

	return getSema().BuildQualifiedDeclarationNameExpr(
	SS, NameInfo, IsAddressOfOperand, /S/nullptr, RecoveryTSI);
	}

	/// Build a new template-id expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildTemplateIdExpr(const CXXScopeSpec &SS,
	SourceLocation TemplateKWLoc,
	LookupResult &R,
	bool RequiresADL,
	const TemplateArgumentListInfo *TemplateArgs) {
	return getSema().BuildTemplateIdExpr(SS, TemplateKWLoc, R, RequiresADL,
	TemplateArgs);
	}

	/// Build a new object-construction expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildCXXConstructExpr(QualType T,
	SourceLocation Loc,
	CXXConstructorDecl *Constructor,
	bool IsElidable,
	MultiExprArg Args,
	bool HadMultipleCandidates,
	bool ListInitialization,
	bool StdInitListInitialization,
	bool RequiresZeroInit,
	CXXConstructExpr::ConstructionKind ConstructKind,
	SourceRange ParenRange) {
	// Reconstruct the constructor we originally found, which might be
	// different if this is a call to an inherited constructor.
	CXXConstructorDecl *FoundCtor = Constructor;
	if (Constructor->isInheritingConstructor())
	FoundCtor = Constructor->getInheritedConstructor().getConstructor();

	SmallVector<Expr *, 8> ConvertedArgs;
	if (getSema().CompleteConstructorCall(FoundCtor, T, Args, Loc,
	ConvertedArgs))
	return ExprError();

	return getSema().BuildCXXConstructExpr(Loc, T, Constructor,
	IsElidable,
	ConvertedArgs,
	HadMultipleCandidates,
	ListInitialization,
	StdInitListInitialization,
	RequiresZeroInit, ConstructKind,
	ParenRange);
	}

	/// Build a new implicit construction via inherited constructor
	/// expression.
	ExprResult RebuildCXXInheritedCtorInitExpr(QualType T, SourceLocation Loc,
	CXXConstructorDecl *Constructor,
	bool ConstructsVBase,
	bool InheritedFromVBase) {
	return new (getSema().Context) CXXInheritedCtorInitExpr(
	Loc, T, Constructor, ConstructsVBase, InheritedFromVBase);
	}

	/// Build a new object-construction expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildCXXTemporaryObjectExpr(TypeSourceInfo *TSInfo,
	SourceLocation LParenOrBraceLoc,
	MultiExprArg Args,
	SourceLocation RParenOrBraceLoc,
	bool ListInitialization) {
	return getSema().BuildCXXTypeConstructExpr(
	TSInfo, LParenOrBraceLoc, Args, RParenOrBraceLoc, ListInitialization);
	}

	/// Build a new object-construction expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildCXXUnresolvedConstructExpr(TypeSourceInfo *TSInfo,
	SourceLocation LParenLoc,
	MultiExprArg Args,
	SourceLocation RParenLoc,
	bool ListInitialization) {
	return getSema().BuildCXXTypeConstructExpr(TSInfo, LParenLoc, Args,
	RParenLoc, ListInitialization);
	}

	/// Build a new member reference expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildCXXDependentScopeMemberExpr(Expr *BaseE,
	QualType BaseType,
	bool IsArrow,
	SourceLocation OperatorLoc,
	NestedNameSpecifierLoc QualifierLoc,
	SourceLocation TemplateKWLoc,
	NamedDecl *FirstQualifierInScope,
	const DeclarationNameInfo &MemberNameInfo,
	const TemplateArgumentListInfo *TemplateArgs) {
	CXXScopeSpec SS;
	SS.Adopt(QualifierLoc);

	return SemaRef.BuildMemberReferenceExpr(BaseE, BaseType,
	OperatorLoc, IsArrow,
	SS, TemplateKWLoc,
	FirstQualifierInScope,
	MemberNameInfo,
	TemplateArgs, /S/nullptr);
	}

	/// Build a new member reference expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildUnresolvedMemberExpr(Expr *BaseE, QualType BaseType,
	SourceLocation OperatorLoc,
	bool IsArrow,
	NestedNameSpecifierLoc QualifierLoc,
	SourceLocation TemplateKWLoc,
	NamedDecl *FirstQualifierInScope,
	LookupResult &R,
	const TemplateArgumentListInfo *TemplateArgs) {
	CXXScopeSpec SS;
	SS.Adopt(QualifierLoc);

	return SemaRef.BuildMemberReferenceExpr(BaseE, BaseType,
	OperatorLoc, IsArrow,
	SS, TemplateKWLoc,
	FirstQualifierInScope,
	R, TemplateArgs, /S/nullptr);
	}

	/// Build a new noexcept expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildCXXNoexceptExpr(SourceRange Range, Expr *Arg) {
	return SemaRef.BuildCXXNoexceptExpr(Range.getBegin(), Arg, Range.getEnd());
	}

	/// Build a new expression to compute the length of a parameter pack.
	ExprResult RebuildSizeOfPackExpr(SourceLocation OperatorLoc,
	NamedDecl *Pack,
	SourceLocation PackLoc,
	SourceLocation RParenLoc,
	Optional<unsigned> Length,
	ArrayRef<TemplateArgument> PartialArgs) {
	return SizeOfPackExpr::Create(SemaRef.Context, OperatorLoc, Pack, PackLoc,
	RParenLoc, Length, PartialArgs);
	}

	/// Build a new expression representing a call to a source location
	/// builtin.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildSourceLocExpr(SourceLocExpr::IdentKind Kind,
	SourceLocation BuiltinLoc,
	SourceLocation RPLoc,
	DeclContext *ParentContext) {
	return getSema().BuildSourceLocExpr(Kind, BuiltinLoc, RPLoc, ParentContext);
	}

	/// Build a new Objective-C boxed expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildConceptSpecializationExpr(NestedNameSpecifierLoc NNS,
	SourceLocation TemplateKWLoc, DeclarationNameInfo ConceptNameInfo,
	NamedDecl FoundDecl, ConceptDecl NamedConcept,
	TemplateArgumentListInfo *TALI) {
	CXXScopeSpec SS;
	SS.Adopt(NNS);
	ExprResult Result = getSema().CheckConceptTemplateId(SS, TemplateKWLoc,
	ConceptNameInfo,
	FoundDecl,
	NamedConcept, TALI);
	if (Result.isInvalid())
	return ExprError();
	return Result;
	}

	/// \brief Build a new requires expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildRequiresExpr(SourceLocation RequiresKWLoc,
	RequiresExprBodyDecl *Body,
	ArrayRef<ParmVarDecl *> LocalParameters,
	ArrayRef<concepts::Requirement *> Requirements,
	SourceLocation ClosingBraceLoc) {
	return RequiresExpr::Create(SemaRef.Context, RequiresKWLoc, Body,
	LocalParameters, Requirements, ClosingBraceLoc);
	}

	concepts::TypeRequirement *
	RebuildTypeRequirement(
	concepts::Requirement::SubstitutionDiagnostic *SubstDiag) {
	return SemaRef.BuildTypeRequirement(SubstDiag);
	}

	concepts::TypeRequirement RebuildTypeRequirement(TypeSourceInfo T) {
	return SemaRef.BuildTypeRequirement(T);
	}

	concepts::ExprRequirement *
	RebuildExprRequirement(
	concepts::Requirement::SubstitutionDiagnostic *SubstDiag, bool IsSimple,
	SourceLocation NoexceptLoc,
	concepts::ExprRequirement::ReturnTypeRequirement Ret) {
	return SemaRef.BuildExprRequirement(SubstDiag, IsSimple, NoexceptLoc,
	std::move(Ret));
	}

	concepts::ExprRequirement *
	RebuildExprRequirement(Expr *E, bool IsSimple, SourceLocation NoexceptLoc,
	concepts::ExprRequirement::ReturnTypeRequirement Ret) {
	return SemaRef.BuildExprRequirement(E, IsSimple, NoexceptLoc,
	std::move(Ret));
	}

	concepts::NestedRequirement *
	RebuildNestedRequirement(
	concepts::Requirement::SubstitutionDiagnostic *SubstDiag) {
	return SemaRef.BuildNestedRequirement(SubstDiag);
	}

	concepts::NestedRequirement RebuildNestedRequirement(Expr Constraint) {
	return SemaRef.BuildNestedRequirement(Constraint);
	}

	/// \brief Build a new Objective-C boxed expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildObjCBoxedExpr(SourceRange SR, Expr *ValueExpr) {
	return getSema().BuildObjCBoxedExpr(SR, ValueExpr);
	}

	/// Build a new Objective-C array literal.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildObjCArrayLiteral(SourceRange Range,
	Expr **Elements, unsigned NumElements) {
	return getSema().BuildObjCArrayLiteral(Range,
	MultiExprArg(Elements, NumElements));
	}

	ExprResult RebuildObjCSubscriptRefExpr(SourceLocation RB,
	Expr Base, Expr Key,
	ObjCMethodDecl *getterMethod,
	ObjCMethodDecl *setterMethod) {
	return getSema().BuildObjCSubscriptExpression(RB, Base, Key,
	getterMethod, setterMethod);
	}

	/// Build a new Objective-C dictionary literal.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildObjCDictionaryLiteral(SourceRange Range,
	MutableArrayRef<ObjCDictionaryElement> Elements) {
	return getSema().BuildObjCDictionaryLiteral(Range, Elements);
	}

	/// Build a new Objective-C \@encode expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildObjCEncodeExpr(SourceLocation AtLoc,
	TypeSourceInfo *EncodeTypeInfo,
	SourceLocation RParenLoc) {
	return SemaRef.BuildObjCEncodeExpression(AtLoc, EncodeTypeInfo, RParenLoc);
	}

	/// Build a new Objective-C class message.
	ExprResult RebuildObjCMessageExpr(TypeSourceInfo *ReceiverTypeInfo,
	Selector Sel,
	ArrayRef<SourceLocation> SelectorLocs,
	ObjCMethodDecl *Method,
	SourceLocation LBracLoc,
	MultiExprArg Args,
	SourceLocation RBracLoc) {
	return SemaRef.BuildClassMessage(ReceiverTypeInfo,
	ReceiverTypeInfo->getType(),
	/SuperLoc=/SourceLocation(),
	Sel, Method, LBracLoc, SelectorLocs,
	RBracLoc, Args);
	}

	/// Build a new Objective-C instance message.
	ExprResult RebuildObjCMessageExpr(Expr *Receiver,
	Selector Sel,
	ArrayRef<SourceLocation> SelectorLocs,
	ObjCMethodDecl *Method,
	SourceLocation LBracLoc,
	MultiExprArg Args,
	SourceLocation RBracLoc) {
	return SemaRef.BuildInstanceMessage(Receiver,
	Receiver->getType(),
	/SuperLoc=/SourceLocation(),
	Sel, Method, LBracLoc, SelectorLocs,
	RBracLoc, Args);
	}

	/// Build a new Objective-C instance/class message to 'super'.
	ExprResult RebuildObjCMessageExpr(SourceLocation SuperLoc,
	Selector Sel,
	ArrayRef<SourceLocation> SelectorLocs,
	QualType SuperType,
	ObjCMethodDecl *Method,
	SourceLocation LBracLoc,
	MultiExprArg Args,
	SourceLocation RBracLoc) {
	return Method->isInstanceMethod() ? SemaRef.BuildInstanceMessage(nullptr,
	SuperType,
	SuperLoc,
	Sel, Method, LBracLoc, SelectorLocs,
	RBracLoc, Args)
	: SemaRef.BuildClassMessage(nullptr,
	SuperType,
	SuperLoc,
	Sel, Method, LBracLoc, SelectorLocs,
	RBracLoc, Args);


	}

	/// Build a new Objective-C ivar reference expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildObjCIvarRefExpr(Expr BaseArg, ObjCIvarDecl Ivar,
	SourceLocation IvarLoc,
	bool IsArrow, bool IsFreeIvar) {
	CXXScopeSpec SS;
	DeclarationNameInfo NameInfo(Ivar->getDeclName(), IvarLoc);
	ExprResult Result = getSema().BuildMemberReferenceExpr(
	BaseArg, BaseArg->getType(),
	/FIXME:/ IvarLoc, IsArrow, SS, SourceLocation(),
	/FirstQualifierInScope=/nullptr, NameInfo,
	/TemplateArgs=/nullptr,
	/S=/nullptr);
	if (IsFreeIvar && Result.isUsable())
	cast<ObjCIvarRefExpr>(Result.get())->setIsFreeIvar(IsFreeIvar);
	return Result;
	}

	/// Build a new Objective-C property reference expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildObjCPropertyRefExpr(Expr *BaseArg,
	ObjCPropertyDecl *Property,
	SourceLocation PropertyLoc) {
	CXXScopeSpec SS;
	DeclarationNameInfo NameInfo(Property->getDeclName(), PropertyLoc);
	return getSema().BuildMemberReferenceExpr(BaseArg, BaseArg->getType(),
	/FIXME:/PropertyLoc,
	/IsArrow=/false,
	SS, SourceLocation(),
	/FirstQualifierInScope=/nullptr,
	NameInfo,
	/TemplateArgs=/nullptr,
	/S=/nullptr);
	}

	/// Build a new Objective-C property reference expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildObjCPropertyRefExpr(Expr *Base, QualType T,
	ObjCMethodDecl *Getter,
	ObjCMethodDecl *Setter,
	SourceLocation PropertyLoc) {
	// Since these expressions can only be value-dependent, we do not
	// need to perform semantic analysis again.
	return Owned(
	new (getSema().Context) ObjCPropertyRefExpr(Getter, Setter, T,
	VK_LValue, OK_ObjCProperty,
	PropertyLoc, Base));
	}

	/// Build a new Objective-C "isa" expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildObjCIsaExpr(Expr *BaseArg, SourceLocation IsaLoc,
	SourceLocation OpLoc, bool IsArrow) {
	CXXScopeSpec SS;
	DeclarationNameInfo NameInfo(&getSema().Context.Idents.get("isa"), IsaLoc);
	return getSema().BuildMemberReferenceExpr(BaseArg, BaseArg->getType(),
	OpLoc, IsArrow,
	SS, SourceLocation(),
	/FirstQualifierInScope=/nullptr,
	NameInfo,
	/TemplateArgs=/nullptr,
	/S=/nullptr);
	}

	/// Build a new shuffle vector expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildShuffleVectorExpr(SourceLocation BuiltinLoc,
	MultiExprArg SubExprs,
	SourceLocation RParenLoc) {
	// Find the declaration for __builtin_shufflevector
	const IdentifierInfo &Name
	= SemaRef.Context.Idents.get("__builtin_shufflevector");
	TranslationUnitDecl *TUDecl = SemaRef.Context.getTranslationUnitDecl();
	DeclContext::lookup_result Lookup = TUDecl->lookup(DeclarationName(&Name));
	assert(!Lookup.empty() && "No __builtin_shufflevector?");

	// Build a reference to the __builtin_shufflevector builtin
	FunctionDecl *Builtin = cast<FunctionDecl>(Lookup.front());
	Expr *Callee = new (SemaRef.Context)
	DeclRefExpr(SemaRef.Context, Builtin, false,
	SemaRef.Context.BuiltinFnTy, VK_PRValue, BuiltinLoc);
	QualType CalleePtrTy = SemaRef.Context.getPointerType(Builtin->getType());
	Callee = SemaRef.ImpCastExprToType(Callee, CalleePtrTy,
	CK_BuiltinFnToFnPtr).get();

	// Build the CallExpr
	ExprResult TheCall = CallExpr::Create(
	SemaRef.Context, Callee, SubExprs, Builtin->getCallResultType(),
	Expr::getValueKindForType(Builtin->getReturnType()), RParenLoc,
	FPOptionsOverride());

	// Type-check the __builtin_shufflevector expression.
	return SemaRef.SemaBuiltinShuffleVector(cast<CallExpr>(TheCall.get()));
	}

	/// Build a new convert vector expression.
	ExprResult RebuildConvertVectorExpr(SourceLocation BuiltinLoc,
	Expr SrcExpr, TypeSourceInfo DstTInfo,
	SourceLocation RParenLoc) {
	return SemaRef.SemaConvertVectorExpr(SrcExpr, DstTInfo,
	BuiltinLoc, RParenLoc);
	}

	/// Build a new template argument pack expansion.
	///
	/// By default, performs semantic analysis to build a new pack expansion
	/// for a template argument. Subclasses may override this routine to provide
	/// different behavior.
	TemplateArgumentLoc RebuildPackExpansion(TemplateArgumentLoc Pattern,
	SourceLocation EllipsisLoc,
	Optional<unsigned> NumExpansions) {
	switch (Pattern.getArgument().getKind()) {
	case TemplateArgument::Expression: {
	ExprResult Result
	= getSema().CheckPackExpansion(Pattern.getSourceExpression(),
	EllipsisLoc, NumExpansions);
	if (Result.isInvalid())
	return TemplateArgumentLoc();

	return TemplateArgumentLoc(Result.get(), Result.get());
	}

	case TemplateArgument::Template:
	return TemplateArgumentLoc(
	SemaRef.Context,
	TemplateArgument(Pattern.getArgument().getAsTemplate(),
	NumExpansions),
	Pattern.getTemplateQualifierLoc(), Pattern.getTemplateNameLoc(),
	EllipsisLoc);

	case TemplateArgument::Null:
	case TemplateArgument::Integral:
	case TemplateArgument::Declaration:
	case TemplateArgument::Pack:
	case TemplateArgument::TemplateExpansion:
	case TemplateArgument::NullPtr:
	llvm_unreachable("Pack expansion pattern has no parameter packs");

	case TemplateArgument::Type:
	if (TypeSourceInfo *Expansion
	= getSema().CheckPackExpansion(Pattern.getTypeSourceInfo(),
	EllipsisLoc,
	NumExpansions))
	return TemplateArgumentLoc(TemplateArgument(Expansion->getType()),
	Expansion);
	break;
	}

	return TemplateArgumentLoc();
	}

	/// Build a new expression pack expansion.
	///
	/// By default, performs semantic analysis to build a new pack expansion
	/// for an expression. Subclasses may override this routine to provide
	/// different behavior.
	ExprResult RebuildPackExpansion(Expr *Pattern, SourceLocation EllipsisLoc,
	Optional<unsigned> NumExpansions) {
	return getSema().CheckPackExpansion(Pattern, EllipsisLoc, NumExpansions);
	}

	/// Build a new C++1z fold-expression.
	///
	/// By default, performs semantic analysis in order to build a new fold
	/// expression.
	ExprResult RebuildCXXFoldExpr(UnresolvedLookupExpr *ULE,
	SourceLocation LParenLoc, Expr *LHS,
	BinaryOperatorKind Operator,
	SourceLocation EllipsisLoc, Expr *RHS,
	SourceLocation RParenLoc,
	Optional<unsigned> NumExpansions) {
	return getSema().BuildCXXFoldExpr(ULE, LParenLoc, LHS, Operator,
	EllipsisLoc, RHS, RParenLoc,
	NumExpansions);
	}

	/// Build an empty C++1z fold-expression with the given operator.
	///
	/// By default, produces the fallback value for the fold-expression, or
	/// produce an error if there is no fallback value.
	ExprResult RebuildEmptyCXXFoldExpr(SourceLocation EllipsisLoc,
	BinaryOperatorKind Operator) {
	return getSema().BuildEmptyCXXFoldExpr(EllipsisLoc, Operator);
	}

	/// Build a new atomic operation expression.
	///
	/// By default, performs semantic analysis to build the new expression.
	/// Subclasses may override this routine to provide different behavior.
	ExprResult RebuildAtomicExpr(SourceLocation BuiltinLoc, MultiExprArg SubExprs,
	AtomicExpr::AtomicOp Op,
	SourceLocation RParenLoc) {
	// Use this for all of the locations, since we don't know the difference
	// between the call and the expr at this point.
	SourceRange Range{BuiltinLoc, RParenLoc};
	return getSema().BuildAtomicExpr(Range, Range, RParenLoc, SubExprs, Op,
	Sema::AtomicArgumentOrder::AST);
	}

	ExprResult RebuildRecoveryExpr(SourceLocation BeginLoc, SourceLocation EndLoc,
	ArrayRef<Expr *> SubExprs, QualType Type) {
	return getSema().CreateRecoveryExpr(BeginLoc, EndLoc, SubExprs, Type);
	}

	private:
	TypeLoc TransformTypeInObjectScope(TypeLoc TL,
	QualType ObjectType,
	NamedDecl *FirstQualifierInScope,
	CXXScopeSpec &SS);

	TypeSourceInfo TransformTypeInObjectScope(TypeSourceInfo TSInfo,
	QualType ObjectType,
	NamedDecl *FirstQualifierInScope,
	CXXScopeSpec &SS);

	TypeSourceInfo *TransformTSIInObjectScope(TypeLoc TL, QualType ObjectType,
	NamedDecl *FirstQualifierInScope,
	CXXScopeSpec &SS);

	QualType TransformDependentNameType(TypeLocBuilder &TLB,
	DependentNameTypeLoc TL,
	bool DeducibleTSTContext);
	};

	template <typename Derived>
	StmtResult TreeTransform<Derived>::TransformStmt(Stmt *S, StmtDiscardKind SDK) {
	if (!S)
	return S;

	switch (S->getStmtClass()) {
	case Stmt::NoStmtClass: break;

	// Transform individual statement nodes
	// Pass SDK into statements that can produce a value
	#define STMT(Node, Parent) \
	case Stmt::Node##Class: return getDerived().Transform##Node(cast<Node>(S));
	#define VALUESTMT(Node, Parent) \
	case Stmt::Node##Class: \
	return getDerived().Transform##Node(cast<Node>(S), SDK);
	#define ABSTRACT_STMT(Node)
	#define EXPR(Node, Parent)
	#include "clang/AST/StmtNodes.inc"

	// Transform expressions by calling TransformExpr.
	#define STMT(Node, Parent)
	#define ABSTRACT_STMT(Stmt)
	#define EXPR(Node, Parent) case Stmt::Node##Class:
	#include "clang/AST/StmtNodes.inc"
	{
	ExprResult E = getDerived().TransformExpr(cast<Expr>(S));

	if (SDK == SDK_StmtExprResult)
	E = getSema().ActOnStmtExprResult(E);
	return getSema().ActOnExprStmt(E, SDK == SDK_Discarded);
	}
	}

	return S;
	}

	template<typename Derived>
	OMPClause TreeTransform<Derived>::TransformOMPClause(OMPClause S) {
	if (!S)
	return S;

	switch (S->getClauseKind()) {
	default: break;
	// Transform individual clause nodes
	#define GEN_CLANG_CLAUSE_CLASS
	#define CLAUSE_CLASS(Enum, Str, Class) \
	case Enum: \
	return getDerived().Transform##Class(cast<Class>(S));
	#include "llvm/Frontend/OpenMP/OMP.inc"
	}

	return S;
	}


	template<typename Derived>
	ExprResult TreeTransform<Derived>::TransformExpr(Expr *E) {
	if (!E)
	return E;

	switch (E->getStmtClass()) {
	case Stmt::NoStmtClass: break;
	#define STMT(Node, Parent) case Stmt::Node##Class: break;
	#define ABSTRACT_STMT(Stmt)
	#define EXPR(Node, Parent) \
	case Stmt::Node##Class: return getDerived().Transform##Node(cast<Node>(E));
	#include "clang/AST/StmtNodes.inc"
	}

	return E;
	}

	template<typename Derived>
	ExprResult TreeTransform<Derived>::TransformInitializer(Expr *Init,
	bool NotCopyInit) {
	// Initializers are instantiated like expressions, except that various outer
	// layers are stripped.
	if (!Init)
	return Init;

	if (auto *FE = dyn_cast<FullExpr>(Init))
	Init = FE->getSubExpr();

	if (auto *AIL = dyn_cast<ArrayInitLoopExpr>(Init))
	Init = AIL->getCommonExpr();

	if (MaterializeTemporaryExpr *MTE = dyn_cast<MaterializeTemporaryExpr>(Init))
	Init = MTE->getSubExpr();

	while (CXXBindTemporaryExpr *Binder = dyn_cast<CXXBindTemporaryExpr>(Init))
	Init = Binder->getSubExpr();

	if (ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(Init))
	Init = ICE->getSubExprAsWritten();

	if (CXXStdInitializerListExpr *ILE =
	dyn_cast<CXXStdInitializerListExpr>(Init))
	return TransformInitializer(ILE->getSubExpr(), NotCopyInit);

	// If this is copy-initialization, we only need to reconstruct
	// InitListExprs. Other forms of copy-initialization will be a no-op if
	// the initializer is already the right type.
	CXXConstructExpr *Construct = dyn_cast<CXXConstructExpr>(Init);
	if (!NotCopyInit && !(Construct && Construct->isListInitialization()))
	return getDerived().TransformExpr(Init);

	// Revert value-initialization back to empty parens.
	if (CXXScalarValueInitExpr *VIE = dyn_cast<CXXScalarValueInitExpr>(Init)) {
	SourceRange Parens = VIE->getSourceRange();
	return getDerived().RebuildParenListExpr(Parens.getBegin(), None,
	Parens.getEnd());
	}

	// FIXME: We shouldn't build ImplicitValueInitExprs for direct-initialization.
	if (isa<ImplicitValueInitExpr>(Init))
	return getDerived().RebuildParenListExpr(SourceLocation(), None,
	SourceLocation());

	// Revert initialization by constructor back to a parenthesized or braced list
	// of expressions. Any other form of initializer can just be reused directly.
	if (!Construct \|\| isa<CXXTemporaryObjectExpr>(Construct))
	return getDerived().TransformExpr(Init);

	// If the initialization implicitly converted an initializer list to a
	// std::initializer_list object, unwrap the std::initializer_list too.
	if (Construct && Construct->isStdInitListInitialization())
	return TransformInitializer(Construct->getArg(0), NotCopyInit);

	// Enter a list-init context if this was list initialization.
	EnterExpressionEvaluationContext Context(
	getSema(), EnterExpressionEvaluationContext::InitList,
	Construct->isListInitialization());

	SmallVector<Expr*, 8> NewArgs;
	bool ArgChanged = false;
	if (getDerived().TransformExprs(Construct->getArgs(), Construct->getNumArgs(),
	/IsCall/true, NewArgs, &ArgChanged))
	return ExprError();

	// If this was list initialization, revert to syntactic list form.
	if (Construct->isListInitialization())
	return getDerived().RebuildInitList(Construct->getBeginLoc(), NewArgs,
	Construct->getEndLoc());

	// Build a ParenListExpr to represent anything else.
	SourceRange Parens = Construct->getParenOrBraceRange();
	if (Parens.isInvalid()) {
	// This was a variable declaration's initialization for which no initializer
	// was specified.
	assert(NewArgs.empty() &&
	"no parens or braces but have direct init with arguments?");
	return ExprEmpty();
	}
	return getDerived().RebuildParenListExpr(Parens.getBegin(), NewArgs,
	Parens.getEnd());
	}

	template<typename Derived>
	bool TreeTransform<Derived>::TransformExprs(Expr const Inputs,
	unsigned NumInputs,
	bool IsCall,
	SmallVectorImpl<Expr *> &Outputs,
	bool *ArgChanged) {
	for (unsigned I = 0; I != NumInputs; ++I) {
	// If requested, drop call arguments that need to be dropped.
	if (IsCall && getDerived().DropCallArgument(Inputs[I])) {
	if (ArgChanged)
	*ArgChanged = true;

	break;
	}

	if (PackExpansionExpr *Expansion = dyn_cast<PackExpansionExpr>(Inputs[I])) {
	Expr *Pattern = Expansion->getPattern();

	SmallVector<UnexpandedParameterPack, 2> Unexpanded;
	getSema().collectUnexpandedParameterPacks(Pattern, Unexpanded);
	assert(!Unexpanded.empty() && "Pack expansion without parameter packs?");

	// Determine whether the set of unexpanded parameter packs can and should
	// be expanded.
	bool Expand = true;
	bool RetainExpansion = false;
	Optional<unsigned> OrigNumExpansions = Expansion->getNumExpansions();
	Optional<unsigned> NumExpansions = OrigNumExpansions;
	if (getDerived().TryExpandParameterPacks(Expansion->getEllipsisLoc(),
	Pattern->getSourceRange(),
	Unexpanded,
	Expand, RetainExpansion,
	NumExpansions))
	return true;

	if (!Expand) {
	// The transform has determined that we should perform a simple
	// transformation on the pack expansion, producing another pack
	// expansion.
	Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(getSema(), -1);
	ExprResult OutPattern = getDerived().TransformExpr(Pattern);
	if (OutPattern.isInvalid())
	return true;

	ExprResult Out = getDerived().RebuildPackExpansion(OutPattern.get(),
	Expansion->getEllipsisLoc(),
	NumExpansions);
	if (Out.isInvalid())
	return true;

	if (ArgChanged)
	*ArgChanged = true;
	Outputs.push_back(Out.get());
	continue;
	}

	// Record right away that the argument was changed. This needs
	// to happen even if the array expands to nothing.
	if (ArgChanged) *ArgChanged = true;

	// The transform has determined that we should perform an elementwise
	// expansion of the pattern. Do so.
	for (unsigned I = 0; I != *NumExpansions; ++I) {
	Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(getSema(), I);
	ExprResult Out = getDerived().TransformExpr(Pattern);
	if (Out.isInvalid())
	return true;

	if (Out.get()->containsUnexpandedParameterPack()) {
	Out = getDerived().RebuildPackExpansion(
	Out.get(), Expansion->getEllipsisLoc(), OrigNumExpansions);
	if (Out.isInvalid())
	return true;
	}

	Outputs.push_back(Out.get());
	}

	// If we're supposed to retain a pack expansion, do so by temporarily
	// forgetting the partially-substituted parameter pack.
	if (RetainExpansion) {
	ForgetPartiallySubstitutedPackRAII Forget(getDerived());

	ExprResult Out = getDerived().TransformExpr(Pattern);
	if (Out.isInvalid())
	return true;

	Out = getDerived().RebuildPackExpansion(
	Out.get(), Expansion->getEllipsisLoc(), OrigNumExpansions);
	if (Out.isInvalid())
	return true;

	Outputs.push_back(Out.get());
	}

	continue;
	}

	ExprResult Result =
	IsCall ? getDerived().TransformInitializer(Inputs[I], /DirectInit/false)
	: getDerived().TransformExpr(Inputs[I]);
	if (Result.isInvalid())
	return true;

	if (Result.get() != Inputs[I] && ArgChanged)
	*ArgChanged = true;

	Outputs.push_back(Result.get());
	}

	return false;
	}

	template <typename Derived>
	Sema::ConditionResult TreeTransform<Derived>::TransformCondition(
	SourceLocation Loc, VarDecl Var, Expr Expr, Sema::ConditionKind Kind) {
	if (Var) {
	VarDecl *ConditionVar = cast_or_null<VarDecl>(
	getDerived().TransformDefinition(Var->getLocation(), Var));

	if (!ConditionVar)
	return Sema::ConditionError();

	return getSema().ActOnConditionVariable(ConditionVar, Loc, Kind);
	}

	if (Expr) {
	ExprResult CondExpr = getDerived().TransformExpr(Expr);

	if (CondExpr.isInvalid())
	return Sema::ConditionError();

	return getSema().ActOnCondition(nullptr, Loc, CondExpr.get(), Kind);
	}

	return Sema::ConditionResult();
	}

	template <typename Derived>
	NestedNameSpecifierLoc TreeTransform<Derived>::TransformNestedNameSpecifierLoc(
	NestedNameSpecifierLoc NNS, QualType ObjectType,
	NamedDecl *FirstQualifierInScope) {
	SmallVector<NestedNameSpecifierLoc, 4> Qualifiers;
	for (NestedNameSpecifierLoc Qualifier = NNS; Qualifier;
	Qualifier = Qualifier.getPrefix())
	Qualifiers.push_back(Qualifier);

	CXXScopeSpec SS;
	while (!Qualifiers.empty()) {
	NestedNameSpecifierLoc Q = Qualifiers.pop_back_val();
	NestedNameSpecifier *QNNS = Q.getNestedNameSpecifier();

	switch (QNNS->getKind()) {
	case NestedNameSpecifier::Identifier: {
	Sema::NestedNameSpecInfo IdInfo(QNNS->getAsIdentifier(),
	Q.getLocalBeginLoc(), Q.getLocalEndLoc(),
	ObjectType);
	if (SemaRef.BuildCXXNestedNameSpecifier(/Scope=/nullptr, IdInfo, false,
	SS, FirstQualifierInScope, false))
	return NestedNameSpecifierLoc();
	break;
	}

	case NestedNameSpecifier::Namespace: {
	NamespaceDecl *NS =
	cast_or_null<NamespaceDecl>(getDerived().TransformDecl(
	Q.getLocalBeginLoc(), QNNS->getAsNamespace()));
	SS.Extend(SemaRef.Context, NS, Q.getLocalBeginLoc(), Q.getLocalEndLoc());
	break;
	}

	case NestedNameSpecifier::NamespaceAlias: {
	NamespaceAliasDecl *Alias =
	cast_or_null<NamespaceAliasDecl>(getDerived().TransformDecl(
	Q.getLocalBeginLoc(), QNNS->getAsNamespaceAlias()));
	SS.Extend(SemaRef.Context, Alias, Q.getLocalBeginLoc(),
	Q.getLocalEndLoc());
	break;
	}

	case NestedNameSpecifier::Global:
	// There is no meaningful transformation that one could perform on the
	// global scope.
	SS.MakeGlobal(SemaRef.Context, Q.getBeginLoc());
	break;

	case NestedNameSpecifier::Super: {
	CXXRecordDecl *RD =
	cast_or_null<CXXRecordDecl>(getDerived().TransformDecl(
	SourceLocation(), QNNS->getAsRecordDecl()));
	SS.MakeSuper(SemaRef.Context, RD, Q.getBeginLoc(), Q.getEndLoc());
	break;
	}

	case NestedNameSpecifier::TypeSpecWithTemplate:
	case NestedNameSpecifier::TypeSpec: {
	TypeLoc TL = TransformTypeInObjectScope(Q.getTypeLoc(), ObjectType,
	FirstQualifierInScope, SS);

	if (!TL)
	return NestedNameSpecifierLoc();

	if (TL.getType()->isDependentType() \|\| TL.getType()->isRecordType() \|\|
	(SemaRef.getLangOpts().CPlusPlus11 &&
	TL.getType()->isEnumeralType())) {
	assert(!TL.getType().hasLocalQualifiers() &&
	"Can't get cv-qualifiers here");
	if (TL.getType()->isEnumeralType())
	SemaRef.Diag(TL.getBeginLoc(),
	diag::warn_cxx98_compat_enum_nested_name_spec);
	SS.Extend(SemaRef.Context, /FIXME:/ SourceLocation(), TL,
	Q.getLocalEndLoc());
	break;
	}
	// If the nested-name-specifier is an invalid type def, don't emit an
	// error because a previous error should have already been emitted.
	TypedefTypeLoc TTL = TL.getAs<TypedefTypeLoc>();
	if (!TTL \|\| !TTL.getTypedefNameDecl()->isInvalidDecl()) {
	SemaRef.Diag(TL.getBeginLoc(), diag::err_nested_name_spec_non_tag)
	<< TL.getType() << SS.getRange();
	}
	return NestedNameSpecifierLoc();
	}
	}

	// The qualifier-in-scope and object type only apply to the leftmost entity.
	FirstQualifierInScope = nullptr;
	ObjectType = QualType();
	}

	// Don't rebuild the nested-name-specifier if we don't have to.
	if (SS.getScopeRep() == NNS.getNestedNameSpecifier() &&
	!getDerived().AlwaysRebuild())
	return NNS;

	// If we can re-use the source-location data from the original
	// nested-name-specifier, do so.
	if (SS.location_size() == NNS.getDataLength() &&
	memcmp(SS.location_data(), NNS.getOpaqueData(), SS.location_size()) == 0)
	return NestedNameSpecifierLoc(SS.getScopeRep(), NNS.getOpaqueData());

	// Allocate new nested-name-specifier location information.
	return SS.getWithLocInContext(SemaRef.Context);
	}

	template<typename Derived>
	DeclarationNameInfo
	TreeTransform<Derived>
	::TransformDeclarationNameInfo(const DeclarationNameInfo &NameInfo) {
	DeclarationName Name = NameInfo.getName();
	if (!Name)
	return DeclarationNameInfo();

	switch (Name.getNameKind()) {
	case DeclarationName::Identifier:
	case DeclarationName::ObjCZeroArgSelector:
	case DeclarationName::ObjCOneArgSelector:
	case DeclarationName::ObjCMultiArgSelector:
	case DeclarationName::CXXOperatorName:
	case DeclarationName::CXXLiteralOperatorName:
	case DeclarationName::CXXUsingDirective:
	return NameInfo;

	case DeclarationName::CXXDeductionGuideName: {
	TemplateDecl *OldTemplate = Name.getCXXDeductionGuideTemplate();
	TemplateDecl *NewTemplate = cast_or_null<TemplateDecl>(
	getDerived().TransformDecl(NameInfo.getLoc(), OldTemplate));
	if (!NewTemplate)
	return DeclarationNameInfo();

	DeclarationNameInfo NewNameInfo(NameInfo);
	NewNameInfo.setName(
	SemaRef.Context.DeclarationNames.getCXXDeductionGuideName(NewTemplate));
	return NewNameInfo;
	}

	case DeclarationName::CXXConstructorName:
	case DeclarationName::CXXDestructorName:
	case DeclarationName::CXXConversionFunctionName: {
	TypeSourceInfo *NewTInfo;
	CanQualType NewCanTy;
	if (TypeSourceInfo *OldTInfo = NameInfo.getNamedTypeInfo()) {
	NewTInfo = getDerived().TransformType(OldTInfo);
	if (!NewTInfo)
	return DeclarationNameInfo();
	NewCanTy = SemaRef.Context.getCanonicalType(NewTInfo->getType());
	}
	else {
	NewTInfo = nullptr;
	TemporaryBase Rebase(*this, NameInfo.getLoc(), Name);
	QualType NewT = getDerived().TransformType(Name.getCXXNameType());
	if (NewT.isNull())
	return DeclarationNameInfo();
	NewCanTy = SemaRef.Context.getCanonicalType(NewT);
	}

	DeclarationName NewName
	= SemaRef.Context.DeclarationNames.getCXXSpecialName(Name.getNameKind(),
	NewCanTy);
	DeclarationNameInfo NewNameInfo(NameInfo);
	NewNameInfo.setName(NewName);
	NewNameInfo.setNamedTypeInfo(NewTInfo);
	return NewNameInfo;
	}
	}

	llvm_unreachable("Unknown name kind.");
	}

	template<typename Derived>
	TemplateName
	TreeTransform<Derived>::TransformTemplateName(CXXScopeSpec &SS,
	TemplateName Name,
	SourceLocation NameLoc,
	QualType ObjectType,
	NamedDecl *FirstQualifierInScope,
	bool AllowInjectedClassName) {
	if (QualifiedTemplateName *QTN = Name.getAsQualifiedTemplateName()) {
	TemplateDecl *Template = QTN->getTemplateDecl();
	assert(Template && "qualified template name must refer to a template");

	TemplateDecl *TransTemplate
	= cast_or_null<TemplateDecl>(getDerived().TransformDecl(NameLoc,
	Template));
	if (!TransTemplate)
	return TemplateName();

	if (!getDerived().AlwaysRebuild() &&
	SS.getScopeRep() == QTN->getQualifier() &&
	TransTemplate == Template)
	return Name;

	return getDerived().RebuildTemplateName(SS, QTN->hasTemplateKeyword(),
	TransTemplate);
	}

	if (DependentTemplateName *DTN = Name.getAsDependentTemplateName()) {
	if (SS.getScopeRep()) {
	// These apply to the scope specifier, not the template.
	ObjectType = QualType();
	FirstQualifierInScope = nullptr;
	}

	if (!getDerived().AlwaysRebuild() &&
	SS.getScopeRep() == DTN->getQualifier() &&
	ObjectType.isNull())
	return Name;

	// FIXME: Preserve the location of the "template" keyword.
	SourceLocation TemplateKWLoc = NameLoc;

	if (DTN->isIdentifier()) {
	return getDerived().RebuildTemplateName(SS,
	TemplateKWLoc,
	*DTN->getIdentifier(),
	NameLoc,
	ObjectType,
	FirstQualifierInScope,
	AllowInjectedClassName);
	}

	return getDerived().RebuildTemplateName(SS, TemplateKWLoc,
	DTN->getOperator(), NameLoc,
	ObjectType, AllowInjectedClassName);
	}

	if (TemplateDecl *Template = Name.getAsTemplateDecl()) {
	TemplateDecl *TransTemplate
	= cast_or_null<TemplateDecl>(getDerived().TransformDecl(NameLoc,
	Template));
	if (!TransTemplate)
	return TemplateName();

	if (!getDerived().AlwaysRebuild() &&
	TransTemplate == Template)
	return Name;

	return TemplateName(TransTemplate);
	}

	if (SubstTemplateTemplateParmPackStorage *SubstPack
	= Name.getAsSubstTemplateTemplateParmPack()) {
	TemplateTemplateParmDecl *TransParam
	= cast_or_null<TemplateTemplateParmDecl>(
	getDerived().TransformDecl(NameLoc, SubstPack->getParameterPack()));
	if (!TransParam)
	return TemplateName();

	if (!getDerived().AlwaysRebuild() &&
	TransParam == SubstPack->getParameterPack())
	return Name;

	return getDerived().RebuildTemplateName(TransParam,
	SubstPack->getArgumentPack());
	}

	// These should be getting filtered out before they reach the AST.
	llvm_unreachable("overloaded function decl survived to here");
	}

	template<typename Derived>
	void TreeTransform<Derived>::InventTemplateArgumentLoc(
	const TemplateArgument &Arg,
	TemplateArgumentLoc &Output) {
	Output = getSema().getTrivialTemplateArgumentLoc(
	Arg, QualType(), getDerived().getBaseLocation());
	}

	template <typename Derived>
	bool TreeTransform<Derived>::TransformTemplateArgument(
	const TemplateArgumentLoc &Input, TemplateArgumentLoc &Output,
	bool Uneval) {
	const TemplateArgument &Arg = Input.getArgument();
	switch (Arg.getKind()) {
	case TemplateArgument::Null:
	case TemplateArgument::Pack:
	llvm_unreachable("Unexpected TemplateArgument");

	case TemplateArgument::Integral:
	case TemplateArgument::NullPtr:
	case TemplateArgument::Declaration: {
	// Transform a resolved template argument straight to a resolved template
	// argument. We get here when substituting into an already-substituted
	// template type argument during concept satisfaction checking.
	QualType T = Arg.getNonTypeTemplateArgumentType();
	QualType NewT = getDerived().TransformType(T);
	if (NewT.isNull())
	return true;

	ValueDecl *D = Arg.getKind() == TemplateArgument::Declaration
	? Arg.getAsDecl()
	: nullptr;
	ValueDecl *NewD = D ? cast_or_null<ValueDecl>(getDerived().TransformDecl(
	getDerived().getBaseLocation(), D))
	: nullptr;
	if (D && !NewD)
	return true;

	if (NewT == T && D == NewD)
	Output = Input;
	else if (Arg.getKind() == TemplateArgument::Integral)
	Output = TemplateArgumentLoc(
	TemplateArgument(getSema().Context, Arg.getAsIntegral(), NewT),
	TemplateArgumentLocInfo());
	else if (Arg.getKind() == TemplateArgument::NullPtr)
	Output = TemplateArgumentLoc(TemplateArgument(NewT, /IsNullPtr=/true),
	TemplateArgumentLocInfo());
	else
	Output = TemplateArgumentLoc(TemplateArgument(NewD, NewT),
	TemplateArgumentLocInfo());

	return false;
	}

	case TemplateArgument::Type: {
	TypeSourceInfo *DI = Input.getTypeSourceInfo();
	if (!DI)
	DI = InventTypeSourceInfo(Input.getArgument().getAsType());

	DI = getDerived().TransformType(DI);
	if (!DI)
	return true;

	Output = TemplateArgumentLoc(TemplateArgument(DI->getType()), DI);
	return false;
	}

	case TemplateArgument::Template: {
	NestedNameSpecifierLoc QualifierLoc = Input.getTemplateQualifierLoc();
	if (QualifierLoc) {
	QualifierLoc = getDerived().TransformNestedNameSpecifierLoc(QualifierLoc);
	if (!QualifierLoc)
	return true;
	}

	CXXScopeSpec SS;
	SS.Adopt(QualifierLoc);
	TemplateName Template = getDerived().TransformTemplateName(
	SS, Arg.getAsTemplate(), Input.getTemplateNameLoc());
	if (Template.isNull())
	return true;

	Output = TemplateArgumentLoc(SemaRef.Context, TemplateArgument(Template),
	QualifierLoc, Input.getTemplateNameLoc());
	return false;
	}

	case TemplateArgument::TemplateExpansion:
	llvm_unreachable("Caller should expand pack expansions");

	case TemplateArgument::Expression: {
	// Template argument expressions are constant expressions.
	EnterExpressionEvaluationContext Unevaluated(
	getSema(),
	Uneval ? Sema::ExpressionEvaluationContext::Unevaluated
	: Sema::ExpressionEvaluationContext::ConstantEvaluated,
	/LambdaContextDecl=/nullptr, /ExprContext=/
	Sema::ExpressionEvaluationContextRecord::EK_TemplateArgument);

	Expr *InputExpr = Input.getSourceExpression();
	if (!InputExpr)
	InputExpr = Input.getArgument().getAsExpr();

	ExprResult E = getDerived().TransformExpr(InputExpr);
	E = SemaRef.ActOnConstantExpression(E);
	if (E.isInvalid())
	return true;
	Output = TemplateArgumentLoc(TemplateArgument(E.get()), E.get());
	return false;
	}
	}

	// Work around bogus GCC warning
	return true;
	}

	/// Iterator adaptor that invents template argument location information
	/// for each of the template arguments in its underlying iterator.
	template<typename Derived, typename InputIterator>
	class TemplateArgumentLocInventIterator {
	TreeTransform<Derived> &Self;
	InputIterator Iter;

	public:
	typedef TemplateArgumentLoc value_type;
	typedef TemplateArgumentLoc reference;
	typedef typename std::iterator_traits<InputIterator>::difference_type
	difference_type;
	typedef std::input_iterator_tag iterator_category;

	class pointer {
	TemplateArgumentLoc Arg;

	public:
	explicit pointer(TemplateArgumentLoc Arg) : Arg(Arg) { }

	const TemplateArgumentLoc *operator->() const { return &Arg; }
	};

	TemplateArgumentLocInventIterator() { }

	explicit TemplateArgumentLocInventIterator(TreeTransform<Derived> &Self,
	InputIterator Iter)
	: Self(Self), Iter(Iter) { }

	TemplateArgumentLocInventIterator &operator++() {
	++Iter;
	return *this;
	}

	TemplateArgumentLocInventIterator operator++(int) {
	TemplateArgumentLocInventIterator Old(*this);
	++(*this);
	return Old;
	}

	reference operator*() const {
	TemplateArgumentLoc Result;
	Self.InventTemplateArgumentLoc(*Iter, Result);
	return Result;
	}

	pointer operator->() const { return pointer(**this); }

	friend bool operator==(const TemplateArgumentLocInventIterator &X,
	const TemplateArgumentLocInventIterator &Y) {
	return X.Iter == Y.Iter;
	}

	friend bool operator!=(const TemplateArgumentLocInventIterator &X,
	const TemplateArgumentLocInventIterator &Y) {
	return X.Iter != Y.Iter;
	}
	};

	template<typename Derived>
	template<typename InputIterator>
	bool TreeTransform<Derived>::TransformTemplateArguments(
	InputIterator First, InputIterator Last, TemplateArgumentListInfo &Outputs,
	bool Uneval) {
	for (; First != Last; ++First) {
	TemplateArgumentLoc Out;
	TemplateArgumentLoc In = *First;

	if (In.getArgument().getKind() == TemplateArgument::Pack) {
	// Unpack argument packs, which we translate them into separate
	// arguments.
	// FIXME: We could do much better if we could guarantee that the
	// TemplateArgumentLocInfo for the pack expansion would be usable for
	// all of the template arguments in the argument pack.
	typedef TemplateArgumentLocInventIterator<Derived,
	TemplateArgument::pack_iterator>
	PackLocIterator;
	if (TransformTemplateArguments(PackLocIterator(*this,
	In.getArgument().pack_begin()),
	PackLocIterator(*this,
	In.getArgument().pack_end()),
	Outputs, Uneval))
	return true;

	continue;
	}

	if (In.getArgument().isPackExpansion()) {
	// We have a pack expansion, for which we will be substituting into
	// the pattern.
	SourceLocation Ellipsis;
	Optional<unsigned> OrigNumExpansions;
	TemplateArgumentLoc Pattern
	= getSema().getTemplateArgumentPackExpansionPattern(
	In, Ellipsis, OrigNumExpansions);

	SmallVector<UnexpandedParameterPack, 2> Unexpanded;
	getSema().collectUnexpandedParameterPacks(Pattern, Unexpanded);
	assert(!Unexpanded.empty() && "Pack expansion without parameter packs?");

	// Determine whether the set of unexpanded parameter packs can and should
	// be expanded.
	bool Expand = true;
	bool RetainExpansion = false;
	Optional<unsigned> NumExpansions = OrigNumExpansions;
	if (getDerived().TryExpandParameterPacks(Ellipsis,
	Pattern.getSourceRange(),
	Unexpanded,
	Expand,
	RetainExpansion,
	NumExpansions))
	return true;

	if (!Expand) {
	// The transform has determined that we should perform a simple
	// transformation on the pack expansion, producing another pack
	// expansion.
	TemplateArgumentLoc OutPattern;
	Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(getSema(), -1);
	if (getDerived().TransformTemplateArgument(Pattern, OutPattern, Uneval))
	return true;

	Out = getDerived().RebuildPackExpansion(OutPattern, Ellipsis,
	NumExpansions);
	if (Out.getArgument().isNull())
	return true;

	Outputs.addArgument(Out);
	continue;
	}

	// The transform has determined that we should perform an elementwise
	// expansion of the pattern. Do so.
	for (unsigned I = 0; I != *NumExpansions; ++I) {
	Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(getSema(), I);

	if (getDerived().TransformTemplateArgument(Pattern, Out, Uneval))
	return true;

	if (Out.getArgument().containsUnexpandedParameterPack()) {
	Out = getDerived().RebuildPackExpansion(Out, Ellipsis,
	OrigNumExpansions);
	if (Out.getArgument().isNull())
	return true;
	}

	Outputs.addArgument(Out);
	}

	// If we're supposed to retain a pack expansion, do so by temporarily
	// forgetting the partially-substituted parameter pack.
	if (RetainExpansion) {
	ForgetPartiallySubstitutedPackRAII Forget(getDerived());

	if (getDerived().TransformTemplateArgument(Pattern, Out, Uneval))
	return true;

	Out = getDerived().RebuildPackExpansion(Out, Ellipsis,
	OrigNumExpansions);
	if (Out.getArgument().isNull())
	return true;

	Outputs.addArgument(Out);
	}

	continue;
	}

	// The simple case:
	if (getDerived().TransformTemplateArgument(In, Out, Uneval))
	return true;

	Outputs.addArgument(Out);
	}

	return false;

	}

	//===----------------------------------------------------------------------===//
	// Type transformation
	//===----------------------------------------------------------------------===//

	template<typename Derived>
	QualType TreeTransform<Derived>::TransformType(QualType T) {
	if (getDerived().AlreadyTransformed(T))
	return T;

	// Temporary workaround. All of these transformations should
	// eventually turn into transformations on TypeLocs.
	TypeSourceInfo *DI = getSema().Context.getTrivialTypeSourceInfo(T,
	getDerived().getBaseLocation());

	TypeSourceInfo *NewDI = getDerived().TransformType(DI);

	if (!NewDI)
	return QualType();

	return NewDI->getType();
	}

	template<typename Derived>
	TypeSourceInfo TreeTransform<Derived>::TransformType(TypeSourceInfo DI) {
	// Refine the base location to the type's location.
	TemporaryBase Rebase(*this, DI->getTypeLoc().getBeginLoc(),
	getDerived().getBaseEntity());
	if (getDerived().AlreadyTransformed(DI->getType()))
	return DI;

	TypeLocBuilder TLB;

	TypeLoc TL = DI->getTypeLoc();
	TLB.reserve(TL.getFullDataSize());

	QualType Result = getDerived().TransformType(TLB, TL);
	if (Result.isNull())
	return nullptr;

	return TLB.getTypeSourceInfo(SemaRef.Context, Result);
	}

	template<typename Derived>
	QualType
	TreeTransform<Derived>::TransformType(TypeLocBuilder &TLB, TypeLoc T) {
	switch (T.getTypeLocClass()) {
	#define ABSTRACT_TYPELOC(CLASS, PARENT)
	#define TYPELOC(CLASS, PARENT) \
	case TypeLoc::CLASS: \
	return getDerived().Transform##CLASS##Type(TLB, \
	T.castAs<CLASS##TypeLoc>());
	#include "clang/AST/TypeLocNodes.def"
	}

	llvm_unreachable("unhandled type loc!");
	}

	template<typename Derived>
	QualType TreeTransform<Derived>::TransformTypeWithDeducedTST(QualType T) {
	if (!isa<DependentNameType>(T))
	return TransformType(T);

	if (getDerived().AlreadyTransformed(T))
	return T;
	TypeSourceInfo *DI = getSema().Context.getTrivialTypeSourceInfo(T,
	getDerived().getBaseLocation());
	TypeSourceInfo *NewDI = getDerived().TransformTypeWithDeducedTST(DI);
	return NewDI ? NewDI->getType() : QualType();
	}

	template<typename Derived>
	TypeSourceInfo *
	TreeTransform<Derived>::TransformTypeWithDeducedTST(TypeSourceInfo *DI) {
	if (!isa<DependentNameType>(DI->getType()))
	return TransformType(DI);

	// Refine the base location to the type's location.
	TemporaryBase Rebase(*this, DI->getTypeLoc().getBeginLoc(),
	getDerived().getBaseEntity());
	if (getDerived().AlreadyTransformed(DI->getType()))
	return DI;

	TypeLocBuilder TLB;

	TypeLoc TL = DI->getTypeLoc();
	TLB.reserve(TL.getFullDataSize());

	auto QTL = TL.getAs<QualifiedTypeLoc>();
	if (QTL)
	TL = QTL.getUnqualifiedLoc();

	auto DNTL = TL.castAs<DependentNameTypeLoc>();

	QualType Result = getDerived().TransformDependentNameType(
	TLB, DNTL, /DeducedTSTContext/true);
	if (Result.isNull())
	return nullptr;

	if (QTL) {
	Result = getDerived().RebuildQualifiedType(Result, QTL);
	if (Result.isNull())
	return nullptr;
	TLB.TypeWasModifiedSafely(Result);
	}

	return TLB.getTypeSourceInfo(SemaRef.Context, Result);
	}

	template<typename Derived>
	QualType
	TreeTransform<Derived>::TransformQualifiedType(TypeLocBuilder &TLB,
	QualifiedTypeLoc T) {
	QualType Result = getDerived().TransformType(TLB, T.getUnqualifiedLoc());
	if (Result.isNull())
	return QualType();

	Result = getDerived().RebuildQualifiedType(Result, T);

	if (Result.isNull())
	return QualType();

	// RebuildQualifiedType might have updated the type, but not in a way
	// that invalidates the TypeLoc. (There's no location information for
	// qualifiers.)
	TLB.TypeWasModifiedSafely(Result);

	return Result;
	}

	template <typename Derived>
	QualType TreeTransform<Derived>::RebuildQualifiedType(QualType T,
	QualifiedTypeLoc TL) {

	SourceLocation Loc = TL.getBeginLoc();
	Qualifiers Quals = TL.getType().getLocalQualifiers();

	if (((T.getAddressSpace() != LangAS::Default &&
	Quals.getAddressSpace() != LangAS::Default)) &&
	T.getAddressSpace() != Quals.getAddressSpace()) {
	SemaRef.Diag(Loc, diag::err_address_space_mismatch_templ_inst)
	<< TL.getType() << T;
	return QualType();
	}

	// C++ [dcl.fct]p7:
	// [When] adding cv-qualifications on top of the function type [...] the
	// cv-qualifiers are ignored.
	if (T->isFunctionType()) {
	T = SemaRef.getASTContext().getAddrSpaceQualType(T,
	Quals.getAddressSpace());
	return T;
	}

	// C++ [dcl.ref]p1:
	// when the cv-qualifiers are introduced through the use of a typedef-name
	// or decltype-specifier [...] the cv-qualifiers are ignored.
	// Note that [dcl.ref]p1 lists all cases in which cv-qualifiers can be
	// applied to a reference type.
	if (T->isReferenceType()) {
	// The only qualifier that applies to a reference type is restrict.
	if (!Quals.hasRestrict())
	return T;
	Quals = Qualifiers::fromCVRMask(Qualifiers::Restrict);
	}

	// Suppress Objective-C lifetime qualifiers if they don't make sense for the
	// resulting type.
	if (Quals.hasObjCLifetime()) {
	if (!T->isObjCLifetimeType() && !T->isDependentType())
	Quals.removeObjCLifetime();
	else if (T.getObjCLifetime()) {
	// Objective-C ARC:
	// A lifetime qualifier applied to a substituted template parameter
	// overrides the lifetime qualifier from the template argument.
	const AutoType *AutoTy;
	if (const SubstTemplateTypeParmType *SubstTypeParam
	= dyn_cast<SubstTemplateTypeParmType>(T)) {
	QualType Replacement = SubstTypeParam->getReplacementType();
	Qualifiers Qs = Replacement.getQualifiers();
	Qs.removeObjCLifetime();
	Replacement = SemaRef.Context.getQualifiedType(
	Replacement.getUnqualifiedType(), Qs);
	T = SemaRef.Context.getSubstTemplateTypeParmType(
	SubstTypeParam->getReplacedParameter(), Replacement);
	} else if ((AutoTy = dyn_cast<AutoType>(T)) && AutoTy->isDeduced()) {
	// 'auto' types behave the same way as template parameters.
	QualType Deduced = AutoTy->getDeducedType();
	Qualifiers Qs = Deduced.getQualifiers();
	Qs.removeObjCLifetime();
	Deduced =
	SemaRef.Context.getQualifiedType(Deduced.getUnqualifiedType(), Qs);
	T = SemaRef.Context.getAutoType(Deduced, AutoTy->getKeyword(),
	AutoTy->isDependentType(),
	/isPack=/false,
	AutoTy->getTypeConstraintConcept(),
	AutoTy->getTypeConstraintArguments());
	} else {
	// Otherwise, complain about the addition of a qualifier to an
	// already-qualified type.
	// FIXME: Why is this check not in Sema::BuildQualifiedType?
	SemaRef.Diag(Loc, diag::err_attr_objc_ownership_redundant) << T;
	Quals.removeObjCLifetime();
	}
	}
	}

	return SemaRef.BuildQualifiedType(T, Loc, Quals);
	}

	template<typename Derived>
	TypeLoc
	TreeTransform<Derived>::TransformTypeInObjectScope(TypeLoc TL,
	QualType ObjectType,
	NamedDecl *UnqualLookup,
	CXXScopeSpec &SS) {
	if (getDerived().AlreadyTransformed(TL.getType()))
	return TL;

	TypeSourceInfo *TSI =
	TransformTSIInObjectScope(TL, ObjectType, UnqualLookup, SS);
	if (TSI)
	return TSI->getTypeLoc();
	return TypeLoc();
	}

	template<typename Derived>
	TypeSourceInfo *
	TreeTransform<Derived>::TransformTypeInObjectScope(TypeSourceInfo *TSInfo,
	QualType ObjectType,
	NamedDecl *UnqualLookup,
	CXXScopeSpec &SS) {
	if (getDerived().AlreadyTransformed(TSInfo->getType()))
	return TSInfo;

	return TransformTSIInObjectScope(TSInfo->getTypeLoc(), ObjectType,
	UnqualLookup, SS);
	}

	template <typename Derived>
	TypeSourceInfo *TreeTransform<Derived>::TransformTSIInObjectScope(
	TypeLoc TL, QualType ObjectType, NamedDecl *UnqualLookup,
	CXXScopeSpec &SS) {
	QualType T = TL.getType();
	assert(!getDerived().AlreadyTransformed(T));

	TypeLocBuilder TLB;
	QualType Result;

	if (isa<TemplateSpecializationType>(T)) {
	TemplateSpecializationTypeLoc SpecTL =
	TL.castAs<TemplateSpecializationTypeLoc>();

	TemplateName Template = getDerived().TransformTemplateName(
	SS, SpecTL.getTypePtr()->getTemplateName(), SpecTL.getTemplateNameLoc(),
	ObjectType, UnqualLookup, /AllowInjectedClassName/true);
	if (Template.isNull())
	return nullptr;

	Result = getDerived().TransformTemplateSpecializationType(TLB, SpecTL,
	Template);
	} else if (isa<DependentTemplateSpecializationType>(T)) {
	DependentTemplateSpecializationTypeLoc SpecTL =
	TL.castAs<DependentTemplateSpecializationTypeLoc>();

	TemplateName Template
	= getDerived().RebuildTemplateName(SS,
	SpecTL.getTemplateKeywordLoc(),
	*SpecTL.getTypePtr()->getIdentifier(),
	SpecTL.getTemplateNameLoc(),
	ObjectType, UnqualLookup,
	/AllowInjectedClassName/true);
	if (Template.isNull())
	return nullptr;

	Result = getDerived().TransformDependentTemplateSpecializationType(TLB,
	SpecTL,
	Template,
	SS);
	} else {
	// Nothing special needs to be done for these.
	Result = getDerived().TransformType(TLB, TL);
	}

	if (Result.isNull())
	return nullptr;

	return TLB.getTypeSourceInfo(SemaRef.Context, Result);
	}

	template <class TyLoc> static inline
	QualType TransformTypeSpecType(TypeLocBuilder &TLB, TyLoc T) {
	TyLoc NewT = TLB.push<TyLoc>(T.getType());
	NewT.setNameLoc(T.getNameLoc());
	return T.getType();
	}

	template<typename Derived>
	QualType TreeTransform<Derived>::TransformBuiltinType(TypeLocBuilder &TLB,
	BuiltinTypeLoc T) {
	BuiltinTypeLoc NewT = TLB.push<BuiltinTypeLoc>(T.getType());
	NewT.setBuiltinLoc(T.getBuiltinLoc());
	if (T.needsExtraLocalData())
	NewT.getWrittenBuiltinSpecs() = T.getWrittenBuiltinSpecs();
	return T.getType();
	}

	template<typename Derived>
	QualType TreeTransform<Derived>::TransformComplexType(TypeLocBuilder &TLB,
	ComplexTypeLoc T) {
	// FIXME: recurse?
	return TransformTypeSpecType(TLB, T);
	}

	template <typename Derived>
	QualType TreeTransform<Derived>::TransformAdjustedType(TypeLocBuilder &TLB,
	AdjustedTypeLoc TL) {
	// Adjustments applied during transformation are handled elsewhere.
	return getDerived().TransformType(TLB, TL.getOriginalLoc());
	}

	template<typename Derived>
	QualType TreeTransform<Derived>::TransformDecayedType(TypeLocBuilder &TLB,
	DecayedTypeLoc TL) {
	QualType OriginalType = getDerived().TransformType(TLB, TL.getOriginalLoc());
	if (OriginalType.isNull())
	return QualType();

	QualType Result = TL.getType();
	if (getDerived().AlwaysRebuild() \|\|
	OriginalType != TL.getOriginalLoc().getType())
	Result = SemaRef.Context.getDecayedType(OriginalType);
	TLB.push<DecayedTypeLoc>(Result);
	// Nothing to set for DecayedTypeLoc.
	return Result;
	}

	template<typename Derived>
	QualType TreeTransform<Derived>::TransformPointerType(TypeLocBuilder &TLB,
	PointerTypeLoc TL) {
	QualType PointeeType
	= getDerived().TransformType(TLB, TL.getPointeeLoc());
	if (PointeeType.isNull())
	return QualType();

	QualType Result = TL.getType();
	if (PointeeType->getAs<ObjCObjectType>()) {
	// A dependent pointer type 'T *' has is being transformed such
	// that an Objective-C class type is being replaced for 'T'. The
	// resulting pointer type is an ObjCObjectPointerType, not a
	// PointerType.
	Result = SemaRef.Context.getObjCObjectPointerType(PointeeType);

	ObjCObjectPointerTypeLoc NewT = TLB.push<ObjCObjectPointerTypeLoc>(Result);
	NewT.setStarLoc(TL.getStarLoc());
	return Result;
	}

	if (getDerived().AlwaysRebuild() \|\|
	PointeeType != TL.getPointeeLoc().getType()) {
	Result = getDerived().RebuildPointerType(PointeeType, TL.getSigilLoc());
	if (Result.isNull())
	return QualType();
	}

	// Objective-C ARC can add lifetime qualifiers to the type that we're
	// pointing to.
	TLB.TypeWasModifiedSafely(Result->getPointeeType());

	PointerTypeLoc NewT = TLB.push<PointerTypeLoc>(Result);
	NewT.setSigilLoc(TL.getSigilLoc());
	return Result;
	}

	template<typename Derived>
	QualType
	TreeTransform<Derived>::TransformBlockPointerType(TypeLocBuilder &TLB,
	BlockPointerTypeLoc TL) {
	QualType PointeeType
	= getDerived().TransformType(TLB, TL.getPointeeLoc());
	if (PointeeType.isNull())
	return QualType();

	QualType Result = TL.getType();
	if (getDerived().AlwaysRebuild() \|\|
	PointeeType != TL.getPointeeLoc().getType()) {
	Result = getDerived().RebuildBlockPointerType(PointeeType,
	TL.getSigilLoc());
	if (Result.isNull())
	return QualType();
	}

	BlockPointerTypeLoc NewT = TLB.push<BlockPointerTypeLoc>(Result);
	NewT.setSigilLoc(TL.getSigilLoc());
	return Result;
	}

	/// Transforms a reference type. Note that somewhat paradoxically we
	/// don't care whether the type itself is an l-value type or an r-value
	/// type; we only care if the type was written as an l-value type
	/// or an r-value type.
	template<typename Derived>
	QualType
	TreeTransform<Derived>::TransformReferenceType(TypeLocBuilder &TLB,
	ReferenceTypeLoc TL) {
	const ReferenceType *T = TL.getTypePtr();

	// Note that this works with the pointee-as-written.
	QualType PointeeType = getDerived().TransformType(TLB, TL.getPointeeLoc());
	if (PointeeType.isNull())
	return QualType();

	QualType Result = TL.getType();
	if (getDerived().AlwaysRebuild() \|\|
	PointeeType != T->getPointeeTypeAsWritten()) {
	Result = getDerived().RebuildReferenceType(PointeeType,
	T->isSpelledAsLValue(),
	TL.getSigilLoc());
	if (Result.isNull())
	return QualType();
	}

	// Objective-C ARC can add lifetime qualifiers to the type that we're
	// referring to.
	TLB.TypeWasModifiedSafely(
	Result->castAs<ReferenceType>()->getPointeeTypeAsWritten());

	// r-value references can be rebuilt as l-value references.
	ReferenceTypeLoc NewTL;
	if (isa<LValueReferenceType>(Result))
	NewTL = TLB.push<LValueReferenceTypeLoc>(Result);
	else
	NewTL = TLB.push<RValueReferenceTypeLoc>(Result);
	NewTL.setSigilLoc(TL.getSigilLoc());

	return Result;
	}

	template<typename Derived>
	QualType
	TreeTransform<Derived>::TransformLValueReferenceType(TypeLocBuilder &TLB,
	LValueReferenceTypeLoc TL) {
	return TransformReferenceType(TLB, TL);
	}

	template<typename Derived>
	QualType
	TreeTransform<Derived>::TransformRValueReferenceType(TypeLocBuilder &TLB,
	RValueReferenceTypeLoc TL) {
	return TransformReferenceType(TLB, TL);
	}

	template<typename Derived>
	QualType
	TreeTransform<Derived>::TransformMemberPointerType(TypeLocBuilder &TLB,
	MemberPointerTypeLoc TL) {
	QualType PointeeType = getDerived().TransformType(TLB, TL.getPointeeLoc());
	if (PointeeType.isNull())
	return QualType();

	TypeSourceInfo* OldClsTInfo = TL.getClassTInfo();
	TypeSourceInfo *NewClsTInfo = nullptr;
	if (OldClsTInfo) {
	NewClsTInfo = getDerived().TransformType(OldClsTInfo);
	if (!NewClsTInfo)
	return QualType();
	}

	const MemberPointerType *T = TL.getTypePtr();
	QualType OldClsType = QualType(T->getClass(), 0);
	QualType NewClsType;
	if (NewClsTInfo)
	NewClsType = NewClsTInfo->getType();
	else {
	NewClsType = getDerived().TransformType(OldClsType);
	if (NewClsType.isNull())
	return QualType();
	}

	QualType Result = TL.getType();
	if (getDerived().AlwaysRebuild() \|\|
	PointeeType != T->getPointeeType() \|\|
	NewClsType != OldClsType) {
	Result = getDerived().RebuildMemberPointerType(PointeeType, NewClsType,
	TL.getStarLoc());
	if (Result.isNull())
	return QualType();
	}

	// If we had to adjust the pointee type when building a member pointer, make
	// sure to push TypeLoc info for it.
	const MemberPointerType *MPT = Result->getAs<MemberPointerType>();
	if (MPT && PointeeType != MPT->getPointeeType()) {
	assert(isa<AdjustedType>(MPT->getPointeeType()));
	TLB.push<AdjustedTypeLoc>(MPT->getPointeeType());
	}

	MemberPointerTypeLoc NewTL = TLB.push<MemberPointerTypeLoc>(Result);
	NewTL.setSigilLoc(TL.getSigilLoc());
	NewTL.setClassTInfo(NewClsTInfo);

	return Result;
	}

	template<typename Derived>
	QualType
	TreeTransform<Derived>::TransformConstantArrayType(TypeLocBuilder &TLB,
	ConstantArrayTypeLoc TL) {
	const ConstantArrayType *T = TL.getTypePtr();
	QualType ElementType = getDerived().TransformType(TLB, TL.getElementLoc());
	if (ElementType.isNull())
	return QualType();

	// Prefer the expression from the TypeLoc; the other may have been uniqued.
	Expr *OldSize = TL.getSizeExpr();
	if (!OldSize)
	OldSize = const_cast<Expr*>(T->getSizeExpr());
	Expr *NewSize = nullptr;
	if (OldSize) {
	EnterExpressionEvaluationContext Unevaluated(
	SemaRef, Sema::ExpressionEvaluationContext::ConstantEvaluated);
	NewSize = getDerived().TransformExpr(OldSize).template getAs<Expr>();
	NewSize = SemaRef.ActOnConstantExpression(NewSize).get();
	}

	QualType Result = TL.getType();
	if (getDerived().AlwaysRebuild() \|\|
	ElementType != T->getElementType() \|\|
	(T->getSizeExpr() && NewSize != OldSize)) {
	Result = getDerived().RebuildConstantArrayType(ElementType,
	T->getSizeModifier(),
	T->getSize(), NewSize,
	T->getIndexTypeCVRQualifiers(),
	TL.getBracketsRange());
	if (Result.isNull())
	return QualType();
	}

	// We might have either a ConstantArrayType or a VariableArrayType now:
	// a ConstantArrayType is allowed to have an element type which is a
	// VariableArrayType if the type is dependent. Fortunately, all array
	// types have the same location layout.
	ArrayTypeLoc NewTL = TLB.push<ArrayTypeLoc>(Result);
	NewTL.setLBracketLoc(TL.getLBracketLoc());
	NewTL.setRBracketLoc(TL.getRBracketLoc());
	NewTL.setSizeExpr(NewSize);

	return Result;
	}

	template<typename Derived>
	QualType TreeTransform<Derived>::TransformIncompleteArrayType(
	TypeLocBuilder &TLB,
	IncompleteArrayTypeLoc TL) {
	const IncompleteArrayType *T = TL.getTypePtr();
	QualType ElementType = getDerived().TransformType(TLB, TL.getElementLoc());
	if (ElementType.isNull())
	return QualType();

	QualType Result = TL.getType();
	if (getDerived().AlwaysRebuild() \|\|
	ElementType != T->getElementType()) {
	Result = getDerived().RebuildIncompleteArrayType(ElementType,
	T->getSizeModifier(),
	T->getIndexTypeCVRQualifiers(),
	TL.getBracketsRange());
	if (Result.isNull())
	return QualType();
	}

	IncompleteArrayTypeLoc NewTL = TLB.push<IncompleteArrayTypeLoc>(Result);
	NewTL.setLBracketLoc(TL.getLBracketLoc());
	NewTL.setRBracketLoc(TL.getRBracketLoc());
	NewTL.setSizeExpr(nullptr);

	return Result;
	}

	template<typename Derived>
	QualType
	TreeTransform<Derived>::TransformVariableArrayType(TypeLocBuilder &TLB,
	VariableArrayTypeLoc TL) {
	const VariableArrayType *T = TL.getTypePtr();
	QualType ElementType = getDerived().TransformType(TLB, TL.getElementLoc());
	if (ElementType.isNull())
	return QualType();

	ExprResult SizeResult;
	{
	EnterExpressionEvaluationContext Context(
	SemaRef, Sema::ExpressionEvaluationContext::PotentiallyEvaluated);
	SizeResult = getDerived().TransformExpr(T->getSizeExpr());
	}
	if (SizeResult.isInvalid())
	return QualType();
	SizeResult =
	SemaRef.ActOnFinishFullExpr(SizeResult.get(), /DiscardedValue/ false);
	if (SizeResult.isInvalid())
	return QualType();

	Expr *Size = SizeResult.get();

	QualType Result = TL.getType();
	if (getDerived().AlwaysRebuild() \|\|
	ElementType != T->getElementType() \|\|
	Size != T->getSizeExpr()) {
	Result = getDerived().RebuildVariableArrayType(ElementType,
	T->getSizeModifier(),
	Size,
	T->getIndexTypeCVRQualifiers(),
	TL.getBracketsRange());
	if (Result.isNull())
	return QualType();
	}

	// We might have constant size array now, but fortunately it has the same
	// location layout.
	ArrayTypeLoc NewTL = TLB.push<ArrayTypeLoc>(Result);
	NewTL.setLBracketLoc(TL.getLBracketLoc());
	NewTL.setRBracketLoc(TL.getRBracketLoc());
	NewTL.setSizeExpr(Size);

	return Result;
	}

	template<typename Derived>
	QualType
	TreeTransform<Derived>::TransformDependentSizedArrayType(TypeLocBuilder &TLB,
	DependentSizedArrayTypeLoc TL) {
	const DependentSizedArrayType *T = TL.getTypePtr();
	QualType ElementType = getDerived().TransformType(TLB, TL.getElementLoc());
	if (ElementType.isNull())
	return QualType();

	// Array bounds are constant expressions.
	EnterExpressionEvaluationContext Unevaluated(
	SemaRef, Sema::ExpressionEvaluationContext::ConstantEvaluated);

	// Prefer the expression from the TypeLoc; the other may have been uniqued.
	Expr *origSize = TL.getSizeExpr();
	if (!origSize) origSize = T->getSizeExpr();

	ExprResult sizeResult
	= getDerived().TransformExpr(origSize);
	sizeResult = SemaRef.ActOnConstantExpression(sizeResult);
	if (sizeResult.isInvalid())
	return QualType();

	Expr *size = sizeResult.get();

	QualType Result = TL.getType();
	if (getDerived().AlwaysRebuild() \|\|
	ElementType != T->getElementType() \|\|
	size != origSize) {
	Result = getDerived().RebuildDependentSizedArrayType(ElementType,
	T->getSizeModifier(),
	size,
	T->getIndexTypeCVRQualifiers(),
	TL.getBracketsRange());
	if (Result.isNull())
	return QualType();
	}

	// We might have any sort of array type now, but fortunately they
	// all have the same location layout.
	ArrayTypeLoc NewTL = TLB.push<ArrayTypeLoc>(Result);
	NewTL.setLBracketLoc(TL.getLBracketLoc());
	NewTL.setRBracketLoc(TL.getRBracketLoc());
	NewTL.setSizeExpr(size);

	return Result;
	}

	template <typename Derived>
	QualType TreeTransform<Derived>::TransformDependentVectorType(
	TypeLocBuilder &TLB, DependentVectorTypeLoc TL) {
	const DependentVectorType *T = TL.getTypePtr();
	QualType ElementType = getDerived().TransformType(TLB, TL.getElementLoc());
	if (ElementType.isNull())
	return QualType();

	EnterExpressionEvaluationContext Unevaluated(
	SemaRef, Sema::ExpressionEvaluationContext::ConstantEvaluated);

	ExprResult Size = getDerived().TransformExpr(T->getSizeExpr());
	Size = SemaRef.ActOnConstantExpression(Size);
	if (Size.isInvalid())
	return QualType();

	QualType Result = TL.getType();
	if (getDerived().AlwaysRebuild() \|\| ElementType != T->getElementType() \|\|
	Size.get() != T->getSizeExpr()) {
	Result = getDerived().RebuildDependentVectorType(
	ElementType, Size.get(), T->getAttributeLoc(), T->getVectorKind());
	if (Result.isNull())
	return QualType();
	}

	// Result might be dependent or not.
	if (isa<DependentVectorType>(Result)) {
	DependentVectorTypeLoc NewTL =
	TLB.push<DependentVectorTypeLoc>(Result);
	NewTL.setNameLoc(TL.getNameLoc());
	} else {
	VectorTypeLoc NewTL = TLB.push<VectorTypeLoc>(Result);
	NewTL.setNameLoc(TL.getNameLoc());
	}

	return Result;
	}

	template<typename Derived>
	QualType TreeTransform<Derived>::TransformDependentSizedExtVectorType(
	TypeLocBuilder &TLB,
	DependentSizedExtVectorTypeLoc TL) {
	const DependentSizedExtVectorType *T = TL.getTypePtr();

	// FIXME: ext vector locs should be nested
	QualType ElementType = getDerived().TransformType(TLB, TL.getElementLoc());
	if (ElementType.isNull())
	return QualType();

	// Vector sizes are constant expressions.
	EnterExpressionEvaluationContext Unevaluated(
	SemaRef, Sema::ExpressionEvaluationContext::ConstantEvaluated);

	ExprResult Size = getDerived().TransformExpr(T->getSizeExpr());
	Size = SemaRef.ActOnConstantExpression(Size);
	if (Size.isInvalid())
	return QualType();

	QualType Result = TL.getType();
	if (getDerived().AlwaysRebuild() \|\|
	ElementType != T->getElementType() \|\|
	Size.get() != T->getSizeExpr()) {
	Result = getDerived().RebuildDependentSizedExtVectorType(ElementType,
	Size.get(),
	T->getAttributeLoc());
	if (Result.isNull())
	return QualType();
	}

	// Result might be dependent or not.
	if (isa<DependentSizedExtVectorType>(Result)) {
	DependentSizedExtVectorTypeLoc NewTL
	= TLB.push<DependentSizedExtVectorTypeLoc>(Result);
	NewTL.setNameLoc(TL.getNameLoc());
	} else {
	ExtVectorTypeLoc NewTL = TLB.push<ExtVectorTypeLoc>(Result);
	NewTL.setNameLoc(TL.getNameLoc());
	}

	return Result;
	}

	template <typename Derived>
	QualType
	TreeTransform<Derived>::TransformConstantMatrixType(TypeLocBuilder &TLB,
	ConstantMatrixTypeLoc TL) {
	const ConstantMatrixType *T = TL.getTypePtr();
	QualType ElementType = getDerived().TransformType(T->getElementType());
	if (ElementType.isNull())
	return QualType();

	QualType Result = TL.getType();
	if (getDerived().AlwaysRebuild() \|\| ElementType != T->getElementType()) {
	Result = getDerived().RebuildConstantMatrixType(
	ElementType, T->getNumRows(), T->getNumColumns());
	if (Result.isNull())
	return QualType();
	}

	ConstantMatrixTypeLoc NewTL = TLB.push<ConstantMatrixTypeLoc>(Result);
	NewTL.setAttrNameLoc(TL.getAttrNameLoc());
	NewTL.setAttrOperandParensRange(TL.getAttrOperandParensRange());
	NewTL.setAttrRowOperand(TL.getAttrRowOperand());
	NewTL.setAttrColumnOperand(TL.getAttrColumnOperand());

	return Result;
	}

	template <typename Derived>
	QualType TreeTransform<Derived>::TransformDependentSizedMatrixType(
	TypeLocBuilder &TLB, DependentSizedMatrixTypeLoc TL) {
	const DependentSizedMatrixType *T = TL.getTypePtr();

	QualType ElementType = getDerived().TransformType(T->getElementType());
	if (ElementType.isNull()) {
	return QualType();
	}

	// Matrix dimensions are constant expressions.
	EnterExpressionEvaluationContext Unevaluated(
	SemaRef, Sema::ExpressionEvaluationContext::ConstantEvaluated);

	Expr *origRows = TL.getAttrRowOperand();
	if (!origRows)
	origRows = T->getRowExpr();
	Expr *origColumns = TL.getAttrColumnOperand();
	if (!origColumns)
	origColumns = T->getColumnExpr();

	ExprResult rowResult = getDerived().TransformExpr(origRows);
	rowResult = SemaRef.ActOnConstantExpression(rowResult);
	if (rowResult.isInvalid())
	return QualType();

	ExprResult columnResult = getDerived().TransformExpr(origColumns);
	columnResult = SemaRef.ActOnConstantExpression(columnResult);
	if (columnResult.isInvalid())
	return QualType();

	Expr *rows = rowResult.get();
	Expr *columns = columnResult.get();

	QualType Result = TL.getType();
	if (getDerived().AlwaysRebuild() \|\| ElementType != T->getElementType() \|\|
	rows != origRows \|\| columns != origColumns) {
	Result = getDerived().RebuildDependentSizedMatrixType(
	ElementType, rows, columns, T->getAttributeLoc());

	if (Result.isNull())
	return QualType();
	}

	// We might have any sort of matrix type now, but fortunately they
	// all have the same location layout.
	MatrixTypeLoc NewTL = TLB.push<MatrixTypeLoc>(Result);
	NewTL.setAttrNameLoc(TL.getAttrNameLoc());
	NewTL.setAttrOperandParensRange(TL.getAttrOperandParensRange());
	NewTL.setAttrRowOperand(rows);
	NewTL.setAttrColumnOperand(columns);
	return Result;
	}

	template <typename Derived>
	QualType TreeTransform<Derived>::TransformDependentAddressSpaceType(
	TypeLocBuilder &TLB, DependentAddressSpaceTypeLoc TL) {
	const DependentAddressSpaceType *T = TL.getTypePtr();

	QualType pointeeType = getDerived().TransformType(T->getPointeeType());

	if (pointeeType.isNull())
	return QualType();

	// Address spaces are constant expressions.
	EnterExpressionEvaluationContext Unevaluated(
	SemaRef, Sema::ExpressionEvaluationContext::ConstantEvaluated);

	ExprResult AddrSpace = getDerived().TransformExpr(T->getAddrSpaceExpr());
	AddrSpace = SemaRef.ActOnConstantExpression(AddrSpace);
	if (AddrSpace.isInvalid())
	return QualType();

	QualType Result = TL.getType();
	if (getDerived().AlwaysRebuild() \|\| pointeeType != T->getPointeeType() \|\|
	AddrSpace.get() != T->getAddrSpaceExpr()) {
	Result = getDerived().RebuildDependentAddressSpaceType(
	pointeeType, AddrSpace.get(), T->getAttributeLoc());
	if (Result.isNull())
	return QualType();
	}

	// Result might be dependent or not.
	if (isa<DependentAddressSpaceType>(Result)) {
	DependentAddressSpaceTypeLoc NewTL =
	TLB.push<DependentAddressSpaceTypeLoc>(Result);

	NewTL.setAttrOperandParensRange(TL.getAttrOperandParensRange());
	NewTL.setAttrExprOperand(TL.getAttrExprOperand());
	NewTL.setAttrNameLoc(TL.getAttrNameLoc());

	} else {
	TypeSourceInfo *DI = getSema().Context.getTrivialTypeSourceInfo(
	Result, getDerived().getBaseLocation());
	TransformType(TLB, DI->getTypeLoc());
	}

	return Result;
	}

	template <typename Derived>
	QualType TreeTransform<Derived>::TransformVectorType(TypeLocBuilder &TLB,
	VectorTypeLoc TL) {
	const VectorType *T = TL.getTypePtr();
	QualType ElementType = getDerived().TransformType(TLB, TL.getElementLoc());
	if (ElementType.isNull())
	return QualType();

	QualType Result = TL.getType();
	if (getDerived().AlwaysRebuild() \|\|
	ElementType != T->getElementType()) {
	Result = getDerived().RebuildVectorType(ElementType, T->getNumElements(),
	T->getVectorKind());
	if (Result.isNull())
	return QualType();
	}

	VectorTypeLoc NewTL = TLB.push<VectorTypeLoc>(Result);
	NewTL.setNameLoc(TL.getNameLoc());

	return Result;
	}

	template<typename Derived>
	QualType TreeTransform<Derived>::TransformExtVectorType(TypeLocBuilder &TLB,
	ExtVectorTypeLoc TL) {
	const VectorType *T = TL.getTypePtr();
	QualType ElementType = getDerived().TransformType(TLB, TL.getElementLoc());
	if (ElementType.isNull())
	return QualType();

	QualType Result = TL.getType();
	if (getDerived().AlwaysRebuild() \|\|
	ElementType != T->getElementType()) {
	Result = getDerived().RebuildExtVectorType(ElementType,
	T->getNumElements(),
	/FIXME/ SourceLocation());
	if (Result.isNull())
	return QualType();
	}

	ExtVectorTypeLoc NewTL = TLB.push<ExtVectorTypeLoc>(Result);
	NewTL.setNameLoc(TL.getNameLoc());

	return Result;
	}

	template <typename Derived>
	ParmVarDecl *TreeTransform<Derived>::TransformFunctionTypeParam(
	ParmVarDecl *OldParm, int indexAdjustment, Optional<unsigned> NumExpansions,
	bool ExpectParameterPack) {
	TypeSourceInfo *OldDI = OldParm->getTypeSourceInfo();
	TypeSourceInfo *NewDI = nullptr;

	if (NumExpansions && isa<PackExpansionType>(OldDI->getType())) {
	// If we're substituting into a pack expansion type and we know the
	// length we want to expand to, just substitute for the pattern.
	TypeLoc OldTL = OldDI->getTypeLoc();
	PackExpansionTypeLoc OldExpansionTL = OldTL.castAs<PackExpansionTypeLoc>();

	TypeLocBuilder TLB;
	TypeLoc NewTL = OldDI->getTypeLoc();
	TLB.reserve(NewTL.getFullDataSize());

	QualType Result = getDerived().TransformType(TLB,
	OldExpansionTL.getPatternLoc());
	if (Result.isNull())
	return nullptr;

	Result = RebuildPackExpansionType(Result,
	OldExpansionTL.getPatternLoc().getSourceRange(),
	OldExpansionTL.getEllipsisLoc(),
	NumExpansions);
	if (Result.isNull())
	return nullptr;

	PackExpansionTypeLoc NewExpansionTL
	= TLB.push<PackExpansionTypeLoc>(Result);
	NewExpansionTL.setEllipsisLoc(OldExpansionTL.getEllipsisLoc());
	NewDI = TLB.getTypeSourceInfo(SemaRef.Context, Result);
	} else
	NewDI = getDerived().TransformType(OldDI);
	if (!NewDI)
	return nullptr;

	if (NewDI == OldDI && indexAdjustment == 0)
	return OldParm;

	ParmVarDecl *newParm = ParmVarDecl::Create(SemaRef.Context,
	OldParm->getDeclContext(),
	OldParm->getInnerLocStart(),
	OldParm->getLocation(),
	OldParm->getIdentifier(),
	NewDI->getType(),
	NewDI,
	OldParm->getStorageClass(),
	/* DefArg */ nullptr);
	newParm->setScopeInfo(OldParm->getFunctionScopeDepth(),
	OldParm->getFunctionScopeIndex() + indexAdjustment);
	transformedLocalDecl(OldParm, {newParm});
	return newParm;
	}

	template <typename Derived>
	bool TreeTransform<Derived>::TransformFunctionTypeParams(
	SourceLocation Loc, ArrayRef<ParmVarDecl *> Params,
	const QualType *ParamTypes,
	const FunctionProtoType::ExtParameterInfo *ParamInfos,
	SmallVectorImpl<QualType> &OutParamTypes,
	SmallVectorImpl<ParmVarDecl > PVars,
	Sema::ExtParameterInfoBuilder &PInfos) {
	int indexAdjustment = 0;

	unsigned NumParams = Params.size();
	for (unsigned i = 0; i != NumParams; ++i) {
	if (ParmVarDecl *OldParm = Params[i]) {
	assert(OldParm->getFunctionScopeIndex() == i);

	Optional<unsigned> NumExpansions;
	ParmVarDecl *NewParm = nullptr;
	if (OldParm->isParameterPack()) {
	// We have a function parameter pack that may need to be expanded.
	SmallVector<UnexpandedParameterPack, 2> Unexpanded;

	// Find the parameter packs that could be expanded.
	TypeLoc TL = OldParm->getTypeSourceInfo()->getTypeLoc();
	PackExpansionTypeLoc ExpansionTL = TL.castAs<PackExpansionTypeLoc>();
	TypeLoc Pattern = ExpansionTL.getPatternLoc();
	SemaRef.collectUnexpandedParameterPacks(Pattern, Unexpanded);

	// Determine whether we should expand the parameter packs.
	bool ShouldExpand = false;
	bool RetainExpansion = false;
	Optional<unsigned> OrigNumExpansions;
	if (Unexpanded.size() > 0) {
	OrigNumExpansions = ExpansionTL.getTypePtr()->getNumExpansions();
	NumExpansions = OrigNumExpansions;
	if (getDerived().TryExpandParameterPacks(ExpansionTL.getEllipsisLoc(),
	Pattern.getSourceRange(),
	Unexpanded,
	ShouldExpand,
	RetainExpansion,
	NumExpansions)) {
	return true;
	}
	} else {
	#ifndef NDEBUG
	const AutoType *AT =
	Pattern.getType().getTypePtr()->getContainedAutoType();
	assert((AT && (!AT->isDeduced() \|\| AT->getDeducedType().isNull())) &&
	"Could not find parameter packs or undeduced auto type!");
	#endif
	}

	if (ShouldExpand) {
	// Expand the function parameter pack into multiple, separate
	// parameters.
	getDerived().ExpandingFunctionParameterPack(OldParm);
	for (unsigned I = 0; I != *NumExpansions; ++I) {
	Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(getSema(), I);
	ParmVarDecl *NewParm
	= getDerived().TransformFunctionTypeParam(OldParm,
	indexAdjustment++,
	OrigNumExpansions,
	/ExpectParameterPack=/false);
	if (!NewParm)
	return true;

	if (ParamInfos)
	PInfos.set(OutParamTypes.size(), ParamInfos[i]);
	OutParamTypes.push_back(NewParm->getType());
	if (PVars)
	PVars->push_back(NewParm);
	}

	// If we're supposed to retain a pack expansion, do so by temporarily
	// forgetting the partially-substituted parameter pack.
	if (RetainExpansion) {
	ForgetPartiallySubstitutedPackRAII Forget(getDerived());
	ParmVarDecl *NewParm
	= getDerived().TransformFunctionTypeParam(OldParm,
	indexAdjustment++,
	OrigNumExpansions,
	/ExpectParameterPack=/false);
	if (!NewParm)
	return true;

	if (ParamInfos)
	PInfos.set(OutParamTypes.size(), ParamInfos[i]);
	OutParamTypes.push_back(NewParm->getType());
	if (PVars)
	PVars->push_back(NewParm);
	}

	// The next parameter should have the same adjustment as the
	// last thing we pushed, but we post-incremented indexAdjustment
	// on every push. Also, if we push nothing, the adjustment should
	// go down by one.
	indexAdjustment--;

	// We're done with the pack expansion.
	continue;
	}

	// We'll substitute the parameter now without expanding the pack
	// expansion.
	Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(getSema(), -1);
	NewParm = getDerived().TransformFunctionTypeParam(OldParm,
	indexAdjustment,
	NumExpansions,
	/ExpectParameterPack=/true);
	assert(NewParm->isParameterPack() &&
	"Parameter pack no longer a parameter pack after "
	"transformation.");
	} else {
	NewParm = getDerived().TransformFunctionTypeParam(
	OldParm, indexAdjustment, None, /ExpectParameterPack=/ false);
	}

	if (!NewParm)
	return true;

	if (ParamInfos)
	PInfos.set(OutParamTypes.size(), ParamInfos[i]);
	OutParamTypes.push_back(NewParm->getType());
	if (PVars)
	PVars->push_back(NewParm);
	continue;
	}

	// Deal with the possibility that we don't have a parameter
	// declaration for this parameter.
	QualType OldType = ParamTypes[i];
	bool IsPackExpansion = false;
	Optional<unsigned> NumExpansions;
	QualType NewType;
	if (const PackExpansionType *Expansion
	= dyn_cast<PackExpansionType>(OldType)) {
	// We have a function parameter pack that may need to be expanded.
	QualType Pattern = Expansion->getPattern();
	SmallVector<UnexpandedParameterPack, 2> Unexpanded;
	getSema().collectUnexpandedParameterPacks(Pattern, Unexpanded);

	// Determine whether we should expand the parameter packs.
	bool ShouldExpand = false;
	bool RetainExpansion = false;
	if (getDerived().TryExpandParameterPacks(Loc, SourceRange(),
	Unexpanded,
	ShouldExpand,
	RetainExpansion,
	NumExpansions)) {
	return true;
	}

	if (ShouldExpand) {
	// Expand the function parameter pack into multiple, separate
	// parameters.
	for (unsigned I = 0; I != *NumExpansions; ++I) {
	Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(getSema(), I);
	QualType NewType = getDerived().TransformType(Pattern);
	if (NewType.isNull())
	return true;

	if (NewType->containsUnexpandedParameterPack()) {
	NewType =
	getSema().getASTContext().getPackExpansionType(NewType, None);

	if (NewType.isNull())
	return true;
	}

	if (ParamInfos)
	PInfos.set(OutParamTypes.size(), ParamInfos[i]);
	OutParamTypes.push_back(NewType);
	if (PVars)
	PVars->push_back(nullptr);
	}

	// We're done with the pack expansion.
	continue;
	}

	// If we're supposed to retain a pack expansion, do so by temporarily
	// forgetting the partially-substituted parameter pack.
	if (RetainExpansion) {
	ForgetPartiallySubstitutedPackRAII Forget(getDerived());
	QualType NewType = getDerived().TransformType(Pattern);
	if (NewType.isNull())
	return true;

	if (ParamInfos)
	PInfos.set(OutParamTypes.size(), ParamInfos[i]);
	OutParamTypes.push_back(NewType);
	if (PVars)
	PVars->push_back(nullptr);
	}

	// We'll substitute the parameter now without expanding the pack
	// expansion.
	OldType = Expansion->getPattern();
	IsPackExpansion = true;
	Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(getSema(), -1);
	NewType = getDerived().TransformType(OldType);
	} else {
	NewType = getDerived().TransformType(OldType);
	}

	if (NewType.isNull())
	return true;

	if (IsPackExpansion)
	NewType = getSema().Context.getPackExpansionType(NewType,
	NumExpansions);

	if (ParamInfos)
	PInfos.set(OutParamTypes.size(), ParamInfos[i]);
	OutParamTypes.push_back(NewType);
	if (PVars)
	PVars->push_back(nullptr);
	}

	#ifndef NDEBUG
	if (PVars) {
	for (unsigned i = 0, e = PVars->size(); i != e; ++i)
	if (ParmVarDecl parm = (PVars)[i])
	assert(parm->getFunctionScopeIndex() == i);
	}
	#endif

	return false;
	}

	template<typename Derived>
	QualType
	TreeTransform<Derived>::TransformFunctionProtoType(TypeLocBuilder &TLB,
	FunctionProtoTypeLoc TL) {
	SmallVector<QualType, 4> ExceptionStorage;
	TreeTransform *This = this; // Work around gcc.gnu.org/PR56135.
	return getDerived().TransformFunctionProtoType(
	TLB, TL, nullptr, Qualifiers(),
	[&](FunctionProtoType::ExceptionSpecInfo &ESI, bool &Changed) {
	return This->getDerived().TransformExceptionSpec(
	TL.getBeginLoc(), ESI, ExceptionStorage, Changed);
	});
	}

	template<typename Derived> template<typename Fn>
	QualType TreeTransform<Derived>::TransformFunctionProtoType(
	TypeLocBuilder &TLB, FunctionProtoTypeLoc TL, CXXRecordDecl *ThisContext,
	Qualifiers ThisTypeQuals, Fn TransformExceptionSpec) {

	// Transform the parameters and return type.
	//
	// We are required to instantiate the params and return type in source order.
	// When the function has a trailing return type, we instantiate the
	// parameters before the return type, since the return type can then refer
	// to the parameters themselves (via decltype, sizeof, etc.).
	//
	SmallVector<QualType, 4> ParamTypes;
	SmallVector<ParmVarDecl*, 4> ParamDecls;
	Sema::ExtParameterInfoBuilder ExtParamInfos;
	const FunctionProtoType *T = TL.getTypePtr();

	QualType ResultType;

	if (T->hasTrailingReturn()) {
	if (getDerived().TransformFunctionTypeParams(
	TL.getBeginLoc(), TL.getParams(),
	TL.getTypePtr()->param_type_begin(),
	T->getExtParameterInfosOrNull(),
	ParamTypes, &ParamDecls, ExtParamInfos))
	return QualType();

	{
	// C++11 [expr.prim.general]p3:
	// If a declaration declares a member function or member function
	// template of a class X, the expression this is a prvalue of type
	// "pointer to cv-qualifier-seq X" between the optional cv-qualifer-seq
	// and the end of the function-definition, member-declarator, or
	// declarator.
	Sema::CXXThisScopeRAII ThisScope(SemaRef, ThisContext, ThisTypeQuals);

	ResultType = getDerived().TransformType(TLB, TL.getReturnLoc());
	if (ResultType.isNull())
	return QualType();
	}
	}
	else {
	ResultType = getDerived().TransformType(TLB, TL.getReturnLoc());
	if (ResultType.isNull())
	return QualType();

	if (getDerived().TransformFunctionTypeParams(
	TL.getBeginLoc(), TL.getParams(),
	TL.getTypePtr()->param_type_begin(),
	T->getExtParameterInfosOrNull(),
	ParamTypes, &ParamDecls, ExtParamInfos))
	return QualType();
	}

	FunctionProtoType::ExtProtoInfo EPI = T->getExtProtoInfo();

	bool EPIChanged = false;
	if (TransformExceptionSpec(EPI.ExceptionSpec, EPIChanged))
	return QualType();

	// Handle extended parameter information.
	if (auto NewExtParamInfos =
	ExtParamInfos.getPointerOrNull(ParamTypes.size())) {
	if (!EPI.ExtParameterInfos \|\|
	llvm::makeArrayRef(EPI.ExtParameterInfos, TL.getNumParams())
	!= llvm::makeArrayRef(NewExtParamInfos, ParamTypes.size())) {
	EPIChanged = true;
	}
	EPI.ExtParameterInfos = NewExtParamInfos;
	} else if (EPI.ExtParameterInfos) {
	EPIChanged = true;
	EPI.ExtParameterInfos = nullptr;
	}

	QualType Result = TL.getType();
	if (getDerived().AlwaysRebuild() \|\| ResultType != T->getReturnType() \|\|
	T->getParamTypes() != llvm::makeArrayRef(ParamTypes) \|\| EPIChanged) {
	Result = getDerived().RebuildFunctionProtoType(ResultType, ParamTypes, EPI);
	if (Result.isNull())
	return QualType();
	}

	FunctionProtoTypeLoc NewTL = TLB.push<FunctionProtoTypeLoc>(Result);
	NewTL.setLocalRangeBegin(TL.getLocalRangeBegin());
	NewTL.setLParenLoc(TL.getLParenLoc());
	NewTL.setRParenLoc(TL.getRParenLoc());
	NewTL.setExceptionSpecRange(TL.getExceptionSpecRange());
	NewTL.setLocalRangeEnd(TL.getLocalRangeEnd());
	for (unsigned i = 0, e = NewTL.getNumParams(); i != e; ++i)
	NewTL.setParam(i, ParamDecls[i]);

	return Result;
	}

	template<typename Derived>
	bool TreeTransform<Derived>::TransformExceptionSpec(
	SourceLocation Loc, FunctionProtoType::ExceptionSpecInfo &ESI,
	SmallVectorImpl<QualType> &Exceptions, bool &Changed) {
	assert(ESI.Type != EST_Uninstantiated && ESI.Type != EST_Unevaluated);

	// Instantiate a dynamic noexcept expression, if any.
	if (isComputedNoexcept(ESI.Type)) {
	EnterExpressionEvaluationContext Unevaluated(
	getSema(), Sema::ExpressionEvaluationContext::ConstantEvaluated);
	ExprResult NoexceptExpr = getDerived().TransformExpr(ESI.NoexceptExpr);
	if (NoexceptExpr.isInvalid())
	return true;

	ExceptionSpecificationType EST = ESI.Type;
	NoexceptExpr =
	getSema().ActOnNoexceptSpec(Loc, NoexceptExpr.get(), EST);
	if (NoexceptExpr.isInvalid())
	return true;

	if (ESI.NoexceptExpr != NoexceptExpr.get() \|\| EST != ESI.Type)
	Changed = true;
	ESI.NoexceptExpr = NoexceptExpr.get();
	ESI.Type = EST;
	}

	if (ESI.Type != EST_Dynamic)
	return false;

	// Instantiate a dynamic exception specification's type.
	for (QualType T : ESI.Exceptions) {
	if (const PackExpansionType *PackExpansion =
	T->getAs<PackExpansionType>()) {
	Changed = true;

	// We have a pack expansion. Instantiate it.
	SmallVector<UnexpandedParameterPack, 2> Unexpanded;
	SemaRef.collectUnexpandedParameterPacks(PackExpansion->getPattern(),
	Unexpanded);
	assert(!Unexpanded.empty() && "Pack expansion without parameter packs?");

	// Determine whether the set of unexpanded parameter packs can and
	// should
	// be expanded.
	bool Expand = false;
	bool RetainExpansion = false;
	Optional<unsigned> NumExpansions = PackExpansion->getNumExpansions();
	// FIXME: Track the location of the ellipsis (and track source location
	// information for the types in the exception specification in general).
	if (getDerived().TryExpandParameterPacks(
	Loc, SourceRange(), Unexpanded, Expand,
	RetainExpansion, NumExpansions))
	return true;

	if (!Expand) {
	// We can't expand this pack expansion into separate arguments yet;
	// just substitute into the pattern and create a new pack expansion
	// type.
	Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(getSema(), -1);
	QualType U = getDerived().TransformType(PackExpansion->getPattern());
	if (U.isNull())
	return true;

	U = SemaRef.Context.getPackExpansionType(U, NumExpansions);
	Exceptions.push_back(U);
	continue;
	}

	// Substitute into the pack expansion pattern for each slice of the
	// pack.
	for (unsigned ArgIdx = 0; ArgIdx != *NumExpansions; ++ArgIdx) {
	Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(getSema(), ArgIdx);

	QualType U = getDerived().TransformType(PackExpansion->getPattern());
	if (U.isNull() \|\| SemaRef.CheckSpecifiedExceptionType(U, Loc))
	return true;

	Exceptions.push_back(U);
	}
	} else {
	QualType U = getDerived().TransformType(T);
	if (U.isNull() \|\| SemaRef.CheckSpecifiedExceptionType(U, Loc))
	return true;
	if (T != U)
	Changed = true;

	Exceptions.push_back(U);
	}
	}

	ESI.Exceptions = Exceptions;
	if (ESI.Exceptions.empty())
	ESI.Type = EST_DynamicNone;
	return false;
	}

	template<typename Derived>
	QualType TreeTransform<Derived>::TransformFunctionNoProtoType(
	TypeLocBuilder &TLB,
	FunctionNoProtoTypeLoc TL) {
	const FunctionNoProtoType *T = TL.getTypePtr();
	QualType ResultType = getDerived().TransformType(TLB, TL.getReturnLoc());
	if (ResultType.isNull())
	return QualType();

	QualType Result = TL.getType();
	if (getDerived().AlwaysRebuild() \|\| ResultType != T->getReturnType())
	Result = getDerived().RebuildFunctionNoProtoType(ResultType);

	FunctionNoProtoTypeLoc NewTL = TLB.push<FunctionNoProtoTypeLoc>(Result);
	NewTL.setLocalRangeBegin(TL.getLocalRangeBegin());
	NewTL.setLParenLoc(TL.getLParenLoc());
	NewTL.setRParenLoc(TL.getRParenLoc());
	NewTL.setLocalRangeEnd(TL.getLocalRangeEnd());

	return Result;
	}

	template<typename Derived> QualType
	TreeTransform<Derived>::TransformUnresolvedUsingType(TypeLocBuilder &TLB,
	UnresolvedUsingTypeLoc TL) {
	const UnresolvedUsingType *T = TL.getTypePtr();
	Decl *D = getDerived().TransformDecl(TL.getNameLoc(), T->getDecl());
	if (!D)
	return QualType();

	QualType Result = TL.getType();
	if (getDerived().AlwaysRebuild() \|\| D != T->getDecl()) {
	Result = getDerived().RebuildUnresolvedUsingType(TL.getNameLoc(), D);
	if (Result.isNull())
	return QualType();
	}

	// We might get an arbitrary type spec type back. We should at
	// least always get a type spec type, though.
	TypeSpecTypeLoc NewTL = TLB.pushTypeSpec(Result);
	NewTL.setNameLoc(TL.getNameLoc());

	return Result;
	}

	template<typename Derived>
	QualType TreeTransform<Derived>::TransformTypedefType(TypeLocBuilder &TLB,
	TypedefTypeLoc TL) {
	const TypedefType *T = TL.getTypePtr();
	TypedefNameDecl *Typedef
	= cast_or_null<TypedefNameDecl>(getDerived().TransformDecl(TL.getNameLoc(),
	T->getDecl()));
	if (!Typedef)
	return QualType();

	QualType Result = TL.getType();
	if (getDerived().AlwaysRebuild() \|\|
	Typedef != T->getDecl()) {
	Result = getDerived().RebuildTypedefType(Typedef);
	if (Result.isNull())
	return QualType();
	}

	TypedefTypeLoc NewTL = TLB.push<TypedefTypeLoc>(Result);
	NewTL.setNameLoc(TL.getNameLoc());

	return Result;
	}

	template<typename Derived>
	QualType TreeTransform<Derived>::TransformTypeOfExprType(TypeLocBuilder &TLB,
	TypeOfExprTypeLoc TL) {
	// typeof expressions are not potentially evaluated contexts
	EnterExpressionEvaluationContext Unevaluated(
	SemaRef, Sema::ExpressionEvaluationContext::Unevaluated,
	Sema::ReuseLambdaContextDecl);

	ExprResult E = getDerived().TransformExpr(TL.getUnderlyingExpr());
	if (E.isInvalid())
	return QualType();

	E = SemaRef.HandleExprEvaluationContextForTypeof(E.get());
	if (E.isInvalid())
	return QualType();

	QualType Result = TL.getType();
	if (getDerived().AlwaysRebuild() \|\|
	E.get() != TL.getUnderlyingExpr()) {
	Result = getDerived().RebuildTypeOfExprType(E.get(), TL.getTypeofLoc());
	if (Result.isNull())
	return QualType();
	}
	else E.get();

	TypeOfExprTypeLoc NewTL = TLB.push<TypeOfExprTypeLoc>(Result);
	NewTL.setTypeofLoc(TL.getTypeofLoc());
	NewTL.setLParenLoc(TL.getLParenLoc());
	NewTL.setRParenLoc(TL.getRParenLoc());

	return Result;
	}

	template<typename Derived>
	QualType TreeTransform<Derived>::TransformTypeOfType(TypeLocBuilder &TLB,
	TypeOfTypeLoc TL) {
	TypeSourceInfo* Old_Under_TI = TL.getUnderlyingTInfo();
	TypeSourceInfo* New_Under_TI = getDerived().TransformType(Old_Under_TI);
	if (!New_Under_TI)
	return QualType();

	QualType Result = TL.getType();
	if (getDerived().AlwaysRebuild() \|\| New_Under_TI != Old_Under_TI) {
	Result = getDerived().RebuildTypeOfType(New_Under_TI->getType());
	if (Result.isNull())
	return QualType();
	}

	TypeOfTypeLoc NewTL = TLB.push<TypeOfTypeLoc>(Result);
	NewTL.setTypeofLoc(TL.getTypeofLoc());
	NewTL.setLParenLoc(TL.getLParenLoc());
	NewTL.setRParenLoc(TL.getRParenLoc());
	NewTL.setUnderlyingTInfo(New_Under_TI);

	return Result;
	}

	template<typename Derived>
	QualType TreeTransform<Derived>::TransformDecltypeType(TypeLocBuilder &TLB,
	DecltypeTypeLoc TL) {
	const DecltypeType *T = TL.getTypePtr();

	// decltype expressions are not potentially evaluated contexts
	EnterExpressionEvaluationContext Unevaluated(
	SemaRef, Sema::ExpressionEvaluationContext::Unevaluated, nullptr,
	Sema::ExpressionEvaluationContextRecord::EK_Decltype);

	ExprResult E = getDerived().TransformExpr(T->getUnderlyingExpr());
	if (E.isInvalid())
	return QualType();

	E = getSema().ActOnDecltypeExpression(E.get());
	if (E.isInvalid())
	return QualType();

	QualType Result = TL.getType();
	if (getDerived().AlwaysRebuild() \|\|
	E.get() != T->getUnderlyingExpr()) {
	Result = getDerived().RebuildDecltypeType(E.get(), TL.getNameLoc());
	if (Result.isNull())
	return QualType();
	}
	else E.get();

	DecltypeTypeLoc NewTL = TLB.push<DecltypeTypeLoc>(Result);
	NewTL.setNameLoc(TL.getNameLoc());

	return Result;
	}

	template<typename Derived>
	QualType TreeTransform<Derived>::TransformUnaryTransformType(
	TypeLocBuilder &TLB,
	UnaryTransformTypeLoc TL) {
	QualType Result = TL.getType();
	if (Result->isDependentType()) {
	const UnaryTransformType *T = TL.getTypePtr();
	QualType NewBase =
	getDerived().TransformType(TL.getUnderlyingTInfo())->getType();
	Result = getDerived().RebuildUnaryTransformType(NewBase,
	T->getUTTKind(),
	TL.getKWLoc());
	if (Result.isNull())
	return QualType();
	}

	UnaryTransformTypeLoc NewTL = TLB.push<UnaryTransformTypeLoc>(Result);
	NewTL.setKWLoc(TL.getKWLoc());
	NewTL.setParensRange(TL.getParensRange());
	NewTL.setUnderlyingTInfo(TL.getUnderlyingTInfo());
	return Result;
	}

	template<typename Derived>
	QualType TreeTransform<Derived>::TransformDeducedTemplateSpecializationType(
	TypeLocBuilder &TLB, DeducedTemplateSpecializationTypeLoc TL) {
	const DeducedTemplateSpecializationType *T = TL.getTypePtr();

	CXXScopeSpec SS;
	TemplateName TemplateName = getDerived().TransformTemplateName(
	SS, T->getTemplateName(), TL.getTemplateNameLoc());
	if (TemplateName.isNull())
	return QualType();

	QualType OldDeduced = T->getDeducedType();
	QualType NewDeduced;
	if (!OldDeduced.isNull()) {
	NewDeduced = getDerived().TransformType(OldDeduced);
	if (NewDeduced.isNull())
	return QualType();
	}

	QualType Result = getDerived().RebuildDeducedTemplateSpecializationType(
	TemplateName, NewDeduced);
	if (Result.isNull())
	return QualType();

	DeducedTemplateSpecializationTypeLoc NewTL =
	TLB.push<DeducedTemplateSpecializationTypeLoc>(Result);
	NewTL.setTemplateNameLoc(TL.getTemplateNameLoc());

	return Result;
	}

	template<typename Derived>
	QualType TreeTransform<Derived>::TransformRecordType(TypeLocBuilder &TLB,
	RecordTypeLoc TL) {
	const RecordType *T = TL.getTypePtr();
	RecordDecl *Record
	= cast_or_null<RecordDecl>(getDerived().TransformDecl(TL.getNameLoc(),
	T->getDecl()));
	if (!Record)
	return QualType();

	QualType Result = TL.getType();
	if (getDerived().AlwaysRebuild() \|\|
	Record != T->getDecl()) {
	Result = getDerived().RebuildRecordType(Record);
	if (Result.isNull())
	return QualType();
	}

	RecordTypeLoc NewTL = TLB.push<RecordTypeLoc>(Result);
	NewTL.setNameLoc(TL.getNameLoc());

	return Result;
	}

	template<typename Derived>
	QualType TreeTransform<Derived>::TransformEnumType(TypeLocBuilder &TLB,
	EnumTypeLoc TL) {
	const EnumType *T = TL.getTypePtr();
	EnumDecl *Enum
	= cast_or_null<EnumDecl>(getDerived().TransformDecl(TL.getNameLoc(),
	T->getDecl()));
	if (!Enum)
	return QualType();

	QualType Result = TL.getType();
	if (getDerived().AlwaysRebuild() \|\|
	Enum != T->getDecl()) {
	Result = getDerived().RebuildEnumType(Enum);
	if (Result.isNull())
	return QualType();
	}

	EnumTypeLoc NewTL = TLB.push<EnumTypeLoc>(Result);
	NewTL.setNameLoc(TL.getNameLoc());

	return Result;
	}

	template<typename Derived>
	QualType TreeTransform<Derived>::TransformInjectedClassNameType(
	TypeLocBuilder &TLB,
	InjectedClassNameTypeLoc TL) {
	Decl *D = getDerived().TransformDecl(TL.getNameLoc(),
	TL.getTypePtr()->getDecl());
	if (!D) return QualType();

	QualType T = SemaRef.Context.getTypeDeclType(cast<TypeDecl>(D));
	TLB.pushTypeSpec(T).setNameLoc(TL.getNameLoc());
	return T;
	}

	template<typename Derived>
	QualType TreeTransform<Derived>::TransformTemplateTypeParmType(
	TypeLocBuilder &TLB,
	TemplateTypeParmTypeLoc TL) {
	return TransformTypeSpecType(TLB, TL);
	}

	template<typename Derived>
	QualType TreeTransform<Derived>::TransformSubstTemplateTypeParmType(
	TypeLocBuilder &TLB,
	SubstTemplateTypeParmTypeLoc TL) {
	const SubstTemplateTypeParmType *T = TL.getTypePtr();

	// Substitute into the replacement type, which itself might involve something
	// that needs to be transformed. This only tends to occur with default
	// template arguments of template template parameters.
	TemporaryBase Rebase(*this, TL.getNameLoc(), DeclarationName());
	QualType Replacement = getDerived().TransformType(T->getReplacementType());
	if (Replacement.isNull())
	return QualType();

	// Always canonicalize the replacement type.
	Replacement = SemaRef.Context.getCanonicalType(Replacement);
	QualType Result
	= SemaRef.Context.getSubstTemplateTypeParmType(T->getReplacedParameter(),
	Replacement);

	// Propagate type-source information.
	SubstTemplateTypeParmTypeLoc NewTL
	= TLB.push<SubstTemplateTypeParmTypeLoc>(Result);
	NewTL.setNameLoc(TL.getNameLoc());
	return Result;

	}

	template<typename Derived>
	QualType TreeTransform<Derived>::TransformSubstTemplateTypeParmPackType(
	TypeLocBuilder &TLB,
	SubstTemplateTypeParmPackTypeLoc TL) {
	return TransformTypeSpecType(TLB, TL);
	}

	template<typename Derived>
	QualType TreeTransform<Derived>::TransformTemplateSpecializationType(
	TypeLocBuilder &TLB,
	TemplateSpecializationTypeLoc TL) {
	const TemplateSpecializationType *T = TL.getTypePtr();

	// The nested-name-specifier never matters in a TemplateSpecializationType,
	// because we can't have a dependent nested-name-specifier anyway.
	CXXScopeSpec SS;
	TemplateName Template
	= getDerived().TransformTemplateName(SS, T->getTemplateName(),
	TL.getTemplateNameLoc());
	if (Template.isNull())
	return QualType();

	return getDerived().TransformTemplateSpecializationType(TLB, TL, Template);
	}

	template<typename Derived>
	QualType TreeTransform<Derived>::TransformAtomicType(TypeLocBuilder &TLB,
	AtomicTypeLoc TL) {
	QualType ValueType = getDerived().TransformType(TLB, TL.getValueLoc());
	if (ValueType.isNull())
	return QualType();

	QualType Result = TL.getType();
	if (getDerived().AlwaysRebuild() \|\|
	ValueType != TL.getValueLoc().getType()) {
	Result = getDerived().RebuildAtomicType(ValueType, TL.getKWLoc());
	if (Result.isNull())
	return QualType();
	}

	AtomicTypeLoc NewTL = TLB.push<AtomicTypeLoc>(Result);
	NewTL.setKWLoc(TL.getKWLoc());
	NewTL.setLParenLoc(TL.getLParenLoc());
	NewTL.setRParenLoc(TL.getRParenLoc());

	return Result;
	}

	template <typename Derived>
	QualType TreeTransform<Derived>::TransformPipeType(TypeLocBuilder &TLB,
	PipeTypeLoc TL) {
	QualType ValueType = getDerived().TransformType(TLB, TL.getValueLoc());
	if (ValueType.isNull())
	return QualType();

	QualType Result = TL.getType();
	if (getDerived().AlwaysRebuild() \|\| ValueType != TL.getValueLoc().getType()) {
	const PipeType *PT = Result->castAs<PipeType>();
	bool isReadPipe = PT->isReadOnly();
	Result = getDerived().RebuildPipeType(ValueType, TL.getKWLoc(), isReadPipe);
	if (Result.isNull())
	return QualType();
	}

	PipeTypeLoc NewTL = TLB.push<PipeTypeLoc>(Result);
	NewTL.setKWLoc(TL.getKWLoc());

	return Result;
	}

	template <typename Derived>
	QualType TreeTransform<Derived>::TransformExtIntType(TypeLocBuilder &TLB,
	ExtIntTypeLoc TL) {
	const ExtIntType *EIT = TL.getTypePtr();
	QualType Result = TL.getType();

	if (getDerived().AlwaysRebuild()) {
	Result = getDerived().RebuildExtIntType(EIT->isUnsigned(),
	EIT->getNumBits(), TL.getNameLoc());
	if (Result.isNull())
	return QualType();
	}

	ExtIntTypeLoc NewTL = TLB.push<ExtIntTypeLoc>(Result);
	NewTL.setNameLoc(TL.getNameLoc());
	return Result;
	}

	template <typename Derived>
	QualType TreeTransform<Derived>::TransformDependentExtIntType(
	TypeLocBuilder &TLB, DependentExtIntTypeLoc TL) {
	const DependentExtIntType *EIT = TL.getTypePtr();

	EnterExpressionEvaluationContext Unevaluated(
	SemaRef, Sema::ExpressionEvaluationContext::ConstantEvaluated);
	ExprResult BitsExpr = getDerived().TransformExpr(EIT->getNumBitsExpr());
	BitsExpr = SemaRef.ActOnConstantExpression(BitsExpr);

	if (BitsExpr.isInvalid())
	return QualType();

	QualType Result = TL.getType();

	if (getDerived().AlwaysRebuild() \|\| BitsExpr.get() != EIT->getNumBitsExpr()) {
	Result = getDerived().RebuildDependentExtIntType(
	EIT->isUnsigned(), BitsExpr.get(), TL.getNameLoc());

	if (Result.isNull())
	return QualType();
	}

	if (isa<DependentExtIntType>(Result)) {
	DependentExtIntTypeLoc NewTL = TLB.push<DependentExtIntTypeLoc>(Result);
	NewTL.setNameLoc(TL.getNameLoc());
	} else {
	ExtIntTypeLoc NewTL = TLB.push<ExtIntTypeLoc>(Result);
	NewTL.setNameLoc(TL.getNameLoc());
	}
	return Result;
	}

	/// Simple iterator that traverses the template arguments in a
	/// container that provides a \c getArgLoc() member function.
	///
	/// This iterator is intended to be used with the iterator form of
	/// \c TreeTransform<Derived>::TransformTemplateArguments().
	template<typename ArgLocContainer>
	class TemplateArgumentLocContainerIterator {
	ArgLocContainer *Container;
	unsigned Index;

	public:
	typedef TemplateArgumentLoc value_type;
	typedef TemplateArgumentLoc reference;
	typedef int difference_type;
	typedef std::input_iterator_tag iterator_category;

	class pointer {
	TemplateArgumentLoc Arg;

	public:
	explicit pointer(TemplateArgumentLoc Arg) : Arg(Arg) { }

	const TemplateArgumentLoc *operator->() const {
	return &Arg;
	}
	};


	TemplateArgumentLocContainerIterator() {}

	TemplateArgumentLocContainerIterator(ArgLocContainer &Container,
	unsigned Index)
	: Container(&Container), Index(Index) { }

	TemplateArgumentLocContainerIterator &operator++() {
	++Index;
	return *this;
	}

	TemplateArgumentLocContainerIterator operator++(int) {
	TemplateArgumentLocContainerIterator Old(*this);
	++(*this);
	return Old;
	}

	TemplateArgumentLoc operator*() const {
	return Container->getArgLoc(Index);
	}

	pointer operator->() const {
	return pointer(Container->getArgLoc(Index));
	}

	friend bool operator==(const TemplateArgumentLocContainerIterator &X,
	const TemplateArgumentLocContainerIterator &Y) {
	return X.Container == Y.Container && X.Index == Y.Index;
	}

	friend bool operator!=(const TemplateArgumentLocContainerIterator &X,
	const TemplateArgumentLocContainerIterator &Y) {
	return !(X == Y);
	}
	};

	template<typename Derived>
	QualType TreeTransform<Derived>::TransformAutoType(TypeLocBuilder &TLB,
	AutoTypeLoc TL) {
	const AutoType *T = TL.getTypePtr();
	QualType OldDeduced = T->getDeducedType();
	QualType NewDeduced;
	if (!OldDeduced.isNull()) {
	NewDeduced = getDerived().TransformType(OldDeduced);
	if (NewDeduced.isNull())
	return QualType();
	}

	ConceptDecl *NewCD = nullptr;
	TemplateArgumentListInfo NewTemplateArgs;
	NestedNameSpecifierLoc NewNestedNameSpec;
	if (T->isConstrained()) {
	NewCD = cast_or_null<ConceptDecl>(getDerived().TransformDecl(
	TL.getConceptNameLoc(), T->getTypeConstraintConcept()));

	NewTemplateArgs.setLAngleLoc(TL.getLAngleLoc());
	NewTemplateArgs.setRAngleLoc(TL.getRAngleLoc());
	typedef TemplateArgumentLocContainerIterator<AutoTypeLoc> ArgIterator;
	if (getDerived().TransformTemplateArguments(ArgIterator(TL, 0),
	ArgIterator(TL,
	TL.getNumArgs()),
	NewTemplateArgs))
	return QualType();

	if (TL.getNestedNameSpecifierLoc()) {
	NewNestedNameSpec
	= getDerived().TransformNestedNameSpecifierLoc(
	TL.getNestedNameSpecifierLoc());
	if (!NewNestedNameSpec)
	return QualType();
	}
	}

	QualType Result = TL.getType();
	if (getDerived().AlwaysRebuild() \|\| NewDeduced != OldDeduced \|\|
	T->isDependentType() \|\| T->isConstrained()) {
	// FIXME: Maybe don't rebuild if all template arguments are the same.
	llvm::SmallVector<TemplateArgument, 4> NewArgList;
	NewArgList.reserve(NewArgList.size());
	for (const auto &ArgLoc : NewTemplateArgs.arguments())
	NewArgList.push_back(ArgLoc.getArgument());
	Result = getDerived().RebuildAutoType(NewDeduced, T->getKeyword(), NewCD,
	NewArgList);
	if (Result.isNull())
	return QualType();
	}

	AutoTypeLoc NewTL = TLB.push<AutoTypeLoc>(Result);
	NewTL.setNameLoc(TL.getNameLoc());
	NewTL.setNestedNameSpecifierLoc(NewNestedNameSpec);
	NewTL.setTemplateKWLoc(TL.getTemplateKWLoc());
	NewTL.setConceptNameLoc(TL.getConceptNameLoc());
	NewTL.setFoundDecl(TL.getFoundDecl());
	NewTL.setLAngleLoc(TL.getLAngleLoc());
	NewTL.setRAngleLoc(TL.getRAngleLoc());
	- for (unsigned I = 0; I < TL.getNumArgs(); ++I)
	+ for (unsigned I = 0; I < NewTL.getNumArgs(); ++I)
	NewTL.setArgLocInfo(I, NewTemplateArgs.arguments()[I].getLocInfo());

	return Result;
	}

	template <typename Derived>
	QualType TreeTransform<Derived>::TransformTemplateSpecializationType(
	TypeLocBuilder &TLB,
	TemplateSpecializationTypeLoc TL,
	TemplateName Template) {
	TemplateArgumentListInfo NewTemplateArgs;
	NewTemplateArgs.setLAngleLoc(TL.getLAngleLoc());
	NewTemplateArgs.setRAngleLoc(TL.getRAngleLoc());
	typedef TemplateArgumentLocContainerIterator<TemplateSpecializationTypeLoc>
	ArgIterator;
	if (getDerived().TransformTemplateArguments(ArgIterator(TL, 0),
	ArgIterator(TL, TL.getNumArgs()),
	NewTemplateArgs))
	return QualType();

	// FIXME: maybe don't rebuild if all the template arguments are the same.

	QualType Result =
	getDerived().RebuildTemplateSpecializationType(Template,
	TL.getTemplateNameLoc(),
	NewTemplateArgs);

	if (!Result.isNull()) {
	// Specializations of template template parameters are represented as
	// TemplateSpecializationTypes, and substitution of type alias templates
	// within a dependent context can transform them into
	// DependentTemplateSpecializationTypes.
	if (isa<DependentTemplateSpecializationType>(Result)) {
	DependentTemplateSpecializationTypeLoc NewTL
	= TLB.push<DependentTemplateSpecializationTypeLoc>(Result);
	NewTL.setElaboratedKeywordLoc(SourceLocation());
	NewTL.setQualifierLoc(NestedNameSpecifierLoc());
	NewTL.setTemplateKeywordLoc(TL.getTemplateKeywordLoc());
	NewTL.setTemplateNameLoc(TL.getTemplateNameLoc());
	NewTL.setLAngleLoc(TL.getLAngleLoc());
	NewTL.setRAngleLoc(TL.getRAngleLoc());
	for (unsigned i = 0, e = NewTemplateArgs.size(); i != e; ++i)
	NewTL.setArgLocInfo(i, NewTemplateArgs[i].getLocInfo());
	return Result;
	}

	TemplateSpecializationTypeLoc NewTL
	= TLB.push<TemplateSpecializationTypeLoc>(Result);
	NewTL.setTemplateKeywordLoc(TL.getTemplateKeywordLoc());
	NewTL.setTemplateNameLoc(TL.getTemplateNameLoc());
	NewTL.setLAngleLoc(TL.getLAngleLoc());
	NewTL.setRAngleLoc(TL.getRAngleLoc());
	for (unsigned i = 0, e = NewTemplateArgs.size(); i != e; ++i)
	NewTL.setArgLocInfo(i, NewTemplateArgs[i].getLocInfo());
	}

	return Result;
	}

	template <typename Derived>
	QualType TreeTransform<Derived>::TransformDependentTemplateSpecializationType(
	TypeLocBuilder &TLB,
	DependentTemplateSpecializationTypeLoc TL,
	TemplateName Template,
	CXXScopeSpec &SS) {
	TemplateArgumentListInfo NewTemplateArgs;
	NewTemplateArgs.setLAngleLoc(TL.getLAngleLoc());
	NewTemplateArgs.setRAngleLoc(TL.getRAngleLoc());
	typedef TemplateArgumentLocContainerIterator<
	DependentTemplateSpecializationTypeLoc> ArgIterator;
	if (getDerived().TransformTemplateArguments(ArgIterator(TL, 0),
	ArgIterator(TL, TL.getNumArgs()),
	NewTemplateArgs))
	return QualType();

	// FIXME: maybe don't rebuild if all the template arguments are the same.

	if (DependentTemplateName *DTN = Template.getAsDependentTemplateName()) {
	QualType Result
	= getSema().Context.getDependentTemplateSpecializationType(
	TL.getTypePtr()->getKeyword(),
	DTN->getQualifier(),
	DTN->getIdentifier(),
	NewTemplateArgs);

	DependentTemplateSpecializationTypeLoc NewTL
	= TLB.push<DependentTemplateSpecializationTypeLoc>(Result);
	NewTL.setElaboratedKeywordLoc(TL.getElaboratedKeywordLoc());
	NewTL.setQualifierLoc(SS.getWithLocInContext(SemaRef.Context));
	NewTL.setTemplateKeywordLoc(TL.getTemplateKeywordLoc());
	NewTL.setTemplateNameLoc(TL.getTemplateNameLoc());
	NewTL.setLAngleLoc(TL.getLAngleLoc());
	NewTL.setRAngleLoc(TL.getRAngleLoc());
	for (unsigned i = 0, e = NewTemplateArgs.size(); i != e; ++i)
	NewTL.setArgLocInfo(i, NewTemplateArgs[i].getLocInfo());
	return Result;
	}

	QualType Result
	= getDerived().RebuildTemplateSpecializationType(Template,
	TL.getTemplateNameLoc(),
	NewTemplateArgs);

	if (!Result.isNull()) {
	/// FIXME: Wrap this in an elaborated-type-specifier?
	TemplateSpecializationTypeLoc NewTL
	= TLB.push<TemplateSpecializationTypeLoc>(Result);
	NewTL.setTemplateKeywordLoc(TL.getTemplateKeywordLoc());
	NewTL.setTemplateNameLoc(TL.getTemplateNameLoc());
	NewTL.setLAngleLoc(TL.getLAngleLoc());
	NewTL.setRAngleLoc(TL.getRAngleLoc());
	for (unsigned i = 0, e = NewTemplateArgs.size(); i != e; ++i)
	NewTL.setArgLocInfo(i, NewTemplateArgs[i].getLocInfo());
	}

	return Result;
	}

	template<typename Derived>
	QualType
	TreeTransform<Derived>::TransformElaboratedType(TypeLocBuilder &TLB,
	ElaboratedTypeLoc TL) {
	const ElaboratedType *T = TL.getTypePtr();

	NestedNameSpecifierLoc QualifierLoc;
	// NOTE: the qualifier in an ElaboratedType is optional.
	if (TL.getQualifierLoc()) {
	QualifierLoc
	= getDerived().TransformNestedNameSpecifierLoc(TL.getQualifierLoc());
	if (!QualifierLoc)
	return QualType();
	}

	QualType NamedT = getDerived().TransformType(TLB, TL.getNamedTypeLoc());
	if (NamedT.isNull())
	return QualType();

	// C++0x [dcl.type.elab]p2:
	// If the identifier resolves to a typedef-name or the simple-template-id
	// resolves to an alias template specialization, the
	// elaborated-type-specifier is ill-formed.
	if (T->getKeyword() != ETK_None && T->getKeyword() != ETK_Typename) {
	if (const TemplateSpecializationType *TST =
	NamedT->getAs<TemplateSpecializationType>()) {
	TemplateName Template = TST->getTemplateName();
	if (TypeAliasTemplateDecl *TAT = dyn_cast_or_null<TypeAliasTemplateDecl>(
	Template.getAsTemplateDecl())) {
	SemaRef.Diag(TL.getNamedTypeLoc().getBeginLoc(),
	diag::err_tag_reference_non_tag)
	<< TAT << Sema::NTK_TypeAliasTemplate
	<< ElaboratedType::getTagTypeKindForKeyword(T->getKeyword());
	SemaRef.Diag(TAT->getLocation(), diag::note_declared_at);
	}
	}
	}

	QualType Result = TL.getType();
	if (getDerived().AlwaysRebuild() \|\|
	QualifierLoc != TL.getQualifierLoc() \|\|
	NamedT != T->getNamedType()) {
	Result = getDerived().RebuildElaboratedType(TL.getElaboratedKeywordLoc(),
	T->getKeyword(),
	QualifierLoc, NamedT);
	if (Result.isNull())
	return QualType();
	}

	ElaboratedTypeLoc NewTL = TLB.push<ElaboratedTypeLoc>(Result);
	NewTL.setElaboratedKeywordLoc(TL.getElaboratedKeywordLoc());
	NewTL.setQualifierLoc(QualifierLoc);
	return Result;
	}

	template<typename Derived>
	QualType TreeTransform<Derived>::TransformAttributedType(
	TypeLocBuilder &TLB,
	AttributedTypeLoc TL) {
	const AttributedType *oldType = TL.getTypePtr();
	QualType modifiedType = getDerived().TransformType(TLB, TL.getModifiedLoc());
	if (modifiedType.isNull())
	return QualType();

	// oldAttr can be null if we started with a QualType rather than a TypeLoc.
	const Attr *oldAttr = TL.getAttr();
	const Attr *newAttr = oldAttr ? getDerived().TransformAttr(oldAttr) : nullptr;
	if (oldAttr && !newAttr)
	return QualType();

	QualType result = TL.getType();

	// FIXME: dependent operand expressions?
	if (getDerived().AlwaysRebuild() \|\|
	modifiedType != oldType->getModifiedType()) {
	// TODO: this is really lame; we should really be rebuilding the
	// equivalent type from first principles.
	QualType equivalentType
	= getDerived().TransformType(oldType->getEquivalentType());
	if (equivalentType.isNull())
	return QualType();

	// Check whether we can add nullability; it is only represented as
	// type sugar, and therefore cannot be diagnosed in any other way.
	if (auto nullability = oldType->getImmediateNullability()) {
	if (!modifiedType->canHaveNullability()) {
	SemaRef.Diag(TL.getAttr()->getLocation(),
	diag::err_nullability_nonpointer)
	<< DiagNullabilityKind(*nullability, false) << modifiedType;
	return QualType();
	}
	}

	result = SemaRef.Context.getAttributedType(TL.getAttrKind(),
	modifiedType,
	equivalentType);
	}

	AttributedTypeLoc newTL = TLB.push<AttributedTypeLoc>(result);
	newTL.setAttr(newAttr);
	return result;
	}

	template<typename Derived>
	QualType
	TreeTransform<Derived>::TransformParenType(TypeLocBuilder &TLB,
	ParenTypeLoc TL) {
	QualType Inner = getDerived().TransformType(TLB, TL.getInnerLoc());
	if (Inner.isNull())
	return QualType();

	QualType Result = TL.getType();
	if (getDerived().AlwaysRebuild() \|\|
	Inner != TL.getInnerLoc().getType()) {
	Result = getDerived().RebuildParenType(Inner);
	if (Result.isNull())
	return QualType();
	}

	ParenTypeLoc NewTL = TLB.push<ParenTypeLoc>(Result);
	NewTL.setLParenLoc(TL.getLParenLoc());
	NewTL.setRParenLoc(TL.getRParenLoc());
	return Result;
	}

	template <typename Derived>
	QualType
	TreeTransform<Derived>::TransformMacroQualifiedType(TypeLocBuilder &TLB,
	MacroQualifiedTypeLoc TL) {
	QualType Inner = getDerived().TransformType(TLB, TL.getInnerLoc());
	if (Inner.isNull())
	return QualType();

	QualType Result = TL.getType();
	if (getDerived().AlwaysRebuild() \|\| Inner != TL.getInnerLoc().getType()) {
	Result =
	getDerived().RebuildMacroQualifiedType(Inner, TL.getMacroIdentifier());
	if (Result.isNull())
	return QualType();
	}

	MacroQualifiedTypeLoc NewTL = TLB.push<MacroQualifiedTypeLoc>(Result);
	NewTL.setExpansionLoc(TL.getExpansionLoc());
	return Result;
	}

	template<typename Derived>
	QualType TreeTransform<Derived>::TransformDependentNameType(
	TypeLocBuilder &TLB, DependentNameTypeLoc TL) {
	return TransformDependentNameType(TLB, TL, false);
	}

	template<typename Derived>
	QualType TreeTransform<Derived>::TransformDependentNameType(
	TypeLocBuilder &TLB, DependentNameTypeLoc TL, bool DeducedTSTContext) {
	const DependentNameType *T = TL.getTypePtr();

	NestedNameSpecifierLoc QualifierLoc
	= getDerived().TransformNestedNameSpecifierLoc(TL.getQualifierLoc());
	if (!QualifierLoc)
	return QualType();

	QualType Result
	= getDerived().RebuildDependentNameType(T->getKeyword(),
	TL.getElaboratedKeywordLoc(),
	QualifierLoc,
	T->getIdentifier(),
	TL.getNameLoc(),
	DeducedTSTContext);
	if (Result.isNull())
	return QualType();

	if (const ElaboratedType* ElabT = Result->getAs<ElaboratedType>()) {
	QualType NamedT = ElabT->getNamedType();
	TLB.pushTypeSpec(NamedT).setNameLoc(TL.getNameLoc());

	ElaboratedTypeLoc NewTL = TLB.push<ElaboratedTypeLoc>(Result);
	NewTL.setElaboratedKeywordLoc(TL.getElaboratedKeywordLoc());
	NewTL.setQualifierLoc(QualifierLoc);
	} else {
	DependentNameTypeLoc NewTL = TLB.push<DependentNameTypeLoc>(Result);
	NewTL.setElaboratedKeywordLoc(TL.getElaboratedKeywordLoc());
	NewTL.setQualifierLoc(QualifierLoc);
	NewTL.setNameLoc(TL.getNameLoc());
	}
	return Result;
	}

	template<typename Derived>
	QualType TreeTransform<Derived>::
	TransformDependentTemplateSpecializationType(TypeLocBuilder &TLB,
	DependentTemplateSpecializationTypeLoc TL) {
	NestedNameSpecifierLoc QualifierLoc;
	if (TL.getQualifierLoc()) {
	QualifierLoc
	= getDerived().TransformNestedNameSpecifierLoc(TL.getQualifierLoc());
	if (!QualifierLoc)
	return QualType();
	}

	return getDerived()
	.TransformDependentTemplateSpecializationType(TLB, TL, QualifierLoc);
	}

	template<typename Derived>
	QualType TreeTransform<Derived>::
	TransformDependentTemplateSpecializationType(TypeLocBuilder &TLB,
	DependentTemplateSpecializationTypeLoc TL,
	NestedNameSpecifierLoc QualifierLoc) {
	const DependentTemplateSpecializationType *T = TL.getTypePtr();

	TemplateArgumentListInfo NewTemplateArgs;
	NewTemplateArgs.setLAngleLoc(TL.getLAngleLoc());
	NewTemplateArgs.setRAngleLoc(TL.getRAngleLoc());

	typedef TemplateArgumentLocContainerIterator<
	DependentTemplateSpecializationTypeLoc> ArgIterator;
	if (getDerived().TransformTemplateArguments(ArgIterator(TL, 0),
	ArgIterator(TL, TL.getNumArgs()),
	NewTemplateArgs))
	return QualType();

	QualType Result = getDerived().RebuildDependentTemplateSpecializationType(
	T->getKeyword(), QualifierLoc, TL.getTemplateKeywordLoc(),
	T->getIdentifier(), TL.getTemplateNameLoc(), NewTemplateArgs,
	/AllowInjectedClassName/ false);
	if (Result.isNull())
	return QualType();

	if (const ElaboratedType *ElabT = dyn_cast<ElaboratedType>(Result)) {
	QualType NamedT = ElabT->getNamedType();

	// Copy information relevant to the template specialization.
	TemplateSpecializationTypeLoc NamedTL
	= TLB.push<TemplateSpecializationTypeLoc>(NamedT);
	NamedTL.setTemplateKeywordLoc(TL.getTemplateKeywordLoc());
	NamedTL.setTemplateNameLoc(TL.getTemplateNameLoc());
	NamedTL.setLAngleLoc(TL.getLAngleLoc());
	NamedTL.setRAngleLoc(TL.getRAngleLoc());
	for (unsigned I = 0, E = NewTemplateArgs.size(); I != E; ++I)
	NamedTL.setArgLocInfo(I, NewTemplateArgs[I].getLocInfo());

	// Copy information relevant to the elaborated type.
	ElaboratedTypeLoc NewTL = TLB.push<ElaboratedTypeLoc>(Result);
	NewTL.setElaboratedKeywordLoc(TL.getElaboratedKeywordLoc());
	NewTL.setQualifierLoc(QualifierLoc);
	} else if (isa<DependentTemplateSpecializationType>(Result)) {
	DependentTemplateSpecializationTypeLoc SpecTL
	= TLB.push<DependentTemplateSpecializationTypeLoc>(Result);
	SpecTL.setElaboratedKeywordLoc(TL.getElaboratedKeywordLoc());
	SpecTL.setQualifierLoc(QualifierLoc);
	SpecTL.setTemplateKeywordLoc(TL.getTemplateKeywordLoc());
	SpecTL.setTemplateNameLoc(TL.getTemplateNameLoc());
	SpecTL.setLAngleLoc(TL.getLAngleLoc());
	SpecTL.setRAngleLoc(TL.getRAngleLoc());
	for (unsigned I = 0, E = NewTemplateArgs.size(); I != E; ++I)
	SpecTL.setArgLocInfo(I, NewTemplateArgs[I].getLocInfo());
	} else {
	TemplateSpecializationTypeLoc SpecTL
	= TLB.push<TemplateSpecializationTypeLoc>(Result);
	SpecTL.setTemplateKeywordLoc(TL.getTemplateKeywordLoc());
	SpecTL.setTemplateNameLoc(TL.getTemplateNameLoc());
	SpecTL.setLAngleLoc(TL.getLAngleLoc());
	SpecTL.setRAngleLoc(TL.getRAngleLoc());
	for (unsigned I = 0, E = NewTemplateArgs.size(); I != E; ++I)
	SpecTL.setArgLocInfo(I, NewTemplateArgs[I].getLocInfo());
	}
	return Result;
	}

	template<typename Derived>
	QualType TreeTransform<Derived>::TransformPackExpansionType(TypeLocBuilder &TLB,
	PackExpansionTypeLoc TL) {
	QualType Pattern
	= getDerived().TransformType(TLB, TL.getPatternLoc());
	if (Pattern.isNull())
	return QualType();

	QualType Result = TL.getType();
	if (getDerived().AlwaysRebuild() \|\|
	Pattern != TL.getPatternLoc().getType()) {
	Result = getDerived().RebuildPackExpansionType(Pattern,
	TL.getPatternLoc().getSourceRange(),
	TL.getEllipsisLoc(),
	TL.getTypePtr()->getNumExpansions());
	if (Result.isNull())
	return QualType();
	}

	PackExpansionTypeLoc NewT = TLB.push<PackExpansionTypeLoc>(Result);
	NewT.setEllipsisLoc(TL.getEllipsisLoc());
	return Result;
	}

	template<typename Derived>
	QualType
	TreeTransform<Derived>::TransformObjCInterfaceType(TypeLocBuilder &TLB,
	ObjCInterfaceTypeLoc TL) {
	// ObjCInterfaceType is never dependent.
	TLB.pushFullCopy(TL);
	return TL.getType();
	}

	template<typename Derived>
	QualType
	TreeTransform<Derived>::TransformObjCTypeParamType(TypeLocBuilder &TLB,
	ObjCTypeParamTypeLoc TL) {
	const ObjCTypeParamType *T = TL.getTypePtr();
	ObjCTypeParamDecl *OTP = cast_or_null<ObjCTypeParamDecl>(
	getDerived().TransformDecl(T->getDecl()->getLocation(), T->getDecl()));
	if (!OTP)
	return QualType();

	QualType Result = TL.getType();
	if (getDerived().AlwaysRebuild() \|\|
	OTP != T->getDecl()) {
	Result = getDerived().RebuildObjCTypeParamType(OTP,
	TL.getProtocolLAngleLoc(),
	llvm::makeArrayRef(TL.getTypePtr()->qual_begin(),
	TL.getNumProtocols()),
	TL.getProtocolLocs(),
	TL.getProtocolRAngleLoc());
	if (Result.isNull())
	return QualType();
	}

	ObjCTypeParamTypeLoc NewTL = TLB.push<ObjCTypeParamTypeLoc>(Result);
	if (TL.getNumProtocols()) {
	NewTL.setProtocolLAngleLoc(TL.getProtocolLAngleLoc());
	for (unsigned i = 0, n = TL.getNumProtocols(); i != n; ++i)
	NewTL.setProtocolLoc(i, TL.getProtocolLoc(i));
	NewTL.setProtocolRAngleLoc(TL.getProtocolRAngleLoc());
	}
	return Result;
	}

	template<typename Derived>
	QualType
	TreeTransform<Derived>::TransformObjCObjectType(TypeLocBuilder &TLB,
	ObjCObjectTypeLoc TL) {
	// Transform base type.
	QualType BaseType = getDerived().TransformType(TLB, TL.getBaseLoc());
	if (BaseType.isNull())
	return QualType();

	bool AnyChanged = BaseType != TL.getBaseLoc().getType();

	// Transform type arguments.
	SmallVector<TypeSourceInfo *, 4> NewTypeArgInfos;
	for (unsigned i = 0, n = TL.getNumTypeArgs(); i != n; ++i) {
	TypeSourceInfo *TypeArgInfo = TL.getTypeArgTInfo(i);
	TypeLoc TypeArgLoc = TypeArgInfo->getTypeLoc();
	QualType TypeArg = TypeArgInfo->getType();
	if (auto PackExpansionLoc = TypeArgLoc.getAs<PackExpansionTypeLoc>()) {
	AnyChanged = true;

	// We have a pack expansion. Instantiate it.
	const auto *PackExpansion = PackExpansionLoc.getType()
	->castAs<PackExpansionType>();
	SmallVector<UnexpandedParameterPack, 2> Unexpanded;
	SemaRef.collectUnexpandedParameterPacks(PackExpansion->getPattern(),
	Unexpanded);
	assert(!Unexpanded.empty() && "Pack expansion without parameter packs?");

	// Determine whether the set of unexpanded parameter packs can
	// and should be expanded.
	TypeLoc PatternLoc = PackExpansionLoc.getPatternLoc();
	bool Expand = false;
	bool RetainExpansion = false;
	Optional<unsigned> NumExpansions = PackExpansion->getNumExpansions();
	if (getDerived().TryExpandParameterPacks(
	PackExpansionLoc.getEllipsisLoc(), PatternLoc.getSourceRange(),
	Unexpanded, Expand, RetainExpansion, NumExpansions))
	return QualType();

	if (!Expand) {
	// We can't expand this pack expansion into separate arguments yet;
	// just substitute into the pattern and create a new pack expansion
	// type.
	Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(getSema(), -1);

	TypeLocBuilder TypeArgBuilder;
	TypeArgBuilder.reserve(PatternLoc.getFullDataSize());
	QualType NewPatternType = getDerived().TransformType(TypeArgBuilder,
	PatternLoc);
	if (NewPatternType.isNull())
	return QualType();

	QualType NewExpansionType = SemaRef.Context.getPackExpansionType(
	NewPatternType, NumExpansions);
	auto NewExpansionLoc = TLB.push<PackExpansionTypeLoc>(NewExpansionType);
	NewExpansionLoc.setEllipsisLoc(PackExpansionLoc.getEllipsisLoc());
	NewTypeArgInfos.push_back(
	TypeArgBuilder.getTypeSourceInfo(SemaRef.Context, NewExpansionType));
	continue;
	}

	// Substitute into the pack expansion pattern for each slice of the
	// pack.
	for (unsigned ArgIdx = 0; ArgIdx != *NumExpansions; ++ArgIdx) {
	Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(getSema(), ArgIdx);

	TypeLocBuilder TypeArgBuilder;
	TypeArgBuilder.reserve(PatternLoc.getFullDataSize());

	QualType NewTypeArg = getDerived().TransformType(TypeArgBuilder,
	PatternLoc);
	if (NewTypeArg.isNull())
	return QualType();

	NewTypeArgInfos.push_back(
	TypeArgBuilder.getTypeSourceInfo(SemaRef.Context, NewTypeArg));
	}

	continue;
	}

	TypeLocBuilder TypeArgBuilder;
	TypeArgBuilder.reserve(TypeArgLoc.getFullDataSize());
	QualType NewTypeArg = getDerived().TransformType(TypeArgBuilder, TypeArgLoc);
	if (NewTypeArg.isNull())
	return QualType();

	// If nothing changed, just keep the old TypeSourceInfo.
	if (NewTypeArg == TypeArg) {
	NewTypeArgInfos.push_back(TypeArgInfo);
	continue;
	}

	NewTypeArgInfos.push_back(
	TypeArgBuilder.getTypeSourceInfo(SemaRef.Context, NewTypeArg));
	AnyChanged = true;
	}

	QualType Result = TL.getType();
	if (getDerived().AlwaysRebuild() \|\| AnyChanged) {
	// Rebuild the type.
	Result = getDerived().RebuildObjCObjectType(
	BaseType, TL.getBeginLoc(), TL.getTypeArgsLAngleLoc(), NewTypeArgInfos,
	TL.getTypeArgsRAngleLoc(), TL.getProtocolLAngleLoc(),
	llvm::makeArrayRef(TL.getTypePtr()->qual_begin(), TL.getNumProtocols()),
	TL.getProtocolLocs(), TL.getProtocolRAngleLoc());

	if (Result.isNull())
	return QualType();
	}

	ObjCObjectTypeLoc NewT = TLB.push<ObjCObjectTypeLoc>(Result);
	NewT.setHasBaseTypeAsWritten(true);
	NewT.setTypeArgsLAngleLoc(TL.getTypeArgsLAngleLoc());
	for (unsigned i = 0, n = TL.getNumTypeArgs(); i != n; ++i)
	NewT.setTypeArgTInfo(i, NewTypeArgInfos[i]);
	NewT.setTypeArgsRAngleLoc(TL.getTypeArgsRAngleLoc());
	NewT.setProtocolLAngleLoc(TL.getProtocolLAngleLoc());
	for (unsigned i = 0, n = TL.getNumProtocols(); i != n; ++i)
	NewT.setProtocolLoc(i, TL.getProtocolLoc(i));
	NewT.setProtocolRAngleLoc(TL.getProtocolRAngleLoc());
	return Result;
	}

	template<typename Derived>
	QualType
	TreeTransform<Derived>::TransformObjCObjectPointerType(TypeLocBuilder &TLB,
	ObjCObjectPointerTypeLoc TL) {
	QualType PointeeType = getDerived().TransformType(TLB, TL.getPointeeLoc());
	if (PointeeType.isNull())
	return QualType();

	QualType Result = TL.getType();
	if (getDerived().AlwaysRebuild() \|\|
	PointeeType != TL.getPointeeLoc().getType()) {
	Result = getDerived().RebuildObjCObjectPointerType(PointeeType,
	TL.getStarLoc());
	if (Result.isNull())
	return QualType();
	}

	ObjCObjectPointerTypeLoc NewT = TLB.push<ObjCObjectPointerTypeLoc>(Result);
	NewT.setStarLoc(TL.getStarLoc());
	return Result;
	}

	//===----------------------------------------------------------------------===//
	// Statement transformation
	//===----------------------------------------------------------------------===//
	template<typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformNullStmt(NullStmt *S) {
	return S;
	}

	template<typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformCompoundStmt(CompoundStmt *S) {
	return getDerived().TransformCompoundStmt(S, false);
	}

	template<typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformCompoundStmt(CompoundStmt *S,
	bool IsStmtExpr) {
	Sema::CompoundScopeRAII CompoundScope(getSema());

	const Stmt *ExprResult = S->getStmtExprResult();
	bool SubStmtInvalid = false;
	bool SubStmtChanged = false;
	SmallVector<Stmt*, 8> Statements;
	for (auto *B : S->body()) {
	StmtResult Result = getDerived().TransformStmt(
	B, IsStmtExpr && B == ExprResult ? SDK_StmtExprResult : SDK_Discarded);

	if (Result.isInvalid()) {
	// Immediately fail if this was a DeclStmt, since it's very
	// likely that this will cause problems for future statements.
	if (isa<DeclStmt>(B))
	return StmtError();

	// Otherwise, just keep processing substatements and fail later.
	SubStmtInvalid = true;
	continue;
	}

	SubStmtChanged = SubStmtChanged \|\| Result.get() != B;
	Statements.push_back(Result.getAs<Stmt>());
	}

	if (SubStmtInvalid)
	return StmtError();

	if (!getDerived().AlwaysRebuild() &&
	!SubStmtChanged)
	return S;

	return getDerived().RebuildCompoundStmt(S->getLBracLoc(),
	Statements,
	S->getRBracLoc(),
	IsStmtExpr);
	}

	template<typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformCaseStmt(CaseStmt *S) {
	ExprResult LHS, RHS;
	{
	EnterExpressionEvaluationContext Unevaluated(
	SemaRef, Sema::ExpressionEvaluationContext::ConstantEvaluated);

	// Transform the left-hand case value.
	LHS = getDerived().TransformExpr(S->getLHS());
	LHS = SemaRef.ActOnCaseExpr(S->getCaseLoc(), LHS);
	if (LHS.isInvalid())
	return StmtError();

	// Transform the right-hand case value (for the GNU case-range extension).
	RHS = getDerived().TransformExpr(S->getRHS());
	RHS = SemaRef.ActOnCaseExpr(S->getCaseLoc(), RHS);
	if (RHS.isInvalid())
	return StmtError();
	}

	// Build the case statement.
	// Case statements are always rebuilt so that they will attached to their
	// transformed switch statement.
	StmtResult Case = getDerived().RebuildCaseStmt(S->getCaseLoc(),
	LHS.get(),
	S->getEllipsisLoc(),
	RHS.get(),
	S->getColonLoc());
	if (Case.isInvalid())
	return StmtError();

	// Transform the statement following the case
	StmtResult SubStmt =
	getDerived().TransformStmt(S->getSubStmt());
	if (SubStmt.isInvalid())
	return StmtError();

	// Attach the body to the case statement
	return getDerived().RebuildCaseStmtBody(Case.get(), SubStmt.get());
	}

	template <typename Derived>
	StmtResult TreeTransform<Derived>::TransformDefaultStmt(DefaultStmt *S) {
	// Transform the statement following the default case
	StmtResult SubStmt =
	getDerived().TransformStmt(S->getSubStmt());
	if (SubStmt.isInvalid())
	return StmtError();

	// Default statements are always rebuilt
	return getDerived().RebuildDefaultStmt(S->getDefaultLoc(), S->getColonLoc(),
	SubStmt.get());
	}

	template<typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformLabelStmt(LabelStmt *S, StmtDiscardKind SDK) {
	StmtResult SubStmt = getDerived().TransformStmt(S->getSubStmt(), SDK);
	if (SubStmt.isInvalid())
	return StmtError();

	Decl *LD = getDerived().TransformDecl(S->getDecl()->getLocation(),
	S->getDecl());
	if (!LD)
	return StmtError();

	// If we're transforming "in-place" (we're not creating new local
	// declarations), assume we're replacing the old label statement
	// and clear out the reference to it.
	if (LD == S->getDecl())
	S->getDecl()->setStmt(nullptr);

	// FIXME: Pass the real colon location in.
	return getDerived().RebuildLabelStmt(S->getIdentLoc(),
	cast<LabelDecl>(LD), SourceLocation(),
	SubStmt.get());
	}

	template <typename Derived>
	const Attr TreeTransform<Derived>::TransformAttr(const Attr R) {
	if (!R)
	return R;

	switch (R->getKind()) {
	// Transform attributes with a pragma spelling by calling TransformXXXAttr.
	#define ATTR(X)
	#define PRAGMA_SPELLING_ATTR(X) \
	case attr::X: \
	return getDerived().Transform##X##Attr(cast<X##Attr>(R));
	#include "clang/Basic/AttrList.inc"
	default:
	return R;
	}
	}

	template <typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformAttributedStmt(AttributedStmt *S,
	StmtDiscardKind SDK) {
	bool AttrsChanged = false;
	SmallVector<const Attr *, 1> Attrs;

	// Visit attributes and keep track if any are transformed.
	for (const auto *I : S->getAttrs()) {
	const Attr *R = getDerived().TransformAttr(I);
	AttrsChanged \|= (I != R);
	if (R)
	Attrs.push_back(R);
	}

	StmtResult SubStmt = getDerived().TransformStmt(S->getSubStmt(), SDK);
	if (SubStmt.isInvalid())
	return StmtError();

	if (SubStmt.get() == S->getSubStmt() && !AttrsChanged)
	return S;

	// If transforming the attributes failed for all of the attributes in the
	// statement, don't make an AttributedStmt without attributes.
	if (Attrs.empty())
	return SubStmt;

	return getDerived().RebuildAttributedStmt(S->getAttrLoc(), Attrs,
	SubStmt.get());
	}

	template<typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformIfStmt(IfStmt *S) {
	// Transform the initialization statement
	StmtResult Init = getDerived().TransformStmt(S->getInit());
	if (Init.isInvalid())
	return StmtError();

	// Transform the condition
	Sema::ConditionResult Cond = getDerived().TransformCondition(
	S->getIfLoc(), S->getConditionVariable(), S->getCond(),
	S->isConstexpr() ? Sema::ConditionKind::ConstexprIf
	: Sema::ConditionKind::Boolean);
	if (Cond.isInvalid())
	return StmtError();

	// If this is a constexpr if, determine which arm we should instantiate.
	llvm::Optional<bool> ConstexprConditionValue;
	if (S->isConstexpr())
	ConstexprConditionValue = Cond.getKnownValue();

	// Transform the "then" branch.
	StmtResult Then;
	if (!ConstexprConditionValue \|\| *ConstexprConditionValue) {
	Then = getDerived().TransformStmt(S->getThen());
	if (Then.isInvalid())
	return StmtError();
	} else {
	Then = new (getSema().Context) NullStmt(S->getThen()->getBeginLoc());
	}

	// Transform the "else" branch.
	StmtResult Else;
	if (!ConstexprConditionValue \|\| !*ConstexprConditionValue) {
	Else = getDerived().TransformStmt(S->getElse());
	if (Else.isInvalid())
	return StmtError();
	}

	if (!getDerived().AlwaysRebuild() &&
	Init.get() == S->getInit() &&
	Cond.get() == std::make_pair(S->getConditionVariable(), S->getCond()) &&
	Then.get() == S->getThen() &&
	Else.get() == S->getElse())
	return S;

	return getDerived().RebuildIfStmt(
	S->getIfLoc(), S->isConstexpr(), S->getLParenLoc(), Cond,
	S->getRParenLoc(), Init.get(), Then.get(), S->getElseLoc(), Else.get());
	}

	template<typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformSwitchStmt(SwitchStmt *S) {
	// Transform the initialization statement
	StmtResult Init = getDerived().TransformStmt(S->getInit());
	if (Init.isInvalid())
	return StmtError();

	// Transform the condition.
	Sema::ConditionResult Cond = getDerived().TransformCondition(
	S->getSwitchLoc(), S->getConditionVariable(), S->getCond(),
	Sema::ConditionKind::Switch);
	if (Cond.isInvalid())
	return StmtError();

	// Rebuild the switch statement.
	StmtResult Switch =
	getDerived().RebuildSwitchStmtStart(S->getSwitchLoc(), S->getLParenLoc(),
	Init.get(), Cond, S->getRParenLoc());
	if (Switch.isInvalid())
	return StmtError();

	// Transform the body of the switch statement.
	StmtResult Body = getDerived().TransformStmt(S->getBody());
	if (Body.isInvalid())
	return StmtError();

	// Complete the switch statement.
	return getDerived().RebuildSwitchStmtBody(S->getSwitchLoc(), Switch.get(),
	Body.get());
	}

	template<typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformWhileStmt(WhileStmt *S) {
	// Transform the condition
	Sema::ConditionResult Cond = getDerived().TransformCondition(
	S->getWhileLoc(), S->getConditionVariable(), S->getCond(),
	Sema::ConditionKind::Boolean);
	if (Cond.isInvalid())
	return StmtError();

	// Transform the body
	StmtResult Body = getDerived().TransformStmt(S->getBody());
	if (Body.isInvalid())
	return StmtError();

	if (!getDerived().AlwaysRebuild() &&
	Cond.get() == std::make_pair(S->getConditionVariable(), S->getCond()) &&
	Body.get() == S->getBody())
	return Owned(S);

	return getDerived().RebuildWhileStmt(S->getWhileLoc(), S->getLParenLoc(),
	Cond, S->getRParenLoc(), Body.get());
	}

	template<typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformDoStmt(DoStmt *S) {
	// Transform the body
	StmtResult Body = getDerived().TransformStmt(S->getBody());
	if (Body.isInvalid())
	return StmtError();

	// Transform the condition
	ExprResult Cond = getDerived().TransformExpr(S->getCond());
	if (Cond.isInvalid())
	return StmtError();

	if (!getDerived().AlwaysRebuild() &&
	Cond.get() == S->getCond() &&
	Body.get() == S->getBody())
	return S;

	return getDerived().RebuildDoStmt(S->getDoLoc(), Body.get(), S->getWhileLoc(),
	/FIXME:/S->getWhileLoc(), Cond.get(),
	S->getRParenLoc());
	}

	template<typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformForStmt(ForStmt *S) {
	if (getSema().getLangOpts().OpenMP)
	getSema().startOpenMPLoop();

	// Transform the initialization statement
	StmtResult Init = getDerived().TransformStmt(S->getInit());
	if (Init.isInvalid())
	return StmtError();

	// In OpenMP loop region loop control variable must be captured and be
	// private. Perform analysis of first part (if any).
	if (getSema().getLangOpts().OpenMP && Init.isUsable())
	getSema().ActOnOpenMPLoopInitialization(S->getForLoc(), Init.get());

	// Transform the condition
	Sema::ConditionResult Cond = getDerived().TransformCondition(
	S->getForLoc(), S->getConditionVariable(), S->getCond(),
	Sema::ConditionKind::Boolean);
	if (Cond.isInvalid())
	return StmtError();

	// Transform the increment
	ExprResult Inc = getDerived().TransformExpr(S->getInc());
	if (Inc.isInvalid())
	return StmtError();

	Sema::FullExprArg FullInc(getSema().MakeFullDiscardedValueExpr(Inc.get()));
	if (S->getInc() && !FullInc.get())
	return StmtError();

	// Transform the body
	StmtResult Body = getDerived().TransformStmt(S->getBody());
	if (Body.isInvalid())
	return StmtError();

	if (!getDerived().AlwaysRebuild() &&
	Init.get() == S->getInit() &&
	Cond.get() == std::make_pair(S->getConditionVariable(), S->getCond()) &&
	Inc.get() == S->getInc() &&
	Body.get() == S->getBody())
	return S;

	return getDerived().RebuildForStmt(S->getForLoc(), S->getLParenLoc(),
	Init.get(), Cond, FullInc,
	S->getRParenLoc(), Body.get());
	}

	template<typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformGotoStmt(GotoStmt *S) {
	Decl *LD = getDerived().TransformDecl(S->getLabel()->getLocation(),
	S->getLabel());
	if (!LD)
	return StmtError();

	// Goto statements must always be rebuilt, to resolve the label.
	return getDerived().RebuildGotoStmt(S->getGotoLoc(), S->getLabelLoc(),
	cast<LabelDecl>(LD));
	}

	template<typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformIndirectGotoStmt(IndirectGotoStmt *S) {
	ExprResult Target = getDerived().TransformExpr(S->getTarget());
	if (Target.isInvalid())
	return StmtError();
	Target = SemaRef.MaybeCreateExprWithCleanups(Target.get());

	if (!getDerived().AlwaysRebuild() &&
	Target.get() == S->getTarget())
	return S;

	return getDerived().RebuildIndirectGotoStmt(S->getGotoLoc(), S->getStarLoc(),
	Target.get());
	}

	template<typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformContinueStmt(ContinueStmt *S) {
	return S;
	}

	template<typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformBreakStmt(BreakStmt *S) {
	return S;
	}

	template<typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformReturnStmt(ReturnStmt *S) {
	ExprResult Result = getDerived().TransformInitializer(S->getRetValue(),
	/NotCopyInit/false);
	if (Result.isInvalid())
	return StmtError();

	// FIXME: We always rebuild the return statement because there is no way
	// to tell whether the return type of the function has changed.
	return getDerived().RebuildReturnStmt(S->getReturnLoc(), Result.get());
	}

	template<typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformDeclStmt(DeclStmt *S) {
	bool DeclChanged = false;
	SmallVector<Decl *, 4> Decls;
	for (auto *D : S->decls()) {
	Decl *Transformed = getDerived().TransformDefinition(D->getLocation(), D);
	if (!Transformed)
	return StmtError();

	if (Transformed != D)
	DeclChanged = true;

	Decls.push_back(Transformed);
	}

	if (!getDerived().AlwaysRebuild() && !DeclChanged)
	return S;

	return getDerived().RebuildDeclStmt(Decls, S->getBeginLoc(), S->getEndLoc());
	}

	template<typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformGCCAsmStmt(GCCAsmStmt *S) {

	SmallVector<Expr*, 8> Constraints;
	SmallVector<Expr*, 8> Exprs;
	SmallVector<IdentifierInfo *, 4> Names;

	ExprResult AsmString;
	SmallVector<Expr*, 8> Clobbers;

	bool ExprsChanged = false;

	// Go through the outputs.
	for (unsigned I = 0, E = S->getNumOutputs(); I != E; ++I) {
	Names.push_back(S->getOutputIdentifier(I));

	// No need to transform the constraint literal.
	Constraints.push_back(S->getOutputConstraintLiteral(I));

	// Transform the output expr.
	Expr *OutputExpr = S->getOutputExpr(I);
	ExprResult Result = getDerived().TransformExpr(OutputExpr);
	if (Result.isInvalid())
	return StmtError();

	ExprsChanged \|= Result.get() != OutputExpr;

	Exprs.push_back(Result.get());
	}

	// Go through the inputs.
	for (unsigned I = 0, E = S->getNumInputs(); I != E; ++I) {
	Names.push_back(S->getInputIdentifier(I));

	// No need to transform the constraint literal.
	Constraints.push_back(S->getInputConstraintLiteral(I));

	// Transform the input expr.
	Expr *InputExpr = S->getInputExpr(I);
	ExprResult Result = getDerived().TransformExpr(InputExpr);
	if (Result.isInvalid())
	return StmtError();

	ExprsChanged \|= Result.get() != InputExpr;

	Exprs.push_back(Result.get());
	}

	// Go through the Labels.
	for (unsigned I = 0, E = S->getNumLabels(); I != E; ++I) {
	Names.push_back(S->getLabelIdentifier(I));

	ExprResult Result = getDerived().TransformExpr(S->getLabelExpr(I));
	if (Result.isInvalid())
	return StmtError();
	ExprsChanged \|= Result.get() != S->getLabelExpr(I);
	Exprs.push_back(Result.get());
	}
	if (!getDerived().AlwaysRebuild() && !ExprsChanged)
	return S;

	// Go through the clobbers.
	for (unsigned I = 0, E = S->getNumClobbers(); I != E; ++I)
	Clobbers.push_back(S->getClobberStringLiteral(I));

	// No need to transform the asm string literal.
	AsmString = S->getAsmString();
	return getDerived().RebuildGCCAsmStmt(S->getAsmLoc(), S->isSimple(),
	S->isVolatile(), S->getNumOutputs(),
	S->getNumInputs(), Names.data(),
	Constraints, Exprs, AsmString.get(),
	Clobbers, S->getNumLabels(),
	S->getRParenLoc());
	}

	template<typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformMSAsmStmt(MSAsmStmt *S) {
	ArrayRef<Token> AsmToks =
	llvm::makeArrayRef(S->getAsmToks(), S->getNumAsmToks());

	bool HadError = false, HadChange = false;

	ArrayRef<Expr*> SrcExprs = S->getAllExprs();
	SmallVector<Expr*, 8> TransformedExprs;
	TransformedExprs.reserve(SrcExprs.size());
	for (unsigned i = 0, e = SrcExprs.size(); i != e; ++i) {
	ExprResult Result = getDerived().TransformExpr(SrcExprs[i]);
	if (!Result.isUsable()) {
	HadError = true;
	} else {
	HadChange \|= (Result.get() != SrcExprs[i]);
	TransformedExprs.push_back(Result.get());
	}
	}

	if (HadError) return StmtError();
	if (!HadChange && !getDerived().AlwaysRebuild())
	return Owned(S);

	return getDerived().RebuildMSAsmStmt(S->getAsmLoc(), S->getLBraceLoc(),
	AsmToks, S->getAsmString(),
	S->getNumOutputs(), S->getNumInputs(),
	S->getAllConstraints(), S->getClobbers(),
	TransformedExprs, S->getEndLoc());
	}

	// C++ Coroutines TS

	template<typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformCoroutineBodyStmt(CoroutineBodyStmt *S) {
	auto *ScopeInfo = SemaRef.getCurFunction();
	auto *FD = cast<FunctionDecl>(SemaRef.CurContext);
	assert(FD && ScopeInfo && !ScopeInfo->CoroutinePromise &&
	ScopeInfo->NeedsCoroutineSuspends &&
	ScopeInfo->CoroutineSuspends.first == nullptr &&
	ScopeInfo->CoroutineSuspends.second == nullptr &&
	"expected clean scope info");

	// Set that we have (possibly-invalid) suspend points before we do anything
	// that may fail.
	ScopeInfo->setNeedsCoroutineSuspends(false);

	// We re-build the coroutine promise object (and the coroutine parameters its
	// type and constructor depend on) based on the types used in our current
	// function. We must do so, and set it on the current FunctionScopeInfo,
	// before attempting to transform the other parts of the coroutine body
	// statement, such as the implicit suspend statements (because those
	// statements reference the FunctionScopeInfo::CoroutinePromise).
	if (!SemaRef.buildCoroutineParameterMoves(FD->getLocation()))
	return StmtError();
	auto *Promise = SemaRef.buildCoroutinePromise(FD->getLocation());
	if (!Promise)
	return StmtError();
	getDerived().transformedLocalDecl(S->getPromiseDecl(), {Promise});
	ScopeInfo->CoroutinePromise = Promise;

	// Transform the implicit coroutine statements constructed using dependent
	// types during the previous parse: initial and final suspensions, the return
	// object, and others. We also transform the coroutine function's body.
	StmtResult InitSuspend = getDerived().TransformStmt(S->getInitSuspendStmt());
	if (InitSuspend.isInvalid())
	return StmtError();
	StmtResult FinalSuspend =
	getDerived().TransformStmt(S->getFinalSuspendStmt());
	if (FinalSuspend.isInvalid() \|\|
	!SemaRef.checkFinalSuspendNoThrow(FinalSuspend.get()))
	return StmtError();
	ScopeInfo->setCoroutineSuspends(InitSuspend.get(), FinalSuspend.get());
	assert(isa<Expr>(InitSuspend.get()) && isa<Expr>(FinalSuspend.get()));

	StmtResult BodyRes = getDerived().TransformStmt(S->getBody());
	if (BodyRes.isInvalid())
	return StmtError();

	CoroutineStmtBuilder Builder(SemaRef, FD, ScopeInfo, BodyRes.get());
	if (Builder.isInvalid())
	return StmtError();

	Expr *ReturnObject = S->getReturnValueInit();
	assert(ReturnObject && "the return object is expected to be valid");
	ExprResult Res = getDerived().TransformInitializer(ReturnObject,
	/NoCopyInit/ false);
	if (Res.isInvalid())
	return StmtError();
	Builder.ReturnValue = Res.get();

	// If during the previous parse the coroutine still had a dependent promise
	// statement, we may need to build some implicit coroutine statements
	// (such as exception and fallthrough handlers) for the first time.
	if (S->hasDependentPromiseType()) {
	// We can only build these statements, however, if the current promise type
	// is not dependent.
	if (!Promise->getType()->isDependentType()) {
	assert(!S->getFallthroughHandler() && !S->getExceptionHandler() &&
	!S->getReturnStmtOnAllocFailure() && !S->getDeallocate() &&
	"these nodes should not have been built yet");
	if (!Builder.buildDependentStatements())
	return StmtError();
	}
	} else {
	if (auto *OnFallthrough = S->getFallthroughHandler()) {
	StmtResult Res = getDerived().TransformStmt(OnFallthrough);
	if (Res.isInvalid())
	return StmtError();
	Builder.OnFallthrough = Res.get();
	}

	if (auto *OnException = S->getExceptionHandler()) {
	StmtResult Res = getDerived().TransformStmt(OnException);
	if (Res.isInvalid())
	return StmtError();
	Builder.OnException = Res.get();
	}

	if (auto *OnAllocFailure = S->getReturnStmtOnAllocFailure()) {
	StmtResult Res = getDerived().TransformStmt(OnAllocFailure);
	if (Res.isInvalid())
	return StmtError();
	Builder.ReturnStmtOnAllocFailure = Res.get();
	}

	// Transform any additional statements we may have already built
	assert(S->getAllocate() && S->getDeallocate() &&
	"allocation and deallocation calls must already be built");
	ExprResult AllocRes = getDerived().TransformExpr(S->getAllocate());
	if (AllocRes.isInvalid())
	return StmtError();
	Builder.Allocate = AllocRes.get();

	ExprResult DeallocRes = getDerived().TransformExpr(S->getDeallocate());
	if (DeallocRes.isInvalid())
	return StmtError();
	Builder.Deallocate = DeallocRes.get();

	assert(S->getResultDecl() && "ResultDecl must already be built");
	StmtResult ResultDecl = getDerived().TransformStmt(S->getResultDecl());
	if (ResultDecl.isInvalid())
	return StmtError();
	Builder.ResultDecl = ResultDecl.get();

	if (auto *ReturnStmt = S->getReturnStmt()) {
	StmtResult Res = getDerived().TransformStmt(ReturnStmt);
	if (Res.isInvalid())
	return StmtError();
	Builder.ReturnStmt = Res.get();
	}
	}

	return getDerived().RebuildCoroutineBodyStmt(Builder);
	}

	template<typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformCoreturnStmt(CoreturnStmt *S) {
	ExprResult Result = getDerived().TransformInitializer(S->getOperand(),
	/NotCopyInit/false);
	if (Result.isInvalid())
	return StmtError();

	// Always rebuild; we don't know if this needs to be injected into a new
	// context or if the promise type has changed.
	return getDerived().RebuildCoreturnStmt(S->getKeywordLoc(), Result.get(),
	S->isImplicit());
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformCoawaitExpr(CoawaitExpr *E) {
	ExprResult Result = getDerived().TransformInitializer(E->getOperand(),
	/NotCopyInit/false);
	if (Result.isInvalid())
	return ExprError();

	// Always rebuild; we don't know if this needs to be injected into a new
	// context or if the promise type has changed.
	return getDerived().RebuildCoawaitExpr(E->getKeywordLoc(), Result.get(),
	E->isImplicit());
	}

	template <typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformDependentCoawaitExpr(DependentCoawaitExpr *E) {
	ExprResult OperandResult = getDerived().TransformInitializer(E->getOperand(),
	/NotCopyInit/ false);
	if (OperandResult.isInvalid())
	return ExprError();

	ExprResult LookupResult = getDerived().TransformUnresolvedLookupExpr(
	E->getOperatorCoawaitLookup());

	if (LookupResult.isInvalid())
	return ExprError();

	// Always rebuild; we don't know if this needs to be injected into a new
	// context or if the promise type has changed.
	return getDerived().RebuildDependentCoawaitExpr(
	E->getKeywordLoc(), OperandResult.get(),
	cast<UnresolvedLookupExpr>(LookupResult.get()));
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformCoyieldExpr(CoyieldExpr *E) {
	ExprResult Result = getDerived().TransformInitializer(E->getOperand(),
	/NotCopyInit/false);
	if (Result.isInvalid())
	return ExprError();

	// Always rebuild; we don't know if this needs to be injected into a new
	// context or if the promise type has changed.
	return getDerived().RebuildCoyieldExpr(E->getKeywordLoc(), Result.get());
	}

	// Objective-C Statements.

	template<typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformObjCAtTryStmt(ObjCAtTryStmt *S) {
	// Transform the body of the @try.
	StmtResult TryBody = getDerived().TransformStmt(S->getTryBody());
	if (TryBody.isInvalid())
	return StmtError();

	// Transform the @catch statements (if present).
	bool AnyCatchChanged = false;
	SmallVector<Stmt*, 8> CatchStmts;
	for (unsigned I = 0, N = S->getNumCatchStmts(); I != N; ++I) {
	StmtResult Catch = getDerived().TransformStmt(S->getCatchStmt(I));
	if (Catch.isInvalid())
	return StmtError();
	if (Catch.get() != S->getCatchStmt(I))
	AnyCatchChanged = true;
	CatchStmts.push_back(Catch.get());
	}

	// Transform the @finally statement (if present).
	StmtResult Finally;
	if (S->getFinallyStmt()) {
	Finally = getDerived().TransformStmt(S->getFinallyStmt());
	if (Finally.isInvalid())
	return StmtError();
	}

	// If nothing changed, just retain this statement.
	if (!getDerived().AlwaysRebuild() &&
	TryBody.get() == S->getTryBody() &&
	!AnyCatchChanged &&
	Finally.get() == S->getFinallyStmt())
	return S;

	// Build a new statement.
	return getDerived().RebuildObjCAtTryStmt(S->getAtTryLoc(), TryBody.get(),
	CatchStmts, Finally.get());
	}

	template<typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformObjCAtCatchStmt(ObjCAtCatchStmt *S) {
	// Transform the @catch parameter, if there is one.
	VarDecl *Var = nullptr;
	if (VarDecl *FromVar = S->getCatchParamDecl()) {
	TypeSourceInfo *TSInfo = nullptr;
	if (FromVar->getTypeSourceInfo()) {
	TSInfo = getDerived().TransformType(FromVar->getTypeSourceInfo());
	if (!TSInfo)
	return StmtError();
	}

	QualType T;
	if (TSInfo)
	T = TSInfo->getType();
	else {
	T = getDerived().TransformType(FromVar->getType());
	if (T.isNull())
	return StmtError();
	}

	Var = getDerived().RebuildObjCExceptionDecl(FromVar, TSInfo, T);
	if (!Var)
	return StmtError();
	}

	StmtResult Body = getDerived().TransformStmt(S->getCatchBody());
	if (Body.isInvalid())
	return StmtError();

	return getDerived().RebuildObjCAtCatchStmt(S->getAtCatchLoc(),
	S->getRParenLoc(),
	Var, Body.get());
	}

	template<typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformObjCAtFinallyStmt(ObjCAtFinallyStmt *S) {
	// Transform the body.
	StmtResult Body = getDerived().TransformStmt(S->getFinallyBody());
	if (Body.isInvalid())
	return StmtError();

	// If nothing changed, just retain this statement.
	if (!getDerived().AlwaysRebuild() &&
	Body.get() == S->getFinallyBody())
	return S;

	// Build a new statement.
	return getDerived().RebuildObjCAtFinallyStmt(S->getAtFinallyLoc(),
	Body.get());
	}

	template<typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformObjCAtThrowStmt(ObjCAtThrowStmt *S) {
	ExprResult Operand;
	if (S->getThrowExpr()) {
	Operand = getDerived().TransformExpr(S->getThrowExpr());
	if (Operand.isInvalid())
	return StmtError();
	}

	if (!getDerived().AlwaysRebuild() &&
	Operand.get() == S->getThrowExpr())
	return S;

	return getDerived().RebuildObjCAtThrowStmt(S->getThrowLoc(), Operand.get());
	}

	template<typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformObjCAtSynchronizedStmt(
	ObjCAtSynchronizedStmt *S) {
	// Transform the object we are locking.
	ExprResult Object = getDerived().TransformExpr(S->getSynchExpr());
	if (Object.isInvalid())
	return StmtError();
	Object =
	getDerived().RebuildObjCAtSynchronizedOperand(S->getAtSynchronizedLoc(),
	Object.get());
	if (Object.isInvalid())
	return StmtError();

	// Transform the body.
	StmtResult Body = getDerived().TransformStmt(S->getSynchBody());
	if (Body.isInvalid())
	return StmtError();

	// If nothing change, just retain the current statement.
	if (!getDerived().AlwaysRebuild() &&
	Object.get() == S->getSynchExpr() &&
	Body.get() == S->getSynchBody())
	return S;

	// Build a new statement.
	return getDerived().RebuildObjCAtSynchronizedStmt(S->getAtSynchronizedLoc(),
	Object.get(), Body.get());
	}

	template<typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformObjCAutoreleasePoolStmt(
	ObjCAutoreleasePoolStmt *S) {
	// Transform the body.
	StmtResult Body = getDerived().TransformStmt(S->getSubStmt());
	if (Body.isInvalid())
	return StmtError();

	// If nothing changed, just retain this statement.
	if (!getDerived().AlwaysRebuild() &&
	Body.get() == S->getSubStmt())
	return S;

	// Build a new statement.
	return getDerived().RebuildObjCAutoreleasePoolStmt(
	S->getAtLoc(), Body.get());
	}

	template<typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformObjCForCollectionStmt(
	ObjCForCollectionStmt *S) {
	// Transform the element statement.
	StmtResult Element =
	getDerived().TransformStmt(S->getElement(), SDK_NotDiscarded);
	if (Element.isInvalid())
	return StmtError();

	// Transform the collection expression.
	ExprResult Collection = getDerived().TransformExpr(S->getCollection());
	if (Collection.isInvalid())
	return StmtError();

	// Transform the body.
	StmtResult Body = getDerived().TransformStmt(S->getBody());
	if (Body.isInvalid())
	return StmtError();

	// If nothing changed, just retain this statement.
	if (!getDerived().AlwaysRebuild() &&
	Element.get() == S->getElement() &&
	Collection.get() == S->getCollection() &&
	Body.get() == S->getBody())
	return S;

	// Build a new statement.
	return getDerived().RebuildObjCForCollectionStmt(S->getForLoc(),
	Element.get(),
	Collection.get(),
	S->getRParenLoc(),
	Body.get());
	}

	template <typename Derived>
	StmtResult TreeTransform<Derived>::TransformCXXCatchStmt(CXXCatchStmt *S) {
	// Transform the exception declaration, if any.
	VarDecl *Var = nullptr;
	if (VarDecl *ExceptionDecl = S->getExceptionDecl()) {
	TypeSourceInfo *T =
	getDerived().TransformType(ExceptionDecl->getTypeSourceInfo());
	if (!T)
	return StmtError();

	Var = getDerived().RebuildExceptionDecl(
	ExceptionDecl, T, ExceptionDecl->getInnerLocStart(),
	ExceptionDecl->getLocation(), ExceptionDecl->getIdentifier());
	if (!Var \|\| Var->isInvalidDecl())
	return StmtError();
	}

	// Transform the actual exception handler.
	StmtResult Handler = getDerived().TransformStmt(S->getHandlerBlock());
	if (Handler.isInvalid())
	return StmtError();

	if (!getDerived().AlwaysRebuild() && !Var &&
	Handler.get() == S->getHandlerBlock())
	return S;

	return getDerived().RebuildCXXCatchStmt(S->getCatchLoc(), Var, Handler.get());
	}

	template <typename Derived>
	StmtResult TreeTransform<Derived>::TransformCXXTryStmt(CXXTryStmt *S) {
	// Transform the try block itself.
	StmtResult TryBlock = getDerived().TransformCompoundStmt(S->getTryBlock());
	if (TryBlock.isInvalid())
	return StmtError();

	// Transform the handlers.
	bool HandlerChanged = false;
	SmallVector<Stmt *, 8> Handlers;
	for (unsigned I = 0, N = S->getNumHandlers(); I != N; ++I) {
	StmtResult Handler = getDerived().TransformCXXCatchStmt(S->getHandler(I));
	if (Handler.isInvalid())
	return StmtError();

	HandlerChanged = HandlerChanged \|\| Handler.get() != S->getHandler(I);
	Handlers.push_back(Handler.getAs<Stmt>());
	}

	if (!getDerived().AlwaysRebuild() && TryBlock.get() == S->getTryBlock() &&
	!HandlerChanged)
	return S;

	return getDerived().RebuildCXXTryStmt(S->getTryLoc(), TryBlock.get(),
	Handlers);
	}

	template<typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformCXXForRangeStmt(CXXForRangeStmt *S) {
	StmtResult Init =
	S->getInit() ? getDerived().TransformStmt(S->getInit()) : StmtResult();
	if (Init.isInvalid())
	return StmtError();

	StmtResult Range = getDerived().TransformStmt(S->getRangeStmt());
	if (Range.isInvalid())
	return StmtError();

	StmtResult Begin = getDerived().TransformStmt(S->getBeginStmt());
	if (Begin.isInvalid())
	return StmtError();
	StmtResult End = getDerived().TransformStmt(S->getEndStmt());
	if (End.isInvalid())
	return StmtError();

	ExprResult Cond = getDerived().TransformExpr(S->getCond());
	if (Cond.isInvalid())
	return StmtError();
	if (Cond.get())
	Cond = SemaRef.CheckBooleanCondition(S->getColonLoc(), Cond.get());
	if (Cond.isInvalid())
	return StmtError();
	if (Cond.get())
	Cond = SemaRef.MaybeCreateExprWithCleanups(Cond.get());

	ExprResult Inc = getDerived().TransformExpr(S->getInc());
	if (Inc.isInvalid())
	return StmtError();
	if (Inc.get())
	Inc = SemaRef.MaybeCreateExprWithCleanups(Inc.get());

	StmtResult LoopVar = getDerived().TransformStmt(S->getLoopVarStmt());
	if (LoopVar.isInvalid())
	return StmtError();

	StmtResult NewStmt = S;
	if (getDerived().AlwaysRebuild() \|\|
	Init.get() != S->getInit() \|\|
	Range.get() != S->getRangeStmt() \|\|
	Begin.get() != S->getBeginStmt() \|\|
	End.get() != S->getEndStmt() \|\|
	Cond.get() != S->getCond() \|\|
	Inc.get() != S->getInc() \|\|
	LoopVar.get() != S->getLoopVarStmt()) {
	NewStmt = getDerived().RebuildCXXForRangeStmt(S->getForLoc(),
	S->getCoawaitLoc(), Init.get(),
	S->getColonLoc(), Range.get(),
	Begin.get(), End.get(),
	Cond.get(),
	Inc.get(), LoopVar.get(),
	S->getRParenLoc());
	if (NewStmt.isInvalid() && LoopVar.get() != S->getLoopVarStmt()) {
	// Might not have attached any initializer to the loop variable.
	getSema().ActOnInitializerError(
	cast<DeclStmt>(LoopVar.get())->getSingleDecl());
	return StmtError();
	}
	}

	StmtResult Body = getDerived().TransformStmt(S->getBody());
	if (Body.isInvalid())
	return StmtError();

	// Body has changed but we didn't rebuild the for-range statement. Rebuild
	// it now so we have a new statement to attach the body to.
	if (Body.get() != S->getBody() && NewStmt.get() == S) {
	NewStmt = getDerived().RebuildCXXForRangeStmt(S->getForLoc(),
	S->getCoawaitLoc(), Init.get(),
	S->getColonLoc(), Range.get(),
	Begin.get(), End.get(),
	Cond.get(),
	Inc.get(), LoopVar.get(),
	S->getRParenLoc());
	if (NewStmt.isInvalid())
	return StmtError();
	}

	if (NewStmt.get() == S)
	return S;

	return FinishCXXForRangeStmt(NewStmt.get(), Body.get());
	}

	template<typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformMSDependentExistsStmt(
	MSDependentExistsStmt *S) {
	// Transform the nested-name-specifier, if any.
	NestedNameSpecifierLoc QualifierLoc;
	if (S->getQualifierLoc()) {
	QualifierLoc
	= getDerived().TransformNestedNameSpecifierLoc(S->getQualifierLoc());
	if (!QualifierLoc)
	return StmtError();
	}

	// Transform the declaration name.
	DeclarationNameInfo NameInfo = S->getNameInfo();
	if (NameInfo.getName()) {
	NameInfo = getDerived().TransformDeclarationNameInfo(NameInfo);
	if (!NameInfo.getName())
	return StmtError();
	}

	// Check whether anything changed.
	if (!getDerived().AlwaysRebuild() &&
	QualifierLoc == S->getQualifierLoc() &&
	NameInfo.getName() == S->getNameInfo().getName())
	return S;

	// Determine whether this name exists, if we can.
	CXXScopeSpec SS;
	SS.Adopt(QualifierLoc);
	bool Dependent = false;
	switch (getSema().CheckMicrosoftIfExistsSymbol(/S=/nullptr, SS, NameInfo)) {
	case Sema::IER_Exists:
	if (S->isIfExists())
	break;

	return new (getSema().Context) NullStmt(S->getKeywordLoc());

	case Sema::IER_DoesNotExist:
	if (S->isIfNotExists())
	break;

	return new (getSema().Context) NullStmt(S->getKeywordLoc());

	case Sema::IER_Dependent:
	Dependent = true;
	break;

	case Sema::IER_Error:
	return StmtError();
	}

	// We need to continue with the instantiation, so do so now.
	StmtResult SubStmt = getDerived().TransformCompoundStmt(S->getSubStmt());
	if (SubStmt.isInvalid())
	return StmtError();

	// If we have resolved the name, just transform to the substatement.
	if (!Dependent)
	return SubStmt;

	// The name is still dependent, so build a dependent expression again.
	return getDerived().RebuildMSDependentExistsStmt(S->getKeywordLoc(),
	S->isIfExists(),
	QualifierLoc,
	NameInfo,
	SubStmt.get());
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformMSPropertyRefExpr(MSPropertyRefExpr *E) {
	NestedNameSpecifierLoc QualifierLoc;
	if (E->getQualifierLoc()) {
	QualifierLoc
	= getDerived().TransformNestedNameSpecifierLoc(E->getQualifierLoc());
	if (!QualifierLoc)
	return ExprError();
	}

	MSPropertyDecl *PD = cast_or_null<MSPropertyDecl>(
	getDerived().TransformDecl(E->getMemberLoc(), E->getPropertyDecl()));
	if (!PD)
	return ExprError();

	ExprResult Base = getDerived().TransformExpr(E->getBaseExpr());
	if (Base.isInvalid())
	return ExprError();

	return new (SemaRef.getASTContext())
	MSPropertyRefExpr(Base.get(), PD, E->isArrow(),
	SemaRef.getASTContext().PseudoObjectTy, VK_LValue,
	QualifierLoc, E->getMemberLoc());
	}

	template <typename Derived>
	ExprResult TreeTransform<Derived>::TransformMSPropertySubscriptExpr(
	MSPropertySubscriptExpr *E) {
	auto BaseRes = getDerived().TransformExpr(E->getBase());
	if (BaseRes.isInvalid())
	return ExprError();
	auto IdxRes = getDerived().TransformExpr(E->getIdx());
	if (IdxRes.isInvalid())
	return ExprError();

	if (!getDerived().AlwaysRebuild() &&
	BaseRes.get() == E->getBase() &&
	IdxRes.get() == E->getIdx())
	return E;

	return getDerived().RebuildArraySubscriptExpr(
	BaseRes.get(), SourceLocation(), IdxRes.get(), E->getRBracketLoc());
	}

	template <typename Derived>
	StmtResult TreeTransform<Derived>::TransformSEHTryStmt(SEHTryStmt *S) {
	StmtResult TryBlock = getDerived().TransformCompoundStmt(S->getTryBlock());
	if (TryBlock.isInvalid())
	return StmtError();

	StmtResult Handler = getDerived().TransformSEHHandler(S->getHandler());
	if (Handler.isInvalid())
	return StmtError();

	if (!getDerived().AlwaysRebuild() && TryBlock.get() == S->getTryBlock() &&
	Handler.get() == S->getHandler())
	return S;

	return getDerived().RebuildSEHTryStmt(S->getIsCXXTry(), S->getTryLoc(),
	TryBlock.get(), Handler.get());
	}

	template <typename Derived>
	StmtResult TreeTransform<Derived>::TransformSEHFinallyStmt(SEHFinallyStmt *S) {
	StmtResult Block = getDerived().TransformCompoundStmt(S->getBlock());
	if (Block.isInvalid())
	return StmtError();

	return getDerived().RebuildSEHFinallyStmt(S->getFinallyLoc(), Block.get());
	}

	template <typename Derived>
	StmtResult TreeTransform<Derived>::TransformSEHExceptStmt(SEHExceptStmt *S) {
	ExprResult FilterExpr = getDerived().TransformExpr(S->getFilterExpr());
	if (FilterExpr.isInvalid())
	return StmtError();

	StmtResult Block = getDerived().TransformCompoundStmt(S->getBlock());
	if (Block.isInvalid())
	return StmtError();

	return getDerived().RebuildSEHExceptStmt(S->getExceptLoc(), FilterExpr.get(),
	Block.get());
	}

	template <typename Derived>
	StmtResult TreeTransform<Derived>::TransformSEHHandler(Stmt *Handler) {
	if (isa<SEHFinallyStmt>(Handler))
	return getDerived().TransformSEHFinallyStmt(cast<SEHFinallyStmt>(Handler));
	else
	return getDerived().TransformSEHExceptStmt(cast<SEHExceptStmt>(Handler));
	}

	template<typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformSEHLeaveStmt(SEHLeaveStmt *S) {
	return S;
	}

	//===----------------------------------------------------------------------===//
	// OpenMP directive transformation
	//===----------------------------------------------------------------------===//

	template <typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformOMPCanonicalLoop(OMPCanonicalLoop *L) {
	// OMPCanonicalLoops are eliminated during transformation, since they will be
	// recomputed by semantic analysis of the associated OMPLoopBasedDirective
	// after transformation.
	return getDerived().TransformStmt(L->getLoopStmt());
	}

	template <typename Derived>
	StmtResult TreeTransform<Derived>::TransformOMPExecutableDirective(
	OMPExecutableDirective *D) {

	// Transform the clauses
	llvm::SmallVector<OMPClause *, 16> TClauses;
	ArrayRef<OMPClause *> Clauses = D->clauses();
	TClauses.reserve(Clauses.size());
	for (ArrayRef<OMPClause *>::iterator I = Clauses.begin(), E = Clauses.end();
	I != E; ++I) {
	if (*I) {
	getDerived().getSema().StartOpenMPClause((*I)->getClauseKind());
	OMPClause Clause = getDerived().TransformOMPClause(I);
	getDerived().getSema().EndOpenMPClause();
	if (Clause)
	TClauses.push_back(Clause);
	} else {
	TClauses.push_back(nullptr);
	}
	}
	StmtResult AssociatedStmt;
	if (D->hasAssociatedStmt() && D->getAssociatedStmt()) {
	getDerived().getSema().ActOnOpenMPRegionStart(D->getDirectiveKind(),
	/CurScope=/nullptr);
	StmtResult Body;
	{
	Sema::CompoundScopeRAII CompoundScope(getSema());
	Stmt *CS;
	if (D->getDirectiveKind() == OMPD_atomic \|\|
	D->getDirectiveKind() == OMPD_critical \|\|
	D->getDirectiveKind() == OMPD_section \|\|
	D->getDirectiveKind() == OMPD_master)
	CS = D->getAssociatedStmt();
	else
	CS = D->getRawStmt();
	Body = getDerived().TransformStmt(CS);
	if (Body.isUsable() && isOpenMPLoopDirective(D->getDirectiveKind()) &&
	getSema().getLangOpts().OpenMPIRBuilder)
	Body = getDerived().RebuildOMPCanonicalLoop(Body.get());
	}
	AssociatedStmt =
	getDerived().getSema().ActOnOpenMPRegionEnd(Body, TClauses);
	if (AssociatedStmt.isInvalid()) {
	return StmtError();
	}
	}
	if (TClauses.size() != Clauses.size()) {
	return StmtError();
	}

	// Transform directive name for 'omp critical' directive.
	DeclarationNameInfo DirName;
	if (D->getDirectiveKind() == OMPD_critical) {
	DirName = cast<OMPCriticalDirective>(D)->getDirectiveName();
	DirName = getDerived().TransformDeclarationNameInfo(DirName);
	}
	OpenMPDirectiveKind CancelRegion = OMPD_unknown;
	if (D->getDirectiveKind() == OMPD_cancellation_point) {
	CancelRegion = cast<OMPCancellationPointDirective>(D)->getCancelRegion();
	} else if (D->getDirectiveKind() == OMPD_cancel) {
	CancelRegion = cast<OMPCancelDirective>(D)->getCancelRegion();
	}

	return getDerived().RebuildOMPExecutableDirective(
	D->getDirectiveKind(), DirName, CancelRegion, TClauses,
	AssociatedStmt.get(), D->getBeginLoc(), D->getEndLoc());
	}

	template <typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformOMPParallelDirective(OMPParallelDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(OMPD_parallel, DirName, nullptr,
	D->getBeginLoc());
	StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformOMPSimdDirective(OMPSimdDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(OMPD_simd, DirName, nullptr,
	D->getBeginLoc());
	StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformOMPTileDirective(OMPTileDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(D->getDirectiveKind(), DirName,
	nullptr, D->getBeginLoc());
	StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformOMPUnrollDirective(OMPUnrollDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(D->getDirectiveKind(), DirName,
	nullptr, D->getBeginLoc());
	StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformOMPForDirective(OMPForDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(OMPD_for, DirName, nullptr,
	D->getBeginLoc());
	StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformOMPForSimdDirective(OMPForSimdDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(OMPD_for_simd, DirName, nullptr,
	D->getBeginLoc());
	StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformOMPSectionsDirective(OMPSectionsDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(OMPD_sections, DirName, nullptr,
	D->getBeginLoc());
	StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformOMPSectionDirective(OMPSectionDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(OMPD_section, DirName, nullptr,
	D->getBeginLoc());
	StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformOMPSingleDirective(OMPSingleDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(OMPD_single, DirName, nullptr,
	D->getBeginLoc());
	StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformOMPMasterDirective(OMPMasterDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(OMPD_master, DirName, nullptr,
	D->getBeginLoc());
	StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformOMPCriticalDirective(OMPCriticalDirective *D) {
	getDerived().getSema().StartOpenMPDSABlock(
	OMPD_critical, D->getDirectiveName(), nullptr, D->getBeginLoc());
	StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult TreeTransform<Derived>::TransformOMPParallelForDirective(
	OMPParallelForDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(OMPD_parallel_for, DirName,
	nullptr, D->getBeginLoc());
	StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult TreeTransform<Derived>::TransformOMPParallelForSimdDirective(
	OMPParallelForSimdDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(OMPD_parallel_for_simd, DirName,
	nullptr, D->getBeginLoc());
	StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult TreeTransform<Derived>::TransformOMPParallelMasterDirective(
	OMPParallelMasterDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(OMPD_parallel_master, DirName,
	nullptr, D->getBeginLoc());
	StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult TreeTransform<Derived>::TransformOMPParallelSectionsDirective(
	OMPParallelSectionsDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(OMPD_parallel_sections, DirName,
	nullptr, D->getBeginLoc());
	StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformOMPTaskDirective(OMPTaskDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(OMPD_task, DirName, nullptr,
	D->getBeginLoc());
	StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult TreeTransform<Derived>::TransformOMPTaskyieldDirective(
	OMPTaskyieldDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(OMPD_taskyield, DirName, nullptr,
	D->getBeginLoc());
	StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformOMPBarrierDirective(OMPBarrierDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(OMPD_barrier, DirName, nullptr,
	D->getBeginLoc());
	StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformOMPTaskwaitDirective(OMPTaskwaitDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(OMPD_taskwait, DirName, nullptr,
	D->getBeginLoc());
	StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult TreeTransform<Derived>::TransformOMPTaskgroupDirective(
	OMPTaskgroupDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(OMPD_taskgroup, DirName, nullptr,
	D->getBeginLoc());
	StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformOMPFlushDirective(OMPFlushDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(OMPD_flush, DirName, nullptr,
	D->getBeginLoc());
	StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformOMPDepobjDirective(OMPDepobjDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(OMPD_depobj, DirName, nullptr,
	D->getBeginLoc());
	StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformOMPScanDirective(OMPScanDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(OMPD_scan, DirName, nullptr,
	D->getBeginLoc());
	StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformOMPOrderedDirective(OMPOrderedDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(OMPD_ordered, DirName, nullptr,
	D->getBeginLoc());
	StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformOMPAtomicDirective(OMPAtomicDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(OMPD_atomic, DirName, nullptr,
	D->getBeginLoc());
	StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformOMPTargetDirective(OMPTargetDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(OMPD_target, DirName, nullptr,
	D->getBeginLoc());
	StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult TreeTransform<Derived>::TransformOMPTargetDataDirective(
	OMPTargetDataDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(OMPD_target_data, DirName, nullptr,
	D->getBeginLoc());
	StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult TreeTransform<Derived>::TransformOMPTargetEnterDataDirective(
	OMPTargetEnterDataDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(OMPD_target_enter_data, DirName,
	nullptr, D->getBeginLoc());
	StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult TreeTransform<Derived>::TransformOMPTargetExitDataDirective(
	OMPTargetExitDataDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(OMPD_target_exit_data, DirName,
	nullptr, D->getBeginLoc());
	StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult TreeTransform<Derived>::TransformOMPTargetParallelDirective(
	OMPTargetParallelDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(OMPD_target_parallel, DirName,
	nullptr, D->getBeginLoc());
	StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult TreeTransform<Derived>::TransformOMPTargetParallelForDirective(
	OMPTargetParallelForDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(OMPD_target_parallel_for, DirName,
	nullptr, D->getBeginLoc());
	StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult TreeTransform<Derived>::TransformOMPTargetUpdateDirective(
	OMPTargetUpdateDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(OMPD_target_update, DirName,
	nullptr, D->getBeginLoc());
	StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformOMPTeamsDirective(OMPTeamsDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(OMPD_teams, DirName, nullptr,
	D->getBeginLoc());
	StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult TreeTransform<Derived>::TransformOMPCancellationPointDirective(
	OMPCancellationPointDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(OMPD_cancellation_point, DirName,
	nullptr, D->getBeginLoc());
	StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformOMPCancelDirective(OMPCancelDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(OMPD_cancel, DirName, nullptr,
	D->getBeginLoc());
	StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformOMPTaskLoopDirective(OMPTaskLoopDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(OMPD_taskloop, DirName, nullptr,
	D->getBeginLoc());
	StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult TreeTransform<Derived>::TransformOMPTaskLoopSimdDirective(
	OMPTaskLoopSimdDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(OMPD_taskloop_simd, DirName,
	nullptr, D->getBeginLoc());
	StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult TreeTransform<Derived>::TransformOMPMasterTaskLoopDirective(
	OMPMasterTaskLoopDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(OMPD_master_taskloop, DirName,
	nullptr, D->getBeginLoc());
	StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult TreeTransform<Derived>::TransformOMPMasterTaskLoopSimdDirective(
	OMPMasterTaskLoopSimdDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(OMPD_master_taskloop_simd, DirName,
	nullptr, D->getBeginLoc());
	StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult TreeTransform<Derived>::TransformOMPParallelMasterTaskLoopDirective(
	OMPParallelMasterTaskLoopDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(
	OMPD_parallel_master_taskloop, DirName, nullptr, D->getBeginLoc());
	StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformOMPParallelMasterTaskLoopSimdDirective(
	OMPParallelMasterTaskLoopSimdDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(
	OMPD_parallel_master_taskloop_simd, DirName, nullptr, D->getBeginLoc());
	StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult TreeTransform<Derived>::TransformOMPDistributeDirective(
	OMPDistributeDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(OMPD_distribute, DirName, nullptr,
	D->getBeginLoc());
	StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult TreeTransform<Derived>::TransformOMPDistributeParallelForDirective(
	OMPDistributeParallelForDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(
	OMPD_distribute_parallel_for, DirName, nullptr, D->getBeginLoc());
	StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformOMPDistributeParallelForSimdDirective(
	OMPDistributeParallelForSimdDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(
	OMPD_distribute_parallel_for_simd, DirName, nullptr, D->getBeginLoc());
	StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult TreeTransform<Derived>::TransformOMPDistributeSimdDirective(
	OMPDistributeSimdDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(OMPD_distribute_simd, DirName,
	nullptr, D->getBeginLoc());
	StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult TreeTransform<Derived>::TransformOMPTargetParallelForSimdDirective(
	OMPTargetParallelForSimdDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(
	OMPD_target_parallel_for_simd, DirName, nullptr, D->getBeginLoc());
	StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult TreeTransform<Derived>::TransformOMPTargetSimdDirective(
	OMPTargetSimdDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(OMPD_target_simd, DirName, nullptr,
	D->getBeginLoc());
	StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult TreeTransform<Derived>::TransformOMPTeamsDistributeDirective(
	OMPTeamsDistributeDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(OMPD_teams_distribute, DirName,
	nullptr, D->getBeginLoc());
	StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult TreeTransform<Derived>::TransformOMPTeamsDistributeSimdDirective(
	OMPTeamsDistributeSimdDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(
	OMPD_teams_distribute_simd, DirName, nullptr, D->getBeginLoc());
	StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult TreeTransform<Derived>::TransformOMPTeamsDistributeParallelForSimdDirective(
	OMPTeamsDistributeParallelForSimdDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(
	OMPD_teams_distribute_parallel_for_simd, DirName, nullptr,
	D->getBeginLoc());
	StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult TreeTransform<Derived>::TransformOMPTeamsDistributeParallelForDirective(
	OMPTeamsDistributeParallelForDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(
	OMPD_teams_distribute_parallel_for, DirName, nullptr, D->getBeginLoc());
	StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult TreeTransform<Derived>::TransformOMPTargetTeamsDirective(
	OMPTargetTeamsDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(OMPD_target_teams, DirName,
	nullptr, D->getBeginLoc());
	auto Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult TreeTransform<Derived>::TransformOMPTargetTeamsDistributeDirective(
	OMPTargetTeamsDistributeDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(
	OMPD_target_teams_distribute, DirName, nullptr, D->getBeginLoc());
	auto Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformOMPTargetTeamsDistributeParallelForDirective(
	OMPTargetTeamsDistributeParallelForDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(
	OMPD_target_teams_distribute_parallel_for, DirName, nullptr,
	D->getBeginLoc());
	auto Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult TreeTransform<Derived>::
	TransformOMPTargetTeamsDistributeParallelForSimdDirective(
	OMPTargetTeamsDistributeParallelForSimdDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(
	OMPD_target_teams_distribute_parallel_for_simd, DirName, nullptr,
	D->getBeginLoc());
	auto Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformOMPTargetTeamsDistributeSimdDirective(
	OMPTargetTeamsDistributeSimdDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(
	OMPD_target_teams_distribute_simd, DirName, nullptr, D->getBeginLoc());
	auto Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformOMPInteropDirective(OMPInteropDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(OMPD_interop, DirName, nullptr,
	D->getBeginLoc());
	StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformOMPDispatchDirective(OMPDispatchDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(OMPD_dispatch, DirName, nullptr,
	D->getBeginLoc());
	StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	template <typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformOMPMaskedDirective(OMPMaskedDirective *D) {
	DeclarationNameInfo DirName;
	getDerived().getSema().StartOpenMPDSABlock(OMPD_masked, DirName, nullptr,
	D->getBeginLoc());
	StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
	getDerived().getSema().EndOpenMPDSABlock(Res.get());
	return Res;
	}

	//===----------------------------------------------------------------------===//
	// OpenMP clause transformation
	//===----------------------------------------------------------------------===//
	template <typename Derived>
	OMPClause TreeTransform<Derived>::TransformOMPIfClause(OMPIfClause C) {
	ExprResult Cond = getDerived().TransformExpr(C->getCondition());
	if (Cond.isInvalid())
	return nullptr;
	return getDerived().RebuildOMPIfClause(
	C->getNameModifier(), Cond.get(), C->getBeginLoc(), C->getLParenLoc(),
	C->getNameModifierLoc(), C->getColonLoc(), C->getEndLoc());
	}

	template <typename Derived>
	OMPClause TreeTransform<Derived>::TransformOMPFinalClause(OMPFinalClause C) {
	ExprResult Cond = getDerived().TransformExpr(C->getCondition());
	if (Cond.isInvalid())
	return nullptr;
	return getDerived().RebuildOMPFinalClause(Cond.get(), C->getBeginLoc(),
	C->getLParenLoc(), C->getEndLoc());
	}

	template <typename Derived>
	OMPClause *
	TreeTransform<Derived>::TransformOMPNumThreadsClause(OMPNumThreadsClause *C) {
	ExprResult NumThreads = getDerived().TransformExpr(C->getNumThreads());
	if (NumThreads.isInvalid())
	return nullptr;
	return getDerived().RebuildOMPNumThreadsClause(
	NumThreads.get(), C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
	}

	template <typename Derived>
	OMPClause *
	TreeTransform<Derived>::TransformOMPSafelenClause(OMPSafelenClause *C) {
	ExprResult E = getDerived().TransformExpr(C->getSafelen());
	if (E.isInvalid())
	return nullptr;
	return getDerived().RebuildOMPSafelenClause(
	E.get(), C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
	}

	template <typename Derived>
	OMPClause *
	TreeTransform<Derived>::TransformOMPAllocatorClause(OMPAllocatorClause *C) {
	ExprResult E = getDerived().TransformExpr(C->getAllocator());
	if (E.isInvalid())
	return nullptr;
	return getDerived().RebuildOMPAllocatorClause(
	E.get(), C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
	}

	template <typename Derived>
	OMPClause *
	TreeTransform<Derived>::TransformOMPSimdlenClause(OMPSimdlenClause *C) {
	ExprResult E = getDerived().TransformExpr(C->getSimdlen());
	if (E.isInvalid())
	return nullptr;
	return getDerived().RebuildOMPSimdlenClause(
	E.get(), C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
	}

	template <typename Derived>
	OMPClause TreeTransform<Derived>::TransformOMPSizesClause(OMPSizesClause C) {
	SmallVector<Expr *, 4> TransformedSizes;
	TransformedSizes.reserve(C->getNumSizes());
	bool Changed = false;
	for (Expr *E : C->getSizesRefs()) {
	if (!E) {
	TransformedSizes.push_back(nullptr);
	continue;
	}

	ExprResult T = getDerived().TransformExpr(E);
	if (T.isInvalid())
	return nullptr;
	if (E != T.get())
	Changed = true;
	TransformedSizes.push_back(T.get());
	}

	if (!Changed && !getDerived().AlwaysRebuild())
	return C;
	return RebuildOMPSizesClause(TransformedSizes, C->getBeginLoc(),
	C->getLParenLoc(), C->getEndLoc());
	}

	template <typename Derived>
	OMPClause TreeTransform<Derived>::TransformOMPFullClause(OMPFullClause C) {
	if (!getDerived().AlwaysRebuild())
	return C;
	return RebuildOMPFullClause(C->getBeginLoc(), C->getEndLoc());
	}

	template <typename Derived>
	OMPClause *
	TreeTransform<Derived>::TransformOMPPartialClause(OMPPartialClause *C) {
	ExprResult T = getDerived().TransformExpr(C->getFactor());
	if (T.isInvalid())
	return nullptr;
	Expr *Factor = T.get();
	bool Changed = Factor != C->getFactor();

	if (!Changed && !getDerived().AlwaysRebuild())
	return C;
	return RebuildOMPPartialClause(Factor, C->getBeginLoc(), C->getLParenLoc(),
	C->getEndLoc());
	}

	template <typename Derived>
	OMPClause *
	TreeTransform<Derived>::TransformOMPCollapseClause(OMPCollapseClause *C) {
	ExprResult E = getDerived().TransformExpr(C->getNumForLoops());
	if (E.isInvalid())
	return nullptr;
	return getDerived().RebuildOMPCollapseClause(
	E.get(), C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
	}

	template <typename Derived>
	OMPClause *
	TreeTransform<Derived>::TransformOMPDefaultClause(OMPDefaultClause *C) {
	return getDerived().RebuildOMPDefaultClause(
	C->getDefaultKind(), C->getDefaultKindKwLoc(), C->getBeginLoc(),
	C->getLParenLoc(), C->getEndLoc());
	}

	template <typename Derived>
	OMPClause *
	TreeTransform<Derived>::TransformOMPProcBindClause(OMPProcBindClause *C) {
	return getDerived().RebuildOMPProcBindClause(
	C->getProcBindKind(), C->getProcBindKindKwLoc(), C->getBeginLoc(),
	C->getLParenLoc(), C->getEndLoc());
	}

	template <typename Derived>
	OMPClause *
	TreeTransform<Derived>::TransformOMPScheduleClause(OMPScheduleClause *C) {
	ExprResult E = getDerived().TransformExpr(C->getChunkSize());
	if (E.isInvalid())
	return nullptr;
	return getDerived().RebuildOMPScheduleClause(
	C->getFirstScheduleModifier(), C->getSecondScheduleModifier(),
	C->getScheduleKind(), E.get(), C->getBeginLoc(), C->getLParenLoc(),
	C->getFirstScheduleModifierLoc(), C->getSecondScheduleModifierLoc(),
	C->getScheduleKindLoc(), C->getCommaLoc(), C->getEndLoc());
	}

	template <typename Derived>
	OMPClause *
	TreeTransform<Derived>::TransformOMPOrderedClause(OMPOrderedClause *C) {
	ExprResult E;
	if (auto *Num = C->getNumForLoops()) {
	E = getDerived().TransformExpr(Num);
	if (E.isInvalid())
	return nullptr;
	}
	return getDerived().RebuildOMPOrderedClause(C->getBeginLoc(), C->getEndLoc(),
	C->getLParenLoc(), E.get());
	}

	template <typename Derived>
	OMPClause *
	TreeTransform<Derived>::TransformOMPDetachClause(OMPDetachClause *C) {
	ExprResult E;
	if (Expr *Evt = C->getEventHandler()) {
	E = getDerived().TransformExpr(Evt);
	if (E.isInvalid())
	return nullptr;
	}
	return getDerived().RebuildOMPDetachClause(E.get(), C->getBeginLoc(),
	C->getLParenLoc(), C->getEndLoc());
	}

	template <typename Derived>
	OMPClause *
	TreeTransform<Derived>::TransformOMPNowaitClause(OMPNowaitClause *C) {
	// No need to rebuild this clause, no template-dependent parameters.
	return C;
	}

	template <typename Derived>
	OMPClause *
	TreeTransform<Derived>::TransformOMPUntiedClause(OMPUntiedClause *C) {
	// No need to rebuild this clause, no template-dependent parameters.
	return C;
	}

	template <typename Derived>
	OMPClause *
	TreeTransform<Derived>::TransformOMPMergeableClause(OMPMergeableClause *C) {
	// No need to rebuild this clause, no template-dependent parameters.
	return C;
	}

	template <typename Derived>
	OMPClause TreeTransform<Derived>::TransformOMPReadClause(OMPReadClause C) {
	// No need to rebuild this clause, no template-dependent parameters.
	return C;
	}

	template <typename Derived>
	OMPClause TreeTransform<Derived>::TransformOMPWriteClause(OMPWriteClause C) {
	// No need to rebuild this clause, no template-dependent parameters.
	return C;
	}

	template <typename Derived>
	OMPClause *
	TreeTransform<Derived>::TransformOMPUpdateClause(OMPUpdateClause *C) {
	// No need to rebuild this clause, no template-dependent parameters.
	return C;
	}

	template <typename Derived>
	OMPClause *
	TreeTransform<Derived>::TransformOMPCaptureClause(OMPCaptureClause *C) {
	// No need to rebuild this clause, no template-dependent parameters.
	return C;
	}

	template <typename Derived>
	OMPClause *
	TreeTransform<Derived>::TransformOMPSeqCstClause(OMPSeqCstClause *C) {
	// No need to rebuild this clause, no template-dependent parameters.
	return C;
	}

	template <typename Derived>
	OMPClause *
	TreeTransform<Derived>::TransformOMPAcqRelClause(OMPAcqRelClause *C) {
	// No need to rebuild this clause, no template-dependent parameters.
	return C;
	}

	template <typename Derived>
	OMPClause *
	TreeTransform<Derived>::TransformOMPAcquireClause(OMPAcquireClause *C) {
	// No need to rebuild this clause, no template-dependent parameters.
	return C;
	}

	template <typename Derived>
	OMPClause *
	TreeTransform<Derived>::TransformOMPReleaseClause(OMPReleaseClause *C) {
	// No need to rebuild this clause, no template-dependent parameters.
	return C;
	}

	template <typename Derived>
	OMPClause *
	TreeTransform<Derived>::TransformOMPRelaxedClause(OMPRelaxedClause *C) {
	// No need to rebuild this clause, no template-dependent parameters.
	return C;
	}

	template <typename Derived>
	OMPClause *
	TreeTransform<Derived>::TransformOMPThreadsClause(OMPThreadsClause *C) {
	// No need to rebuild this clause, no template-dependent parameters.
	return C;
	}

	template <typename Derived>
	OMPClause TreeTransform<Derived>::TransformOMPSIMDClause(OMPSIMDClause C) {
	// No need to rebuild this clause, no template-dependent parameters.
	return C;
	}

	template <typename Derived>
	OMPClause *
	TreeTransform<Derived>::TransformOMPNogroupClause(OMPNogroupClause *C) {
	// No need to rebuild this clause, no template-dependent parameters.
	return C;
	}

	template <typename Derived>
	OMPClause TreeTransform<Derived>::TransformOMPInitClause(OMPInitClause C) {
	ExprResult IVR = getDerived().TransformExpr(C->getInteropVar());
	if (IVR.isInvalid())
	return nullptr;

	llvm::SmallVector<Expr *, 8> PrefExprs;
	PrefExprs.reserve(C->varlist_size() - 1);
	for (Expr *E : llvm::drop_begin(C->varlists())) {
	ExprResult ER = getDerived().TransformExpr(cast<Expr>(E));
	if (ER.isInvalid())
	return nullptr;
	PrefExprs.push_back(ER.get());
	}
	return getDerived().RebuildOMPInitClause(
	IVR.get(), PrefExprs, C->getIsTarget(), C->getIsTargetSync(),
	C->getBeginLoc(), C->getLParenLoc(), C->getVarLoc(), C->getEndLoc());
	}

	template <typename Derived>
	OMPClause TreeTransform<Derived>::TransformOMPUseClause(OMPUseClause C) {
	ExprResult ER = getDerived().TransformExpr(C->getInteropVar());
	if (ER.isInvalid())
	return nullptr;
	return getDerived().RebuildOMPUseClause(ER.get(), C->getBeginLoc(),
	C->getLParenLoc(), C->getVarLoc(),
	C->getEndLoc());
	}

	template <typename Derived>
	OMPClause *
	TreeTransform<Derived>::TransformOMPDestroyClause(OMPDestroyClause *C) {
	ExprResult ER;
	if (Expr *IV = C->getInteropVar()) {
	ER = getDerived().TransformExpr(IV);
	if (ER.isInvalid())
	return nullptr;
	}
	return getDerived().RebuildOMPDestroyClause(ER.get(), C->getBeginLoc(),
	C->getLParenLoc(), C->getVarLoc(),
	C->getEndLoc());
	}

	template <typename Derived>
	OMPClause *
	TreeTransform<Derived>::TransformOMPNovariantsClause(OMPNovariantsClause *C) {
	ExprResult Cond = getDerived().TransformExpr(C->getCondition());
	if (Cond.isInvalid())
	return nullptr;
	return getDerived().RebuildOMPNovariantsClause(
	Cond.get(), C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
	}

	template <typename Derived>
	OMPClause *
	TreeTransform<Derived>::TransformOMPNocontextClause(OMPNocontextClause *C) {
	ExprResult Cond = getDerived().TransformExpr(C->getCondition());
	if (Cond.isInvalid())
	return nullptr;
	return getDerived().RebuildOMPNocontextClause(
	Cond.get(), C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
	}

	template <typename Derived>
	OMPClause *
	TreeTransform<Derived>::TransformOMPFilterClause(OMPFilterClause *C) {
	ExprResult ThreadID = getDerived().TransformExpr(C->getThreadID());
	if (ThreadID.isInvalid())
	return nullptr;
	return getDerived().RebuildOMPFilterClause(ThreadID.get(), C->getBeginLoc(),
	C->getLParenLoc(), C->getEndLoc());
	}

	template <typename Derived>
	OMPClause *TreeTransform<Derived>::TransformOMPUnifiedAddressClause(
	OMPUnifiedAddressClause *C) {
	llvm_unreachable("unified_address clause cannot appear in dependent context");
	}

	template <typename Derived>
	OMPClause *TreeTransform<Derived>::TransformOMPUnifiedSharedMemoryClause(
	OMPUnifiedSharedMemoryClause *C) {
	llvm_unreachable(
	"unified_shared_memory clause cannot appear in dependent context");
	}

	template <typename Derived>
	OMPClause *TreeTransform<Derived>::TransformOMPReverseOffloadClause(
	OMPReverseOffloadClause *C) {
	llvm_unreachable("reverse_offload clause cannot appear in dependent context");
	}

	template <typename Derived>
	OMPClause *TreeTransform<Derived>::TransformOMPDynamicAllocatorsClause(
	OMPDynamicAllocatorsClause *C) {
	llvm_unreachable(
	"dynamic_allocators clause cannot appear in dependent context");
	}

	template <typename Derived>
	OMPClause *TreeTransform<Derived>::TransformOMPAtomicDefaultMemOrderClause(
	OMPAtomicDefaultMemOrderClause *C) {
	llvm_unreachable(
	"atomic_default_mem_order clause cannot appear in dependent context");
	}

	template <typename Derived>
	OMPClause *
	TreeTransform<Derived>::TransformOMPPrivateClause(OMPPrivateClause *C) {
	llvm::SmallVector<Expr *, 16> Vars;
	Vars.reserve(C->varlist_size());
	for (auto *VE : C->varlists()) {
	ExprResult EVar = getDerived().TransformExpr(cast<Expr>(VE));
	if (EVar.isInvalid())
	return nullptr;
	Vars.push_back(EVar.get());
	}
	return getDerived().RebuildOMPPrivateClause(
	Vars, C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
	}

	template <typename Derived>
	OMPClause *TreeTransform<Derived>::TransformOMPFirstprivateClause(
	OMPFirstprivateClause *C) {
	llvm::SmallVector<Expr *, 16> Vars;
	Vars.reserve(C->varlist_size());
	for (auto *VE : C->varlists()) {
	ExprResult EVar = getDerived().TransformExpr(cast<Expr>(VE));
	if (EVar.isInvalid())
	return nullptr;
	Vars.push_back(EVar.get());
	}
	return getDerived().RebuildOMPFirstprivateClause(
	Vars, C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
	}

	template <typename Derived>
	OMPClause *
	TreeTransform<Derived>::TransformOMPLastprivateClause(OMPLastprivateClause *C) {
	llvm::SmallVector<Expr *, 16> Vars;
	Vars.reserve(C->varlist_size());
	for (auto *VE : C->varlists()) {
	ExprResult EVar = getDerived().TransformExpr(cast<Expr>(VE));
	if (EVar.isInvalid())
	return nullptr;
	Vars.push_back(EVar.get());
	}
	return getDerived().RebuildOMPLastprivateClause(
	Vars, C->getKind(), C->getKindLoc(), C->getColonLoc(), C->getBeginLoc(),
	C->getLParenLoc(), C->getEndLoc());
	}

	template <typename Derived>
	OMPClause *
	TreeTransform<Derived>::TransformOMPSharedClause(OMPSharedClause *C) {
	llvm::SmallVector<Expr *, 16> Vars;
	Vars.reserve(C->varlist_size());
	for (auto *VE : C->varlists()) {
	ExprResult EVar = getDerived().TransformExpr(cast<Expr>(VE));
	if (EVar.isInvalid())
	return nullptr;
	Vars.push_back(EVar.get());
	}
	return getDerived().RebuildOMPSharedClause(Vars, C->getBeginLoc(),
	C->getLParenLoc(), C->getEndLoc());
	}

	template <typename Derived>
	OMPClause *
	TreeTransform<Derived>::TransformOMPReductionClause(OMPReductionClause *C) {
	llvm::SmallVector<Expr *, 16> Vars;
	Vars.reserve(C->varlist_size());
	for (auto *VE : C->varlists()) {
	ExprResult EVar = getDerived().TransformExpr(cast<Expr>(VE));
	if (EVar.isInvalid())
	return nullptr;
	Vars.push_back(EVar.get());
	}
	CXXScopeSpec ReductionIdScopeSpec;
	ReductionIdScopeSpec.Adopt(C->getQualifierLoc());

	DeclarationNameInfo NameInfo = C->getNameInfo();
	if (NameInfo.getName()) {
	NameInfo = getDerived().TransformDeclarationNameInfo(NameInfo);
	if (!NameInfo.getName())
	return nullptr;
	}
	// Build a list of all UDR decls with the same names ranged by the Scopes.
	// The Scope boundary is a duplication of the previous decl.
	llvm::SmallVector<Expr *, 16> UnresolvedReductions;
	for (auto *E : C->reduction_ops()) {
	// Transform all the decls.
	if (E) {
	auto *ULE = cast<UnresolvedLookupExpr>(E);
	UnresolvedSet<8> Decls;
	for (auto *D : ULE->decls()) {
	NamedDecl *InstD =
	cast<NamedDecl>(getDerived().TransformDecl(E->getExprLoc(), D));
	Decls.addDecl(InstD, InstD->getAccess());
	}
	UnresolvedReductions.push_back(
	UnresolvedLookupExpr::Create(
	SemaRef.Context, /NamingClass=/nullptr,
	ReductionIdScopeSpec.getWithLocInContext(SemaRef.Context),
	NameInfo, /ADL=/true, ULE->isOverloaded(),
	Decls.begin(), Decls.end()));
	} else
	UnresolvedReductions.push_back(nullptr);
	}
	return getDerived().RebuildOMPReductionClause(
	Vars, C->getModifier(), C->getBeginLoc(), C->getLParenLoc(),
	C->getModifierLoc(), C->getColonLoc(), C->getEndLoc(),
	ReductionIdScopeSpec, NameInfo, UnresolvedReductions);
	}

	template <typename Derived>
	OMPClause *TreeTransform<Derived>::TransformOMPTaskReductionClause(
	OMPTaskReductionClause *C) {
	llvm::SmallVector<Expr *, 16> Vars;
	Vars.reserve(C->varlist_size());
	for (auto *VE : C->varlists()) {
	ExprResult EVar = getDerived().TransformExpr(cast<Expr>(VE));
	if (EVar.isInvalid())
	return nullptr;
	Vars.push_back(EVar.get());
	}
	CXXScopeSpec ReductionIdScopeSpec;
	ReductionIdScopeSpec.Adopt(C->getQualifierLoc());

	DeclarationNameInfo NameInfo = C->getNameInfo();
	if (NameInfo.getName()) {
	NameInfo = getDerived().TransformDeclarationNameInfo(NameInfo);
	if (!NameInfo.getName())
	return nullptr;
	}
	// Build a list of all UDR decls with the same names ranged by the Scopes.
	// The Scope boundary is a duplication of the previous decl.
	llvm::SmallVector<Expr *, 16> UnresolvedReductions;
	for (auto *E : C->reduction_ops()) {
	// Transform all the decls.
	if (E) {
	auto *ULE = cast<UnresolvedLookupExpr>(E);
	UnresolvedSet<8> Decls;
	for (auto *D : ULE->decls()) {
	NamedDecl *InstD =
	cast<NamedDecl>(getDerived().TransformDecl(E->getExprLoc(), D));
	Decls.addDecl(InstD, InstD->getAccess());
	}
	UnresolvedReductions.push_back(UnresolvedLookupExpr::Create(
	SemaRef.Context, /NamingClass=/nullptr,
	ReductionIdScopeSpec.getWithLocInContext(SemaRef.Context), NameInfo,
	/ADL=/true, ULE->isOverloaded(), Decls.begin(), Decls.end()));
	} else
	UnresolvedReductions.push_back(nullptr);
	}
	return getDerived().RebuildOMPTaskReductionClause(
	Vars, C->getBeginLoc(), C->getLParenLoc(), C->getColonLoc(),
	C->getEndLoc(), ReductionIdScopeSpec, NameInfo, UnresolvedReductions);
	}

	template <typename Derived>
	OMPClause *
	TreeTransform<Derived>::TransformOMPInReductionClause(OMPInReductionClause *C) {
	llvm::SmallVector<Expr *, 16> Vars;
	Vars.reserve(C->varlist_size());
	for (auto *VE : C->varlists()) {
	ExprResult EVar = getDerived().TransformExpr(cast<Expr>(VE));
	if (EVar.isInvalid())
	return nullptr;
	Vars.push_back(EVar.get());
	}
	CXXScopeSpec ReductionIdScopeSpec;
	ReductionIdScopeSpec.Adopt(C->getQualifierLoc());

	DeclarationNameInfo NameInfo = C->getNameInfo();
	if (NameInfo.getName()) {
	NameInfo = getDerived().TransformDeclarationNameInfo(NameInfo);
	if (!NameInfo.getName())
	return nullptr;
	}
	// Build a list of all UDR decls with the same names ranged by the Scopes.
	// The Scope boundary is a duplication of the previous decl.
	llvm::SmallVector<Expr *, 16> UnresolvedReductions;
	for (auto *E : C->reduction_ops()) {
	// Transform all the decls.
	if (E) {
	auto *ULE = cast<UnresolvedLookupExpr>(E);
	UnresolvedSet<8> Decls;
	for (auto *D : ULE->decls()) {
	NamedDecl *InstD =
	cast<NamedDecl>(getDerived().TransformDecl(E->getExprLoc(), D));
	Decls.addDecl(InstD, InstD->getAccess());
	}
	UnresolvedReductions.push_back(UnresolvedLookupExpr::Create(
	SemaRef.Context, /NamingClass=/nullptr,
	ReductionIdScopeSpec.getWithLocInContext(SemaRef.Context), NameInfo,
	/ADL=/true, ULE->isOverloaded(), Decls.begin(), Decls.end()));
	} else
	UnresolvedReductions.push_back(nullptr);
	}
	return getDerived().RebuildOMPInReductionClause(
	Vars, C->getBeginLoc(), C->getLParenLoc(), C->getColonLoc(),
	C->getEndLoc(), ReductionIdScopeSpec, NameInfo, UnresolvedReductions);
	}

	template <typename Derived>
	OMPClause *
	TreeTransform<Derived>::TransformOMPLinearClause(OMPLinearClause *C) {
	llvm::SmallVector<Expr *, 16> Vars;
	Vars.reserve(C->varlist_size());
	for (auto *VE : C->varlists()) {
	ExprResult EVar = getDerived().TransformExpr(cast<Expr>(VE));
	if (EVar.isInvalid())
	return nullptr;
	Vars.push_back(EVar.get());
	}
	ExprResult Step = getDerived().TransformExpr(C->getStep());
	if (Step.isInvalid())
	return nullptr;
	return getDerived().RebuildOMPLinearClause(
	Vars, Step.get(), C->getBeginLoc(), C->getLParenLoc(), C->getModifier(),
	C->getModifierLoc(), C->getColonLoc(), C->getEndLoc());
	}

	template <typename Derived>
	OMPClause *
	TreeTransform<Derived>::TransformOMPAlignedClause(OMPAlignedClause *C) {
	llvm::SmallVector<Expr *, 16> Vars;
	Vars.reserve(C->varlist_size());
	for (auto *VE : C->varlists()) {
	ExprResult EVar = getDerived().TransformExpr(cast<Expr>(VE));
	if (EVar.isInvalid())
	return nullptr;
	Vars.push_back(EVar.get());
	}
	ExprResult Alignment = getDerived().TransformExpr(C->getAlignment());
	if (Alignment.isInvalid())
	return nullptr;
	return getDerived().RebuildOMPAlignedClause(
	Vars, Alignment.get(), C->getBeginLoc(), C->getLParenLoc(),
	C->getColonLoc(), C->getEndLoc());
	}

	template <typename Derived>
	OMPClause *
	TreeTransform<Derived>::TransformOMPCopyinClause(OMPCopyinClause *C) {
	llvm::SmallVector<Expr *, 16> Vars;
	Vars.reserve(C->varlist_size());
	for (auto *VE : C->varlists()) {
	ExprResult EVar = getDerived().TransformExpr(cast<Expr>(VE));
	if (EVar.isInvalid())
	return nullptr;
	Vars.push_back(EVar.get());
	}
	return getDerived().RebuildOMPCopyinClause(Vars, C->getBeginLoc(),
	C->getLParenLoc(), C->getEndLoc());
	}

	template <typename Derived>
	OMPClause *
	TreeTransform<Derived>::TransformOMPCopyprivateClause(OMPCopyprivateClause *C) {
	llvm::SmallVector<Expr *, 16> Vars;
	Vars.reserve(C->varlist_size());
	for (auto *VE : C->varlists()) {
	ExprResult EVar = getDerived().TransformExpr(cast<Expr>(VE));
	if (EVar.isInvalid())
	return nullptr;
	Vars.push_back(EVar.get());
	}
	return getDerived().RebuildOMPCopyprivateClause(
	Vars, C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
	}

	template <typename Derived>
	OMPClause TreeTransform<Derived>::TransformOMPFlushClause(OMPFlushClause C) {
	llvm::SmallVector<Expr *, 16> Vars;
	Vars.reserve(C->varlist_size());
	for (auto *VE : C->varlists()) {
	ExprResult EVar = getDerived().TransformExpr(cast<Expr>(VE));
	if (EVar.isInvalid())
	return nullptr;
	Vars.push_back(EVar.get());
	}
	return getDerived().RebuildOMPFlushClause(Vars, C->getBeginLoc(),
	C->getLParenLoc(), C->getEndLoc());
	}

	template <typename Derived>
	OMPClause *
	TreeTransform<Derived>::TransformOMPDepobjClause(OMPDepobjClause *C) {
	ExprResult E = getDerived().TransformExpr(C->getDepobj());
	if (E.isInvalid())
	return nullptr;
	return getDerived().RebuildOMPDepobjClause(E.get(), C->getBeginLoc(),
	C->getLParenLoc(), C->getEndLoc());
	}

	template <typename Derived>
	OMPClause *
	TreeTransform<Derived>::TransformOMPDependClause(OMPDependClause *C) {
	llvm::SmallVector<Expr *, 16> Vars;
	Expr *DepModifier = C->getModifier();
	if (DepModifier) {
	ExprResult DepModRes = getDerived().TransformExpr(DepModifier);
	if (DepModRes.isInvalid())
	return nullptr;
	DepModifier = DepModRes.get();
	}
	Vars.reserve(C->varlist_size());
	for (auto *VE : C->varlists()) {
	ExprResult EVar = getDerived().TransformExpr(cast<Expr>(VE));
	if (EVar.isInvalid())
	return nullptr;
	Vars.push_back(EVar.get());
	}
	return getDerived().RebuildOMPDependClause(
	DepModifier, C->getDependencyKind(), C->getDependencyLoc(),
	C->getColonLoc(), Vars, C->getBeginLoc(), C->getLParenLoc(),
	C->getEndLoc());
	}

	template <typename Derived>
	OMPClause *
	TreeTransform<Derived>::TransformOMPDeviceClause(OMPDeviceClause *C) {
	ExprResult E = getDerived().TransformExpr(C->getDevice());
	if (E.isInvalid())
	return nullptr;
	return getDerived().RebuildOMPDeviceClause(
	C->getModifier(), E.get(), C->getBeginLoc(), C->getLParenLoc(),
	C->getModifierLoc(), C->getEndLoc());
	}

	template <typename Derived, class T>
	bool transformOMPMappableExprListClause(
	TreeTransform<Derived> &TT, OMPMappableExprListClause<T> *C,
	llvm::SmallVectorImpl<Expr *> &Vars, CXXScopeSpec &MapperIdScopeSpec,
	DeclarationNameInfo &MapperIdInfo,
	llvm::SmallVectorImpl<Expr *> &UnresolvedMappers) {
	// Transform expressions in the list.
	Vars.reserve(C->varlist_size());
	for (auto *VE : C->varlists()) {
	ExprResult EVar = TT.getDerived().TransformExpr(cast<Expr>(VE));
	if (EVar.isInvalid())
	return true;
	Vars.push_back(EVar.get());
	}
	// Transform mapper scope specifier and identifier.
	NestedNameSpecifierLoc QualifierLoc;
	if (C->getMapperQualifierLoc()) {
	QualifierLoc = TT.getDerived().TransformNestedNameSpecifierLoc(
	C->getMapperQualifierLoc());
	if (!QualifierLoc)
	return true;
	}
	MapperIdScopeSpec.Adopt(QualifierLoc);
	MapperIdInfo = C->getMapperIdInfo();
	if (MapperIdInfo.getName()) {
	MapperIdInfo = TT.getDerived().TransformDeclarationNameInfo(MapperIdInfo);
	if (!MapperIdInfo.getName())
	return true;
	}
	// Build a list of all candidate OMPDeclareMapperDecls, which is provided by
	// the previous user-defined mapper lookup in dependent environment.
	for (auto *E : C->mapperlists()) {
	// Transform all the decls.
	if (E) {
	auto *ULE = cast<UnresolvedLookupExpr>(E);
	UnresolvedSet<8> Decls;
	for (auto *D : ULE->decls()) {
	NamedDecl *InstD =
	cast<NamedDecl>(TT.getDerived().TransformDecl(E->getExprLoc(), D));
	Decls.addDecl(InstD, InstD->getAccess());
	}
	UnresolvedMappers.push_back(UnresolvedLookupExpr::Create(
	TT.getSema().Context, /NamingClass=/nullptr,
	MapperIdScopeSpec.getWithLocInContext(TT.getSema().Context),
	MapperIdInfo, /ADL=/true, ULE->isOverloaded(), Decls.begin(),
	Decls.end()));
	} else {
	UnresolvedMappers.push_back(nullptr);
	}
	}
	return false;
	}

	template <typename Derived>
	OMPClause TreeTransform<Derived>::TransformOMPMapClause(OMPMapClause C) {
	OMPVarListLocTy Locs(C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
	llvm::SmallVector<Expr *, 16> Vars;
	CXXScopeSpec MapperIdScopeSpec;
	DeclarationNameInfo MapperIdInfo;
	llvm::SmallVector<Expr *, 16> UnresolvedMappers;
	if (transformOMPMappableExprListClause<Derived, OMPMapClause>(
	*this, C, Vars, MapperIdScopeSpec, MapperIdInfo, UnresolvedMappers))
	return nullptr;
	return getDerived().RebuildOMPMapClause(
	C->getMapTypeModifiers(), C->getMapTypeModifiersLoc(), MapperIdScopeSpec,
	MapperIdInfo, C->getMapType(), C->isImplicitMapType(), C->getMapLoc(),
	C->getColonLoc(), Vars, Locs, UnresolvedMappers);
	}

	template <typename Derived>
	OMPClause *
	TreeTransform<Derived>::TransformOMPAllocateClause(OMPAllocateClause *C) {
	Expr *Allocator = C->getAllocator();
	if (Allocator) {
	ExprResult AllocatorRes = getDerived().TransformExpr(Allocator);
	if (AllocatorRes.isInvalid())
	return nullptr;
	Allocator = AllocatorRes.get();
	}
	llvm::SmallVector<Expr *, 16> Vars;
	Vars.reserve(C->varlist_size());
	for (auto *VE : C->varlists()) {
	ExprResult EVar = getDerived().TransformExpr(cast<Expr>(VE));
	if (EVar.isInvalid())
	return nullptr;
	Vars.push_back(EVar.get());
	}
	return getDerived().RebuildOMPAllocateClause(
	Allocator, Vars, C->getBeginLoc(), C->getLParenLoc(), C->getColonLoc(),
	C->getEndLoc());
	}

	template <typename Derived>
	OMPClause *
	TreeTransform<Derived>::TransformOMPNumTeamsClause(OMPNumTeamsClause *C) {
	ExprResult E = getDerived().TransformExpr(C->getNumTeams());
	if (E.isInvalid())
	return nullptr;
	return getDerived().RebuildOMPNumTeamsClause(
	E.get(), C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
	}

	template <typename Derived>
	OMPClause *
	TreeTransform<Derived>::TransformOMPThreadLimitClause(OMPThreadLimitClause *C) {
	ExprResult E = getDerived().TransformExpr(C->getThreadLimit());
	if (E.isInvalid())
	return nullptr;
	return getDerived().RebuildOMPThreadLimitClause(
	E.get(), C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
	}

	template <typename Derived>
	OMPClause *
	TreeTransform<Derived>::TransformOMPPriorityClause(OMPPriorityClause *C) {
	ExprResult E = getDerived().TransformExpr(C->getPriority());
	if (E.isInvalid())
	return nullptr;
	return getDerived().RebuildOMPPriorityClause(
	E.get(), C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
	}

	template <typename Derived>
	OMPClause *
	TreeTransform<Derived>::TransformOMPGrainsizeClause(OMPGrainsizeClause *C) {
	ExprResult E = getDerived().TransformExpr(C->getGrainsize());
	if (E.isInvalid())
	return nullptr;
	return getDerived().RebuildOMPGrainsizeClause(
	E.get(), C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
	}

	template <typename Derived>
	OMPClause *
	TreeTransform<Derived>::TransformOMPNumTasksClause(OMPNumTasksClause *C) {
	ExprResult E = getDerived().TransformExpr(C->getNumTasks());
	if (E.isInvalid())
	return nullptr;
	return getDerived().RebuildOMPNumTasksClause(
	E.get(), C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
	}

	template <typename Derived>
	OMPClause TreeTransform<Derived>::TransformOMPHintClause(OMPHintClause C) {
	ExprResult E = getDerived().TransformExpr(C->getHint());
	if (E.isInvalid())
	return nullptr;
	return getDerived().RebuildOMPHintClause(E.get(), C->getBeginLoc(),
	C->getLParenLoc(), C->getEndLoc());
	}

	template <typename Derived>
	OMPClause *TreeTransform<Derived>::TransformOMPDistScheduleClause(
	OMPDistScheduleClause *C) {
	ExprResult E = getDerived().TransformExpr(C->getChunkSize());
	if (E.isInvalid())
	return nullptr;
	return getDerived().RebuildOMPDistScheduleClause(
	C->getDistScheduleKind(), E.get(), C->getBeginLoc(), C->getLParenLoc(),
	C->getDistScheduleKindLoc(), C->getCommaLoc(), C->getEndLoc());
	}

	template <typename Derived>
	OMPClause *
	TreeTransform<Derived>::TransformOMPDefaultmapClause(OMPDefaultmapClause *C) {
	// Rebuild Defaultmap Clause since we need to invoke the checking of
	// defaultmap(none:variable-category) after template initialization.
	return getDerived().RebuildOMPDefaultmapClause(C->getDefaultmapModifier(),
	C->getDefaultmapKind(),
	C->getBeginLoc(),
	C->getLParenLoc(),
	C->getDefaultmapModifierLoc(),
	C->getDefaultmapKindLoc(),
	C->getEndLoc());
	}

	template <typename Derived>
	OMPClause TreeTransform<Derived>::TransformOMPToClause(OMPToClause C) {
	OMPVarListLocTy Locs(C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
	llvm::SmallVector<Expr *, 16> Vars;
	CXXScopeSpec MapperIdScopeSpec;
	DeclarationNameInfo MapperIdInfo;
	llvm::SmallVector<Expr *, 16> UnresolvedMappers;
	if (transformOMPMappableExprListClause<Derived, OMPToClause>(
	*this, C, Vars, MapperIdScopeSpec, MapperIdInfo, UnresolvedMappers))
	return nullptr;
	return getDerived().RebuildOMPToClause(
	C->getMotionModifiers(), C->getMotionModifiersLoc(), MapperIdScopeSpec,
	MapperIdInfo, C->getColonLoc(), Vars, Locs, UnresolvedMappers);
	}

	template <typename Derived>
	OMPClause TreeTransform<Derived>::TransformOMPFromClause(OMPFromClause C) {
	OMPVarListLocTy Locs(C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
	llvm::SmallVector<Expr *, 16> Vars;
	CXXScopeSpec MapperIdScopeSpec;
	DeclarationNameInfo MapperIdInfo;
	llvm::SmallVector<Expr *, 16> UnresolvedMappers;
	if (transformOMPMappableExprListClause<Derived, OMPFromClause>(
	*this, C, Vars, MapperIdScopeSpec, MapperIdInfo, UnresolvedMappers))
	return nullptr;
	return getDerived().RebuildOMPFromClause(
	C->getMotionModifiers(), C->getMotionModifiersLoc(), MapperIdScopeSpec,
	MapperIdInfo, C->getColonLoc(), Vars, Locs, UnresolvedMappers);
	}

	template <typename Derived>
	OMPClause *TreeTransform<Derived>::TransformOMPUseDevicePtrClause(
	OMPUseDevicePtrClause *C) {
	llvm::SmallVector<Expr *, 16> Vars;
	Vars.reserve(C->varlist_size());
	for (auto *VE : C->varlists()) {
	ExprResult EVar = getDerived().TransformExpr(cast<Expr>(VE));
	if (EVar.isInvalid())
	return nullptr;
	Vars.push_back(EVar.get());
	}
	OMPVarListLocTy Locs(C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
	return getDerived().RebuildOMPUseDevicePtrClause(Vars, Locs);
	}

	template <typename Derived>
	OMPClause *TreeTransform<Derived>::TransformOMPUseDeviceAddrClause(
	OMPUseDeviceAddrClause *C) {
	llvm::SmallVector<Expr *, 16> Vars;
	Vars.reserve(C->varlist_size());
	for (auto *VE : C->varlists()) {
	ExprResult EVar = getDerived().TransformExpr(cast<Expr>(VE));
	if (EVar.isInvalid())
	return nullptr;
	Vars.push_back(EVar.get());
	}
	OMPVarListLocTy Locs(C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
	return getDerived().RebuildOMPUseDeviceAddrClause(Vars, Locs);
	}

	template <typename Derived>
	OMPClause *
	TreeTransform<Derived>::TransformOMPIsDevicePtrClause(OMPIsDevicePtrClause *C) {
	llvm::SmallVector<Expr *, 16> Vars;
	Vars.reserve(C->varlist_size());
	for (auto *VE : C->varlists()) {
	ExprResult EVar = getDerived().TransformExpr(cast<Expr>(VE));
	if (EVar.isInvalid())
	return nullptr;
	Vars.push_back(EVar.get());
	}
	OMPVarListLocTy Locs(C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
	return getDerived().RebuildOMPIsDevicePtrClause(Vars, Locs);
	}

	template <typename Derived>
	OMPClause *
	TreeTransform<Derived>::TransformOMPNontemporalClause(OMPNontemporalClause *C) {
	llvm::SmallVector<Expr *, 16> Vars;
	Vars.reserve(C->varlist_size());
	for (auto *VE : C->varlists()) {
	ExprResult EVar = getDerived().TransformExpr(cast<Expr>(VE));
	if (EVar.isInvalid())
	return nullptr;
	Vars.push_back(EVar.get());
	}
	return getDerived().RebuildOMPNontemporalClause(
	Vars, C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
	}

	template <typename Derived>
	OMPClause *
	TreeTransform<Derived>::TransformOMPInclusiveClause(OMPInclusiveClause *C) {
	llvm::SmallVector<Expr *, 16> Vars;
	Vars.reserve(C->varlist_size());
	for (auto *VE : C->varlists()) {
	ExprResult EVar = getDerived().TransformExpr(cast<Expr>(VE));
	if (EVar.isInvalid())
	return nullptr;
	Vars.push_back(EVar.get());
	}
	return getDerived().RebuildOMPInclusiveClause(
	Vars, C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
	}

	template <typename Derived>
	OMPClause *
	TreeTransform<Derived>::TransformOMPExclusiveClause(OMPExclusiveClause *C) {
	llvm::SmallVector<Expr *, 16> Vars;
	Vars.reserve(C->varlist_size());
	for (auto *VE : C->varlists()) {
	ExprResult EVar = getDerived().TransformExpr(cast<Expr>(VE));
	if (EVar.isInvalid())
	return nullptr;
	Vars.push_back(EVar.get());
	}
	return getDerived().RebuildOMPExclusiveClause(
	Vars, C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
	}

	template <typename Derived>
	OMPClause *TreeTransform<Derived>::TransformOMPUsesAllocatorsClause(
	OMPUsesAllocatorsClause *C) {
	SmallVector<Sema::UsesAllocatorsData, 16> Data;
	Data.reserve(C->getNumberOfAllocators());
	for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
	OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
	ExprResult Allocator = getDerived().TransformExpr(D.Allocator);
	if (Allocator.isInvalid())
	continue;
	ExprResult AllocatorTraits;
	if (Expr *AT = D.AllocatorTraits) {
	AllocatorTraits = getDerived().TransformExpr(AT);
	if (AllocatorTraits.isInvalid())
	continue;
	}
	Sema::UsesAllocatorsData &NewD = Data.emplace_back();
	NewD.Allocator = Allocator.get();
	NewD.AllocatorTraits = AllocatorTraits.get();
	NewD.LParenLoc = D.LParenLoc;
	NewD.RParenLoc = D.RParenLoc;
	}
	return getDerived().RebuildOMPUsesAllocatorsClause(
	Data, C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
	}

	template <typename Derived>
	OMPClause *
	TreeTransform<Derived>::TransformOMPAffinityClause(OMPAffinityClause *C) {
	SmallVector<Expr *, 4> Locators;
	Locators.reserve(C->varlist_size());
	ExprResult ModifierRes;
	if (Expr *Modifier = C->getModifier()) {
	ModifierRes = getDerived().TransformExpr(Modifier);
	if (ModifierRes.isInvalid())
	return nullptr;
	}
	for (Expr *E : C->varlists()) {
	ExprResult Locator = getDerived().TransformExpr(E);
	if (Locator.isInvalid())
	continue;
	Locators.push_back(Locator.get());
	}
	return getDerived().RebuildOMPAffinityClause(
	C->getBeginLoc(), C->getLParenLoc(), C->getColonLoc(), C->getEndLoc(),
	ModifierRes.get(), Locators);
	}

	template <typename Derived>
	OMPClause TreeTransform<Derived>::TransformOMPOrderClause(OMPOrderClause C) {
	return getDerived().RebuildOMPOrderClause(C->getKind(), C->getKindKwLoc(),
	C->getBeginLoc(), C->getLParenLoc(),
	C->getEndLoc());
	}

	//===----------------------------------------------------------------------===//
	// Expression transformation
	//===----------------------------------------------------------------------===//
	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformConstantExpr(ConstantExpr *E) {
	return TransformExpr(E->getSubExpr());
	}

	template <typename Derived>
	ExprResult TreeTransform<Derived>::TransformSYCLUniqueStableNameExpr(
	SYCLUniqueStableNameExpr *E) {
	if (!E->isTypeDependent())
	return E;

	TypeSourceInfo *NewT = getDerived().TransformType(E->getTypeSourceInfo());

	if (!NewT)
	return ExprError();

	if (!getDerived().AlwaysRebuild() && E->getTypeSourceInfo() == NewT)
	return E;

	return getDerived().RebuildSYCLUniqueStableNameExpr(
	E->getLocation(), E->getLParenLocation(), E->getRParenLocation(), NewT);
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformPredefinedExpr(PredefinedExpr *E) {
	if (!E->isTypeDependent())
	return E;

	return getDerived().RebuildPredefinedExpr(E->getLocation(),
	E->getIdentKind());
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformDeclRefExpr(DeclRefExpr *E) {
	NestedNameSpecifierLoc QualifierLoc;
	if (E->getQualifierLoc()) {
	QualifierLoc
	= getDerived().TransformNestedNameSpecifierLoc(E->getQualifierLoc());
	if (!QualifierLoc)
	return ExprError();
	}

	ValueDecl *ND
	= cast_or_null<ValueDecl>(getDerived().TransformDecl(E->getLocation(),
	E->getDecl()));
	if (!ND)
	return ExprError();

	NamedDecl *Found = ND;
	if (E->getFoundDecl() != E->getDecl()) {
	Found = cast_or_null<NamedDecl>(
	getDerived().TransformDecl(E->getLocation(), E->getFoundDecl()));
	if (!Found)
	return ExprError();
	}

	DeclarationNameInfo NameInfo = E->getNameInfo();
	if (NameInfo.getName()) {
	NameInfo = getDerived().TransformDeclarationNameInfo(NameInfo);
	if (!NameInfo.getName())
	return ExprError();
	}

	if (!getDerived().AlwaysRebuild() &&
	QualifierLoc == E->getQualifierLoc() &&
	ND == E->getDecl() &&
	Found == E->getFoundDecl() &&
	NameInfo.getName() == E->getDecl()->getDeclName() &&
	!E->hasExplicitTemplateArgs()) {

	// Mark it referenced in the new context regardless.
	// FIXME: this is a bit instantiation-specific.
	SemaRef.MarkDeclRefReferenced(E);

	return E;
	}

	TemplateArgumentListInfo TransArgs, *TemplateArgs = nullptr;
	if (E->hasExplicitTemplateArgs()) {
	TemplateArgs = &TransArgs;
	TransArgs.setLAngleLoc(E->getLAngleLoc());
	TransArgs.setRAngleLoc(E->getRAngleLoc());
	if (getDerived().TransformTemplateArguments(E->getTemplateArgs(),
	E->getNumTemplateArgs(),
	TransArgs))
	return ExprError();
	}

	return getDerived().RebuildDeclRefExpr(QualifierLoc, ND, NameInfo,
	Found, TemplateArgs);
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformIntegerLiteral(IntegerLiteral *E) {
	return E;
	}

	template <typename Derived>
	ExprResult TreeTransform<Derived>::TransformFixedPointLiteral(
	FixedPointLiteral *E) {
	return E;
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformFloatingLiteral(FloatingLiteral *E) {
	return E;
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformImaginaryLiteral(ImaginaryLiteral *E) {
	return E;
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformStringLiteral(StringLiteral *E) {
	return E;
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformCharacterLiteral(CharacterLiteral *E) {
	return E;
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformUserDefinedLiteral(UserDefinedLiteral *E) {
	if (FunctionDecl *FD = E->getDirectCallee())
	SemaRef.MarkFunctionReferenced(E->getBeginLoc(), FD);
	return SemaRef.MaybeBindToTemporary(E);
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformGenericSelectionExpr(GenericSelectionExpr *E) {
	ExprResult ControllingExpr =
	getDerived().TransformExpr(E->getControllingExpr());
	if (ControllingExpr.isInvalid())
	return ExprError();

	SmallVector<Expr *, 4> AssocExprs;
	SmallVector<TypeSourceInfo *, 4> AssocTypes;
	for (const GenericSelectionExpr::Association Assoc : E->associations()) {
	TypeSourceInfo *TSI = Assoc.getTypeSourceInfo();
	if (TSI) {
	TypeSourceInfo *AssocType = getDerived().TransformType(TSI);
	if (!AssocType)
	return ExprError();
	AssocTypes.push_back(AssocType);
	} else {
	AssocTypes.push_back(nullptr);
	}

	ExprResult AssocExpr =
	getDerived().TransformExpr(Assoc.getAssociationExpr());
	if (AssocExpr.isInvalid())
	return ExprError();
	AssocExprs.push_back(AssocExpr.get());
	}

	return getDerived().RebuildGenericSelectionExpr(E->getGenericLoc(),
	E->getDefaultLoc(),
	E->getRParenLoc(),
	ControllingExpr.get(),
	AssocTypes,
	AssocExprs);
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformParenExpr(ParenExpr *E) {
	ExprResult SubExpr = getDerived().TransformExpr(E->getSubExpr());
	if (SubExpr.isInvalid())
	return ExprError();

	if (!getDerived().AlwaysRebuild() && SubExpr.get() == E->getSubExpr())
	return E;

	return getDerived().RebuildParenExpr(SubExpr.get(), E->getLParen(),
	E->getRParen());
	}

	/// The operand of a unary address-of operator has special rules: it's
	/// allowed to refer to a non-static member of a class even if there's no 'this'
	/// object available.
	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformAddressOfOperand(Expr *E) {
	if (DependentScopeDeclRefExpr *DRE = dyn_cast<DependentScopeDeclRefExpr>(E))
	return getDerived().TransformDependentScopeDeclRefExpr(DRE, true, nullptr);
	else
	return getDerived().TransformExpr(E);
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformUnaryOperator(UnaryOperator *E) {
	ExprResult SubExpr;
	if (E->getOpcode() == UO_AddrOf)
	SubExpr = TransformAddressOfOperand(E->getSubExpr());
	else
	SubExpr = TransformExpr(E->getSubExpr());
	if (SubExpr.isInvalid())
	return ExprError();

	if (!getDerived().AlwaysRebuild() && SubExpr.get() == E->getSubExpr())
	return E;

	return getDerived().RebuildUnaryOperator(E->getOperatorLoc(),
	E->getOpcode(),
	SubExpr.get());
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformOffsetOfExpr(OffsetOfExpr *E) {
	// Transform the type.
	TypeSourceInfo *Type = getDerived().TransformType(E->getTypeSourceInfo());
	if (!Type)
	return ExprError();

	// Transform all of the components into components similar to what the
	// parser uses.
	// FIXME: It would be slightly more efficient in the non-dependent case to
	// just map FieldDecls, rather than requiring the rebuilder to look for
	// the fields again. However, __builtin_offsetof is rare enough in
	// template code that we don't care.
	bool ExprChanged = false;
	typedef Sema::OffsetOfComponent Component;
	SmallVector<Component, 4> Components;
	for (unsigned I = 0, N = E->getNumComponents(); I != N; ++I) {
	const OffsetOfNode &ON = E->getComponent(I);
	Component Comp;
	Comp.isBrackets = true;
	Comp.LocStart = ON.getSourceRange().getBegin();
	Comp.LocEnd = ON.getSourceRange().getEnd();
	switch (ON.getKind()) {
	case OffsetOfNode::Array: {
	Expr *FromIndex = E->getIndexExpr(ON.getArrayExprIndex());
	ExprResult Index = getDerived().TransformExpr(FromIndex);
	if (Index.isInvalid())
	return ExprError();

	ExprChanged = ExprChanged \|\| Index.get() != FromIndex;
	Comp.isBrackets = true;
	Comp.U.E = Index.get();
	break;
	}

	case OffsetOfNode::Field:
	case OffsetOfNode::Identifier:
	Comp.isBrackets = false;
	Comp.U.IdentInfo = ON.getFieldName();
	if (!Comp.U.IdentInfo)
	continue;

	break;

	case OffsetOfNode::Base:
	// Will be recomputed during the rebuild.
	continue;
	}

	Components.push_back(Comp);
	}

	// If nothing changed, retain the existing expression.
	if (!getDerived().AlwaysRebuild() &&
	Type == E->getTypeSourceInfo() &&
	!ExprChanged)
	return E;

	// Build a new offsetof expression.
	return getDerived().RebuildOffsetOfExpr(E->getOperatorLoc(), Type,
	Components, E->getRParenLoc());
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformOpaqueValueExpr(OpaqueValueExpr *E) {
	assert((!E->getSourceExpr() \|\| getDerived().AlreadyTransformed(E->getType())) &&
	"opaque value expression requires transformation");
	return E;
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformTypoExpr(TypoExpr *E) {
	return E;
	}

	template <typename Derived>
	ExprResult TreeTransform<Derived>::TransformRecoveryExpr(RecoveryExpr *E) {
	llvm::SmallVector<Expr *, 8> Children;
	bool Changed = false;
	for (Expr *C : E->subExpressions()) {
	ExprResult NewC = getDerived().TransformExpr(C);
	if (NewC.isInvalid())
	return ExprError();
	Children.push_back(NewC.get());

	Changed \|= NewC.get() != C;
	}
	if (!getDerived().AlwaysRebuild() && !Changed)
	return E;
	return getDerived().RebuildRecoveryExpr(E->getBeginLoc(), E->getEndLoc(),
	Children, E->getType());
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformPseudoObjectExpr(PseudoObjectExpr *E) {
	// Rebuild the syntactic form. The original syntactic form has
	// opaque-value expressions in it, so strip those away and rebuild
	// the result. This is a really awful way of doing this, but the
	// better solution (rebuilding the semantic expressions and
	// rebinding OVEs as necessary) doesn't work; we'd need
	// TreeTransform to not strip away implicit conversions.
	Expr *newSyntacticForm = SemaRef.recreateSyntacticForm(E);
	ExprResult result = getDerived().TransformExpr(newSyntacticForm);
	if (result.isInvalid()) return ExprError();

	// If that gives us a pseudo-object result back, the pseudo-object
	// expression must have been an lvalue-to-rvalue conversion which we
	// should reapply.
	if (result.get()->hasPlaceholderType(BuiltinType::PseudoObject))
	result = SemaRef.checkPseudoObjectRValue(result.get());

	return result;
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformUnaryExprOrTypeTraitExpr(
	UnaryExprOrTypeTraitExpr *E) {
	if (E->isArgumentType()) {
	TypeSourceInfo *OldT = E->getArgumentTypeInfo();

	TypeSourceInfo *NewT = getDerived().TransformType(OldT);
	if (!NewT)
	return ExprError();

	if (!getDerived().AlwaysRebuild() && OldT == NewT)
	return E;

	return getDerived().RebuildUnaryExprOrTypeTrait(NewT, E->getOperatorLoc(),
	E->getKind(),
	E->getSourceRange());
	}

	// C++0x [expr.sizeof]p1:
	// The operand is either an expression, which is an unevaluated operand
	// [...]
	EnterExpressionEvaluationContext Unevaluated(
	SemaRef, Sema::ExpressionEvaluationContext::Unevaluated,
	Sema::ReuseLambdaContextDecl);

	// Try to recover if we have something like sizeof(T::X) where X is a type.
	// Notably, there must be exactly one set of parens if X is a type.
	TypeSourceInfo *RecoveryTSI = nullptr;
	ExprResult SubExpr;
	auto *PE = dyn_cast<ParenExpr>(E->getArgumentExpr());
	if (auto *DRE =
	PE ? dyn_cast<DependentScopeDeclRefExpr>(PE->getSubExpr()) : nullptr)
	SubExpr = getDerived().TransformParenDependentScopeDeclRefExpr(
	PE, DRE, false, &RecoveryTSI);
	else
	SubExpr = getDerived().TransformExpr(E->getArgumentExpr());

	if (RecoveryTSI) {
	return getDerived().RebuildUnaryExprOrTypeTrait(
	RecoveryTSI, E->getOperatorLoc(), E->getKind(), E->getSourceRange());
	} else if (SubExpr.isInvalid())
	return ExprError();

	if (!getDerived().AlwaysRebuild() && SubExpr.get() == E->getArgumentExpr())
	return E;

	return getDerived().RebuildUnaryExprOrTypeTrait(SubExpr.get(),
	E->getOperatorLoc(),
	E->getKind(),
	E->getSourceRange());
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformArraySubscriptExpr(ArraySubscriptExpr *E) {
	ExprResult LHS = getDerived().TransformExpr(E->getLHS());
	if (LHS.isInvalid())
	return ExprError();

	ExprResult RHS = getDerived().TransformExpr(E->getRHS());
	if (RHS.isInvalid())
	return ExprError();


	if (!getDerived().AlwaysRebuild() &&
	LHS.get() == E->getLHS() &&
	RHS.get() == E->getRHS())
	return E;

	return getDerived().RebuildArraySubscriptExpr(
	LHS.get(),
	/FIXME:/ E->getLHS()->getBeginLoc(), RHS.get(), E->getRBracketLoc());
	}

	template <typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformMatrixSubscriptExpr(MatrixSubscriptExpr *E) {
	ExprResult Base = getDerived().TransformExpr(E->getBase());
	if (Base.isInvalid())
	return ExprError();

	ExprResult RowIdx = getDerived().TransformExpr(E->getRowIdx());
	if (RowIdx.isInvalid())
	return ExprError();

	ExprResult ColumnIdx = getDerived().TransformExpr(E->getColumnIdx());
	if (ColumnIdx.isInvalid())
	return ExprError();

	if (!getDerived().AlwaysRebuild() && Base.get() == E->getBase() &&
	RowIdx.get() == E->getRowIdx() && ColumnIdx.get() == E->getColumnIdx())
	return E;

	return getDerived().RebuildMatrixSubscriptExpr(
	Base.get(), RowIdx.get(), ColumnIdx.get(), E->getRBracketLoc());
	}

	template <typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformOMPArraySectionExpr(OMPArraySectionExpr *E) {
	ExprResult Base = getDerived().TransformExpr(E->getBase());
	if (Base.isInvalid())
	return ExprError();

	ExprResult LowerBound;
	if (E->getLowerBound()) {
	LowerBound = getDerived().TransformExpr(E->getLowerBound());
	if (LowerBound.isInvalid())
	return ExprError();
	}

	ExprResult Length;
	if (E->getLength()) {
	Length = getDerived().TransformExpr(E->getLength());
	if (Length.isInvalid())
	return ExprError();
	}

	ExprResult Stride;
	if (Expr *Str = E->getStride()) {
	Stride = getDerived().TransformExpr(Str);
	if (Stride.isInvalid())
	return ExprError();
	}

	if (!getDerived().AlwaysRebuild() && Base.get() == E->getBase() &&
	LowerBound.get() == E->getLowerBound() && Length.get() == E->getLength())
	return E;

	return getDerived().RebuildOMPArraySectionExpr(
	Base.get(), E->getBase()->getEndLoc(), LowerBound.get(),
	E->getColonLocFirst(), E->getColonLocSecond(), Length.get(), Stride.get(),
	E->getRBracketLoc());
	}

	template <typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformOMPArrayShapingExpr(OMPArrayShapingExpr *E) {
	ExprResult Base = getDerived().TransformExpr(E->getBase());
	if (Base.isInvalid())
	return ExprError();

	SmallVector<Expr *, 4> Dims;
	bool ErrorFound = false;
	for (Expr *Dim : E->getDimensions()) {
	ExprResult DimRes = getDerived().TransformExpr(Dim);
	if (DimRes.isInvalid()) {
	ErrorFound = true;
	continue;
	}
	Dims.push_back(DimRes.get());
	}

	if (ErrorFound)
	return ExprError();
	return getDerived().RebuildOMPArrayShapingExpr(Base.get(), E->getLParenLoc(),
	E->getRParenLoc(), Dims,
	E->getBracketsRanges());
	}

	template <typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformOMPIteratorExpr(OMPIteratorExpr *E) {
	unsigned NumIterators = E->numOfIterators();
	SmallVector<Sema::OMPIteratorData, 4> Data(NumIterators);

	bool ErrorFound = false;
	bool NeedToRebuild = getDerived().AlwaysRebuild();
	for (unsigned I = 0; I < NumIterators; ++I) {
	auto *D = cast<VarDecl>(E->getIteratorDecl(I));
	Data[I].DeclIdent = D->getIdentifier();
	Data[I].DeclIdentLoc = D->getLocation();
	if (D->getLocation() == D->getBeginLoc()) {
	assert(SemaRef.Context.hasSameType(D->getType(), SemaRef.Context.IntTy) &&
	"Implicit type must be int.");
	} else {
	TypeSourceInfo *TSI = getDerived().TransformType(D->getTypeSourceInfo());
	QualType DeclTy = getDerived().TransformType(D->getType());
	Data[I].Type = SemaRef.CreateParsedType(DeclTy, TSI);
	}
	OMPIteratorExpr::IteratorRange Range = E->getIteratorRange(I);
	ExprResult Begin = getDerived().TransformExpr(Range.Begin);
	ExprResult End = getDerived().TransformExpr(Range.End);
	ExprResult Step = getDerived().TransformExpr(Range.Step);
	ErrorFound = ErrorFound \|\|
	!(!D->getTypeSourceInfo() \|\| (Data[I].Type.getAsOpaquePtr() &&
	!Data[I].Type.get().isNull())) \|\|
	Begin.isInvalid() \|\| End.isInvalid() \|\| Step.isInvalid();
	if (ErrorFound)
	continue;
	Data[I].Range.Begin = Begin.get();
	Data[I].Range.End = End.get();
	Data[I].Range.Step = Step.get();
	Data[I].AssignLoc = E->getAssignLoc(I);
	Data[I].ColonLoc = E->getColonLoc(I);
	Data[I].SecColonLoc = E->getSecondColonLoc(I);
	NeedToRebuild =
	NeedToRebuild \|\|
	(D->getTypeSourceInfo() && Data[I].Type.get().getTypePtrOrNull() !=
	D->getType().getTypePtrOrNull()) \|\|
	Range.Begin != Data[I].Range.Begin \|\| Range.End != Data[I].Range.End \|\|
	Range.Step != Data[I].Range.Step;
	}
	if (ErrorFound)
	return ExprError();
	if (!NeedToRebuild)
	return E;

	ExprResult Res = getDerived().RebuildOMPIteratorExpr(
	E->getIteratorKwLoc(), E->getLParenLoc(), E->getRParenLoc(), Data);
	if (!Res.isUsable())
	return Res;
	auto *IE = cast<OMPIteratorExpr>(Res.get());
	for (unsigned I = 0; I < NumIterators; ++I)
	getDerived().transformedLocalDecl(E->getIteratorDecl(I),
	IE->getIteratorDecl(I));
	return Res;
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformCallExpr(CallExpr *E) {
	// Transform the callee.
	ExprResult Callee = getDerived().TransformExpr(E->getCallee());
	if (Callee.isInvalid())
	return ExprError();

	// Transform arguments.
	bool ArgChanged = false;
	SmallVector<Expr*, 8> Args;
	if (getDerived().TransformExprs(E->getArgs(), E->getNumArgs(), true, Args,
	&ArgChanged))
	return ExprError();

	if (!getDerived().AlwaysRebuild() &&
	Callee.get() == E->getCallee() &&
	!ArgChanged)
	return SemaRef.MaybeBindToTemporary(E);

	// FIXME: Wrong source location information for the '('.
	SourceLocation FakeLParenLoc
	= ((Expr *)Callee.get())->getSourceRange().getBegin();

	Sema::FPFeaturesStateRAII FPFeaturesState(getSema());
	if (E->hasStoredFPFeatures()) {
	FPOptionsOverride NewOverrides = E->getFPFeatures();
	getSema().CurFPFeatures =
	NewOverrides.applyOverrides(getSema().getLangOpts());
	getSema().FpPragmaStack.CurrentValue = NewOverrides;
	}

	return getDerived().RebuildCallExpr(Callee.get(), FakeLParenLoc,
	Args,
	E->getRParenLoc());
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformMemberExpr(MemberExpr *E) {
	ExprResult Base = getDerived().TransformExpr(E->getBase());
	if (Base.isInvalid())
	return ExprError();

	NestedNameSpecifierLoc QualifierLoc;
	if (E->hasQualifier()) {
	QualifierLoc
	= getDerived().TransformNestedNameSpecifierLoc(E->getQualifierLoc());

	if (!QualifierLoc)
	return ExprError();
	}
	SourceLocation TemplateKWLoc = E->getTemplateKeywordLoc();

	ValueDecl *Member
	= cast_or_null<ValueDecl>(getDerived().TransformDecl(E->getMemberLoc(),
	E->getMemberDecl()));
	if (!Member)
	return ExprError();

	NamedDecl *FoundDecl = E->getFoundDecl();
	if (FoundDecl == E->getMemberDecl()) {
	FoundDecl = Member;
	} else {
	FoundDecl = cast_or_null<NamedDecl>(
	getDerived().TransformDecl(E->getMemberLoc(), FoundDecl));
	if (!FoundDecl)
	return ExprError();
	}

	if (!getDerived().AlwaysRebuild() &&
	Base.get() == E->getBase() &&
	QualifierLoc == E->getQualifierLoc() &&
	Member == E->getMemberDecl() &&
	FoundDecl == E->getFoundDecl() &&
	!E->hasExplicitTemplateArgs()) {

	// Mark it referenced in the new context regardless.
	// FIXME: this is a bit instantiation-specific.
	SemaRef.MarkMemberReferenced(E);

	return E;
	}

	TemplateArgumentListInfo TransArgs;
	if (E->hasExplicitTemplateArgs()) {
	TransArgs.setLAngleLoc(E->getLAngleLoc());
	TransArgs.setRAngleLoc(E->getRAngleLoc());
	if (getDerived().TransformTemplateArguments(E->getTemplateArgs(),
	E->getNumTemplateArgs(),
	TransArgs))
	return ExprError();
	}

	// FIXME: Bogus source location for the operator
	SourceLocation FakeOperatorLoc =
	SemaRef.getLocForEndOfToken(E->getBase()->getSourceRange().getEnd());

	// FIXME: to do this check properly, we will need to preserve the
	// first-qualifier-in-scope here, just in case we had a dependent
	// base (and therefore couldn't do the check) and a
	// nested-name-qualifier (and therefore could do the lookup).
	NamedDecl *FirstQualifierInScope = nullptr;
	DeclarationNameInfo MemberNameInfo = E->getMemberNameInfo();
	if (MemberNameInfo.getName()) {
	MemberNameInfo = getDerived().TransformDeclarationNameInfo(MemberNameInfo);
	if (!MemberNameInfo.getName())
	return ExprError();
	}

	return getDerived().RebuildMemberExpr(Base.get(), FakeOperatorLoc,
	E->isArrow(),
	QualifierLoc,
	TemplateKWLoc,
	MemberNameInfo,
	Member,
	FoundDecl,
	(E->hasExplicitTemplateArgs()
	? &TransArgs : nullptr),
	FirstQualifierInScope);
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformBinaryOperator(BinaryOperator *E) {
	ExprResult LHS = getDerived().TransformExpr(E->getLHS());
	if (LHS.isInvalid())
	return ExprError();

	ExprResult RHS = getDerived().TransformExpr(E->getRHS());
	if (RHS.isInvalid())
	return ExprError();

	if (!getDerived().AlwaysRebuild() &&
	LHS.get() == E->getLHS() &&
	RHS.get() == E->getRHS())
	return E;

	if (E->isCompoundAssignmentOp())
	// FPFeatures has already been established from trailing storage
	return getDerived().RebuildBinaryOperator(
	E->getOperatorLoc(), E->getOpcode(), LHS.get(), RHS.get());
	Sema::FPFeaturesStateRAII FPFeaturesState(getSema());
	FPOptionsOverride NewOverrides(E->getFPFeatures(getSema().getLangOpts()));
	getSema().CurFPFeatures =
	NewOverrides.applyOverrides(getSema().getLangOpts());
	getSema().FpPragmaStack.CurrentValue = NewOverrides;
	return getDerived().RebuildBinaryOperator(E->getOperatorLoc(), E->getOpcode(),
	LHS.get(), RHS.get());
	}

	template <typename Derived>
	ExprResult TreeTransform<Derived>::TransformCXXRewrittenBinaryOperator(
	CXXRewrittenBinaryOperator *E) {
	CXXRewrittenBinaryOperator::DecomposedForm Decomp = E->getDecomposedForm();

	ExprResult LHS = getDerived().TransformExpr(const_cast<Expr*>(Decomp.LHS));
	if (LHS.isInvalid())
	return ExprError();

	ExprResult RHS = getDerived().TransformExpr(const_cast<Expr*>(Decomp.RHS));
	if (RHS.isInvalid())
	return ExprError();

	if (!getDerived().AlwaysRebuild() &&
	LHS.get() == Decomp.LHS &&
	RHS.get() == Decomp.RHS)
	return E;

	// Extract the already-resolved callee declarations so that we can restrict
	// ourselves to using them as the unqualified lookup results when rebuilding.
	UnresolvedSet<2> UnqualLookups;
	Expr *PossibleBinOps[] = {E->getSemanticForm(),
	const_cast<Expr *>(Decomp.InnerBinOp)};
	for (Expr *PossibleBinOp : PossibleBinOps) {
	auto *Op = dyn_cast<CXXOperatorCallExpr>(PossibleBinOp->IgnoreImplicit());
	if (!Op)
	continue;
	auto *Callee = dyn_cast<DeclRefExpr>(Op->getCallee()->IgnoreImplicit());
	if (!Callee \|\| isa<CXXMethodDecl>(Callee->getDecl()))
	continue;

	// Transform the callee in case we built a call to a local extern
	// declaration.
	NamedDecl *Found = cast_or_null<NamedDecl>(getDerived().TransformDecl(
	E->getOperatorLoc(), Callee->getFoundDecl()));
	if (!Found)
	return ExprError();
	UnqualLookups.addDecl(Found);
	}

	return getDerived().RebuildCXXRewrittenBinaryOperator(
	E->getOperatorLoc(), Decomp.Opcode, UnqualLookups, LHS.get(), RHS.get());
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformCompoundAssignOperator(
	CompoundAssignOperator *E) {
	Sema::FPFeaturesStateRAII FPFeaturesState(getSema());
	FPOptionsOverride NewOverrides(E->getFPFeatures(getSema().getLangOpts()));
	getSema().CurFPFeatures =
	NewOverrides.applyOverrides(getSema().getLangOpts());
	getSema().FpPragmaStack.CurrentValue = NewOverrides;
	return getDerived().TransformBinaryOperator(E);
	}

	template<typename Derived>
	ExprResult TreeTransform<Derived>::
	TransformBinaryConditionalOperator(BinaryConditionalOperator *e) {
	// Just rebuild the common and RHS expressions and see whether we
	// get any changes.

	ExprResult commonExpr = getDerived().TransformExpr(e->getCommon());
	if (commonExpr.isInvalid())
	return ExprError();

	ExprResult rhs = getDerived().TransformExpr(e->getFalseExpr());
	if (rhs.isInvalid())
	return ExprError();

	if (!getDerived().AlwaysRebuild() &&
	commonExpr.get() == e->getCommon() &&
	rhs.get() == e->getFalseExpr())
	return e;

	return getDerived().RebuildConditionalOperator(commonExpr.get(),
	e->getQuestionLoc(),
	nullptr,
	e->getColonLoc(),
	rhs.get());
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformConditionalOperator(ConditionalOperator *E) {
	ExprResult Cond = getDerived().TransformExpr(E->getCond());
	if (Cond.isInvalid())
	return ExprError();

	ExprResult LHS = getDerived().TransformExpr(E->getLHS());
	if (LHS.isInvalid())
	return ExprError();

	ExprResult RHS = getDerived().TransformExpr(E->getRHS());
	if (RHS.isInvalid())
	return ExprError();

	if (!getDerived().AlwaysRebuild() &&
	Cond.get() == E->getCond() &&
	LHS.get() == E->getLHS() &&
	RHS.get() == E->getRHS())
	return E;

	return getDerived().RebuildConditionalOperator(Cond.get(),
	E->getQuestionLoc(),
	LHS.get(),
	E->getColonLoc(),
	RHS.get());
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformImplicitCastExpr(ImplicitCastExpr *E) {
	// Implicit casts are eliminated during transformation, since they
	// will be recomputed by semantic analysis after transformation.
	return getDerived().TransformExpr(E->getSubExprAsWritten());
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformCStyleCastExpr(CStyleCastExpr *E) {
	TypeSourceInfo *Type = getDerived().TransformType(E->getTypeInfoAsWritten());
	if (!Type)
	return ExprError();

	ExprResult SubExpr
	= getDerived().TransformExpr(E->getSubExprAsWritten());
	if (SubExpr.isInvalid())
	return ExprError();

	if (!getDerived().AlwaysRebuild() &&
	Type == E->getTypeInfoAsWritten() &&
	SubExpr.get() == E->getSubExpr())
	return E;

	return getDerived().RebuildCStyleCastExpr(E->getLParenLoc(),
	Type,
	E->getRParenLoc(),
	SubExpr.get());
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformCompoundLiteralExpr(CompoundLiteralExpr *E) {
	TypeSourceInfo *OldT = E->getTypeSourceInfo();
	TypeSourceInfo *NewT = getDerived().TransformType(OldT);
	if (!NewT)
	return ExprError();

	ExprResult Init = getDerived().TransformExpr(E->getInitializer());
	if (Init.isInvalid())
	return ExprError();

	if (!getDerived().AlwaysRebuild() &&
	OldT == NewT &&
	Init.get() == E->getInitializer())
	return SemaRef.MaybeBindToTemporary(E);

	// Note: the expression type doesn't necessarily match the
	// type-as-written, but that's okay, because it should always be
	// derivable from the initializer.

	return getDerived().RebuildCompoundLiteralExpr(
	E->getLParenLoc(), NewT,
	/FIXME:/ E->getInitializer()->getEndLoc(), Init.get());
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformExtVectorElementExpr(ExtVectorElementExpr *E) {
	ExprResult Base = getDerived().TransformExpr(E->getBase());
	if (Base.isInvalid())
	return ExprError();

	if (!getDerived().AlwaysRebuild() &&
	Base.get() == E->getBase())
	return E;

	// FIXME: Bad source location
	SourceLocation FakeOperatorLoc =
	SemaRef.getLocForEndOfToken(E->getBase()->getEndLoc());
	return getDerived().RebuildExtVectorElementExpr(Base.get(), FakeOperatorLoc,
	E->getAccessorLoc(),
	E->getAccessor());
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformInitListExpr(InitListExpr *E) {
	if (InitListExpr *Syntactic = E->getSyntacticForm())
	E = Syntactic;

	bool InitChanged = false;

	EnterExpressionEvaluationContext Context(
	getSema(), EnterExpressionEvaluationContext::InitList);

	SmallVector<Expr*, 4> Inits;
	if (getDerived().TransformExprs(E->getInits(), E->getNumInits(), false,
	Inits, &InitChanged))
	return ExprError();

	if (!getDerived().AlwaysRebuild() && !InitChanged) {
	// FIXME: Attempt to reuse the existing syntactic form of the InitListExpr
	// in some cases. We can't reuse it in general, because the syntactic and
	// semantic forms are linked, and we can't know that semantic form will
	// match even if the syntactic form does.
	}

	return getDerived().RebuildInitList(E->getLBraceLoc(), Inits,
	E->getRBraceLoc());
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformDesignatedInitExpr(DesignatedInitExpr *E) {
	Designation Desig;

	// transform the initializer value
	ExprResult Init = getDerived().TransformExpr(E->getInit());
	if (Init.isInvalid())
	return ExprError();

	// transform the designators.
	SmallVector<Expr*, 4> ArrayExprs;
	bool ExprChanged = false;
	for (const DesignatedInitExpr::Designator &D : E->designators()) {
	if (D.isFieldDesignator()) {
	Desig.AddDesignator(Designator::getField(D.getFieldName(),
	D.getDotLoc(),
	D.getFieldLoc()));
	if (D.getField()) {
	FieldDecl *Field = cast_or_null<FieldDecl>(
	getDerived().TransformDecl(D.getFieldLoc(), D.getField()));
	if (Field != D.getField())
	// Rebuild the expression when the transformed FieldDecl is
	// different to the already assigned FieldDecl.
	ExprChanged = true;
	} else {
	// Ensure that the designator expression is rebuilt when there isn't
	// a resolved FieldDecl in the designator as we don't want to assign
	// a FieldDecl to a pattern designator that will be instantiated again.
	ExprChanged = true;
	}
	continue;
	}

	if (D.isArrayDesignator()) {
	ExprResult Index = getDerived().TransformExpr(E->getArrayIndex(D));
	if (Index.isInvalid())
	return ExprError();

	Desig.AddDesignator(
	Designator::getArray(Index.get(), D.getLBracketLoc()));

	ExprChanged = ExprChanged \|\| Init.get() != E->getArrayIndex(D);
	ArrayExprs.push_back(Index.get());
	continue;
	}

	assert(D.isArrayRangeDesignator() && "New kind of designator?");
	ExprResult Start
	= getDerived().TransformExpr(E->getArrayRangeStart(D));
	if (Start.isInvalid())
	return ExprError();

	ExprResult End = getDerived().TransformExpr(E->getArrayRangeEnd(D));
	if (End.isInvalid())
	return ExprError();

	Desig.AddDesignator(Designator::getArrayRange(Start.get(),
	End.get(),
	D.getLBracketLoc(),
	D.getEllipsisLoc()));

	ExprChanged = ExprChanged \|\| Start.get() != E->getArrayRangeStart(D) \|\|
	End.get() != E->getArrayRangeEnd(D);

	ArrayExprs.push_back(Start.get());
	ArrayExprs.push_back(End.get());
	}

	if (!getDerived().AlwaysRebuild() &&
	Init.get() == E->getInit() &&
	!ExprChanged)
	return E;

	return getDerived().RebuildDesignatedInitExpr(Desig, ArrayExprs,
	E->getEqualOrColonLoc(),
	E->usesGNUSyntax(), Init.get());
	}

	// Seems that if TransformInitListExpr() only works on the syntactic form of an
	// InitListExpr, then a DesignatedInitUpdateExpr is not encountered.
	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformDesignatedInitUpdateExpr(
	DesignatedInitUpdateExpr *E) {
	llvm_unreachable("Unexpected DesignatedInitUpdateExpr in syntactic form of "
	"initializer");
	return ExprError();
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformNoInitExpr(
	NoInitExpr *E) {
	llvm_unreachable("Unexpected NoInitExpr in syntactic form of initializer");
	return ExprError();
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformArrayInitLoopExpr(ArrayInitLoopExpr *E) {
	llvm_unreachable("Unexpected ArrayInitLoopExpr outside of initializer");
	return ExprError();
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformArrayInitIndexExpr(ArrayInitIndexExpr *E) {
	llvm_unreachable("Unexpected ArrayInitIndexExpr outside of initializer");
	return ExprError();
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformImplicitValueInitExpr(
	ImplicitValueInitExpr *E) {
	TemporaryBase Rebase(*this, E->getBeginLoc(), DeclarationName());

	// FIXME: Will we ever have proper type location here? Will we actually
	// need to transform the type?
	QualType T = getDerived().TransformType(E->getType());
	if (T.isNull())
	return ExprError();

	if (!getDerived().AlwaysRebuild() &&
	T == E->getType())
	return E;

	return getDerived().RebuildImplicitValueInitExpr(T);
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformVAArgExpr(VAArgExpr *E) {
	TypeSourceInfo *TInfo = getDerived().TransformType(E->getWrittenTypeInfo());
	if (!TInfo)
	return ExprError();

	ExprResult SubExpr = getDerived().TransformExpr(E->getSubExpr());
	if (SubExpr.isInvalid())
	return ExprError();

	if (!getDerived().AlwaysRebuild() &&
	TInfo == E->getWrittenTypeInfo() &&
	SubExpr.get() == E->getSubExpr())
	return E;

	return getDerived().RebuildVAArgExpr(E->getBuiltinLoc(), SubExpr.get(),
	TInfo, E->getRParenLoc());
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformParenListExpr(ParenListExpr *E) {
	bool ArgumentChanged = false;
	SmallVector<Expr*, 4> Inits;
	if (TransformExprs(E->getExprs(), E->getNumExprs(), true, Inits,
	&ArgumentChanged))
	return ExprError();

	return getDerived().RebuildParenListExpr(E->getLParenLoc(),
	Inits,
	E->getRParenLoc());
	}

	/// Transform an address-of-label expression.
	///
	/// By default, the transformation of an address-of-label expression always
	/// rebuilds the expression, so that the label identifier can be resolved to
	/// the corresponding label statement by semantic analysis.
	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformAddrLabelExpr(AddrLabelExpr *E) {
	Decl *LD = getDerived().TransformDecl(E->getLabel()->getLocation(),
	E->getLabel());
	if (!LD)
	return ExprError();

	return getDerived().RebuildAddrLabelExpr(E->getAmpAmpLoc(), E->getLabelLoc(),
	cast<LabelDecl>(LD));
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformStmtExpr(StmtExpr *E) {
	SemaRef.ActOnStartStmtExpr();
	StmtResult SubStmt
	= getDerived().TransformCompoundStmt(E->getSubStmt(), true);
	if (SubStmt.isInvalid()) {
	SemaRef.ActOnStmtExprError();
	return ExprError();
	}

	unsigned OldDepth = E->getTemplateDepth();
	unsigned NewDepth = getDerived().TransformTemplateDepth(OldDepth);

	if (!getDerived().AlwaysRebuild() && OldDepth == NewDepth &&
	SubStmt.get() == E->getSubStmt()) {
	// Calling this an 'error' is unintuitive, but it does the right thing.
	SemaRef.ActOnStmtExprError();
	return SemaRef.MaybeBindToTemporary(E);
	}

	return getDerived().RebuildStmtExpr(E->getLParenLoc(), SubStmt.get(),
	E->getRParenLoc(), NewDepth);
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformChooseExpr(ChooseExpr *E) {
	ExprResult Cond = getDerived().TransformExpr(E->getCond());
	if (Cond.isInvalid())
	return ExprError();

	ExprResult LHS = getDerived().TransformExpr(E->getLHS());
	if (LHS.isInvalid())
	return ExprError();

	ExprResult RHS = getDerived().TransformExpr(E->getRHS());
	if (RHS.isInvalid())
	return ExprError();

	if (!getDerived().AlwaysRebuild() &&
	Cond.get() == E->getCond() &&
	LHS.get() == E->getLHS() &&
	RHS.get() == E->getRHS())
	return E;

	return getDerived().RebuildChooseExpr(E->getBuiltinLoc(),
	Cond.get(), LHS.get(), RHS.get(),
	E->getRParenLoc());
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformGNUNullExpr(GNUNullExpr *E) {
	return E;
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformCXXOperatorCallExpr(CXXOperatorCallExpr *E) {
	switch (E->getOperator()) {
	case OO_New:
	case OO_Delete:
	case OO_Array_New:
	case OO_Array_Delete:
	llvm_unreachable("new and delete operators cannot use CXXOperatorCallExpr");

	case OO_Call: {
	// This is a call to an object's operator().
	assert(E->getNumArgs() >= 1 && "Object call is missing arguments");

	// Transform the object itself.
	ExprResult Object = getDerived().TransformExpr(E->getArg(0));
	if (Object.isInvalid())
	return ExprError();

	// FIXME: Poor location information
	SourceLocation FakeLParenLoc = SemaRef.getLocForEndOfToken(
	static_cast<Expr *>(Object.get())->getEndLoc());

	// Transform the call arguments.
	SmallVector<Expr*, 8> Args;
	if (getDerived().TransformExprs(E->getArgs() + 1, E->getNumArgs() - 1, true,
	Args))
	return ExprError();

	return getDerived().RebuildCallExpr(Object.get(), FakeLParenLoc, Args,
	E->getEndLoc());
	}

	#define OVERLOADED_OPERATOR(Name,Spelling,Token,Unary,Binary,MemberOnly) \
	case OO_##Name:
	#define OVERLOADED_OPERATOR_MULTI(Name,Spelling,Unary,Binary,MemberOnly)
	#include "clang/Basic/OperatorKinds.def"
	case OO_Subscript:
	// Handled below.
	break;

	case OO_Conditional:
	llvm_unreachable("conditional operator is not actually overloadable");

	case OO_None:
	case NUM_OVERLOADED_OPERATORS:
	llvm_unreachable("not an overloaded operator?");
	}

	ExprResult Callee = getDerived().TransformExpr(E->getCallee());
	if (Callee.isInvalid())
	return ExprError();

	ExprResult First;
	if (E->getOperator() == OO_Amp)
	First = getDerived().TransformAddressOfOperand(E->getArg(0));
	else
	First = getDerived().TransformExpr(E->getArg(0));
	if (First.isInvalid())
	return ExprError();

	ExprResult Second;
	if (E->getNumArgs() == 2) {
	Second = getDerived().TransformExpr(E->getArg(1));
	if (Second.isInvalid())
	return ExprError();
	}

	if (!getDerived().AlwaysRebuild() &&
	Callee.get() == E->getCallee() &&
	First.get() == E->getArg(0) &&
	(E->getNumArgs() != 2 \|\| Second.get() == E->getArg(1)))
	return SemaRef.MaybeBindToTemporary(E);

	Sema::FPFeaturesStateRAII FPFeaturesState(getSema());
	FPOptionsOverride NewOverrides(E->getFPFeatures());
	getSema().CurFPFeatures =
	NewOverrides.applyOverrides(getSema().getLangOpts());
	getSema().FpPragmaStack.CurrentValue = NewOverrides;

	return getDerived().RebuildCXXOperatorCallExpr(E->getOperator(),
	E->getOperatorLoc(),
	Callee.get(),
	First.get(),
	Second.get());
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformCXXMemberCallExpr(CXXMemberCallExpr *E) {
	return getDerived().TransformCallExpr(E);
	}

	template <typename Derived>
	ExprResult TreeTransform<Derived>::TransformSourceLocExpr(SourceLocExpr *E) {
	bool NeedRebuildFunc = E->getIdentKind() == SourceLocExpr::Function &&
	getSema().CurContext != E->getParentContext();

	if (!getDerived().AlwaysRebuild() && !NeedRebuildFunc)
	return E;

	return getDerived().RebuildSourceLocExpr(E->getIdentKind(), E->getBeginLoc(),
	E->getEndLoc(),
	getSema().CurContext);
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformCUDAKernelCallExpr(CUDAKernelCallExpr *E) {
	// Transform the callee.
	ExprResult Callee = getDerived().TransformExpr(E->getCallee());
	if (Callee.isInvalid())
	return ExprError();

	// Transform exec config.
	ExprResult EC = getDerived().TransformCallExpr(E->getConfig());
	if (EC.isInvalid())
	return ExprError();

	// Transform arguments.
	bool ArgChanged = false;
	SmallVector<Expr*, 8> Args;
	if (getDerived().TransformExprs(E->getArgs(), E->getNumArgs(), true, Args,
	&ArgChanged))
	return ExprError();

	if (!getDerived().AlwaysRebuild() &&
	Callee.get() == E->getCallee() &&
	!ArgChanged)
	return SemaRef.MaybeBindToTemporary(E);

	// FIXME: Wrong source location information for the '('.
	SourceLocation FakeLParenLoc
	= ((Expr *)Callee.get())->getSourceRange().getBegin();
	return getDerived().RebuildCallExpr(Callee.get(), FakeLParenLoc,
	Args,
	E->getRParenLoc(), EC.get());
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformCXXNamedCastExpr(CXXNamedCastExpr *E) {
	TypeSourceInfo *Type = getDerived().TransformType(E->getTypeInfoAsWritten());
	if (!Type)
	return ExprError();

	ExprResult SubExpr
	= getDerived().TransformExpr(E->getSubExprAsWritten());
	if (SubExpr.isInvalid())
	return ExprError();

	if (!getDerived().AlwaysRebuild() &&
	Type == E->getTypeInfoAsWritten() &&
	SubExpr.get() == E->getSubExpr())
	return E;
	return getDerived().RebuildCXXNamedCastExpr(
	E->getOperatorLoc(), E->getStmtClass(), E->getAngleBrackets().getBegin(),
	Type, E->getAngleBrackets().getEnd(),
	// FIXME. this should be '(' location
	E->getAngleBrackets().getEnd(), SubExpr.get(), E->getRParenLoc());
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformBuiltinBitCastExpr(BuiltinBitCastExpr *BCE) {
	TypeSourceInfo *TSI =
	getDerived().TransformType(BCE->getTypeInfoAsWritten());
	if (!TSI)
	return ExprError();

	ExprResult Sub = getDerived().TransformExpr(BCE->getSubExpr());
	if (Sub.isInvalid())
	return ExprError();

	return getDerived().RebuildBuiltinBitCastExpr(BCE->getBeginLoc(), TSI,
	Sub.get(), BCE->getEndLoc());
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformCXXStaticCastExpr(CXXStaticCastExpr *E) {
	return getDerived().TransformCXXNamedCastExpr(E);
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformCXXDynamicCastExpr(CXXDynamicCastExpr *E) {
	return getDerived().TransformCXXNamedCastExpr(E);
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformCXXReinterpretCastExpr(
	CXXReinterpretCastExpr *E) {
	return getDerived().TransformCXXNamedCastExpr(E);
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformCXXConstCastExpr(CXXConstCastExpr *E) {
	return getDerived().TransformCXXNamedCastExpr(E);
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformCXXAddrspaceCastExpr(CXXAddrspaceCastExpr *E) {
	return getDerived().TransformCXXNamedCastExpr(E);
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformCXXFunctionalCastExpr(
	CXXFunctionalCastExpr *E) {
	TypeSourceInfo *Type =
	getDerived().TransformTypeWithDeducedTST(E->getTypeInfoAsWritten());
	if (!Type)
	return ExprError();

	ExprResult SubExpr
	= getDerived().TransformExpr(E->getSubExprAsWritten());
	if (SubExpr.isInvalid())
	return ExprError();

	if (!getDerived().AlwaysRebuild() &&
	Type == E->getTypeInfoAsWritten() &&
	SubExpr.get() == E->getSubExpr())
	return E;

	return getDerived().RebuildCXXFunctionalCastExpr(Type,
	E->getLParenLoc(),
	SubExpr.get(),
	E->getRParenLoc(),
	E->isListInitialization());
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformCXXTypeidExpr(CXXTypeidExpr *E) {
	if (E->isTypeOperand()) {
	TypeSourceInfo *TInfo
	= getDerived().TransformType(E->getTypeOperandSourceInfo());
	if (!TInfo)
	return ExprError();

	if (!getDerived().AlwaysRebuild() &&
	TInfo == E->getTypeOperandSourceInfo())
	return E;

	return getDerived().RebuildCXXTypeidExpr(E->getType(), E->getBeginLoc(),
	TInfo, E->getEndLoc());
	}

	// Typeid's operand is an unevaluated context, unless it's a polymorphic
	// type. We must not unilaterally enter unevaluated context here, as then
	// semantic processing can re-transform an already transformed operand.
	Expr *Op = E->getExprOperand();
	auto EvalCtx = Sema::ExpressionEvaluationContext::Unevaluated;
	if (E->isGLValue())
	if (auto *RecordT = Op->getType()->getAs<RecordType>())
	if (cast<CXXRecordDecl>(RecordT->getDecl())->isPolymorphic())
	EvalCtx = SemaRef.ExprEvalContexts.back().Context;

	EnterExpressionEvaluationContext Unevaluated(SemaRef, EvalCtx,
	Sema::ReuseLambdaContextDecl);

	ExprResult SubExpr = getDerived().TransformExpr(Op);
	if (SubExpr.isInvalid())
	return ExprError();

	if (!getDerived().AlwaysRebuild() &&
	SubExpr.get() == E->getExprOperand())
	return E;

	return getDerived().RebuildCXXTypeidExpr(E->getType(), E->getBeginLoc(),
	SubExpr.get(), E->getEndLoc());
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformCXXUuidofExpr(CXXUuidofExpr *E) {
	if (E->isTypeOperand()) {
	TypeSourceInfo *TInfo
	= getDerived().TransformType(E->getTypeOperandSourceInfo());
	if (!TInfo)
	return ExprError();

	if (!getDerived().AlwaysRebuild() &&
	TInfo == E->getTypeOperandSourceInfo())
	return E;

	return getDerived().RebuildCXXUuidofExpr(E->getType(), E->getBeginLoc(),
	TInfo, E->getEndLoc());
	}

	EnterExpressionEvaluationContext Unevaluated(
	SemaRef, Sema::ExpressionEvaluationContext::Unevaluated);

	ExprResult SubExpr = getDerived().TransformExpr(E->getExprOperand());
	if (SubExpr.isInvalid())
	return ExprError();

	if (!getDerived().AlwaysRebuild() &&
	SubExpr.get() == E->getExprOperand())
	return E;

	return getDerived().RebuildCXXUuidofExpr(E->getType(), E->getBeginLoc(),
	SubExpr.get(), E->getEndLoc());
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformCXXBoolLiteralExpr(CXXBoolLiteralExpr *E) {
	return E;
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformCXXNullPtrLiteralExpr(
	CXXNullPtrLiteralExpr *E) {
	return E;
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformCXXThisExpr(CXXThisExpr *E) {
	QualType T = getSema().getCurrentThisType();

	if (!getDerived().AlwaysRebuild() && T == E->getType()) {
	// Mark it referenced in the new context regardless.
	// FIXME: this is a bit instantiation-specific.
	getSema().MarkThisReferenced(E);
	return E;
	}

	return getDerived().RebuildCXXThisExpr(E->getBeginLoc(), T, E->isImplicit());
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformCXXThrowExpr(CXXThrowExpr *E) {
	ExprResult SubExpr = getDerived().TransformExpr(E->getSubExpr());
	if (SubExpr.isInvalid())
	return ExprError();

	if (!getDerived().AlwaysRebuild() &&
	SubExpr.get() == E->getSubExpr())
	return E;

	return getDerived().RebuildCXXThrowExpr(E->getThrowLoc(), SubExpr.get(),
	E->isThrownVariableInScope());
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformCXXDefaultArgExpr(CXXDefaultArgExpr *E) {
	ParmVarDecl *Param = cast_or_null<ParmVarDecl>(
	getDerived().TransformDecl(E->getBeginLoc(), E->getParam()));
	if (!Param)
	return ExprError();

	if (!getDerived().AlwaysRebuild() && Param == E->getParam() &&
	E->getUsedContext() == SemaRef.CurContext)
	return E;

	return getDerived().RebuildCXXDefaultArgExpr(E->getUsedLocation(), Param);
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformCXXDefaultInitExpr(CXXDefaultInitExpr *E) {
	FieldDecl *Field = cast_or_null<FieldDecl>(
	getDerived().TransformDecl(E->getBeginLoc(), E->getField()));
	if (!Field)
	return ExprError();

	if (!getDerived().AlwaysRebuild() && Field == E->getField() &&
	E->getUsedContext() == SemaRef.CurContext)
	return E;

	return getDerived().RebuildCXXDefaultInitExpr(E->getExprLoc(), Field);
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformCXXScalarValueInitExpr(
	CXXScalarValueInitExpr *E) {
	TypeSourceInfo *T = getDerived().TransformType(E->getTypeSourceInfo());
	if (!T)
	return ExprError();

	if (!getDerived().AlwaysRebuild() &&
	T == E->getTypeSourceInfo())
	return E;

	return getDerived().RebuildCXXScalarValueInitExpr(T,
	/FIXME:/T->getTypeLoc().getEndLoc(),
	E->getRParenLoc());
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformCXXNewExpr(CXXNewExpr *E) {
	// Transform the type that we're allocating
	TypeSourceInfo *AllocTypeInfo =
	getDerived().TransformTypeWithDeducedTST(E->getAllocatedTypeSourceInfo());
	if (!AllocTypeInfo)
	return ExprError();

	// Transform the size of the array we're allocating (if any).
	Optional<Expr *> ArraySize;
	if (Optional<Expr *> OldArraySize = E->getArraySize()) {
	ExprResult NewArraySize;
	if (*OldArraySize) {
	NewArraySize = getDerived().TransformExpr(*OldArraySize);
	if (NewArraySize.isInvalid())
	return ExprError();
	}
	ArraySize = NewArraySize.get();
	}

	// Transform the placement arguments (if any).
	bool ArgumentChanged = false;
	SmallVector<Expr*, 8> PlacementArgs;
	if (getDerived().TransformExprs(E->getPlacementArgs(),
	E->getNumPlacementArgs(), true,
	PlacementArgs, &ArgumentChanged))
	return ExprError();

	// Transform the initializer (if any).
	Expr *OldInit = E->getInitializer();
	ExprResult NewInit;
	if (OldInit)
	NewInit = getDerived().TransformInitializer(OldInit, true);
	if (NewInit.isInvalid())
	return ExprError();

	// Transform new operator and delete operator.
	FunctionDecl *OperatorNew = nullptr;
	if (E->getOperatorNew()) {
	OperatorNew = cast_or_null<FunctionDecl>(
	getDerived().TransformDecl(E->getBeginLoc(), E->getOperatorNew()));
	if (!OperatorNew)
	return ExprError();
	}

	FunctionDecl *OperatorDelete = nullptr;
	if (E->getOperatorDelete()) {
	OperatorDelete = cast_or_null<FunctionDecl>(
	getDerived().TransformDecl(E->getBeginLoc(), E->getOperatorDelete()));
	if (!OperatorDelete)
	return ExprError();
	}

	if (!getDerived().AlwaysRebuild() &&
	AllocTypeInfo == E->getAllocatedTypeSourceInfo() &&
	ArraySize == E->getArraySize() &&
	NewInit.get() == OldInit &&
	OperatorNew == E->getOperatorNew() &&
	OperatorDelete == E->getOperatorDelete() &&
	!ArgumentChanged) {
	// Mark any declarations we need as referenced.
	// FIXME: instantiation-specific.
	if (OperatorNew)
	SemaRef.MarkFunctionReferenced(E->getBeginLoc(), OperatorNew);
	if (OperatorDelete)
	SemaRef.MarkFunctionReferenced(E->getBeginLoc(), OperatorDelete);

	if (E->isArray() && !E->getAllocatedType()->isDependentType()) {
	QualType ElementType
	= SemaRef.Context.getBaseElementType(E->getAllocatedType());
	if (const RecordType *RecordT = ElementType->getAs<RecordType>()) {
	CXXRecordDecl *Record = cast<CXXRecordDecl>(RecordT->getDecl());
	if (CXXDestructorDecl *Destructor = SemaRef.LookupDestructor(Record)) {
	SemaRef.MarkFunctionReferenced(E->getBeginLoc(), Destructor);
	}
	}
	}

	return E;
	}

	QualType AllocType = AllocTypeInfo->getType();
	if (!ArraySize) {
	// If no array size was specified, but the new expression was
	// instantiated with an array type (e.g., "new T" where T is
	// instantiated with "int[4]"), extract the outer bound from the
	// array type as our array size. We do this with constant and
	// dependently-sized array types.
	const ArrayType *ArrayT = SemaRef.Context.getAsArrayType(AllocType);
	if (!ArrayT) {
	// Do nothing
	} else if (const ConstantArrayType *ConsArrayT
	= dyn_cast<ConstantArrayType>(ArrayT)) {
	ArraySize = IntegerLiteral::Create(SemaRef.Context, ConsArrayT->getSize(),
	SemaRef.Context.getSizeType(),
	/FIXME:/ E->getBeginLoc());
	AllocType = ConsArrayT->getElementType();
	} else if (const DependentSizedArrayType *DepArrayT
	= dyn_cast<DependentSizedArrayType>(ArrayT)) {
	if (DepArrayT->getSizeExpr()) {
	ArraySize = DepArrayT->getSizeExpr();
	AllocType = DepArrayT->getElementType();
	}
	}
	}

	return getDerived().RebuildCXXNewExpr(
	E->getBeginLoc(), E->isGlobalNew(),
	/FIXME:/ E->getBeginLoc(), PlacementArgs,
	/FIXME:/ E->getBeginLoc(), E->getTypeIdParens(), AllocType,
	AllocTypeInfo, ArraySize, E->getDirectInitRange(), NewInit.get());
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformCXXDeleteExpr(CXXDeleteExpr *E) {
	ExprResult Operand = getDerived().TransformExpr(E->getArgument());
	if (Operand.isInvalid())
	return ExprError();

	// Transform the delete operator, if known.
	FunctionDecl *OperatorDelete = nullptr;
	if (E->getOperatorDelete()) {
	OperatorDelete = cast_or_null<FunctionDecl>(
	getDerived().TransformDecl(E->getBeginLoc(), E->getOperatorDelete()));
	if (!OperatorDelete)
	return ExprError();
	}

	if (!getDerived().AlwaysRebuild() &&
	Operand.get() == E->getArgument() &&
	OperatorDelete == E->getOperatorDelete()) {
	// Mark any declarations we need as referenced.
	// FIXME: instantiation-specific.
	if (OperatorDelete)
	SemaRef.MarkFunctionReferenced(E->getBeginLoc(), OperatorDelete);

	if (!E->getArgument()->isTypeDependent()) {
	QualType Destroyed = SemaRef.Context.getBaseElementType(
	E->getDestroyedType());
	if (const RecordType *DestroyedRec = Destroyed->getAs<RecordType>()) {
	CXXRecordDecl *Record = cast<CXXRecordDecl>(DestroyedRec->getDecl());
	SemaRef.MarkFunctionReferenced(E->getBeginLoc(),
	SemaRef.LookupDestructor(Record));
	}
	}

	return E;
	}

	return getDerived().RebuildCXXDeleteExpr(
	E->getBeginLoc(), E->isGlobalDelete(), E->isArrayForm(), Operand.get());
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformCXXPseudoDestructorExpr(
	CXXPseudoDestructorExpr *E) {
	ExprResult Base = getDerived().TransformExpr(E->getBase());
	if (Base.isInvalid())
	return ExprError();

	ParsedType ObjectTypePtr;
	bool MayBePseudoDestructor = false;
	Base = SemaRef.ActOnStartCXXMemberReference(nullptr, Base.get(),
	E->getOperatorLoc(),
	E->isArrow()? tok::arrow : tok::period,
	ObjectTypePtr,
	MayBePseudoDestructor);
	if (Base.isInvalid())
	return ExprError();

	QualType ObjectType = ObjectTypePtr.get();
	NestedNameSpecifierLoc QualifierLoc = E->getQualifierLoc();
	if (QualifierLoc) {
	QualifierLoc
	= getDerived().TransformNestedNameSpecifierLoc(QualifierLoc, ObjectType);
	if (!QualifierLoc)
	return ExprError();
	}
	CXXScopeSpec SS;
	SS.Adopt(QualifierLoc);

	PseudoDestructorTypeStorage Destroyed;
	if (E->getDestroyedTypeInfo()) {
	TypeSourceInfo *DestroyedTypeInfo
	= getDerived().TransformTypeInObjectScope(E->getDestroyedTypeInfo(),
	ObjectType, nullptr, SS);
	if (!DestroyedTypeInfo)
	return ExprError();
	Destroyed = DestroyedTypeInfo;
	} else if (!ObjectType.isNull() && ObjectType->isDependentType()) {
	// We aren't likely to be able to resolve the identifier down to a type
	// now anyway, so just retain the identifier.
	Destroyed = PseudoDestructorTypeStorage(E->getDestroyedTypeIdentifier(),
	E->getDestroyedTypeLoc());
	} else {
	// Look for a destructor known with the given name.
	ParsedType T = SemaRef.getDestructorName(E->getTildeLoc(),
	*E->getDestroyedTypeIdentifier(),
	E->getDestroyedTypeLoc(),
	/Scope=/nullptr,
	SS, ObjectTypePtr,
	false);
	if (!T)
	return ExprError();

	Destroyed
	= SemaRef.Context.getTrivialTypeSourceInfo(SemaRef.GetTypeFromParser(T),
	E->getDestroyedTypeLoc());
	}

	TypeSourceInfo *ScopeTypeInfo = nullptr;
	if (E->getScopeTypeInfo()) {
	CXXScopeSpec EmptySS;
	ScopeTypeInfo = getDerived().TransformTypeInObjectScope(
	E->getScopeTypeInfo(), ObjectType, nullptr, EmptySS);
	if (!ScopeTypeInfo)
	return ExprError();
	}

	return getDerived().RebuildCXXPseudoDestructorExpr(Base.get(),
	E->getOperatorLoc(),
	E->isArrow(),
	SS,
	ScopeTypeInfo,
	E->getColonColonLoc(),
	E->getTildeLoc(),
	Destroyed);
	}

	template <typename Derived>
	bool TreeTransform<Derived>::TransformOverloadExprDecls(OverloadExpr *Old,
	bool RequiresADL,
	LookupResult &R) {
	// Transform all the decls.
	bool AllEmptyPacks = true;
	for (auto *OldD : Old->decls()) {
	Decl *InstD = getDerived().TransformDecl(Old->getNameLoc(), OldD);
	if (!InstD) {
	// Silently ignore these if a UsingShadowDecl instantiated to nothing.
	// This can happen because of dependent hiding.
	if (isa<UsingShadowDecl>(OldD))
	continue;
	else {
	R.clear();
	return true;
	}
	}

	// Expand using pack declarations.
	NamedDecl *SingleDecl = cast<NamedDecl>(InstD);
	ArrayRef<NamedDecl*> Decls = SingleDecl;
	if (auto *UPD = dyn_cast<UsingPackDecl>(InstD))
	Decls = UPD->expansions();

	// Expand using declarations.
	for (auto *D : Decls) {
	if (auto *UD = dyn_cast<UsingDecl>(D)) {
	for (auto *SD : UD->shadows())
	R.addDecl(SD);
	} else {
	R.addDecl(D);
	}
	}

	AllEmptyPacks &= Decls.empty();
	};

	// C++ [temp.res]/8.4.2:
	// The program is ill-formed, no diagnostic required, if [...] lookup for
	// a name in the template definition found a using-declaration, but the
	// lookup in the corresponding scope in the instantiation odoes not find
	// any declarations because the using-declaration was a pack expansion and
	// the corresponding pack is empty
	if (AllEmptyPacks && !RequiresADL) {
	getSema().Diag(Old->getNameLoc(), diag::err_using_pack_expansion_empty)
	<< isa<UnresolvedMemberExpr>(Old) << Old->getName();
	return true;
	}

	// Resolve a kind, but don't do any further analysis. If it's
	// ambiguous, the callee needs to deal with it.
	R.resolveKind();
	return false;
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformUnresolvedLookupExpr(
	UnresolvedLookupExpr *Old) {
	LookupResult R(SemaRef, Old->getName(), Old->getNameLoc(),
	Sema::LookupOrdinaryName);

	// Transform the declaration set.
	if (TransformOverloadExprDecls(Old, Old->requiresADL(), R))
	return ExprError();

	// Rebuild the nested-name qualifier, if present.
	CXXScopeSpec SS;
	if (Old->getQualifierLoc()) {
	NestedNameSpecifierLoc QualifierLoc
	= getDerived().TransformNestedNameSpecifierLoc(Old->getQualifierLoc());
	if (!QualifierLoc)
	return ExprError();

	SS.Adopt(QualifierLoc);
	}

	if (Old->getNamingClass()) {
	CXXRecordDecl *NamingClass
	= cast_or_null<CXXRecordDecl>(getDerived().TransformDecl(
	Old->getNameLoc(),
	Old->getNamingClass()));
	if (!NamingClass) {
	R.clear();
	return ExprError();
	}

	R.setNamingClass(NamingClass);
	}

	SourceLocation TemplateKWLoc = Old->getTemplateKeywordLoc();

	// If we have neither explicit template arguments, nor the template keyword,
	// it's a normal declaration name or member reference.
	if (!Old->hasExplicitTemplateArgs() && !TemplateKWLoc.isValid()) {
	NamedDecl *D = R.getAsSingle<NamedDecl>();
	// In a C++11 unevaluated context, an UnresolvedLookupExpr might refer to an
	// instance member. In other contexts, BuildPossibleImplicitMemberExpr will
	// give a good diagnostic.
	if (D && D->isCXXInstanceMember()) {
	return SemaRef.BuildPossibleImplicitMemberExpr(SS, TemplateKWLoc, R,
	/TemplateArgs=/nullptr,
	/Scope=/nullptr);
	}

	return getDerived().RebuildDeclarationNameExpr(SS, R, Old->requiresADL());
	}

	// If we have template arguments, rebuild them, then rebuild the
	// templateid expression.
	TemplateArgumentListInfo TransArgs(Old->getLAngleLoc(), Old->getRAngleLoc());
	if (Old->hasExplicitTemplateArgs() &&
	getDerived().TransformTemplateArguments(Old->getTemplateArgs(),
	Old->getNumTemplateArgs(),
	TransArgs)) {
	R.clear();
	return ExprError();
	}

	return getDerived().RebuildTemplateIdExpr(SS, TemplateKWLoc, R,
	Old->requiresADL(), &TransArgs);
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformTypeTraitExpr(TypeTraitExpr *E) {
	bool ArgChanged = false;
	SmallVector<TypeSourceInfo *, 4> Args;
	for (unsigned I = 0, N = E->getNumArgs(); I != N; ++I) {
	TypeSourceInfo *From = E->getArg(I);
	TypeLoc FromTL = From->getTypeLoc();
	if (!FromTL.getAs<PackExpansionTypeLoc>()) {
	TypeLocBuilder TLB;
	TLB.reserve(FromTL.getFullDataSize());
	QualType To = getDerived().TransformType(TLB, FromTL);
	if (To.isNull())
	return ExprError();

	if (To == From->getType())
	Args.push_back(From);
	else {
	Args.push_back(TLB.getTypeSourceInfo(SemaRef.Context, To));
	ArgChanged = true;
	}
	continue;
	}

	ArgChanged = true;

	// We have a pack expansion. Instantiate it.
	PackExpansionTypeLoc ExpansionTL = FromTL.castAs<PackExpansionTypeLoc>();
	TypeLoc PatternTL = ExpansionTL.getPatternLoc();
	SmallVector<UnexpandedParameterPack, 2> Unexpanded;
	SemaRef.collectUnexpandedParameterPacks(PatternTL, Unexpanded);

	// Determine whether the set of unexpanded parameter packs can and should
	// be expanded.
	bool Expand = true;
	bool RetainExpansion = false;
	Optional<unsigned> OrigNumExpansions =
	ExpansionTL.getTypePtr()->getNumExpansions();
	Optional<unsigned> NumExpansions = OrigNumExpansions;
	if (getDerived().TryExpandParameterPacks(ExpansionTL.getEllipsisLoc(),
	PatternTL.getSourceRange(),
	Unexpanded,
	Expand, RetainExpansion,
	NumExpansions))
	return ExprError();

	if (!Expand) {
	// The transform has determined that we should perform a simple
	// transformation on the pack expansion, producing another pack
	// expansion.
	Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(getSema(), -1);

	TypeLocBuilder TLB;
	TLB.reserve(From->getTypeLoc().getFullDataSize());

	QualType To = getDerived().TransformType(TLB, PatternTL);
	if (To.isNull())
	return ExprError();

	To = getDerived().RebuildPackExpansionType(To,
	PatternTL.getSourceRange(),
	ExpansionTL.getEllipsisLoc(),
	NumExpansions);
	if (To.isNull())
	return ExprError();

	PackExpansionTypeLoc ToExpansionTL
	= TLB.push<PackExpansionTypeLoc>(To);
	ToExpansionTL.setEllipsisLoc(ExpansionTL.getEllipsisLoc());
	Args.push_back(TLB.getTypeSourceInfo(SemaRef.Context, To));
	continue;
	}

	// Expand the pack expansion by substituting for each argument in the
	// pack(s).
	for (unsigned I = 0; I != *NumExpansions; ++I) {
	Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(SemaRef, I);
	TypeLocBuilder TLB;
	TLB.reserve(PatternTL.getFullDataSize());
	QualType To = getDerived().TransformType(TLB, PatternTL);
	if (To.isNull())
	return ExprError();

	if (To->containsUnexpandedParameterPack()) {
	To = getDerived().RebuildPackExpansionType(To,
	PatternTL.getSourceRange(),
	ExpansionTL.getEllipsisLoc(),
	NumExpansions);
	if (To.isNull())
	return ExprError();

	PackExpansionTypeLoc ToExpansionTL
	= TLB.push<PackExpansionTypeLoc>(To);
	ToExpansionTL.setEllipsisLoc(ExpansionTL.getEllipsisLoc());
	}

	Args.push_back(TLB.getTypeSourceInfo(SemaRef.Context, To));
	}

	if (!RetainExpansion)
	continue;

	// If we're supposed to retain a pack expansion, do so by temporarily
	// forgetting the partially-substituted parameter pack.
	ForgetPartiallySubstitutedPackRAII Forget(getDerived());

	TypeLocBuilder TLB;
	TLB.reserve(From->getTypeLoc().getFullDataSize());

	QualType To = getDerived().TransformType(TLB, PatternTL);
	if (To.isNull())
	return ExprError();

	To = getDerived().RebuildPackExpansionType(To,
	PatternTL.getSourceRange(),
	ExpansionTL.getEllipsisLoc(),
	NumExpansions);
	if (To.isNull())
	return ExprError();

	PackExpansionTypeLoc ToExpansionTL
	= TLB.push<PackExpansionTypeLoc>(To);
	ToExpansionTL.setEllipsisLoc(ExpansionTL.getEllipsisLoc());
	Args.push_back(TLB.getTypeSourceInfo(SemaRef.Context, To));
	}

	if (!getDerived().AlwaysRebuild() && !ArgChanged)
	return E;

	return getDerived().RebuildTypeTrait(E->getTrait(), E->getBeginLoc(), Args,
	E->getEndLoc());
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformConceptSpecializationExpr(
	ConceptSpecializationExpr *E) {
	const ASTTemplateArgumentListInfo *Old = E->getTemplateArgsAsWritten();
	TemplateArgumentListInfo TransArgs(Old->LAngleLoc, Old->RAngleLoc);
	if (getDerived().TransformTemplateArguments(Old->getTemplateArgs(),
	Old->NumTemplateArgs, TransArgs))
	return ExprError();

	return getDerived().RebuildConceptSpecializationExpr(
	E->getNestedNameSpecifierLoc(), E->getTemplateKWLoc(),
	E->getConceptNameInfo(), E->getFoundDecl(), E->getNamedConcept(),
	&TransArgs);
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformRequiresExpr(RequiresExpr *E) {
	SmallVector<ParmVarDecl*, 4> TransParams;
	SmallVector<QualType, 4> TransParamTypes;
	Sema::ExtParameterInfoBuilder ExtParamInfos;

	// C++2a [expr.prim.req]p2
	// Expressions appearing within a requirement-body are unevaluated operands.
	EnterExpressionEvaluationContext Ctx(
	SemaRef, Sema::ExpressionEvaluationContext::Unevaluated);

	RequiresExprBodyDecl *Body = RequiresExprBodyDecl::Create(
	getSema().Context, getSema().CurContext,
	E->getBody()->getBeginLoc());

	Sema::ContextRAII SavedContext(getSema(), Body, /NewThisContext/false);

	if (getDerived().TransformFunctionTypeParams(E->getRequiresKWLoc(),
	E->getLocalParameters(),
	/ParamTypes=/nullptr,
	/ParamInfos=/nullptr,
	TransParamTypes, &TransParams,
	ExtParamInfos))
	return ExprError();

	for (ParmVarDecl *Param : TransParams)
	Param->setDeclContext(Body);

	SmallVector<concepts::Requirement *, 4> TransReqs;
	if (getDerived().TransformRequiresExprRequirements(E->getRequirements(),
	TransReqs))
	return ExprError();

	for (concepts::Requirement *Req : TransReqs) {
	if (auto *ER = dyn_cast<concepts::ExprRequirement>(Req)) {
	if (ER->getReturnTypeRequirement().isTypeConstraint()) {
	ER->getReturnTypeRequirement()
	.getTypeConstraintTemplateParameterList()->getParam(0)
	->setDeclContext(Body);
	}
	}
	}

	return getDerived().RebuildRequiresExpr(E->getRequiresKWLoc(), Body,
	TransParams, TransReqs,
	E->getRBraceLoc());
	}

	template<typename Derived>
	bool TreeTransform<Derived>::TransformRequiresExprRequirements(
	ArrayRef<concepts::Requirement *> Reqs,
	SmallVectorImpl<concepts::Requirement *> &Transformed) {
	for (concepts::Requirement *Req : Reqs) {
	concepts::Requirement *TransReq = nullptr;
	if (auto *TypeReq = dyn_cast<concepts::TypeRequirement>(Req))
	TransReq = getDerived().TransformTypeRequirement(TypeReq);
	else if (auto *ExprReq = dyn_cast<concepts::ExprRequirement>(Req))
	TransReq = getDerived().TransformExprRequirement(ExprReq);
	else
	TransReq = getDerived().TransformNestedRequirement(
	cast<concepts::NestedRequirement>(Req));
	if (!TransReq)
	return true;
	Transformed.push_back(TransReq);
	}
	return false;
	}

	template<typename Derived>
	concepts::TypeRequirement *
	TreeTransform<Derived>::TransformTypeRequirement(
	concepts::TypeRequirement *Req) {
	if (Req->isSubstitutionFailure()) {
	if (getDerived().AlwaysRebuild())
	return getDerived().RebuildTypeRequirement(
	Req->getSubstitutionDiagnostic());
	return Req;
	}
	TypeSourceInfo *TransType = getDerived().TransformType(Req->getType());
	if (!TransType)
	return nullptr;
	return getDerived().RebuildTypeRequirement(TransType);
	}

	template<typename Derived>
	concepts::ExprRequirement *
	TreeTransform<Derived>::TransformExprRequirement(concepts::ExprRequirement *Req) {
	llvm::PointerUnion<Expr , concepts::Requirement::SubstitutionDiagnostic > TransExpr;
	if (Req->isExprSubstitutionFailure())
	TransExpr = Req->getExprSubstitutionDiagnostic();
	else {
	ExprResult TransExprRes = getDerived().TransformExpr(Req->getExpr());
	if (TransExprRes.isInvalid())
	return nullptr;
	TransExpr = TransExprRes.get();
	}

	llvm::Optional<concepts::ExprRequirement::ReturnTypeRequirement> TransRetReq;
	const auto &RetReq = Req->getReturnTypeRequirement();
	if (RetReq.isEmpty())
	TransRetReq.emplace();
	else if (RetReq.isSubstitutionFailure())
	TransRetReq.emplace(RetReq.getSubstitutionDiagnostic());
	else if (RetReq.isTypeConstraint()) {
	TemplateParameterList *OrigTPL =
	RetReq.getTypeConstraintTemplateParameterList();
	TemplateParameterList *TPL =
	getDerived().TransformTemplateParameterList(OrigTPL);
	if (!TPL)
	return nullptr;
	TransRetReq.emplace(TPL);
	}
	assert(TransRetReq.hasValue() &&
	"All code paths leading here must set TransRetReq");
	if (Expr E = TransExpr.dyn_cast<Expr >())
	return getDerived().RebuildExprRequirement(E, Req->isSimple(),
	Req->getNoexceptLoc(),
	std::move(*TransRetReq));
	return getDerived().RebuildExprRequirement(
	TransExpr.get<concepts::Requirement::SubstitutionDiagnostic *>(),
	Req->isSimple(), Req->getNoexceptLoc(), std::move(*TransRetReq));
	}

	template<typename Derived>
	concepts::NestedRequirement *
	TreeTransform<Derived>::TransformNestedRequirement(
	concepts::NestedRequirement *Req) {
	if (Req->isSubstitutionFailure()) {
	if (getDerived().AlwaysRebuild())
	return getDerived().RebuildNestedRequirement(
	Req->getSubstitutionDiagnostic());
	return Req;
	}
	ExprResult TransConstraint =
	getDerived().TransformExpr(Req->getConstraintExpr());
	if (TransConstraint.isInvalid())
	return nullptr;
	return getDerived().RebuildNestedRequirement(TransConstraint.get());
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformArrayTypeTraitExpr(ArrayTypeTraitExpr *E) {
	TypeSourceInfo *T = getDerived().TransformType(E->getQueriedTypeSourceInfo());
	if (!T)
	return ExprError();

	if (!getDerived().AlwaysRebuild() &&
	T == E->getQueriedTypeSourceInfo())
	return E;

	ExprResult SubExpr;
	{
	EnterExpressionEvaluationContext Unevaluated(
	SemaRef, Sema::ExpressionEvaluationContext::Unevaluated);
	SubExpr = getDerived().TransformExpr(E->getDimensionExpression());
	if (SubExpr.isInvalid())
	return ExprError();

	if (!getDerived().AlwaysRebuild() && SubExpr.get() == E->getDimensionExpression())
	return E;
	}

	return getDerived().RebuildArrayTypeTrait(E->getTrait(), E->getBeginLoc(), T,
	SubExpr.get(), E->getEndLoc());
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformExpressionTraitExpr(ExpressionTraitExpr *E) {
	ExprResult SubExpr;
	{
	EnterExpressionEvaluationContext Unevaluated(
	SemaRef, Sema::ExpressionEvaluationContext::Unevaluated);
	SubExpr = getDerived().TransformExpr(E->getQueriedExpression());
	if (SubExpr.isInvalid())
	return ExprError();

	if (!getDerived().AlwaysRebuild() && SubExpr.get() == E->getQueriedExpression())
	return E;
	}

	return getDerived().RebuildExpressionTrait(E->getTrait(), E->getBeginLoc(),
	SubExpr.get(), E->getEndLoc());
	}

	template <typename Derived>
	ExprResult TreeTransform<Derived>::TransformParenDependentScopeDeclRefExpr(
	ParenExpr PE, DependentScopeDeclRefExpr DRE, bool AddrTaken,
	TypeSourceInfo **RecoveryTSI) {
	ExprResult NewDRE = getDerived().TransformDependentScopeDeclRefExpr(
	DRE, AddrTaken, RecoveryTSI);

	// Propagate both errors and recovered types, which return ExprEmpty.
	if (!NewDRE.isUsable())
	return NewDRE;

	// We got an expr, wrap it up in parens.
	if (!getDerived().AlwaysRebuild() && NewDRE.get() == DRE)
	return PE;
	return getDerived().RebuildParenExpr(NewDRE.get(), PE->getLParen(),
	PE->getRParen());
	}

	template <typename Derived>
	ExprResult TreeTransform<Derived>::TransformDependentScopeDeclRefExpr(
	DependentScopeDeclRefExpr *E) {
	return TransformDependentScopeDeclRefExpr(E, /IsAddressOfOperand=/false,
	nullptr);
	}

	template <typename Derived>
	ExprResult TreeTransform<Derived>::TransformDependentScopeDeclRefExpr(
	DependentScopeDeclRefExpr *E, bool IsAddressOfOperand,
	TypeSourceInfo **RecoveryTSI) {
	assert(E->getQualifierLoc());
	NestedNameSpecifierLoc QualifierLoc =
	getDerived().TransformNestedNameSpecifierLoc(E->getQualifierLoc());
	if (!QualifierLoc)
	return ExprError();
	SourceLocation TemplateKWLoc = E->getTemplateKeywordLoc();

	// TODO: If this is a conversion-function-id, verify that the
	// destination type name (if present) resolves the same way after
	// instantiation as it did in the local scope.

	DeclarationNameInfo NameInfo =
	getDerived().TransformDeclarationNameInfo(E->getNameInfo());
	if (!NameInfo.getName())
	return ExprError();

	if (!E->hasExplicitTemplateArgs()) {
	if (!getDerived().AlwaysRebuild() && QualifierLoc == E->getQualifierLoc() &&
	// Note: it is sufficient to compare the Name component of NameInfo:
	// if name has not changed, DNLoc has not changed either.
	NameInfo.getName() == E->getDeclName())
	return E;

	return getDerived().RebuildDependentScopeDeclRefExpr(
	QualifierLoc, TemplateKWLoc, NameInfo, /TemplateArgs=/nullptr,
	IsAddressOfOperand, RecoveryTSI);
	}

	TemplateArgumentListInfo TransArgs(E->getLAngleLoc(), E->getRAngleLoc());
	if (getDerived().TransformTemplateArguments(
	E->getTemplateArgs(), E->getNumTemplateArgs(), TransArgs))
	return ExprError();

	return getDerived().RebuildDependentScopeDeclRefExpr(
	QualifierLoc, TemplateKWLoc, NameInfo, &TransArgs, IsAddressOfOperand,
	RecoveryTSI);
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformCXXConstructExpr(CXXConstructExpr *E) {
	// CXXConstructExprs other than for list-initialization and
	// CXXTemporaryObjectExpr are always implicit, so when we have
	// a 1-argument construction we just transform that argument.
	if (getDerived().AllowSkippingCXXConstructExpr() &&
	((E->getNumArgs() == 1 \|\|
	(E->getNumArgs() > 1 && getDerived().DropCallArgument(E->getArg(1)))) &&
	(!getDerived().DropCallArgument(E->getArg(0))) &&
	!E->isListInitialization()))
	return getDerived().TransformInitializer(E->getArg(0),
	/DirectInit/ false);

	TemporaryBase Rebase(this, /FIXME*/ E->getBeginLoc(), DeclarationName());

	QualType T = getDerived().TransformType(E->getType());
	if (T.isNull())
	return ExprError();

	CXXConstructorDecl *Constructor = cast_or_null<CXXConstructorDecl>(
	getDerived().TransformDecl(E->getBeginLoc(), E->getConstructor()));
	if (!Constructor)
	return ExprError();

	bool ArgumentChanged = false;
	SmallVector<Expr*, 8> Args;
	{
	EnterExpressionEvaluationContext Context(
	getSema(), EnterExpressionEvaluationContext::InitList,
	E->isListInitialization());
	if (getDerived().TransformExprs(E->getArgs(), E->getNumArgs(), true, Args,
	&ArgumentChanged))
	return ExprError();
	}

	if (!getDerived().AlwaysRebuild() &&
	T == E->getType() &&
	Constructor == E->getConstructor() &&
	!ArgumentChanged) {
	// Mark the constructor as referenced.
	// FIXME: Instantiation-specific
	SemaRef.MarkFunctionReferenced(E->getBeginLoc(), Constructor);
	return E;
	}

	return getDerived().RebuildCXXConstructExpr(
	T, /FIXME:/ E->getBeginLoc(), Constructor, E->isElidable(), Args,
	E->hadMultipleCandidates(), E->isListInitialization(),
	E->isStdInitListInitialization(), E->requiresZeroInitialization(),
	E->getConstructionKind(), E->getParenOrBraceRange());
	}

	template<typename Derived>
	ExprResult TreeTransform<Derived>::TransformCXXInheritedCtorInitExpr(
	CXXInheritedCtorInitExpr *E) {
	QualType T = getDerived().TransformType(E->getType());
	if (T.isNull())
	return ExprError();

	CXXConstructorDecl *Constructor = cast_or_null<CXXConstructorDecl>(
	getDerived().TransformDecl(E->getBeginLoc(), E->getConstructor()));
	if (!Constructor)
	return ExprError();

	if (!getDerived().AlwaysRebuild() &&
	T == E->getType() &&
	Constructor == E->getConstructor()) {
	// Mark the constructor as referenced.
	// FIXME: Instantiation-specific
	SemaRef.MarkFunctionReferenced(E->getBeginLoc(), Constructor);
	return E;
	}

	return getDerived().RebuildCXXInheritedCtorInitExpr(
	T, E->getLocation(), Constructor,
	E->constructsVBase(), E->inheritedFromVBase());
	}

	/// Transform a C++ temporary-binding expression.
	///
	/// Since CXXBindTemporaryExpr nodes are implicitly generated, we just
	/// transform the subexpression and return that.
	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformCXXBindTemporaryExpr(CXXBindTemporaryExpr *E) {
	return getDerived().TransformExpr(E->getSubExpr());
	}

	/// Transform a C++ expression that contains cleanups that should
	/// be run after the expression is evaluated.
	///
	/// Since ExprWithCleanups nodes are implicitly generated, we
	/// just transform the subexpression and return that.
	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformExprWithCleanups(ExprWithCleanups *E) {
	return getDerived().TransformExpr(E->getSubExpr());
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformCXXTemporaryObjectExpr(
	CXXTemporaryObjectExpr *E) {
	TypeSourceInfo *T =
	getDerived().TransformTypeWithDeducedTST(E->getTypeSourceInfo());
	if (!T)
	return ExprError();

	CXXConstructorDecl *Constructor = cast_or_null<CXXConstructorDecl>(
	getDerived().TransformDecl(E->getBeginLoc(), E->getConstructor()));
	if (!Constructor)
	return ExprError();

	bool ArgumentChanged = false;
	SmallVector<Expr*, 8> Args;
	Args.reserve(E->getNumArgs());
	{
	EnterExpressionEvaluationContext Context(
	getSema(), EnterExpressionEvaluationContext::InitList,
	E->isListInitialization());
	if (TransformExprs(E->getArgs(), E->getNumArgs(), true, Args,
	&ArgumentChanged))
	return ExprError();
	}

	if (!getDerived().AlwaysRebuild() &&
	T == E->getTypeSourceInfo() &&
	Constructor == E->getConstructor() &&
	!ArgumentChanged) {
	// FIXME: Instantiation-specific
	SemaRef.MarkFunctionReferenced(E->getBeginLoc(), Constructor);
	return SemaRef.MaybeBindToTemporary(E);
	}

	// FIXME: We should just pass E->isListInitialization(), but we're not
	// prepared to handle list-initialization without a child InitListExpr.
	SourceLocation LParenLoc = T->getTypeLoc().getEndLoc();
	return getDerived().RebuildCXXTemporaryObjectExpr(
	T, LParenLoc, Args, E->getEndLoc(),
	/ListInitialization=/LParenLoc.isInvalid());
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformLambdaExpr(LambdaExpr *E) {
	// Transform any init-capture expressions before entering the scope of the
	// lambda body, because they are not semantically within that scope.
	typedef std::pair<ExprResult, QualType> InitCaptureInfoTy;
	struct TransformedInitCapture {
	// The location of the ... if the result is retaining a pack expansion.
	SourceLocation EllipsisLoc;
	// Zero or more expansions of the init-capture.
	SmallVector<InitCaptureInfoTy, 4> Expansions;
	};
	SmallVector<TransformedInitCapture, 4> InitCaptures;
	InitCaptures.resize(E->explicit_capture_end() - E->explicit_capture_begin());
	for (LambdaExpr::capture_iterator C = E->capture_begin(),
	CEnd = E->capture_end();
	C != CEnd; ++C) {
	if (!E->isInitCapture(C))
	continue;

	TransformedInitCapture &Result = InitCaptures[C - E->capture_begin()];
	VarDecl *OldVD = C->getCapturedVar();

	auto SubstInitCapture = [&](SourceLocation EllipsisLoc,
	Optional<unsigned> NumExpansions) {
	ExprResult NewExprInitResult = getDerived().TransformInitializer(
	OldVD->getInit(), OldVD->getInitStyle() == VarDecl::CallInit);

	if (NewExprInitResult.isInvalid()) {
	Result.Expansions.push_back(InitCaptureInfoTy(ExprError(), QualType()));
	return;
	}
	Expr *NewExprInit = NewExprInitResult.get();

	QualType NewInitCaptureType =
	getSema().buildLambdaInitCaptureInitialization(
	C->getLocation(), OldVD->getType()->isReferenceType(),
	EllipsisLoc, NumExpansions, OldVD->getIdentifier(),
	C->getCapturedVar()->getInitStyle() != VarDecl::CInit,
	NewExprInit);
	Result.Expansions.push_back(
	InitCaptureInfoTy(NewExprInit, NewInitCaptureType));
	};

	// If this is an init-capture pack, consider expanding the pack now.
	if (OldVD->isParameterPack()) {
	PackExpansionTypeLoc ExpansionTL = OldVD->getTypeSourceInfo()
	->getTypeLoc()
	.castAs<PackExpansionTypeLoc>();
	SmallVector<UnexpandedParameterPack, 2> Unexpanded;
	SemaRef.collectUnexpandedParameterPacks(OldVD->getInit(), Unexpanded);

	// Determine whether the set of unexpanded parameter packs can and should
	// be expanded.
	bool Expand = true;
	bool RetainExpansion = false;
	Optional<unsigned> OrigNumExpansions =
	ExpansionTL.getTypePtr()->getNumExpansions();
	Optional<unsigned> NumExpansions = OrigNumExpansions;
	if (getDerived().TryExpandParameterPacks(
	ExpansionTL.getEllipsisLoc(),
	OldVD->getInit()->getSourceRange(), Unexpanded, Expand,
	RetainExpansion, NumExpansions))
	return ExprError();
	if (Expand) {
	for (unsigned I = 0; I != *NumExpansions; ++I) {
	Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(getSema(), I);
	SubstInitCapture(SourceLocation(), None);
	}
	}
	if (!Expand \|\| RetainExpansion) {
	ForgetPartiallySubstitutedPackRAII Forget(getDerived());
	SubstInitCapture(ExpansionTL.getEllipsisLoc(), NumExpansions);
	Result.EllipsisLoc = ExpansionTL.getEllipsisLoc();
	}
	} else {
	SubstInitCapture(SourceLocation(), None);
	}
	}

	LambdaScopeInfo *LSI = getSema().PushLambdaScope();
	Sema::FunctionScopeRAII FuncScopeCleanup(getSema());

	// Transform the template parameters, and add them to the current
	// instantiation scope. The null case is handled correctly.
	auto TPL = getDerived().TransformTemplateParameterList(
	E->getTemplateParameterList());
	LSI->GLTemplateParameterList = TPL;

	// Transform the type of the original lambda's call operator.
	// The transformation MUST be done in the CurrentInstantiationScope since
	// it introduces a mapping of the original to the newly created
	// transformed parameters.
	TypeSourceInfo *NewCallOpTSI = nullptr;
	{
	TypeSourceInfo *OldCallOpTSI = E->getCallOperator()->getTypeSourceInfo();
	FunctionProtoTypeLoc OldCallOpFPTL =
	OldCallOpTSI->getTypeLoc().getAs<FunctionProtoTypeLoc>();

	TypeLocBuilder NewCallOpTLBuilder;
	SmallVector<QualType, 4> ExceptionStorage;
	TreeTransform *This = this; // Work around gcc.gnu.org/PR56135.
	QualType NewCallOpType = TransformFunctionProtoType(
	NewCallOpTLBuilder, OldCallOpFPTL, nullptr, Qualifiers(),
	[&](FunctionProtoType::ExceptionSpecInfo &ESI, bool &Changed) {
	return This->TransformExceptionSpec(OldCallOpFPTL.getBeginLoc(), ESI,
	ExceptionStorage, Changed);
	});
	if (NewCallOpType.isNull())
	return ExprError();
	NewCallOpTSI = NewCallOpTLBuilder.getTypeSourceInfo(getSema().Context,
	NewCallOpType);
	}

	// Transform the trailing requires clause
	ExprResult NewTrailingRequiresClause;
	if (Expr *TRC = E->getCallOperator()->getTrailingRequiresClause())
	// FIXME: Concepts: Substitution into requires clause should only happen
	// when checking satisfaction.
	NewTrailingRequiresClause = getDerived().TransformExpr(TRC);

	// Create the local class that will describe the lambda.
	// FIXME: KnownDependent below is wrong when substituting inside a templated
	// context that isn't a DeclContext (such as a variable template).
	CXXRecordDecl *OldClass = E->getLambdaClass();
	CXXRecordDecl *Class
	= getSema().createLambdaClosureType(E->getIntroducerRange(),
	NewCallOpTSI,
	/KnownDependent=/false,
	E->getCaptureDefault());
	getDerived().transformedLocalDecl(OldClass, {Class});

	Optional<std::tuple<bool, unsigned, unsigned, Decl *>> Mangling;
	if (getDerived().ReplacingOriginal())
	Mangling = std::make_tuple(OldClass->hasKnownLambdaInternalLinkage(),
	OldClass->getLambdaManglingNumber(),
	OldClass->getDeviceLambdaManglingNumber(),
	OldClass->getLambdaContextDecl());

	// Build the call operator.
	CXXMethodDecl *NewCallOperator = getSema().startLambdaDefinition(
	Class, E->getIntroducerRange(), NewCallOpTSI,
	E->getCallOperator()->getEndLoc(),
	NewCallOpTSI->getTypeLoc().castAs<FunctionProtoTypeLoc>().getParams(),
	E->getCallOperator()->getConstexprKind(),
	NewTrailingRequiresClause.get());

	LSI->CallOperator = NewCallOperator;

	getDerived().transformAttrs(E->getCallOperator(), NewCallOperator);
	getDerived().transformedLocalDecl(E->getCallOperator(), {NewCallOperator});

	// Number the lambda for linkage purposes if necessary.
	getSema().handleLambdaNumbering(Class, NewCallOperator, Mangling);

	// Introduce the context of the call operator.
	Sema::ContextRAII SavedContext(getSema(), NewCallOperator,
	/NewThisContext/false);

	// Enter the scope of the lambda.
	getSema().buildLambdaScope(LSI, NewCallOperator,
	E->getIntroducerRange(),
	E->getCaptureDefault(),
	E->getCaptureDefaultLoc(),
	E->hasExplicitParameters(),
	E->hasExplicitResultType(),
	E->isMutable());

	bool Invalid = false;

	// Transform captures.
	for (LambdaExpr::capture_iterator C = E->capture_begin(),
	CEnd = E->capture_end();
	C != CEnd; ++C) {
	// When we hit the first implicit capture, tell Sema that we've finished
	// the list of explicit captures.
	if (C->isImplicit())
	break;

	// Capturing 'this' is trivial.
	if (C->capturesThis()) {
	getSema().CheckCXXThisCapture(C->getLocation(), C->isExplicit(),
	/BuildAndDiagnose/ true, nullptr,
	C->getCaptureKind() == LCK_StarThis);
	continue;
	}
	// Captured expression will be recaptured during captured variables
	// rebuilding.
	if (C->capturesVLAType())
	continue;

	// Rebuild init-captures, including the implied field declaration.
	if (E->isInitCapture(C)) {
	TransformedInitCapture &NewC = InitCaptures[C - E->capture_begin()];

	VarDecl *OldVD = C->getCapturedVar();
	llvm::SmallVector<Decl*, 4> NewVDs;

	for (InitCaptureInfoTy &Info : NewC.Expansions) {
	ExprResult Init = Info.first;
	QualType InitQualType = Info.second;
	if (Init.isInvalid() \|\| InitQualType.isNull()) {
	Invalid = true;
	break;
	}
	VarDecl *NewVD = getSema().createLambdaInitCaptureVarDecl(
	OldVD->getLocation(), InitQualType, NewC.EllipsisLoc,
	OldVD->getIdentifier(), OldVD->getInitStyle(), Init.get());
	if (!NewVD) {
	Invalid = true;
	break;
	}
	NewVDs.push_back(NewVD);
	getSema().addInitCapture(LSI, NewVD);
	}

	if (Invalid)
	break;

	getDerived().transformedLocalDecl(OldVD, NewVDs);
	continue;
	}

	assert(C->capturesVariable() && "unexpected kind of lambda capture");

	// Determine the capture kind for Sema.
	Sema::TryCaptureKind Kind
	= C->isImplicit()? Sema::TryCapture_Implicit
	: C->getCaptureKind() == LCK_ByCopy
	? Sema::TryCapture_ExplicitByVal
	: Sema::TryCapture_ExplicitByRef;
	SourceLocation EllipsisLoc;
	if (C->isPackExpansion()) {
	UnexpandedParameterPack Unexpanded(C->getCapturedVar(), C->getLocation());
	bool ShouldExpand = false;
	bool RetainExpansion = false;
	Optional<unsigned> NumExpansions;
	if (getDerived().TryExpandParameterPacks(C->getEllipsisLoc(),
	C->getLocation(),
	Unexpanded,
	ShouldExpand, RetainExpansion,
	NumExpansions)) {
	Invalid = true;
	continue;
	}

	if (ShouldExpand) {
	// The transform has determined that we should perform an expansion;
	// transform and capture each of the arguments.
	// expansion of the pattern. Do so.
	VarDecl *Pack = C->getCapturedVar();
	for (unsigned I = 0; I != *NumExpansions; ++I) {
	Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(getSema(), I);
	VarDecl *CapturedVar
	= cast_or_null<VarDecl>(getDerived().TransformDecl(C->getLocation(),
	Pack));
	if (!CapturedVar) {
	Invalid = true;
	continue;
	}

	// Capture the transformed variable.
	getSema().tryCaptureVariable(CapturedVar, C->getLocation(), Kind);
	}

	// FIXME: Retain a pack expansion if RetainExpansion is true.

	continue;
	}

	EllipsisLoc = C->getEllipsisLoc();
	}

	// Transform the captured variable.
	VarDecl *CapturedVar
	= cast_or_null<VarDecl>(getDerived().TransformDecl(C->getLocation(),
	C->getCapturedVar()));
	if (!CapturedVar \|\| CapturedVar->isInvalidDecl()) {
	Invalid = true;
	continue;
	}

	// Capture the transformed variable.
	getSema().tryCaptureVariable(CapturedVar, C->getLocation(), Kind,
	EllipsisLoc);
	}
	getSema().finishLambdaExplicitCaptures(LSI);

	// FIXME: Sema's lambda-building mechanism expects us to push an expression
	// evaluation context even if we're not transforming the function body.
	getSema().PushExpressionEvaluationContext(
	Sema::ExpressionEvaluationContext::PotentiallyEvaluated);

	// Instantiate the body of the lambda expression.
	StmtResult Body =
	Invalid ? StmtError() : getDerived().TransformLambdaBody(E, E->getBody());

	// ActOnLambda* will pop the function scope for us.
	FuncScopeCleanup.disable();

	if (Body.isInvalid()) {
	SavedContext.pop();
	getSema().ActOnLambdaError(E->getBeginLoc(), /CurScope=/nullptr,
	/IsInstantiation=/true);
	return ExprError();
	}

	// Copy the LSI before ActOnFinishFunctionBody removes it.
	// FIXME: This is dumb. Store the lambda information somewhere that outlives
	// the call operator.
	auto LSICopy = *LSI;
	getSema().ActOnFinishFunctionBody(NewCallOperator, Body.get(),
	/IsInstantiation/ true);
	SavedContext.pop();

	return getSema().BuildLambdaExpr(E->getBeginLoc(), Body.get()->getEndLoc(),
	&LSICopy);
	}

	template<typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformLambdaBody(LambdaExpr E, Stmt S) {
	return TransformStmt(S);
	}

	template<typename Derived>
	StmtResult
	TreeTransform<Derived>::SkipLambdaBody(LambdaExpr E, Stmt S) {
	// Transform captures.
	for (LambdaExpr::capture_iterator C = E->capture_begin(),
	CEnd = E->capture_end();
	C != CEnd; ++C) {
	// When we hit the first implicit capture, tell Sema that we've finished
	// the list of explicit captures.
	if (!C->isImplicit())
	continue;

	// Capturing 'this' is trivial.
	if (C->capturesThis()) {
	getSema().CheckCXXThisCapture(C->getLocation(), C->isExplicit(),
	/BuildAndDiagnose/ true, nullptr,
	C->getCaptureKind() == LCK_StarThis);
	continue;
	}
	// Captured expression will be recaptured during captured variables
	// rebuilding.
	if (C->capturesVLAType())
	continue;

	assert(C->capturesVariable() && "unexpected kind of lambda capture");
	assert(!E->isInitCapture(C) && "implicit init-capture?");

	// Transform the captured variable.
	VarDecl *CapturedVar = cast_or_null<VarDecl>(
	getDerived().TransformDecl(C->getLocation(), C->getCapturedVar()));
	if (!CapturedVar \|\| CapturedVar->isInvalidDecl())
	return StmtError();

	// Capture the transformed variable.
	getSema().tryCaptureVariable(CapturedVar, C->getLocation());
	}

	return S;
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformCXXUnresolvedConstructExpr(
	CXXUnresolvedConstructExpr *E) {
	TypeSourceInfo *T =
	getDerived().TransformTypeWithDeducedTST(E->getTypeSourceInfo());
	if (!T)
	return ExprError();

	bool ArgumentChanged = false;
	SmallVector<Expr*, 8> Args;
	Args.reserve(E->getNumArgs());
	{
	EnterExpressionEvaluationContext Context(
	getSema(), EnterExpressionEvaluationContext::InitList,
	E->isListInitialization());
	if (getDerived().TransformExprs(E->arg_begin(), E->getNumArgs(), true, Args,
	&ArgumentChanged))
	return ExprError();
	}

	if (!getDerived().AlwaysRebuild() &&
	T == E->getTypeSourceInfo() &&
	!ArgumentChanged)
	return E;

	// FIXME: we're faking the locations of the commas
	return getDerived().RebuildCXXUnresolvedConstructExpr(
	T, E->getLParenLoc(), Args, E->getRParenLoc(), E->isListInitialization());
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformCXXDependentScopeMemberExpr(
	CXXDependentScopeMemberExpr *E) {
	// Transform the base of the expression.
	ExprResult Base((Expr*) nullptr);
	Expr *OldBase;
	QualType BaseType;
	QualType ObjectType;
	if (!E->isImplicitAccess()) {
	OldBase = E->getBase();
	Base = getDerived().TransformExpr(OldBase);
	if (Base.isInvalid())
	return ExprError();

	// Start the member reference and compute the object's type.
	ParsedType ObjectTy;
	bool MayBePseudoDestructor = false;
	Base = SemaRef.ActOnStartCXXMemberReference(nullptr, Base.get(),
	E->getOperatorLoc(),
	E->isArrow()? tok::arrow : tok::period,
	ObjectTy,
	MayBePseudoDestructor);
	if (Base.isInvalid())
	return ExprError();

	ObjectType = ObjectTy.get();
	BaseType = ((Expr*) Base.get())->getType();
	} else {
	OldBase = nullptr;
	BaseType = getDerived().TransformType(E->getBaseType());
	ObjectType = BaseType->castAs<PointerType>()->getPointeeType();
	}

	// Transform the first part of the nested-name-specifier that qualifies
	// the member name.
	NamedDecl *FirstQualifierInScope
	= getDerived().TransformFirstQualifierInScope(
	E->getFirstQualifierFoundInScope(),
	E->getQualifierLoc().getBeginLoc());

	NestedNameSpecifierLoc QualifierLoc;
	if (E->getQualifier()) {
	QualifierLoc
	= getDerived().TransformNestedNameSpecifierLoc(E->getQualifierLoc(),
	ObjectType,
	FirstQualifierInScope);
	if (!QualifierLoc)
	return ExprError();
	}

	SourceLocation TemplateKWLoc = E->getTemplateKeywordLoc();

	// TODO: If this is a conversion-function-id, verify that the
	// destination type name (if present) resolves the same way after
	// instantiation as it did in the local scope.

	DeclarationNameInfo NameInfo
	= getDerived().TransformDeclarationNameInfo(E->getMemberNameInfo());
	if (!NameInfo.getName())
	return ExprError();

	if (!E->hasExplicitTemplateArgs()) {
	// This is a reference to a member without an explicitly-specified
	// template argument list. Optimize for this common case.
	if (!getDerived().AlwaysRebuild() &&
	Base.get() == OldBase &&
	BaseType == E->getBaseType() &&
	QualifierLoc == E->getQualifierLoc() &&
	NameInfo.getName() == E->getMember() &&
	FirstQualifierInScope == E->getFirstQualifierFoundInScope())
	return E;

	return getDerived().RebuildCXXDependentScopeMemberExpr(Base.get(),
	BaseType,
	E->isArrow(),
	E->getOperatorLoc(),
	QualifierLoc,
	TemplateKWLoc,
	FirstQualifierInScope,
	NameInfo,
	/TemplateArgs/nullptr);
	}

	TemplateArgumentListInfo TransArgs(E->getLAngleLoc(), E->getRAngleLoc());
	if (getDerived().TransformTemplateArguments(E->getTemplateArgs(),
	E->getNumTemplateArgs(),
	TransArgs))
	return ExprError();

	return getDerived().RebuildCXXDependentScopeMemberExpr(Base.get(),
	BaseType,
	E->isArrow(),
	E->getOperatorLoc(),
	QualifierLoc,
	TemplateKWLoc,
	FirstQualifierInScope,
	NameInfo,
	&TransArgs);
	}

	template <typename Derived>
	ExprResult TreeTransform<Derived>::TransformUnresolvedMemberExpr(
	UnresolvedMemberExpr *Old) {
	// Transform the base of the expression.
	ExprResult Base((Expr *)nullptr);
	QualType BaseType;
	if (!Old->isImplicitAccess()) {
	Base = getDerived().TransformExpr(Old->getBase());
	if (Base.isInvalid())
	return ExprError();
	Base =
	getSema().PerformMemberExprBaseConversion(Base.get(), Old->isArrow());
	if (Base.isInvalid())
	return ExprError();
	BaseType = Base.get()->getType();
	} else {
	BaseType = getDerived().TransformType(Old->getBaseType());
	}

	NestedNameSpecifierLoc QualifierLoc;
	if (Old->getQualifierLoc()) {
	QualifierLoc =
	getDerived().TransformNestedNameSpecifierLoc(Old->getQualifierLoc());
	if (!QualifierLoc)
	return ExprError();
	}

	SourceLocation TemplateKWLoc = Old->getTemplateKeywordLoc();

	LookupResult R(SemaRef, Old->getMemberNameInfo(), Sema::LookupOrdinaryName);

	// Transform the declaration set.
	if (TransformOverloadExprDecls(Old, /RequiresADL/ false, R))
	return ExprError();

	// Determine the naming class.
	if (Old->getNamingClass()) {
	CXXRecordDecl *NamingClass = cast_or_null<CXXRecordDecl>(
	getDerived().TransformDecl(Old->getMemberLoc(), Old->getNamingClass()));
	if (!NamingClass)
	return ExprError();

	R.setNamingClass(NamingClass);
	}

	TemplateArgumentListInfo TransArgs;
	if (Old->hasExplicitTemplateArgs()) {
	TransArgs.setLAngleLoc(Old->getLAngleLoc());
	TransArgs.setRAngleLoc(Old->getRAngleLoc());
	if (getDerived().TransformTemplateArguments(
	Old->getTemplateArgs(), Old->getNumTemplateArgs(), TransArgs))
	return ExprError();
	}

	// FIXME: to do this check properly, we will need to preserve the
	// first-qualifier-in-scope here, just in case we had a dependent
	// base (and therefore couldn't do the check) and a
	// nested-name-qualifier (and therefore could do the lookup).
	NamedDecl *FirstQualifierInScope = nullptr;

	return getDerived().RebuildUnresolvedMemberExpr(
	Base.get(), BaseType, Old->getOperatorLoc(), Old->isArrow(), QualifierLoc,
	TemplateKWLoc, FirstQualifierInScope, R,
	(Old->hasExplicitTemplateArgs() ? &TransArgs : nullptr));
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformCXXNoexceptExpr(CXXNoexceptExpr *E) {
	EnterExpressionEvaluationContext Unevaluated(
	SemaRef, Sema::ExpressionEvaluationContext::Unevaluated);
	ExprResult SubExpr = getDerived().TransformExpr(E->getOperand());
	if (SubExpr.isInvalid())
	return ExprError();

	if (!getDerived().AlwaysRebuild() && SubExpr.get() == E->getOperand())
	return E;

	return getDerived().RebuildCXXNoexceptExpr(E->getSourceRange(),SubExpr.get());
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformPackExpansionExpr(PackExpansionExpr *E) {
	ExprResult Pattern = getDerived().TransformExpr(E->getPattern());
	if (Pattern.isInvalid())
	return ExprError();

	if (!getDerived().AlwaysRebuild() && Pattern.get() == E->getPattern())
	return E;

	return getDerived().RebuildPackExpansion(Pattern.get(), E->getEllipsisLoc(),
	E->getNumExpansions());
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformSizeOfPackExpr(SizeOfPackExpr *E) {
	// If E is not value-dependent, then nothing will change when we transform it.
	// Note: This is an instantiation-centric view.
	if (!E->isValueDependent())
	return E;

	EnterExpressionEvaluationContext Unevaluated(
	getSema(), Sema::ExpressionEvaluationContext::Unevaluated);

	ArrayRef<TemplateArgument> PackArgs;
	TemplateArgument ArgStorage;

	// Find the argument list to transform.
	if (E->isPartiallySubstituted()) {
	PackArgs = E->getPartialArguments();
	} else if (E->isValueDependent()) {
	UnexpandedParameterPack Unexpanded(E->getPack(), E->getPackLoc());
	bool ShouldExpand = false;
	bool RetainExpansion = false;
	Optional<unsigned> NumExpansions;
	if (getDerived().TryExpandParameterPacks(E->getOperatorLoc(), E->getPackLoc(),
	Unexpanded,
	ShouldExpand, RetainExpansion,
	NumExpansions))
	return ExprError();

	// If we need to expand the pack, build a template argument from it and
	// expand that.
	if (ShouldExpand) {
	auto *Pack = E->getPack();
	if (auto *TTPD = dyn_cast<TemplateTypeParmDecl>(Pack)) {
	ArgStorage = getSema().Context.getPackExpansionType(
	getSema().Context.getTypeDeclType(TTPD), None);
	} else if (auto *TTPD = dyn_cast<TemplateTemplateParmDecl>(Pack)) {
	ArgStorage = TemplateArgument(TemplateName(TTPD), None);
	} else {
	auto *VD = cast<ValueDecl>(Pack);
	ExprResult DRE = getSema().BuildDeclRefExpr(
	VD, VD->getType().getNonLValueExprType(getSema().Context),
	VD->getType()->isReferenceType() ? VK_LValue : VK_PRValue,
	E->getPackLoc());
	if (DRE.isInvalid())
	return ExprError();
	ArgStorage = new (getSema().Context) PackExpansionExpr(
	getSema().Context.DependentTy, DRE.get(), E->getPackLoc(), None);
	}
	PackArgs = ArgStorage;
	}
	}

	// If we're not expanding the pack, just transform the decl.
	if (!PackArgs.size()) {
	auto *Pack = cast_or_null<NamedDecl>(
	getDerived().TransformDecl(E->getPackLoc(), E->getPack()));
	if (!Pack)
	return ExprError();
	return getDerived().RebuildSizeOfPackExpr(E->getOperatorLoc(), Pack,
	E->getPackLoc(),
	E->getRParenLoc(), None, None);
	}

	// Try to compute the result without performing a partial substitution.
	Optional<unsigned> Result = 0;
	for (const TemplateArgument &Arg : PackArgs) {
	if (!Arg.isPackExpansion()) {
	Result = *Result + 1;
	continue;
	}

	TemplateArgumentLoc ArgLoc;
	InventTemplateArgumentLoc(Arg, ArgLoc);

	// Find the pattern of the pack expansion.
	SourceLocation Ellipsis;
	Optional<unsigned> OrigNumExpansions;
	TemplateArgumentLoc Pattern =
	getSema().getTemplateArgumentPackExpansionPattern(ArgLoc, Ellipsis,
	OrigNumExpansions);

	// Substitute under the pack expansion. Do not expand the pack (yet).
	TemplateArgumentLoc OutPattern;
	Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(getSema(), -1);
	if (getDerived().TransformTemplateArgument(Pattern, OutPattern,
	/Uneval/ true))
	return true;

	// See if we can determine the number of arguments from the result.
	Optional<unsigned> NumExpansions =
	getSema().getFullyPackExpandedSize(OutPattern.getArgument());
	if (!NumExpansions) {
	// No: we must be in an alias template expansion, and we're going to need
	// to actually expand the packs.
	Result = None;
	break;
	}

	Result = Result + NumExpansions;
	}

	// Common case: we could determine the number of expansions without
	// substituting.
	if (Result)
	return getDerived().RebuildSizeOfPackExpr(E->getOperatorLoc(), E->getPack(),
	E->getPackLoc(),
	E->getRParenLoc(), *Result, None);

	TemplateArgumentListInfo TransformedPackArgs(E->getPackLoc(),
	E->getPackLoc());
	{
	TemporaryBase Rebase(*this, E->getPackLoc(), getBaseEntity());
	typedef TemplateArgumentLocInventIterator<
	Derived, const TemplateArgument*> PackLocIterator;
	if (TransformTemplateArguments(PackLocIterator(*this, PackArgs.begin()),
	PackLocIterator(*this, PackArgs.end()),
	TransformedPackArgs, /Uneval/true))
	return ExprError();
	}

	// Check whether we managed to fully-expand the pack.
	// FIXME: Is it possible for us to do so and not hit the early exit path?
	SmallVector<TemplateArgument, 8> Args;
	bool PartialSubstitution = false;
	for (auto &Loc : TransformedPackArgs.arguments()) {
	Args.push_back(Loc.getArgument());
	if (Loc.getArgument().isPackExpansion())
	PartialSubstitution = true;
	}

	if (PartialSubstitution)
	return getDerived().RebuildSizeOfPackExpr(E->getOperatorLoc(), E->getPack(),
	E->getPackLoc(),
	E->getRParenLoc(), None, Args);

	return getDerived().RebuildSizeOfPackExpr(E->getOperatorLoc(), E->getPack(),
	E->getPackLoc(), E->getRParenLoc(),
	Args.size(), None);
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformSubstNonTypeTemplateParmPackExpr(
	SubstNonTypeTemplateParmPackExpr *E) {
	// Default behavior is to do nothing with this transformation.
	return E;
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformSubstNonTypeTemplateParmExpr(
	SubstNonTypeTemplateParmExpr *E) {
	// Default behavior is to do nothing with this transformation.
	return E;
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformFunctionParmPackExpr(FunctionParmPackExpr *E) {
	// Default behavior is to do nothing with this transformation.
	return E;
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformMaterializeTemporaryExpr(
	MaterializeTemporaryExpr *E) {
	return getDerived().TransformExpr(E->getSubExpr());
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformCXXFoldExpr(CXXFoldExpr *E) {
	UnresolvedLookupExpr *Callee = nullptr;
	if (Expr *OldCallee = E->getCallee()) {
	ExprResult CalleeResult = getDerived().TransformExpr(OldCallee);
	if (CalleeResult.isInvalid())
	return ExprError();
	Callee = cast<UnresolvedLookupExpr>(CalleeResult.get());
	}

	Expr *Pattern = E->getPattern();

	SmallVector<UnexpandedParameterPack, 2> Unexpanded;
	getSema().collectUnexpandedParameterPacks(Pattern, Unexpanded);
	assert(!Unexpanded.empty() && "Pack expansion without parameter packs?");

	// Determine whether the set of unexpanded parameter packs can and should
	// be expanded.
	bool Expand = true;
	bool RetainExpansion = false;
	Optional<unsigned> OrigNumExpansions = E->getNumExpansions(),
	NumExpansions = OrigNumExpansions;
	if (getDerived().TryExpandParameterPacks(E->getEllipsisLoc(),
	Pattern->getSourceRange(),
	Unexpanded,
	Expand, RetainExpansion,
	NumExpansions))
	return true;

	if (!Expand) {
	// Do not expand any packs here, just transform and rebuild a fold
	// expression.
	Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(getSema(), -1);

	ExprResult LHS =
	E->getLHS() ? getDerived().TransformExpr(E->getLHS()) : ExprResult();
	if (LHS.isInvalid())
	return true;

	ExprResult RHS =
	E->getRHS() ? getDerived().TransformExpr(E->getRHS()) : ExprResult();
	if (RHS.isInvalid())
	return true;

	if (!getDerived().AlwaysRebuild() &&
	LHS.get() == E->getLHS() && RHS.get() == E->getRHS())
	return E;

	return getDerived().RebuildCXXFoldExpr(
	Callee, E->getBeginLoc(), LHS.get(), E->getOperator(),
	E->getEllipsisLoc(), RHS.get(), E->getEndLoc(), NumExpansions);
	}

	// Formally a fold expression expands to nested parenthesized expressions.
	// Enforce this limit to avoid creating trees so deep we can't safely traverse
	// them.
	if (NumExpansions && SemaRef.getLangOpts().BracketDepth < NumExpansions) {
	SemaRef.Diag(E->getEllipsisLoc(),
	clang::diag::err_fold_expression_limit_exceeded)
	<< *NumExpansions << SemaRef.getLangOpts().BracketDepth
	<< E->getSourceRange();
	SemaRef.Diag(E->getEllipsisLoc(), diag::note_bracket_depth);
	return ExprError();
	}

	// The transform has determined that we should perform an elementwise
	// expansion of the pattern. Do so.
	ExprResult Result = getDerived().TransformExpr(E->getInit());
	if (Result.isInvalid())
	return true;
	bool LeftFold = E->isLeftFold();

	// If we're retaining an expansion for a right fold, it is the innermost
	// component and takes the init (if any).
	if (!LeftFold && RetainExpansion) {
	ForgetPartiallySubstitutedPackRAII Forget(getDerived());

	ExprResult Out = getDerived().TransformExpr(Pattern);
	if (Out.isInvalid())
	return true;

	Result = getDerived().RebuildCXXFoldExpr(
	Callee, E->getBeginLoc(), Out.get(), E->getOperator(),
	E->getEllipsisLoc(), Result.get(), E->getEndLoc(), OrigNumExpansions);
	if (Result.isInvalid())
	return true;
	}

	for (unsigned I = 0; I != *NumExpansions; ++I) {
	Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(
	getSema(), LeftFold ? I : *NumExpansions - I - 1);
	ExprResult Out = getDerived().TransformExpr(Pattern);
	if (Out.isInvalid())
	return true;

	if (Out.get()->containsUnexpandedParameterPack()) {
	// We still have a pack; retain a pack expansion for this slice.
	Result = getDerived().RebuildCXXFoldExpr(
	Callee, E->getBeginLoc(), LeftFold ? Result.get() : Out.get(),
	E->getOperator(), E->getEllipsisLoc(),
	LeftFold ? Out.get() : Result.get(), E->getEndLoc(),
	OrigNumExpansions);
	} else if (Result.isUsable()) {
	// We've got down to a single element; build a binary operator.
	Expr *LHS = LeftFold ? Result.get() : Out.get();
	Expr *RHS = LeftFold ? Out.get() : Result.get();
	if (Callee)
	Result = getDerived().RebuildCXXOperatorCallExpr(
	BinaryOperator::getOverloadedOperator(E->getOperator()),
	E->getEllipsisLoc(), Callee, LHS, RHS);
	else
	Result = getDerived().RebuildBinaryOperator(E->getEllipsisLoc(),
	E->getOperator(), LHS, RHS);
	} else
	Result = Out;

	if (Result.isInvalid())
	return true;
	}

	// If we're retaining an expansion for a left fold, it is the outermost
	// component and takes the complete expansion so far as its init (if any).
	if (LeftFold && RetainExpansion) {
	ForgetPartiallySubstitutedPackRAII Forget(getDerived());

	ExprResult Out = getDerived().TransformExpr(Pattern);
	if (Out.isInvalid())
	return true;

	Result = getDerived().RebuildCXXFoldExpr(
	Callee, E->getBeginLoc(), Result.get(), E->getOperator(),
	E->getEllipsisLoc(), Out.get(), E->getEndLoc(), OrigNumExpansions);
	if (Result.isInvalid())
	return true;
	}

	// If we had no init and an empty pack, and we're not retaining an expansion,
	// then produce a fallback value or error.
	if (Result.isUnset())
	return getDerived().RebuildEmptyCXXFoldExpr(E->getEllipsisLoc(),
	E->getOperator());

	return Result;
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformCXXStdInitializerListExpr(
	CXXStdInitializerListExpr *E) {
	return getDerived().TransformExpr(E->getSubExpr());
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformObjCStringLiteral(ObjCStringLiteral *E) {
	return SemaRef.MaybeBindToTemporary(E);
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformObjCBoolLiteralExpr(ObjCBoolLiteralExpr *E) {
	return E;
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformObjCBoxedExpr(ObjCBoxedExpr *E) {
	ExprResult SubExpr = getDerived().TransformExpr(E->getSubExpr());
	if (SubExpr.isInvalid())
	return ExprError();

	if (!getDerived().AlwaysRebuild() &&
	SubExpr.get() == E->getSubExpr())
	return E;

	return getDerived().RebuildObjCBoxedExpr(E->getSourceRange(), SubExpr.get());
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformObjCArrayLiteral(ObjCArrayLiteral *E) {
	// Transform each of the elements.
	SmallVector<Expr *, 8> Elements;
	bool ArgChanged = false;
	if (getDerived().TransformExprs(E->getElements(), E->getNumElements(),
	/IsCall=/false, Elements, &ArgChanged))
	return ExprError();

	if (!getDerived().AlwaysRebuild() && !ArgChanged)
	return SemaRef.MaybeBindToTemporary(E);

	return getDerived().RebuildObjCArrayLiteral(E->getSourceRange(),
	Elements.data(),
	Elements.size());
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformObjCDictionaryLiteral(
	ObjCDictionaryLiteral *E) {
	// Transform each of the elements.
	SmallVector<ObjCDictionaryElement, 8> Elements;
	bool ArgChanged = false;
	for (unsigned I = 0, N = E->getNumElements(); I != N; ++I) {
	ObjCDictionaryElement OrigElement = E->getKeyValueElement(I);

	if (OrigElement.isPackExpansion()) {
	// This key/value element is a pack expansion.
	SmallVector<UnexpandedParameterPack, 2> Unexpanded;
	getSema().collectUnexpandedParameterPacks(OrigElement.Key, Unexpanded);
	getSema().collectUnexpandedParameterPacks(OrigElement.Value, Unexpanded);
	assert(!Unexpanded.empty() && "Pack expansion without parameter packs?");

	// Determine whether the set of unexpanded parameter packs can
	// and should be expanded.
	bool Expand = true;
	bool RetainExpansion = false;
	Optional<unsigned> OrigNumExpansions = OrigElement.NumExpansions;
	Optional<unsigned> NumExpansions = OrigNumExpansions;
	SourceRange PatternRange(OrigElement.Key->getBeginLoc(),
	OrigElement.Value->getEndLoc());
	if (getDerived().TryExpandParameterPacks(OrigElement.EllipsisLoc,
	PatternRange, Unexpanded, Expand,
	RetainExpansion, NumExpansions))
	return ExprError();

	if (!Expand) {
	// The transform has determined that we should perform a simple
	// transformation on the pack expansion, producing another pack
	// expansion.
	Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(getSema(), -1);
	ExprResult Key = getDerived().TransformExpr(OrigElement.Key);
	if (Key.isInvalid())
	return ExprError();

	if (Key.get() != OrigElement.Key)
	ArgChanged = true;

	ExprResult Value = getDerived().TransformExpr(OrigElement.Value);
	if (Value.isInvalid())
	return ExprError();

	if (Value.get() != OrigElement.Value)
	ArgChanged = true;

	ObjCDictionaryElement Expansion = {
	Key.get(), Value.get(), OrigElement.EllipsisLoc, NumExpansions
	};
	Elements.push_back(Expansion);
	continue;
	}

	// Record right away that the argument was changed. This needs
	// to happen even if the array expands to nothing.
	ArgChanged = true;

	// The transform has determined that we should perform an elementwise
	// expansion of the pattern. Do so.
	for (unsigned I = 0; I != *NumExpansions; ++I) {
	Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(getSema(), I);
	ExprResult Key = getDerived().TransformExpr(OrigElement.Key);
	if (Key.isInvalid())
	return ExprError();

	ExprResult Value = getDerived().TransformExpr(OrigElement.Value);
	if (Value.isInvalid())
	return ExprError();

	ObjCDictionaryElement Element = {
	Key.get(), Value.get(), SourceLocation(), NumExpansions
	};

	// If any unexpanded parameter packs remain, we still have a
	// pack expansion.
	// FIXME: Can this really happen?
	if (Key.get()->containsUnexpandedParameterPack() \|\|
	Value.get()->containsUnexpandedParameterPack())
	Element.EllipsisLoc = OrigElement.EllipsisLoc;

	Elements.push_back(Element);
	}

	// FIXME: Retain a pack expansion if RetainExpansion is true.

	// We've finished with this pack expansion.
	continue;
	}

	// Transform and check key.
	ExprResult Key = getDerived().TransformExpr(OrigElement.Key);
	if (Key.isInvalid())
	return ExprError();

	if (Key.get() != OrigElement.Key)
	ArgChanged = true;

	// Transform and check value.
	ExprResult Value
	= getDerived().TransformExpr(OrigElement.Value);
	if (Value.isInvalid())
	return ExprError();

	if (Value.get() != OrigElement.Value)
	ArgChanged = true;

	ObjCDictionaryElement Element = {
	Key.get(), Value.get(), SourceLocation(), None
	};
	Elements.push_back(Element);
	}

	if (!getDerived().AlwaysRebuild() && !ArgChanged)
	return SemaRef.MaybeBindToTemporary(E);

	return getDerived().RebuildObjCDictionaryLiteral(E->getSourceRange(),
	Elements);
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformObjCEncodeExpr(ObjCEncodeExpr *E) {
	TypeSourceInfo *EncodedTypeInfo
	= getDerived().TransformType(E->getEncodedTypeSourceInfo());
	if (!EncodedTypeInfo)
	return ExprError();

	if (!getDerived().AlwaysRebuild() &&
	EncodedTypeInfo == E->getEncodedTypeSourceInfo())
	return E;

	return getDerived().RebuildObjCEncodeExpr(E->getAtLoc(),
	EncodedTypeInfo,
	E->getRParenLoc());
	}

	template<typename Derived>
	ExprResult TreeTransform<Derived>::
	TransformObjCIndirectCopyRestoreExpr(ObjCIndirectCopyRestoreExpr *E) {
	// This is a kind of implicit conversion, and it needs to get dropped
	// and recomputed for the same general reasons that ImplicitCastExprs
	// do, as well a more specific one: this expression is only valid when
	// it appears immediately as an argument expression.
	return getDerived().TransformExpr(E->getSubExpr());
	}

	template<typename Derived>
	ExprResult TreeTransform<Derived>::
	TransformObjCBridgedCastExpr(ObjCBridgedCastExpr *E) {
	TypeSourceInfo *TSInfo
	= getDerived().TransformType(E->getTypeInfoAsWritten());
	if (!TSInfo)
	return ExprError();

	ExprResult Result = getDerived().TransformExpr(E->getSubExpr());
	if (Result.isInvalid())
	return ExprError();

	if (!getDerived().AlwaysRebuild() &&
	TSInfo == E->getTypeInfoAsWritten() &&
	Result.get() == E->getSubExpr())
	return E;

	return SemaRef.BuildObjCBridgedCast(E->getLParenLoc(), E->getBridgeKind(),
	E->getBridgeKeywordLoc(), TSInfo,
	Result.get());
	}

	template <typename Derived>
	ExprResult TreeTransform<Derived>::TransformObjCAvailabilityCheckExpr(
	ObjCAvailabilityCheckExpr *E) {
	return E;
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformObjCMessageExpr(ObjCMessageExpr *E) {
	// Transform arguments.
	bool ArgChanged = false;
	SmallVector<Expr*, 8> Args;
	Args.reserve(E->getNumArgs());
	if (getDerived().TransformExprs(E->getArgs(), E->getNumArgs(), false, Args,
	&ArgChanged))
	return ExprError();

	if (E->getReceiverKind() == ObjCMessageExpr::Class) {
	// Class message: transform the receiver type.
	TypeSourceInfo *ReceiverTypeInfo
	= getDerived().TransformType(E->getClassReceiverTypeInfo());
	if (!ReceiverTypeInfo)
	return ExprError();

	// If nothing changed, just retain the existing message send.
	if (!getDerived().AlwaysRebuild() &&
	ReceiverTypeInfo == E->getClassReceiverTypeInfo() && !ArgChanged)
	return SemaRef.MaybeBindToTemporary(E);

	// Build a new class message send.
	SmallVector<SourceLocation, 16> SelLocs;
	E->getSelectorLocs(SelLocs);
	return getDerived().RebuildObjCMessageExpr(ReceiverTypeInfo,
	E->getSelector(),
	SelLocs,
	E->getMethodDecl(),
	E->getLeftLoc(),
	Args,
	E->getRightLoc());
	}
	else if (E->getReceiverKind() == ObjCMessageExpr::SuperClass \|\|
	E->getReceiverKind() == ObjCMessageExpr::SuperInstance) {
	if (!E->getMethodDecl())
	return ExprError();

	// Build a new class message send to 'super'.
	SmallVector<SourceLocation, 16> SelLocs;
	E->getSelectorLocs(SelLocs);
	return getDerived().RebuildObjCMessageExpr(E->getSuperLoc(),
	E->getSelector(),
	SelLocs,
	E->getReceiverType(),
	E->getMethodDecl(),
	E->getLeftLoc(),
	Args,
	E->getRightLoc());
	}

	// Instance message: transform the receiver
	assert(E->getReceiverKind() == ObjCMessageExpr::Instance &&
	"Only class and instance messages may be instantiated");
	ExprResult Receiver
	= getDerived().TransformExpr(E->getInstanceReceiver());
	if (Receiver.isInvalid())
	return ExprError();

	// If nothing changed, just retain the existing message send.
	if (!getDerived().AlwaysRebuild() &&
	Receiver.get() == E->getInstanceReceiver() && !ArgChanged)
	return SemaRef.MaybeBindToTemporary(E);

	// Build a new instance message send.
	SmallVector<SourceLocation, 16> SelLocs;
	E->getSelectorLocs(SelLocs);
	return getDerived().RebuildObjCMessageExpr(Receiver.get(),
	E->getSelector(),
	SelLocs,
	E->getMethodDecl(),
	E->getLeftLoc(),
	Args,
	E->getRightLoc());
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformObjCSelectorExpr(ObjCSelectorExpr *E) {
	return E;
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformObjCProtocolExpr(ObjCProtocolExpr *E) {
	return E;
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformObjCIvarRefExpr(ObjCIvarRefExpr *E) {
	// Transform the base expression.
	ExprResult Base = getDerived().TransformExpr(E->getBase());
	if (Base.isInvalid())
	return ExprError();

	// We don't need to transform the ivar; it will never change.

	// If nothing changed, just retain the existing expression.
	if (!getDerived().AlwaysRebuild() &&
	Base.get() == E->getBase())
	return E;

	return getDerived().RebuildObjCIvarRefExpr(Base.get(), E->getDecl(),
	E->getLocation(),
	E->isArrow(), E->isFreeIvar());
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformObjCPropertyRefExpr(ObjCPropertyRefExpr *E) {
	// 'super' and types never change. Property never changes. Just
	// retain the existing expression.
	if (!E->isObjectReceiver())
	return E;

	// Transform the base expression.
	ExprResult Base = getDerived().TransformExpr(E->getBase());
	if (Base.isInvalid())
	return ExprError();

	// We don't need to transform the property; it will never change.

	// If nothing changed, just retain the existing expression.
	if (!getDerived().AlwaysRebuild() &&
	Base.get() == E->getBase())
	return E;

	if (E->isExplicitProperty())
	return getDerived().RebuildObjCPropertyRefExpr(Base.get(),
	E->getExplicitProperty(),
	E->getLocation());

	return getDerived().RebuildObjCPropertyRefExpr(Base.get(),
	SemaRef.Context.PseudoObjectTy,
	E->getImplicitPropertyGetter(),
	E->getImplicitPropertySetter(),
	E->getLocation());
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformObjCSubscriptRefExpr(ObjCSubscriptRefExpr *E) {
	// Transform the base expression.
	ExprResult Base = getDerived().TransformExpr(E->getBaseExpr());
	if (Base.isInvalid())
	return ExprError();

	// Transform the key expression.
	ExprResult Key = getDerived().TransformExpr(E->getKeyExpr());
	if (Key.isInvalid())
	return ExprError();

	// If nothing changed, just retain the existing expression.
	if (!getDerived().AlwaysRebuild() &&
	Key.get() == E->getKeyExpr() && Base.get() == E->getBaseExpr())
	return E;

	return getDerived().RebuildObjCSubscriptRefExpr(E->getRBracket(),
	Base.get(), Key.get(),
	E->getAtIndexMethodDecl(),
	E->setAtIndexMethodDecl());
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformObjCIsaExpr(ObjCIsaExpr *E) {
	// Transform the base expression.
	ExprResult Base = getDerived().TransformExpr(E->getBase());
	if (Base.isInvalid())
	return ExprError();

	// If nothing changed, just retain the existing expression.
	if (!getDerived().AlwaysRebuild() &&
	Base.get() == E->getBase())
	return E;

	return getDerived().RebuildObjCIsaExpr(Base.get(), E->getIsaMemberLoc(),
	E->getOpLoc(),
	E->isArrow());
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformShuffleVectorExpr(ShuffleVectorExpr *E) {
	bool ArgumentChanged = false;
	SmallVector<Expr*, 8> SubExprs;
	SubExprs.reserve(E->getNumSubExprs());
	if (getDerived().TransformExprs(E->getSubExprs(), E->getNumSubExprs(), false,
	SubExprs, &ArgumentChanged))
	return ExprError();

	if (!getDerived().AlwaysRebuild() &&
	!ArgumentChanged)
	return E;

	return getDerived().RebuildShuffleVectorExpr(E->getBuiltinLoc(),
	SubExprs,
	E->getRParenLoc());
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformConvertVectorExpr(ConvertVectorExpr *E) {
	ExprResult SrcExpr = getDerived().TransformExpr(E->getSrcExpr());
	if (SrcExpr.isInvalid())
	return ExprError();

	TypeSourceInfo *Type = getDerived().TransformType(E->getTypeSourceInfo());
	if (!Type)
	return ExprError();

	if (!getDerived().AlwaysRebuild() &&
	Type == E->getTypeSourceInfo() &&
	SrcExpr.get() == E->getSrcExpr())
	return E;

	return getDerived().RebuildConvertVectorExpr(E->getBuiltinLoc(),
	SrcExpr.get(), Type,
	E->getRParenLoc());
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformBlockExpr(BlockExpr *E) {
	BlockDecl *oldBlock = E->getBlockDecl();

	SemaRef.ActOnBlockStart(E->getCaretLocation(), /Scope=/nullptr);
	BlockScopeInfo *blockScope = SemaRef.getCurBlock();

	blockScope->TheDecl->setIsVariadic(oldBlock->isVariadic());
	blockScope->TheDecl->setBlockMissingReturnType(
	oldBlock->blockMissingReturnType());

	SmallVector<ParmVarDecl*, 4> params;
	SmallVector<QualType, 4> paramTypes;

	const FunctionProtoType *exprFunctionType = E->getFunctionType();

	// Parameter substitution.
	Sema::ExtParameterInfoBuilder extParamInfos;
	if (getDerived().TransformFunctionTypeParams(
	E->getCaretLocation(), oldBlock->parameters(), nullptr,
	exprFunctionType->getExtParameterInfosOrNull(), paramTypes, &params,
	extParamInfos)) {
	getSema().ActOnBlockError(E->getCaretLocation(), /Scope=/nullptr);
	return ExprError();
	}

	QualType exprResultType =
	getDerived().TransformType(exprFunctionType->getReturnType());

	auto epi = exprFunctionType->getExtProtoInfo();
	epi.ExtParameterInfos = extParamInfos.getPointerOrNull(paramTypes.size());

	QualType functionType =
	getDerived().RebuildFunctionProtoType(exprResultType, paramTypes, epi);
	blockScope->FunctionType = functionType;

	// Set the parameters on the block decl.
	if (!params.empty())
	blockScope->TheDecl->setParams(params);

	if (!oldBlock->blockMissingReturnType()) {
	blockScope->HasImplicitReturnType = false;
	blockScope->ReturnType = exprResultType;
	}

	// Transform the body
	StmtResult body = getDerived().TransformStmt(E->getBody());
	if (body.isInvalid()) {
	getSema().ActOnBlockError(E->getCaretLocation(), /Scope=/nullptr);
	return ExprError();
	}

	#ifndef NDEBUG
	// In builds with assertions, make sure that we captured everything we
	// captured before.
	if (!SemaRef.getDiagnostics().hasErrorOccurred()) {
	for (const auto &I : oldBlock->captures()) {
	VarDecl *oldCapture = I.getVariable();

	// Ignore parameter packs.
	if (oldCapture->isParameterPack())
	continue;

	VarDecl *newCapture =
	cast<VarDecl>(getDerived().TransformDecl(E->getCaretLocation(),
	oldCapture));
	assert(blockScope->CaptureMap.count(newCapture));
	}
	assert(oldBlock->capturesCXXThis() == blockScope->isCXXThisCaptured());
	}
	#endif

	return SemaRef.ActOnBlockStmtExpr(E->getCaretLocation(), body.get(),
	/Scope=/nullptr);
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformAsTypeExpr(AsTypeExpr *E) {
	ExprResult SrcExpr = getDerived().TransformExpr(E->getSrcExpr());
	if (SrcExpr.isInvalid())
	return ExprError();

	QualType Type = getDerived().TransformType(E->getType());

	return SemaRef.BuildAsTypeExpr(SrcExpr.get(), Type, E->getBuiltinLoc(),
	E->getRParenLoc());
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::TransformAtomicExpr(AtomicExpr *E) {
	bool ArgumentChanged = false;
	SmallVector<Expr*, 8> SubExprs;
	SubExprs.reserve(E->getNumSubExprs());
	if (getDerived().TransformExprs(E->getSubExprs(), E->getNumSubExprs(), false,
	SubExprs, &ArgumentChanged))
	return ExprError();

	if (!getDerived().AlwaysRebuild() &&
	!ArgumentChanged)
	return E;

	return getDerived().RebuildAtomicExpr(E->getBuiltinLoc(), SubExprs,
	E->getOp(), E->getRParenLoc());
	}

	//===----------------------------------------------------------------------===//
	// Type reconstruction
	//===----------------------------------------------------------------------===//

	template<typename Derived>
	QualType TreeTransform<Derived>::RebuildPointerType(QualType PointeeType,
	SourceLocation Star) {
	return SemaRef.BuildPointerType(PointeeType, Star,
	getDerived().getBaseEntity());
	}

	template<typename Derived>
	QualType TreeTransform<Derived>::RebuildBlockPointerType(QualType PointeeType,
	SourceLocation Star) {
	return SemaRef.BuildBlockPointerType(PointeeType, Star,
	getDerived().getBaseEntity());
	}

	template<typename Derived>
	QualType
	TreeTransform<Derived>::RebuildReferenceType(QualType ReferentType,
	bool WrittenAsLValue,
	SourceLocation Sigil) {
	return SemaRef.BuildReferenceType(ReferentType, WrittenAsLValue,
	Sigil, getDerived().getBaseEntity());
	}

	template<typename Derived>
	QualType
	TreeTransform<Derived>::RebuildMemberPointerType(QualType PointeeType,
	QualType ClassType,
	SourceLocation Sigil) {
	return SemaRef.BuildMemberPointerType(PointeeType, ClassType, Sigil,
	getDerived().getBaseEntity());
	}

	template<typename Derived>
	QualType TreeTransform<Derived>::RebuildObjCTypeParamType(
	const ObjCTypeParamDecl *Decl,
	SourceLocation ProtocolLAngleLoc,
	ArrayRef<ObjCProtocolDecl *> Protocols,
	ArrayRef<SourceLocation> ProtocolLocs,
	SourceLocation ProtocolRAngleLoc) {
	return SemaRef.BuildObjCTypeParamType(Decl,
	ProtocolLAngleLoc, Protocols,
	ProtocolLocs, ProtocolRAngleLoc,
	/FailOnError=/true);
	}

	template<typename Derived>
	QualType TreeTransform<Derived>::RebuildObjCObjectType(
	QualType BaseType,
	SourceLocation Loc,
	SourceLocation TypeArgsLAngleLoc,
	ArrayRef<TypeSourceInfo *> TypeArgs,
	SourceLocation TypeArgsRAngleLoc,
	SourceLocation ProtocolLAngleLoc,
	ArrayRef<ObjCProtocolDecl *> Protocols,
	ArrayRef<SourceLocation> ProtocolLocs,
	SourceLocation ProtocolRAngleLoc) {
	return SemaRef.BuildObjCObjectType(BaseType, Loc, TypeArgsLAngleLoc,
	TypeArgs, TypeArgsRAngleLoc,
	ProtocolLAngleLoc, Protocols, ProtocolLocs,
	ProtocolRAngleLoc,
	/FailOnError=/true);
	}

	template<typename Derived>
	QualType TreeTransform<Derived>::RebuildObjCObjectPointerType(
	QualType PointeeType,
	SourceLocation Star) {
	return SemaRef.Context.getObjCObjectPointerType(PointeeType);
	}

	template<typename Derived>
	QualType
	TreeTransform<Derived>::RebuildArrayType(QualType ElementType,
	ArrayType::ArraySizeModifier SizeMod,
	const llvm::APInt *Size,
	Expr *SizeExpr,
	unsigned IndexTypeQuals,
	SourceRange BracketsRange) {
	if (SizeExpr \|\| !Size)
	return SemaRef.BuildArrayType(ElementType, SizeMod, SizeExpr,
	IndexTypeQuals, BracketsRange,
	getDerived().getBaseEntity());

	QualType Types[] = {
	SemaRef.Context.UnsignedCharTy, SemaRef.Context.UnsignedShortTy,
	SemaRef.Context.UnsignedIntTy, SemaRef.Context.UnsignedLongTy,
	SemaRef.Context.UnsignedLongLongTy, SemaRef.Context.UnsignedInt128Ty
	};
	const unsigned NumTypes = llvm::array_lengthof(Types);
	QualType SizeType;
	for (unsigned I = 0; I != NumTypes; ++I)
	if (Size->getBitWidth() == SemaRef.Context.getIntWidth(Types[I])) {
	SizeType = Types[I];
	break;
	}

	// Note that we can return a VariableArrayType here in the case where
	// the element type was a dependent VariableArrayType.
	IntegerLiteral *ArraySize
	= IntegerLiteral::Create(SemaRef.Context, *Size, SizeType,
	/FIXME/BracketsRange.getBegin());
	return SemaRef.BuildArrayType(ElementType, SizeMod, ArraySize,
	IndexTypeQuals, BracketsRange,
	getDerived().getBaseEntity());
	}

	template<typename Derived>
	QualType
	TreeTransform<Derived>::RebuildConstantArrayType(QualType ElementType,
	ArrayType::ArraySizeModifier SizeMod,
	const llvm::APInt &Size,
	Expr *SizeExpr,
	unsigned IndexTypeQuals,
	SourceRange BracketsRange) {
	return getDerived().RebuildArrayType(ElementType, SizeMod, &Size, SizeExpr,
	IndexTypeQuals, BracketsRange);
	}

	template<typename Derived>
	QualType
	TreeTransform<Derived>::RebuildIncompleteArrayType(QualType ElementType,
	ArrayType::ArraySizeModifier SizeMod,
	unsigned IndexTypeQuals,
	SourceRange BracketsRange) {
	return getDerived().RebuildArrayType(ElementType, SizeMod, nullptr, nullptr,
	IndexTypeQuals, BracketsRange);
	}

	template<typename Derived>
	QualType
	TreeTransform<Derived>::RebuildVariableArrayType(QualType ElementType,
	ArrayType::ArraySizeModifier SizeMod,
	Expr *SizeExpr,
	unsigned IndexTypeQuals,
	SourceRange BracketsRange) {
	return getDerived().RebuildArrayType(ElementType, SizeMod, nullptr,
	SizeExpr,
	IndexTypeQuals, BracketsRange);
	}

	template<typename Derived>
	QualType
	TreeTransform<Derived>::RebuildDependentSizedArrayType(QualType ElementType,
	ArrayType::ArraySizeModifier SizeMod,
	Expr *SizeExpr,
	unsigned IndexTypeQuals,
	SourceRange BracketsRange) {
	return getDerived().RebuildArrayType(ElementType, SizeMod, nullptr,
	SizeExpr,
	IndexTypeQuals, BracketsRange);
	}

	template <typename Derived>
	QualType TreeTransform<Derived>::RebuildDependentAddressSpaceType(
	QualType PointeeType, Expr *AddrSpaceExpr, SourceLocation AttributeLoc) {
	return SemaRef.BuildAddressSpaceAttr(PointeeType, AddrSpaceExpr,
	AttributeLoc);
	}

	template <typename Derived>
	QualType
	TreeTransform<Derived>::RebuildVectorType(QualType ElementType,
	unsigned NumElements,
	VectorType::VectorKind VecKind) {
	// FIXME: semantic checking!
	return SemaRef.Context.getVectorType(ElementType, NumElements, VecKind);
	}

	template <typename Derived>
	QualType TreeTransform<Derived>::RebuildDependentVectorType(
	QualType ElementType, Expr *SizeExpr, SourceLocation AttributeLoc,
	VectorType::VectorKind VecKind) {
	return SemaRef.BuildVectorType(ElementType, SizeExpr, AttributeLoc);
	}

	template<typename Derived>
	QualType TreeTransform<Derived>::RebuildExtVectorType(QualType ElementType,
	unsigned NumElements,
	SourceLocation AttributeLoc) {
	llvm::APInt numElements(SemaRef.Context.getIntWidth(SemaRef.Context.IntTy),
	NumElements, true);
	IntegerLiteral *VectorSize
	= IntegerLiteral::Create(SemaRef.Context, numElements, SemaRef.Context.IntTy,
	AttributeLoc);
	return SemaRef.BuildExtVectorType(ElementType, VectorSize, AttributeLoc);
	}

	template<typename Derived>
	QualType
	TreeTransform<Derived>::RebuildDependentSizedExtVectorType(QualType ElementType,
	Expr *SizeExpr,
	SourceLocation AttributeLoc) {
	return SemaRef.BuildExtVectorType(ElementType, SizeExpr, AttributeLoc);
	}

	template <typename Derived>
	QualType TreeTransform<Derived>::RebuildConstantMatrixType(
	QualType ElementType, unsigned NumRows, unsigned NumColumns) {
	return SemaRef.Context.getConstantMatrixType(ElementType, NumRows,
	NumColumns);
	}

	template <typename Derived>
	QualType TreeTransform<Derived>::RebuildDependentSizedMatrixType(
	QualType ElementType, Expr RowExpr, Expr ColumnExpr,
	SourceLocation AttributeLoc) {
	return SemaRef.BuildMatrixType(ElementType, RowExpr, ColumnExpr,
	AttributeLoc);
	}

	template<typename Derived>
	QualType TreeTransform<Derived>::RebuildFunctionProtoType(
	QualType T,
	MutableArrayRef<QualType> ParamTypes,
	const FunctionProtoType::ExtProtoInfo &EPI) {
	return SemaRef.BuildFunctionType(T, ParamTypes,
	getDerived().getBaseLocation(),
	getDerived().getBaseEntity(),
	EPI);
	}

	template<typename Derived>
	QualType TreeTransform<Derived>::RebuildFunctionNoProtoType(QualType T) {
	return SemaRef.Context.getFunctionNoProtoType(T);
	}

	template<typename Derived>
	QualType TreeTransform<Derived>::RebuildUnresolvedUsingType(SourceLocation Loc,
	Decl *D) {
	assert(D && "no decl found");
	if (D->isInvalidDecl()) return QualType();

	// FIXME: Doesn't account for ObjCInterfaceDecl!
	TypeDecl *Ty;
	if (auto *UPD = dyn_cast<UsingPackDecl>(D)) {
	// A valid resolved using typename pack expansion decl can have multiple
	// UsingDecls, but they must each have exactly one type, and it must be
	// the same type in every case. But we must have at least one expansion!
	if (UPD->expansions().empty()) {
	getSema().Diag(Loc, diag::err_using_pack_expansion_empty)
	<< UPD->isCXXClassMember() << UPD;
	return QualType();
	}

	// We might still have some unresolved types. Try to pick a resolved type
	// if we can. The final instantiation will check that the remaining
	// unresolved types instantiate to the type we pick.
	QualType FallbackT;
	QualType T;
	for (auto *E : UPD->expansions()) {
	QualType ThisT = RebuildUnresolvedUsingType(Loc, E);
	if (ThisT.isNull())
	continue;
	else if (ThisT->getAs<UnresolvedUsingType>())
	FallbackT = ThisT;
	else if (T.isNull())
	T = ThisT;
	else
	assert(getSema().Context.hasSameType(ThisT, T) &&
	"mismatched resolved types in using pack expansion");
	}
	return T.isNull() ? FallbackT : T;
	} else if (auto *Using = dyn_cast<UsingDecl>(D)) {
	assert(Using->hasTypename() &&
	"UnresolvedUsingTypenameDecl transformed to non-typename using");

	// A valid resolved using typename decl points to exactly one type decl.
	assert(++Using->shadow_begin() == Using->shadow_end());

	NamedDecl *Target = Using->shadow_begin()->getTargetDecl();
	if (SemaRef.DiagnoseUseOfDecl(Target, Loc))
	return QualType();
	Ty = cast<TypeDecl>(Target);
	} else {
	assert(isa<UnresolvedUsingTypenameDecl>(D) &&
	"UnresolvedUsingTypenameDecl transformed to non-using decl");
	Ty = cast<UnresolvedUsingTypenameDecl>(D);
	}

	return SemaRef.Context.getTypeDeclType(Ty);
	}

	template<typename Derived>
	QualType TreeTransform<Derived>::RebuildTypeOfExprType(Expr *E,
	SourceLocation Loc) {
	return SemaRef.BuildTypeofExprType(E, Loc);
	}

	template<typename Derived>
	QualType TreeTransform<Derived>::RebuildTypeOfType(QualType Underlying) {
	return SemaRef.Context.getTypeOfType(Underlying);
	}

	template<typename Derived>
	QualType TreeTransform<Derived>::RebuildDecltypeType(Expr *E,
	SourceLocation Loc) {
	return SemaRef.BuildDecltypeType(E, Loc);
	}

	template<typename Derived>
	QualType TreeTransform<Derived>::RebuildUnaryTransformType(QualType BaseType,
	UnaryTransformType::UTTKind UKind,
	SourceLocation Loc) {
	return SemaRef.BuildUnaryTransformType(BaseType, UKind, Loc);
	}

	template<typename Derived>
	QualType TreeTransform<Derived>::RebuildTemplateSpecializationType(
	TemplateName Template,
	SourceLocation TemplateNameLoc,
	TemplateArgumentListInfo &TemplateArgs) {
	return SemaRef.CheckTemplateIdType(Template, TemplateNameLoc, TemplateArgs);
	}

	template<typename Derived>
	QualType TreeTransform<Derived>::RebuildAtomicType(QualType ValueType,
	SourceLocation KWLoc) {
	return SemaRef.BuildAtomicType(ValueType, KWLoc);
	}

	template<typename Derived>
	QualType TreeTransform<Derived>::RebuildPipeType(QualType ValueType,
	SourceLocation KWLoc,
	bool isReadPipe) {
	return isReadPipe ? SemaRef.BuildReadPipeType(ValueType, KWLoc)
	: SemaRef.BuildWritePipeType(ValueType, KWLoc);
	}

	template <typename Derived>
	QualType TreeTransform<Derived>::RebuildExtIntType(bool IsUnsigned,
	unsigned NumBits,
	SourceLocation Loc) {
	llvm::APInt NumBitsAP(SemaRef.Context.getIntWidth(SemaRef.Context.IntTy),
	NumBits, true);
	IntegerLiteral *Bits = IntegerLiteral::Create(SemaRef.Context, NumBitsAP,
	SemaRef.Context.IntTy, Loc);
	return SemaRef.BuildExtIntType(IsUnsigned, Bits, Loc);
	}

	template <typename Derived>
	QualType TreeTransform<Derived>::RebuildDependentExtIntType(
	bool IsUnsigned, Expr *NumBitsExpr, SourceLocation Loc) {
	return SemaRef.BuildExtIntType(IsUnsigned, NumBitsExpr, Loc);
	}

	template<typename Derived>
	TemplateName
	TreeTransform<Derived>::RebuildTemplateName(CXXScopeSpec &SS,
	bool TemplateKW,
	TemplateDecl *Template) {
	return SemaRef.Context.getQualifiedTemplateName(SS.getScopeRep(), TemplateKW,
	Template);
	}

	template<typename Derived>
	TemplateName
	TreeTransform<Derived>::RebuildTemplateName(CXXScopeSpec &SS,
	SourceLocation TemplateKWLoc,
	const IdentifierInfo &Name,
	SourceLocation NameLoc,
	QualType ObjectType,
	NamedDecl *FirstQualifierInScope,
	bool AllowInjectedClassName) {
	UnqualifiedId TemplateName;
	TemplateName.setIdentifier(&Name, NameLoc);
	Sema::TemplateTy Template;
	getSema().ActOnTemplateName(/Scope=/nullptr, SS, TemplateKWLoc,
	TemplateName, ParsedType::make(ObjectType),
	/EnteringContext=/false, Template,
	AllowInjectedClassName);
	return Template.get();
	}

	template<typename Derived>
	TemplateName
	TreeTransform<Derived>::RebuildTemplateName(CXXScopeSpec &SS,
	SourceLocation TemplateKWLoc,
	OverloadedOperatorKind Operator,
	SourceLocation NameLoc,
	QualType ObjectType,
	bool AllowInjectedClassName) {
	UnqualifiedId Name;
	// FIXME: Bogus location information.
	SourceLocation SymbolLocations[3] = { NameLoc, NameLoc, NameLoc };
	Name.setOperatorFunctionId(NameLoc, Operator, SymbolLocations);
	Sema::TemplateTy Template;
	getSema().ActOnTemplateName(
	/Scope=/nullptr, SS, TemplateKWLoc, Name, ParsedType::make(ObjectType),
	/EnteringContext=/false, Template, AllowInjectedClassName);
	return Template.get();
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::RebuildCXXOperatorCallExpr(OverloadedOperatorKind Op,
	SourceLocation OpLoc,
	Expr *OrigCallee,
	Expr *First,
	Expr *Second) {
	Expr *Callee = OrigCallee->IgnoreParenCasts();
	bool isPostIncDec = Second && (Op == OO_PlusPlus \|\| Op == OO_MinusMinus);

	if (First->getObjectKind() == OK_ObjCProperty) {
	BinaryOperatorKind Opc = BinaryOperator::getOverloadedOpcode(Op);
	if (BinaryOperator::isAssignmentOp(Opc))
	return SemaRef.checkPseudoObjectAssignment(/Scope=/nullptr, OpLoc, Opc,
	First, Second);
	ExprResult Result = SemaRef.CheckPlaceholderExpr(First);
	if (Result.isInvalid())
	return ExprError();
	First = Result.get();
	}

	if (Second && Second->getObjectKind() == OK_ObjCProperty) {
	ExprResult Result = SemaRef.CheckPlaceholderExpr(Second);
	if (Result.isInvalid())
	return ExprError();
	Second = Result.get();
	}

	// Determine whether this should be a builtin operation.
	if (Op == OO_Subscript) {
	if (!First->getType()->isOverloadableType() &&
	!Second->getType()->isOverloadableType())
	return getSema().CreateBuiltinArraySubscriptExpr(
	First, Callee->getBeginLoc(), Second, OpLoc);
	} else if (Op == OO_Arrow) {
	// -> is never a builtin operation.
	return SemaRef.BuildOverloadedArrowExpr(nullptr, First, OpLoc);
	} else if (Second == nullptr \|\| isPostIncDec) {
	if (!First->getType()->isOverloadableType() \|\|
	(Op == OO_Amp && getSema().isQualifiedMemberAccess(First))) {
	// The argument is not of overloadable type, or this is an expression
	// of the form &Class::member, so try to create a built-in unary
	// operation.
	UnaryOperatorKind Opc
	= UnaryOperator::getOverloadedOpcode(Op, isPostIncDec);

	return getSema().CreateBuiltinUnaryOp(OpLoc, Opc, First);
	}
	} else {
	if (!First->getType()->isOverloadableType() &&
	!Second->getType()->isOverloadableType()) {
	// Neither of the arguments is an overloadable type, so try to
	// create a built-in binary operation.
	BinaryOperatorKind Opc = BinaryOperator::getOverloadedOpcode(Op);
	ExprResult Result
	= SemaRef.CreateBuiltinBinOp(OpLoc, Opc, First, Second);
	if (Result.isInvalid())
	return ExprError();

	return Result;
	}
	}

	// Compute the transformed set of functions (and function templates) to be
	// used during overload resolution.
	UnresolvedSet<16> Functions;
	bool RequiresADL;

	if (UnresolvedLookupExpr *ULE = dyn_cast<UnresolvedLookupExpr>(Callee)) {
	Functions.append(ULE->decls_begin(), ULE->decls_end());
	// If the overload could not be resolved in the template definition
	// (because we had a dependent argument), ADL is performed as part of
	// template instantiation.
	RequiresADL = ULE->requiresADL();
	} else {
	// If we've resolved this to a particular non-member function, just call
	// that function. If we resolved it to a member function,
	// CreateOverloaded* will find that function for us.
	NamedDecl *ND = cast<DeclRefExpr>(Callee)->getDecl();
	if (!isa<CXXMethodDecl>(ND))
	Functions.addDecl(ND);
	RequiresADL = false;
	}

	// Add any functions found via argument-dependent lookup.
	Expr *Args[2] = { First, Second };
	unsigned NumArgs = 1 + (Second != nullptr);

	// Create the overloaded operator invocation for unary operators.
	if (NumArgs == 1 \|\| isPostIncDec) {
	UnaryOperatorKind Opc
	= UnaryOperator::getOverloadedOpcode(Op, isPostIncDec);
	return SemaRef.CreateOverloadedUnaryOp(OpLoc, Opc, Functions, First,
	RequiresADL);
	}

	if (Op == OO_Subscript) {
	SourceLocation LBrace;
	SourceLocation RBrace;

	if (DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(Callee)) {
	DeclarationNameLoc NameLoc = DRE->getNameInfo().getInfo();
	LBrace = NameLoc.getCXXOperatorNameBeginLoc();
	RBrace = NameLoc.getCXXOperatorNameEndLoc();
	} else {
	LBrace = Callee->getBeginLoc();
	RBrace = OpLoc;
	}

	return SemaRef.CreateOverloadedArraySubscriptExpr(LBrace, RBrace,
	First, Second);
	}

	// Create the overloaded operator invocation for binary operators.
	BinaryOperatorKind Opc = BinaryOperator::getOverloadedOpcode(Op);
	ExprResult Result = SemaRef.CreateOverloadedBinOp(
	OpLoc, Opc, Functions, Args[0], Args[1], RequiresADL);
	if (Result.isInvalid())
	return ExprError();

	return Result;
	}

	template<typename Derived>
	ExprResult
	TreeTransform<Derived>::RebuildCXXPseudoDestructorExpr(Expr *Base,
	SourceLocation OperatorLoc,
	bool isArrow,
	CXXScopeSpec &SS,
	TypeSourceInfo *ScopeType,
	SourceLocation CCLoc,
	SourceLocation TildeLoc,
	PseudoDestructorTypeStorage Destroyed) {
	QualType BaseType = Base->getType();
	if (Base->isTypeDependent() \|\| Destroyed.getIdentifier() \|\|
	(!isArrow && !BaseType->getAs<RecordType>()) \|\|
	(isArrow && BaseType->getAs<PointerType>() &&
	!BaseType->castAs<PointerType>()->getPointeeType()
	->template getAs<RecordType>())){
	// This pseudo-destructor expression is still a pseudo-destructor.
	return SemaRef.BuildPseudoDestructorExpr(
	Base, OperatorLoc, isArrow ? tok::arrow : tok::period, SS, ScopeType,
	CCLoc, TildeLoc, Destroyed);
	}

	TypeSourceInfo *DestroyedType = Destroyed.getTypeSourceInfo();
	DeclarationName Name(SemaRef.Context.DeclarationNames.getCXXDestructorName(
	SemaRef.Context.getCanonicalType(DestroyedType->getType())));
	DeclarationNameInfo NameInfo(Name, Destroyed.getLocation());
	NameInfo.setNamedTypeInfo(DestroyedType);

	// The scope type is now known to be a valid nested name specifier
	// component. Tack it on to the end of the nested name specifier.
	if (ScopeType) {
	if (!ScopeType->getType()->getAs<TagType>()) {
	getSema().Diag(ScopeType->getTypeLoc().getBeginLoc(),
	diag::err_expected_class_or_namespace)
	<< ScopeType->getType() << getSema().getLangOpts().CPlusPlus;
	return ExprError();
	}
	SS.Extend(SemaRef.Context, SourceLocation(), ScopeType->getTypeLoc(),
	CCLoc);
	}

	SourceLocation TemplateKWLoc; // FIXME: retrieve it from caller.
	return getSema().BuildMemberReferenceExpr(Base, BaseType,
	OperatorLoc, isArrow,
	SS, TemplateKWLoc,
	/FIXME: FirstQualifier/ nullptr,
	NameInfo,
	/TemplateArgs/ nullptr,
	/S/nullptr);
	}

	template<typename Derived>
	StmtResult
	TreeTransform<Derived>::TransformCapturedStmt(CapturedStmt *S) {
	SourceLocation Loc = S->getBeginLoc();
	CapturedDecl *CD = S->getCapturedDecl();
	unsigned NumParams = CD->getNumParams();
	unsigned ContextParamPos = CD->getContextParamPosition();
	SmallVector<Sema::CapturedParamNameType, 4> Params;
	for (unsigned I = 0; I < NumParams; ++I) {
	if (I != ContextParamPos) {
	Params.push_back(
	std::make_pair(
	CD->getParam(I)->getName(),
	getDerived().TransformType(CD->getParam(I)->getType())));
	} else {
	Params.push_back(std::make_pair(StringRef(), QualType()));
	}
	}
	getSema().ActOnCapturedRegionStart(Loc, /CurScope/nullptr,
	S->getCapturedRegionKind(), Params);
	StmtResult Body;
	{
	Sema::CompoundScopeRAII CompoundScope(getSema());
	Body = getDerived().TransformStmt(S->getCapturedStmt());
	}

	if (Body.isInvalid()) {
	getSema().ActOnCapturedRegionError();
	return StmtError();
	}

	return getSema().ActOnCapturedRegionEnd(Body.get());
	}

	} // end namespace clang

	#endif // LLVM_CLANG_LIB_SEMA_TREETRANSFORM_H
	diff --git a/contrib/llvm-project/clang/lib/Serialization/ASTReader.cpp b/contrib/llvm-project/clang/lib/Serialization/ASTReader.cpp
	index 83bade9941b3..1722572f1a27 100644
	--- a/contrib/llvm-project/clang/lib/Serialization/ASTReader.cpp
	+++ b/contrib/llvm-project/clang/lib/Serialization/ASTReader.cpp
	@@ -1,13023 +1,13025 @@
	//===- ASTReader.cpp - AST File Reader ------------------------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file defines the ASTReader class, which reads AST files.
	//
	//===----------------------------------------------------------------------===//

	#include "clang/Basic/OpenMPKinds.h"
	#include "clang/Serialization/ASTRecordReader.h"
	#include "ASTCommon.h"
	#include "ASTReaderInternals.h"
	#include "clang/AST/AbstractTypeReader.h"
	#include "clang/AST/ASTConsumer.h"
	#include "clang/AST/ASTContext.h"
	#include "clang/AST/ASTMutationListener.h"
	#include "clang/AST/ASTUnresolvedSet.h"
	#include "clang/AST/Decl.h"
	#include "clang/AST/DeclBase.h"
	#include "clang/AST/DeclCXX.h"
	#include "clang/AST/DeclFriend.h"
	#include "clang/AST/DeclGroup.h"
	#include "clang/AST/DeclObjC.h"
	#include "clang/AST/DeclTemplate.h"
	#include "clang/AST/DeclarationName.h"
	#include "clang/AST/Expr.h"
	#include "clang/AST/ExprCXX.h"
	#include "clang/AST/ExternalASTSource.h"
	#include "clang/AST/NestedNameSpecifier.h"
	#include "clang/AST/OpenMPClause.h"
	#include "clang/AST/ODRHash.h"
	#include "clang/AST/RawCommentList.h"
	#include "clang/AST/TemplateBase.h"
	#include "clang/AST/TemplateName.h"
	#include "clang/AST/Type.h"
	#include "clang/AST/TypeLoc.h"
	#include "clang/AST/TypeLocVisitor.h"
	#include "clang/AST/UnresolvedSet.h"
	#include "clang/Basic/CommentOptions.h"
	#include "clang/Basic/Diagnostic.h"
	#include "clang/Basic/DiagnosticOptions.h"
	#include "clang/Basic/ExceptionSpecificationType.h"
	#include "clang/Basic/FileManager.h"
	#include "clang/Basic/FileSystemOptions.h"
	#include "clang/Basic/IdentifierTable.h"
	#include "clang/Basic/LLVM.h"
	#include "clang/Basic/LangOptions.h"
	#include "clang/Basic/Module.h"
	#include "clang/Basic/ObjCRuntime.h"
	#include "clang/Basic/OperatorKinds.h"
	#include "clang/Basic/PragmaKinds.h"
	#include "clang/Basic/Sanitizers.h"
	#include "clang/Basic/SourceLocation.h"
	#include "clang/Basic/SourceManager.h"
	#include "clang/Basic/SourceManagerInternals.h"
	#include "clang/Basic/Specifiers.h"
	#include "clang/Basic/TargetInfo.h"
	#include "clang/Basic/TargetOptions.h"
	#include "clang/Basic/TokenKinds.h"
	#include "clang/Basic/Version.h"
	#include "clang/Lex/HeaderSearch.h"
	#include "clang/Lex/HeaderSearchOptions.h"
	#include "clang/Lex/MacroInfo.h"
	#include "clang/Lex/ModuleMap.h"
	#include "clang/Lex/PreprocessingRecord.h"
	#include "clang/Lex/Preprocessor.h"
	#include "clang/Lex/PreprocessorOptions.h"
	#include "clang/Lex/Token.h"
	#include "clang/Sema/ObjCMethodList.h"
	#include "clang/Sema/Scope.h"
	#include "clang/Sema/Sema.h"
	#include "clang/Sema/Weak.h"
	#include "clang/Serialization/ASTBitCodes.h"
	#include "clang/Serialization/ASTDeserializationListener.h"
	#include "clang/Serialization/ContinuousRangeMap.h"
	#include "clang/Serialization/GlobalModuleIndex.h"
	#include "clang/Serialization/InMemoryModuleCache.h"
	#include "clang/Serialization/ModuleFile.h"
	#include "clang/Serialization/ModuleFileExtension.h"
	#include "clang/Serialization/ModuleManager.h"
	#include "clang/Serialization/PCHContainerOperations.h"
	#include "clang/Serialization/SerializationDiagnostic.h"
	#include "llvm/ADT/APFloat.h"
	#include "llvm/ADT/APInt.h"
	#include "llvm/ADT/APSInt.h"
	#include "llvm/ADT/ArrayRef.h"
	#include "llvm/ADT/DenseMap.h"
	#include "llvm/ADT/FloatingPointMode.h"
	#include "llvm/ADT/FoldingSet.h"
	#include "llvm/ADT/Hashing.h"
	#include "llvm/ADT/IntrusiveRefCntPtr.h"
	#include "llvm/ADT/None.h"
	#include "llvm/ADT/Optional.h"
	#include "llvm/ADT/STLExtras.h"
	#include "llvm/ADT/ScopeExit.h"
	#include "llvm/ADT/SmallPtrSet.h"
	#include "llvm/ADT/SmallString.h"
	#include "llvm/ADT/SmallVector.h"
	#include "llvm/ADT/StringExtras.h"
	#include "llvm/ADT/StringMap.h"
	#include "llvm/ADT/StringRef.h"
	#include "llvm/ADT/Triple.h"
	#include "llvm/ADT/iterator_range.h"
	#include "llvm/Bitstream/BitstreamReader.h"
	#include "llvm/Support/Casting.h"
	#include "llvm/Support/Compiler.h"
	#include "llvm/Support/Compression.h"
	#include "llvm/Support/DJB.h"
	#include "llvm/Support/Endian.h"
	#include "llvm/Support/Error.h"
	#include "llvm/Support/ErrorHandling.h"
	#include "llvm/Support/FileSystem.h"
	#include "llvm/Support/LEB128.h"
	#include "llvm/Support/MemoryBuffer.h"
	#include "llvm/Support/Path.h"
	#include "llvm/Support/SaveAndRestore.h"
	#include "llvm/Support/Timer.h"
	#include "llvm/Support/VersionTuple.h"
	#include "llvm/Support/raw_ostream.h"
	#include <algorithm>
	#include <cassert>
	#include <cstddef>
	#include <cstdint>
	#include <cstdio>
	#include <ctime>
	#include <iterator>
	#include <limits>
	#include <map>
	#include <memory>
	#include <string>
	#include <system_error>
	#include <tuple>
	#include <utility>
	#include <vector>

	using namespace clang;
	using namespace clang::serialization;
	using namespace clang::serialization::reader;
	using llvm::BitstreamCursor;
	using llvm::RoundingMode;

	//===----------------------------------------------------------------------===//
	// ChainedASTReaderListener implementation
	//===----------------------------------------------------------------------===//

	bool
	ChainedASTReaderListener::ReadFullVersionInformation(StringRef FullVersion) {
	return First->ReadFullVersionInformation(FullVersion) \|\|
	Second->ReadFullVersionInformation(FullVersion);
	}

	void ChainedASTReaderListener::ReadModuleName(StringRef ModuleName) {
	First->ReadModuleName(ModuleName);
	Second->ReadModuleName(ModuleName);
	}

	void ChainedASTReaderListener::ReadModuleMapFile(StringRef ModuleMapPath) {
	First->ReadModuleMapFile(ModuleMapPath);
	Second->ReadModuleMapFile(ModuleMapPath);
	}

	bool
	ChainedASTReaderListener::ReadLanguageOptions(const LangOptions &LangOpts,
	bool Complain,
	bool AllowCompatibleDifferences) {
	return First->ReadLanguageOptions(LangOpts, Complain,
	AllowCompatibleDifferences) \|\|
	Second->ReadLanguageOptions(LangOpts, Complain,
	AllowCompatibleDifferences);
	}

	bool ChainedASTReaderListener::ReadTargetOptions(
	const TargetOptions &TargetOpts, bool Complain,
	bool AllowCompatibleDifferences) {
	return First->ReadTargetOptions(TargetOpts, Complain,
	AllowCompatibleDifferences) \|\|
	Second->ReadTargetOptions(TargetOpts, Complain,
	AllowCompatibleDifferences);
	}

	bool ChainedASTReaderListener::ReadDiagnosticOptions(
	IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts, bool Complain) {
	return First->ReadDiagnosticOptions(DiagOpts, Complain) \|\|
	Second->ReadDiagnosticOptions(DiagOpts, Complain);
	}

	bool
	ChainedASTReaderListener::ReadFileSystemOptions(const FileSystemOptions &FSOpts,
	bool Complain) {
	return First->ReadFileSystemOptions(FSOpts, Complain) \|\|
	Second->ReadFileSystemOptions(FSOpts, Complain);
	}

	bool ChainedASTReaderListener::ReadHeaderSearchOptions(
	const HeaderSearchOptions &HSOpts, StringRef SpecificModuleCachePath,
	bool Complain) {
	return First->ReadHeaderSearchOptions(HSOpts, SpecificModuleCachePath,
	Complain) \|\|
	Second->ReadHeaderSearchOptions(HSOpts, SpecificModuleCachePath,
	Complain);
	}

	bool ChainedASTReaderListener::ReadPreprocessorOptions(
	const PreprocessorOptions &PPOpts, bool Complain,
	std::string &SuggestedPredefines) {
	return First->ReadPreprocessorOptions(PPOpts, Complain,
	SuggestedPredefines) \|\|
	Second->ReadPreprocessorOptions(PPOpts, Complain, SuggestedPredefines);
	}

	void ChainedASTReaderListener::ReadCounter(const serialization::ModuleFile &M,
	unsigned Value) {
	First->ReadCounter(M, Value);
	Second->ReadCounter(M, Value);
	}

	bool ChainedASTReaderListener::needsInputFileVisitation() {
	return First->needsInputFileVisitation() \|\|
	Second->needsInputFileVisitation();
	}

	bool ChainedASTReaderListener::needsSystemInputFileVisitation() {
	return First->needsSystemInputFileVisitation() \|\|
	Second->needsSystemInputFileVisitation();
	}

	void ChainedASTReaderListener::visitModuleFile(StringRef Filename,
	ModuleKind Kind) {
	First->visitModuleFile(Filename, Kind);
	Second->visitModuleFile(Filename, Kind);
	}

	bool ChainedASTReaderListener::visitInputFile(StringRef Filename,
	bool isSystem,
	bool isOverridden,
	bool isExplicitModule) {
	bool Continue = false;
	if (First->needsInputFileVisitation() &&
	(!isSystem \|\| First->needsSystemInputFileVisitation()))
	Continue \|= First->visitInputFile(Filename, isSystem, isOverridden,
	isExplicitModule);
	if (Second->needsInputFileVisitation() &&
	(!isSystem \|\| Second->needsSystemInputFileVisitation()))
	Continue \|= Second->visitInputFile(Filename, isSystem, isOverridden,
	isExplicitModule);
	return Continue;
	}

	void ChainedASTReaderListener::readModuleFileExtension(
	const ModuleFileExtensionMetadata &Metadata) {
	First->readModuleFileExtension(Metadata);
	Second->readModuleFileExtension(Metadata);
	}

	//===----------------------------------------------------------------------===//
	// PCH validator implementation
	//===----------------------------------------------------------------------===//

	ASTReaderListener::~ASTReaderListener() = default;

	/// Compare the given set of language options against an existing set of
	/// language options.
	///
	/// \param Diags If non-NULL, diagnostics will be emitted via this engine.
	/// \param AllowCompatibleDifferences If true, differences between compatible
	/// language options will be permitted.
	///
	/// \returns true if the languagae options mis-match, false otherwise.
	static bool checkLanguageOptions(const LangOptions &LangOpts,
	const LangOptions &ExistingLangOpts,
	DiagnosticsEngine *Diags,
	bool AllowCompatibleDifferences = true) {
	#define LANGOPT(Name, Bits, Default, Description) \
	if (ExistingLangOpts.Name != LangOpts.Name) { \
	if (Diags) \
	Diags->Report(diag::err_pch_langopt_mismatch) \
	<< Description << LangOpts.Name << ExistingLangOpts.Name; \
	return true; \
	}

	#define VALUE_LANGOPT(Name, Bits, Default, Description) \
	if (ExistingLangOpts.Name != LangOpts.Name) { \
	if (Diags) \
	Diags->Report(diag::err_pch_langopt_value_mismatch) \
	<< Description; \
	return true; \
	}

	#define ENUM_LANGOPT(Name, Type, Bits, Default, Description) \
	if (ExistingLangOpts.get##Name() != LangOpts.get##Name()) { \
	if (Diags) \
	Diags->Report(diag::err_pch_langopt_value_mismatch) \
	<< Description; \
	return true; \
	}

	#define COMPATIBLE_LANGOPT(Name, Bits, Default, Description) \
	if (!AllowCompatibleDifferences) \
	LANGOPT(Name, Bits, Default, Description)

	#define COMPATIBLE_ENUM_LANGOPT(Name, Bits, Default, Description) \
	if (!AllowCompatibleDifferences) \
	ENUM_LANGOPT(Name, Bits, Default, Description)

	#define COMPATIBLE_VALUE_LANGOPT(Name, Bits, Default, Description) \
	if (!AllowCompatibleDifferences) \
	VALUE_LANGOPT(Name, Bits, Default, Description)

	#define BENIGN_LANGOPT(Name, Bits, Default, Description)
	#define BENIGN_ENUM_LANGOPT(Name, Type, Bits, Default, Description)
	#define BENIGN_VALUE_LANGOPT(Name, Type, Bits, Default, Description)
	#include "clang/Basic/LangOptions.def"

	if (ExistingLangOpts.ModuleFeatures != LangOpts.ModuleFeatures) {
	if (Diags)
	Diags->Report(diag::err_pch_langopt_value_mismatch) << "module features";
	return true;
	}

	if (ExistingLangOpts.ObjCRuntime != LangOpts.ObjCRuntime) {
	if (Diags)
	Diags->Report(diag::err_pch_langopt_value_mismatch)
	<< "target Objective-C runtime";
	return true;
	}

	if (ExistingLangOpts.CommentOpts.BlockCommandNames !=
	LangOpts.CommentOpts.BlockCommandNames) {
	if (Diags)
	Diags->Report(diag::err_pch_langopt_value_mismatch)
	<< "block command names";
	return true;
	}

	// Sanitizer feature mismatches are treated as compatible differences. If
	// compatible differences aren't allowed, we still only want to check for
	// mismatches of non-modular sanitizers (the only ones which can affect AST
	// generation).
	if (!AllowCompatibleDifferences) {
	SanitizerMask ModularSanitizers = getPPTransparentSanitizers();
	SanitizerSet ExistingSanitizers = ExistingLangOpts.Sanitize;
	SanitizerSet ImportedSanitizers = LangOpts.Sanitize;
	ExistingSanitizers.clear(ModularSanitizers);
	ImportedSanitizers.clear(ModularSanitizers);
	if (ExistingSanitizers.Mask != ImportedSanitizers.Mask) {
	const std::string Flag = "-fsanitize=";
	if (Diags) {
	#define SANITIZER(NAME, ID) \
	{ \
	bool InExistingModule = ExistingSanitizers.has(SanitizerKind::ID); \
	bool InImportedModule = ImportedSanitizers.has(SanitizerKind::ID); \
	if (InExistingModule != InImportedModule) \
	Diags->Report(diag::err_pch_targetopt_feature_mismatch) \
	<< InExistingModule << (Flag + NAME); \
	}
	#include "clang/Basic/Sanitizers.def"
	}
	return true;
	}
	}

	return false;
	}

	/// Compare the given set of target options against an existing set of
	/// target options.
	///
	/// \param Diags If non-NULL, diagnostics will be emitted via this engine.
	///
	/// \returns true if the target options mis-match, false otherwise.
	static bool checkTargetOptions(const TargetOptions &TargetOpts,
	const TargetOptions &ExistingTargetOpts,
	DiagnosticsEngine *Diags,
	bool AllowCompatibleDifferences = true) {
	#define CHECK_TARGET_OPT(Field, Name) \
	if (TargetOpts.Field != ExistingTargetOpts.Field) { \
	if (Diags) \
	Diags->Report(diag::err_pch_targetopt_mismatch) \
	<< Name << TargetOpts.Field << ExistingTargetOpts.Field; \
	return true; \
	}

	// The triple and ABI must match exactly.
	CHECK_TARGET_OPT(Triple, "target");
	CHECK_TARGET_OPT(ABI, "target ABI");

	// We can tolerate different CPUs in many cases, notably when one CPU
	// supports a strict superset of another. When allowing compatible
	// differences skip this check.
	if (!AllowCompatibleDifferences) {
	CHECK_TARGET_OPT(CPU, "target CPU");
	CHECK_TARGET_OPT(TuneCPU, "tune CPU");
	}

	#undef CHECK_TARGET_OPT

	// Compare feature sets.
	SmallVector<StringRef, 4> ExistingFeatures(
	ExistingTargetOpts.FeaturesAsWritten.begin(),
	ExistingTargetOpts.FeaturesAsWritten.end());
	SmallVector<StringRef, 4> ReadFeatures(TargetOpts.FeaturesAsWritten.begin(),
	TargetOpts.FeaturesAsWritten.end());
	llvm::sort(ExistingFeatures);
	llvm::sort(ReadFeatures);

	// We compute the set difference in both directions explicitly so that we can
	// diagnose the differences differently.
	SmallVector<StringRef, 4> UnmatchedExistingFeatures, UnmatchedReadFeatures;
	std::set_difference(
	ExistingFeatures.begin(), ExistingFeatures.end(), ReadFeatures.begin(),
	ReadFeatures.end(), std::back_inserter(UnmatchedExistingFeatures));
	std::set_difference(ReadFeatures.begin(), ReadFeatures.end(),
	ExistingFeatures.begin(), ExistingFeatures.end(),
	std::back_inserter(UnmatchedReadFeatures));

	// If we are allowing compatible differences and the read feature set is
	// a strict subset of the existing feature set, there is nothing to diagnose.
	if (AllowCompatibleDifferences && UnmatchedReadFeatures.empty())
	return false;

	if (Diags) {
	for (StringRef Feature : UnmatchedReadFeatures)
	Diags->Report(diag::err_pch_targetopt_feature_mismatch)
	<< /* is-existing-feature */ false << Feature;
	for (StringRef Feature : UnmatchedExistingFeatures)
	Diags->Report(diag::err_pch_targetopt_feature_mismatch)
	<< /* is-existing-feature */ true << Feature;
	}

	return !UnmatchedReadFeatures.empty() \|\| !UnmatchedExistingFeatures.empty();
	}

	bool
	PCHValidator::ReadLanguageOptions(const LangOptions &LangOpts,
	bool Complain,
	bool AllowCompatibleDifferences) {
	const LangOptions &ExistingLangOpts = PP.getLangOpts();
	return checkLanguageOptions(LangOpts, ExistingLangOpts,
	Complain ? &Reader.Diags : nullptr,
	AllowCompatibleDifferences);
	}

	bool PCHValidator::ReadTargetOptions(const TargetOptions &TargetOpts,
	bool Complain,
	bool AllowCompatibleDifferences) {
	const TargetOptions &ExistingTargetOpts = PP.getTargetInfo().getTargetOpts();
	return checkTargetOptions(TargetOpts, ExistingTargetOpts,
	Complain ? &Reader.Diags : nullptr,
	AllowCompatibleDifferences);
	}

	namespace {

	using MacroDefinitionsMap =
	llvm::StringMap<std::pair<StringRef, bool /IsUndef/>>;
	using DeclsMap = llvm::DenseMap<DeclarationName, SmallVector<NamedDecl *, 8>>;

	} // namespace

	static bool checkDiagnosticGroupMappings(DiagnosticsEngine &StoredDiags,
	DiagnosticsEngine &Diags,
	bool Complain) {
	using Level = DiagnosticsEngine::Level;

	// Check current mappings for new -Werror mappings, and the stored mappings
	// for cases that were explicitly mapped to not be errors that are now
	// errors because of options like -Werror.
	DiagnosticsEngine *MappingSources[] = { &Diags, &StoredDiags };

	for (DiagnosticsEngine *MappingSource : MappingSources) {
	for (auto DiagIDMappingPair : MappingSource->getDiagnosticMappings()) {
	diag::kind DiagID = DiagIDMappingPair.first;
	Level CurLevel = Diags.getDiagnosticLevel(DiagID, SourceLocation());
	if (CurLevel < DiagnosticsEngine::Error)
	continue; // not significant
	Level StoredLevel =
	StoredDiags.getDiagnosticLevel(DiagID, SourceLocation());
	if (StoredLevel < DiagnosticsEngine::Error) {
	if (Complain)
	Diags.Report(diag::err_pch_diagopt_mismatch) << "-Werror=" +
	Diags.getDiagnosticIDs()->getWarningOptionForDiag(DiagID).str();
	return true;
	}
	}
	}

	return false;
	}

	static bool isExtHandlingFromDiagsError(DiagnosticsEngine &Diags) {
	diag::Severity Ext = Diags.getExtensionHandlingBehavior();
	if (Ext == diag::Severity::Warning && Diags.getWarningsAsErrors())
	return true;
	return Ext >= diag::Severity::Error;
	}

	static bool checkDiagnosticMappings(DiagnosticsEngine &StoredDiags,
	DiagnosticsEngine &Diags,
	bool IsSystem, bool Complain) {
	// Top-level options
	if (IsSystem) {
	if (Diags.getSuppressSystemWarnings())
	return false;
	// If -Wsystem-headers was not enabled before, be conservative
	if (StoredDiags.getSuppressSystemWarnings()) {
	if (Complain)
	Diags.Report(diag::err_pch_diagopt_mismatch) << "-Wsystem-headers";
	return true;
	}
	}

	if (Diags.getWarningsAsErrors() && !StoredDiags.getWarningsAsErrors()) {
	if (Complain)
	Diags.Report(diag::err_pch_diagopt_mismatch) << "-Werror";
	return true;
	}

	if (Diags.getWarningsAsErrors() && Diags.getEnableAllWarnings() &&
	!StoredDiags.getEnableAllWarnings()) {
	if (Complain)
	Diags.Report(diag::err_pch_diagopt_mismatch) << "-Weverything -Werror";
	return true;
	}

	if (isExtHandlingFromDiagsError(Diags) &&
	!isExtHandlingFromDiagsError(StoredDiags)) {
	if (Complain)
	Diags.Report(diag::err_pch_diagopt_mismatch) << "-pedantic-errors";
	return true;
	}

	return checkDiagnosticGroupMappings(StoredDiags, Diags, Complain);
	}

	/// Return the top import module if it is implicit, nullptr otherwise.
	static Module *getTopImportImplicitModule(ModuleManager &ModuleMgr,
	Preprocessor &PP) {
	// If the original import came from a file explicitly generated by the user,
	// don't check the diagnostic mappings.
	// FIXME: currently this is approximated by checking whether this is not a
	// module import of an implicitly-loaded module file.
	// Note: ModuleMgr.rbegin() may not be the current module, but it must be in
	// the transitive closure of its imports, since unrelated modules cannot be
	// imported until after this module finishes validation.
	ModuleFile TopImport = &ModuleMgr.rbegin();
	while (!TopImport->ImportedBy.empty())
	TopImport = TopImport->ImportedBy[0];
	if (TopImport->Kind != MK_ImplicitModule)
	return nullptr;

	StringRef ModuleName = TopImport->ModuleName;
	assert(!ModuleName.empty() && "diagnostic options read before module name");

	Module *M = PP.getHeaderSearchInfo().lookupModule(ModuleName);
	assert(M && "missing module");
	return M;
	}

	bool PCHValidator::ReadDiagnosticOptions(
	IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts, bool Complain) {
	DiagnosticsEngine &ExistingDiags = PP.getDiagnostics();
	IntrusiveRefCntPtr<DiagnosticIDs> DiagIDs(ExistingDiags.getDiagnosticIDs());
	IntrusiveRefCntPtr<DiagnosticsEngine> Diags(
	new DiagnosticsEngine(DiagIDs, DiagOpts.get()));
	// This should never fail, because we would have processed these options
	// before writing them to an ASTFile.
	ProcessWarningOptions(Diags, DiagOpts, /Report/false);

	ModuleManager &ModuleMgr = Reader.getModuleManager();
	assert(ModuleMgr.size() >= 1 && "what ASTFile is this then");

	Module *TopM = getTopImportImplicitModule(ModuleMgr, PP);
	if (!TopM)
	return false;

	// FIXME: if the diagnostics are incompatible, save a DiagnosticOptions that
	// contains the union of their flags.
	return checkDiagnosticMappings(*Diags, ExistingDiags, TopM->IsSystem,
	Complain);
	}

	/// Collect the macro definitions provided by the given preprocessor
	/// options.
	static void
	collectMacroDefinitions(const PreprocessorOptions &PPOpts,
	MacroDefinitionsMap &Macros,
	SmallVectorImpl<StringRef> *MacroNames = nullptr) {
	for (unsigned I = 0, N = PPOpts.Macros.size(); I != N; ++I) {
	StringRef Macro = PPOpts.Macros[I].first;
	bool IsUndef = PPOpts.Macros[I].second;

	std::pair<StringRef, StringRef> MacroPair = Macro.split('=');
	StringRef MacroName = MacroPair.first;
	StringRef MacroBody = MacroPair.second;

	// For an #undef'd macro, we only care about the name.
	if (IsUndef) {
	if (MacroNames && !Macros.count(MacroName))
	MacroNames->push_back(MacroName);

	Macros[MacroName] = std::make_pair("", true);
	continue;
	}

	// For a #define'd macro, figure out the actual definition.
	if (MacroName.size() == Macro.size())
	MacroBody = "1";
	else {
	// Note: GCC drops anything following an end-of-line character.
	StringRef::size_type End = MacroBody.find_first_of("\n\r");
	MacroBody = MacroBody.substr(0, End);
	}

	if (MacroNames && !Macros.count(MacroName))
	MacroNames->push_back(MacroName);
	Macros[MacroName] = std::make_pair(MacroBody, false);
	}
	}

	/// Check the preprocessor options deserialized from the control block
	/// against the preprocessor options in an existing preprocessor.
	///
	/// \param Diags If non-null, produce diagnostics for any mismatches incurred.
	/// \param Validate If true, validate preprocessor options. If false, allow
	/// macros defined by \p ExistingPPOpts to override those defined by
	/// \p PPOpts in SuggestedPredefines.
	static bool checkPreprocessorOptions(const PreprocessorOptions &PPOpts,
	const PreprocessorOptions &ExistingPPOpts,
	DiagnosticsEngine *Diags,
	FileManager &FileMgr,
	std::string &SuggestedPredefines,
	const LangOptions &LangOpts,
	bool Validate = true) {
	// Check macro definitions.
	MacroDefinitionsMap ASTFileMacros;
	collectMacroDefinitions(PPOpts, ASTFileMacros);
	MacroDefinitionsMap ExistingMacros;
	SmallVector<StringRef, 4> ExistingMacroNames;
	collectMacroDefinitions(ExistingPPOpts, ExistingMacros, &ExistingMacroNames);

	for (unsigned I = 0, N = ExistingMacroNames.size(); I != N; ++I) {
	// Dig out the macro definition in the existing preprocessor options.
	StringRef MacroName = ExistingMacroNames[I];
	std::pair<StringRef, bool> Existing = ExistingMacros[MacroName];

	// Check whether we know anything about this macro name or not.
	llvm::StringMap<std::pair<StringRef, bool /IsUndef/>>::iterator Known =
	ASTFileMacros.find(MacroName);
	if (!Validate \|\| Known == ASTFileMacros.end()) {
	// FIXME: Check whether this identifier was referenced anywhere in the
	// AST file. If so, we should reject the AST file. Unfortunately, this
	// information isn't in the control block. What shall we do about it?

	if (Existing.second) {
	SuggestedPredefines += "#undef ";
	SuggestedPredefines += MacroName.str();
	SuggestedPredefines += '\n';
	} else {
	SuggestedPredefines += "#define ";
	SuggestedPredefines += MacroName.str();
	SuggestedPredefines += ' ';
	SuggestedPredefines += Existing.first.str();
	SuggestedPredefines += '\n';
	}
	continue;
	}

	// If the macro was defined in one but undef'd in the other, we have a
	// conflict.
	if (Existing.second != Known->second.second) {
	if (Diags) {
	Diags->Report(diag::err_pch_macro_def_undef)
	<< MacroName << Known->second.second;
	}
	return true;
	}

	// If the macro was #undef'd in both, or if the macro bodies are identical,
	// it's fine.
	if (Existing.second \|\| Existing.first == Known->second.first)
	continue;

	// The macro bodies differ; complain.
	if (Diags) {
	Diags->Report(diag::err_pch_macro_def_conflict)
	<< MacroName << Known->second.first << Existing.first;
	}
	return true;
	}

	// Check whether we're using predefines.
	if (PPOpts.UsePredefines != ExistingPPOpts.UsePredefines && Validate) {
	if (Diags) {
	Diags->Report(diag::err_pch_undef) << ExistingPPOpts.UsePredefines;
	}
	return true;
	}

	// Detailed record is important since it is used for the module cache hash.
	if (LangOpts.Modules &&
	PPOpts.DetailedRecord != ExistingPPOpts.DetailedRecord && Validate) {
	if (Diags) {
	Diags->Report(diag::err_pch_pp_detailed_record) << PPOpts.DetailedRecord;
	}
	return true;
	}

	// Compute the #include and #include_macros lines we need.
	for (unsigned I = 0, N = ExistingPPOpts.Includes.size(); I != N; ++I) {
	StringRef File = ExistingPPOpts.Includes[I];

	if (!ExistingPPOpts.ImplicitPCHInclude.empty() &&
	!ExistingPPOpts.PCHThroughHeader.empty()) {
	// In case the through header is an include, we must add all the includes
	// to the predefines so the start point can be determined.
	SuggestedPredefines += "#include \"";
	SuggestedPredefines += File;
	SuggestedPredefines += "\"\n";
	continue;
	}

	if (File == ExistingPPOpts.ImplicitPCHInclude)
	continue;

	if (std::find(PPOpts.Includes.begin(), PPOpts.Includes.end(), File)
	!= PPOpts.Includes.end())
	continue;

	SuggestedPredefines += "#include \"";
	SuggestedPredefines += File;
	SuggestedPredefines += "\"\n";
	}

	for (unsigned I = 0, N = ExistingPPOpts.MacroIncludes.size(); I != N; ++I) {
	StringRef File = ExistingPPOpts.MacroIncludes[I];
	if (std::find(PPOpts.MacroIncludes.begin(), PPOpts.MacroIncludes.end(),
	File)
	!= PPOpts.MacroIncludes.end())
	continue;

	SuggestedPredefines += "#__include_macros \"";
	SuggestedPredefines += File;
	SuggestedPredefines += "\"\n##\n";
	}

	return false;
	}

	bool PCHValidator::ReadPreprocessorOptions(const PreprocessorOptions &PPOpts,
	bool Complain,
	std::string &SuggestedPredefines) {
	const PreprocessorOptions &ExistingPPOpts = PP.getPreprocessorOpts();

	return checkPreprocessorOptions(PPOpts, ExistingPPOpts,
	Complain? &Reader.Diags : nullptr,
	PP.getFileManager(),
	SuggestedPredefines,
	PP.getLangOpts());
	}

	bool SimpleASTReaderListener::ReadPreprocessorOptions(
	const PreprocessorOptions &PPOpts,
	bool Complain,
	std::string &SuggestedPredefines) {
	return checkPreprocessorOptions(PPOpts,
	PP.getPreprocessorOpts(),
	nullptr,
	PP.getFileManager(),
	SuggestedPredefines,
	PP.getLangOpts(),
	false);
	}

	/// Check the header search options deserialized from the control block
	/// against the header search options in an existing preprocessor.
	///
	/// \param Diags If non-null, produce diagnostics for any mismatches incurred.
	static bool checkHeaderSearchOptions(const HeaderSearchOptions &HSOpts,
	StringRef SpecificModuleCachePath,
	StringRef ExistingModuleCachePath,
	DiagnosticsEngine *Diags,
	const LangOptions &LangOpts,
	const PreprocessorOptions &PPOpts) {
	if (LangOpts.Modules) {
	if (SpecificModuleCachePath != ExistingModuleCachePath &&
	!PPOpts.AllowPCHWithDifferentModulesCachePath) {
	if (Diags)
	Diags->Report(diag::err_pch_modulecache_mismatch)
	<< SpecificModuleCachePath << ExistingModuleCachePath;
	return true;
	}
	}

	return false;
	}

	bool PCHValidator::ReadHeaderSearchOptions(const HeaderSearchOptions &HSOpts,
	StringRef SpecificModuleCachePath,
	bool Complain) {
	return checkHeaderSearchOptions(HSOpts, SpecificModuleCachePath,
	PP.getHeaderSearchInfo().getModuleCachePath(),
	Complain ? &Reader.Diags : nullptr,
	PP.getLangOpts(), PP.getPreprocessorOpts());
	}

	void PCHValidator::ReadCounter(const ModuleFile &M, unsigned Value) {
	PP.setCounterValue(Value);
	}

	//===----------------------------------------------------------------------===//
	// AST reader implementation
	//===----------------------------------------------------------------------===//

	static uint64_t readULEB(const unsigned char *&P) {
	unsigned Length = 0;
	const char *Error = nullptr;

	uint64_t Val = llvm::decodeULEB128(P, &Length, nullptr, &Error);
	if (Error)
	llvm::report_fatal_error(Error);
	P += Length;
	return Val;
	}

	/// Read ULEB-encoded key length and data length.
	static std::pair<unsigned, unsigned>
	readULEBKeyDataLength(const unsigned char *&P) {
	unsigned KeyLen = readULEB(P);
	if ((unsigned)KeyLen != KeyLen)
	llvm::report_fatal_error("key too large");

	unsigned DataLen = readULEB(P);
	if ((unsigned)DataLen != DataLen)
	llvm::report_fatal_error("data too large");

	return std::make_pair(KeyLen, DataLen);
	}

	void ASTReader::setDeserializationListener(ASTDeserializationListener *Listener,
	bool TakeOwnership) {
	DeserializationListener = Listener;
	OwnsDeserializationListener = TakeOwnership;
	}

	unsigned ASTSelectorLookupTrait::ComputeHash(Selector Sel) {
	return serialization::ComputeHash(Sel);
	}

	std::pair<unsigned, unsigned>
	ASTSelectorLookupTrait::ReadKeyDataLength(const unsigned char*& d) {
	return readULEBKeyDataLength(d);
	}

	ASTSelectorLookupTrait::internal_key_type
	ASTSelectorLookupTrait::ReadKey(const unsigned char* d, unsigned) {
	using namespace llvm::support;

	SelectorTable &SelTable = Reader.getContext().Selectors;
	unsigned N = endian::readNext<uint16_t, little, unaligned>(d);
	IdentifierInfo *FirstII = Reader.getLocalIdentifier(
	F, endian::readNext<uint32_t, little, unaligned>(d));
	if (N == 0)
	return SelTable.getNullarySelector(FirstII);
	else if (N == 1)
	return SelTable.getUnarySelector(FirstII);

	SmallVector<IdentifierInfo *, 16> Args;
	Args.push_back(FirstII);
	for (unsigned I = 1; I != N; ++I)
	Args.push_back(Reader.getLocalIdentifier(
	F, endian::readNext<uint32_t, little, unaligned>(d)));

	return SelTable.getSelector(N, Args.data());
	}

	ASTSelectorLookupTrait::data_type
	ASTSelectorLookupTrait::ReadData(Selector, const unsigned char* d,
	unsigned DataLen) {
	using namespace llvm::support;

	data_type Result;

	Result.ID = Reader.getGlobalSelectorID(
	F, endian::readNext<uint32_t, little, unaligned>(d));
	unsigned FullInstanceBits = endian::readNext<uint16_t, little, unaligned>(d);
	unsigned FullFactoryBits = endian::readNext<uint16_t, little, unaligned>(d);
	Result.InstanceBits = FullInstanceBits & 0x3;
	Result.InstanceHasMoreThanOneDecl = (FullInstanceBits >> 2) & 0x1;
	Result.FactoryBits = FullFactoryBits & 0x3;
	Result.FactoryHasMoreThanOneDecl = (FullFactoryBits >> 2) & 0x1;
	unsigned NumInstanceMethods = FullInstanceBits >> 3;
	unsigned NumFactoryMethods = FullFactoryBits >> 3;

	// Load instance methods
	for (unsigned I = 0; I != NumInstanceMethods; ++I) {
	if (ObjCMethodDecl *Method = Reader.GetLocalDeclAs<ObjCMethodDecl>(
	F, endian::readNext<uint32_t, little, unaligned>(d)))
	Result.Instance.push_back(Method);
	}

	// Load factory methods
	for (unsigned I = 0; I != NumFactoryMethods; ++I) {
	if (ObjCMethodDecl *Method = Reader.GetLocalDeclAs<ObjCMethodDecl>(
	F, endian::readNext<uint32_t, little, unaligned>(d)))
	Result.Factory.push_back(Method);
	}

	return Result;
	}

	unsigned ASTIdentifierLookupTraitBase::ComputeHash(const internal_key_type& a) {
	return llvm::djbHash(a);
	}

	std::pair<unsigned, unsigned>
	ASTIdentifierLookupTraitBase::ReadKeyDataLength(const unsigned char*& d) {
	return readULEBKeyDataLength(d);
	}

	ASTIdentifierLookupTraitBase::internal_key_type
	ASTIdentifierLookupTraitBase::ReadKey(const unsigned char* d, unsigned n) {
	assert(n >= 2 && d[n-1] == '\0');
	return StringRef((const char*) d, n-1);
	}

	/// Whether the given identifier is "interesting".
	static bool isInterestingIdentifier(ASTReader &Reader, IdentifierInfo &II,
	bool IsModule) {
	return II.hadMacroDefinition() \|\| II.isPoisoned() \|\|
	(!IsModule && II.getObjCOrBuiltinID()) \|\|
	II.hasRevertedTokenIDToIdentifier() \|\|
	(!(IsModule && Reader.getPreprocessor().getLangOpts().CPlusPlus) &&
	II.getFETokenInfo());
	}

	static bool readBit(unsigned &Bits) {
	bool Value = Bits & 0x1;
	Bits >>= 1;
	return Value;
	}

	IdentID ASTIdentifierLookupTrait::ReadIdentifierID(const unsigned char *d) {
	using namespace llvm::support;

	unsigned RawID = endian::readNext<uint32_t, little, unaligned>(d);
	return Reader.getGlobalIdentifierID(F, RawID >> 1);
	}

	static void markIdentifierFromAST(ASTReader &Reader, IdentifierInfo &II) {
	if (!II.isFromAST()) {
	II.setIsFromAST();
	bool IsModule = Reader.getPreprocessor().getCurrentModule() != nullptr;
	if (isInterestingIdentifier(Reader, II, IsModule))
	II.setChangedSinceDeserialization();
	}
	}

	IdentifierInfo *ASTIdentifierLookupTrait::ReadData(const internal_key_type& k,
	const unsigned char* d,
	unsigned DataLen) {
	using namespace llvm::support;

	unsigned RawID = endian::readNext<uint32_t, little, unaligned>(d);
	bool IsInteresting = RawID & 0x01;

	// Wipe out the "is interesting" bit.
	RawID = RawID >> 1;

	// Build the IdentifierInfo and link the identifier ID with it.
	IdentifierInfo *II = KnownII;
	if (!II) {
	II = &Reader.getIdentifierTable().getOwn(k);
	KnownII = II;
	}
	markIdentifierFromAST(Reader, *II);
	Reader.markIdentifierUpToDate(II);

	IdentID ID = Reader.getGlobalIdentifierID(F, RawID);
	if (!IsInteresting) {
	// For uninteresting identifiers, there's nothing else to do. Just notify
	// the reader that we've finished loading this identifier.
	Reader.SetIdentifierInfo(ID, II);
	return II;
	}

	unsigned ObjCOrBuiltinID = endian::readNext<uint16_t, little, unaligned>(d);
	unsigned Bits = endian::readNext<uint16_t, little, unaligned>(d);
	bool CPlusPlusOperatorKeyword = readBit(Bits);
	bool HasRevertedTokenIDToIdentifier = readBit(Bits);
	bool Poisoned = readBit(Bits);
	bool ExtensionToken = readBit(Bits);
	bool HadMacroDefinition = readBit(Bits);

	assert(Bits == 0 && "Extra bits in the identifier?");
	DataLen -= 8;

	// Set or check the various bits in the IdentifierInfo structure.
	// Token IDs are read-only.
	if (HasRevertedTokenIDToIdentifier && II->getTokenID() != tok::identifier)
	II->revertTokenIDToIdentifier();
	if (!F.isModule())
	II->setObjCOrBuiltinID(ObjCOrBuiltinID);
	assert(II->isExtensionToken() == ExtensionToken &&
	"Incorrect extension token flag");
	(void)ExtensionToken;
	if (Poisoned)
	II->setIsPoisoned(true);
	assert(II->isCPlusPlusOperatorKeyword() == CPlusPlusOperatorKeyword &&
	"Incorrect C++ operator keyword flag");
	(void)CPlusPlusOperatorKeyword;

	// If this identifier is a macro, deserialize the macro
	// definition.
	if (HadMacroDefinition) {
	uint32_t MacroDirectivesOffset =
	endian::readNext<uint32_t, little, unaligned>(d);
	DataLen -= 4;

	Reader.addPendingMacro(II, &F, MacroDirectivesOffset);
	}

	Reader.SetIdentifierInfo(ID, II);

	// Read all of the declarations visible at global scope with this
	// name.
	if (DataLen > 0) {
	SmallVector<uint32_t, 4> DeclIDs;
	for (; DataLen > 0; DataLen -= 4)
	DeclIDs.push_back(Reader.getGlobalDeclID(
	F, endian::readNext<uint32_t, little, unaligned>(d)));
	Reader.SetGloballyVisibleDecls(II, DeclIDs);
	}

	return II;
	}

	DeclarationNameKey::DeclarationNameKey(DeclarationName Name)
	: Kind(Name.getNameKind()) {
	switch (Kind) {
	case DeclarationName::Identifier:
	Data = (uint64_t)Name.getAsIdentifierInfo();
	break;
	case DeclarationName::ObjCZeroArgSelector:
	case DeclarationName::ObjCOneArgSelector:
	case DeclarationName::ObjCMultiArgSelector:
	Data = (uint64_t)Name.getObjCSelector().getAsOpaquePtr();
	break;
	case DeclarationName::CXXOperatorName:
	Data = Name.getCXXOverloadedOperator();
	break;
	case DeclarationName::CXXLiteralOperatorName:
	Data = (uint64_t)Name.getCXXLiteralIdentifier();
	break;
	case DeclarationName::CXXDeductionGuideName:
	Data = (uint64_t)Name.getCXXDeductionGuideTemplate()
	->getDeclName().getAsIdentifierInfo();
	break;
	case DeclarationName::CXXConstructorName:
	case DeclarationName::CXXDestructorName:
	case DeclarationName::CXXConversionFunctionName:
	case DeclarationName::CXXUsingDirective:
	Data = 0;
	break;
	}
	}

	unsigned DeclarationNameKey::getHash() const {
	llvm::FoldingSetNodeID ID;
	ID.AddInteger(Kind);

	switch (Kind) {
	case DeclarationName::Identifier:
	case DeclarationName::CXXLiteralOperatorName:
	case DeclarationName::CXXDeductionGuideName:
	ID.AddString(((IdentifierInfo*)Data)->getName());
	break;
	case DeclarationName::ObjCZeroArgSelector:
	case DeclarationName::ObjCOneArgSelector:
	case DeclarationName::ObjCMultiArgSelector:
	ID.AddInteger(serialization::ComputeHash(Selector(Data)));
	break;
	case DeclarationName::CXXOperatorName:
	ID.AddInteger((OverloadedOperatorKind)Data);
	break;
	case DeclarationName::CXXConstructorName:
	case DeclarationName::CXXDestructorName:
	case DeclarationName::CXXConversionFunctionName:
	case DeclarationName::CXXUsingDirective:
	break;
	}

	return ID.ComputeHash();
	}

	ModuleFile *
	ASTDeclContextNameLookupTrait::ReadFileRef(const unsigned char *&d) {
	using namespace llvm::support;

	uint32_t ModuleFileID = endian::readNext<uint32_t, little, unaligned>(d);
	return Reader.getLocalModuleFile(F, ModuleFileID);
	}

	std::pair<unsigned, unsigned>
	ASTDeclContextNameLookupTrait::ReadKeyDataLength(const unsigned char *&d) {
	return readULEBKeyDataLength(d);
	}

	ASTDeclContextNameLookupTrait::internal_key_type
	ASTDeclContextNameLookupTrait::ReadKey(const unsigned char *d, unsigned) {
	using namespace llvm::support;

	auto Kind = (DeclarationName::NameKind)*d++;
	uint64_t Data;
	switch (Kind) {
	case DeclarationName::Identifier:
	case DeclarationName::CXXLiteralOperatorName:
	case DeclarationName::CXXDeductionGuideName:
	Data = (uint64_t)Reader.getLocalIdentifier(
	F, endian::readNext<uint32_t, little, unaligned>(d));
	break;
	case DeclarationName::ObjCZeroArgSelector:
	case DeclarationName::ObjCOneArgSelector:
	case DeclarationName::ObjCMultiArgSelector:
	Data =
	(uint64_t)Reader.getLocalSelector(
	F, endian::readNext<uint32_t, little, unaligned>(
	d)).getAsOpaquePtr();
	break;
	case DeclarationName::CXXOperatorName:
	Data = *d++; // OverloadedOperatorKind
	break;
	case DeclarationName::CXXConstructorName:
	case DeclarationName::CXXDestructorName:
	case DeclarationName::CXXConversionFunctionName:
	case DeclarationName::CXXUsingDirective:
	Data = 0;
	break;
	}

	return DeclarationNameKey(Kind, Data);
	}

	void ASTDeclContextNameLookupTrait::ReadDataInto(internal_key_type,
	const unsigned char *d,
	unsigned DataLen,
	data_type_builder &Val) {
	using namespace llvm::support;

	for (unsigned NumDecls = DataLen / 4; NumDecls; --NumDecls) {
	uint32_t LocalID = endian::readNext<uint32_t, little, unaligned>(d);
	Val.insert(Reader.getGlobalDeclID(F, LocalID));
	}
	}

	bool ASTReader::ReadLexicalDeclContextStorage(ModuleFile &M,
	BitstreamCursor &Cursor,
	uint64_t Offset,
	DeclContext *DC) {
	assert(Offset != 0);

	SavedStreamPosition SavedPosition(Cursor);
	if (llvm::Error Err = Cursor.JumpToBit(Offset)) {
	Error(std::move(Err));
	return true;
	}

	RecordData Record;
	StringRef Blob;
	Expected<unsigned> MaybeCode = Cursor.ReadCode();
	if (!MaybeCode) {
	Error(MaybeCode.takeError());
	return true;
	}
	unsigned Code = MaybeCode.get();

	Expected<unsigned> MaybeRecCode = Cursor.readRecord(Code, Record, &Blob);
	if (!MaybeRecCode) {
	Error(MaybeRecCode.takeError());
	return true;
	}
	unsigned RecCode = MaybeRecCode.get();
	if (RecCode != DECL_CONTEXT_LEXICAL) {
	Error("Expected lexical block");
	return true;
	}

	assert(!isa<TranslationUnitDecl>(DC) &&
	"expected a TU_UPDATE_LEXICAL record for TU");
	// If we are handling a C++ class template instantiation, we can see multiple
	// lexical updates for the same record. It's important that we select only one
	// of them, so that field numbering works properly. Just pick the first one we
	// see.
	auto &Lex = LexicalDecls[DC];
	if (!Lex.first) {
	Lex = std::make_pair(
	&M, llvm::makeArrayRef(
	reinterpret_cast<const llvm::support::unaligned_uint32_t *>(
	Blob.data()),
	Blob.size() / 4));
	}
	DC->setHasExternalLexicalStorage(true);
	return false;
	}

	bool ASTReader::ReadVisibleDeclContextStorage(ModuleFile &M,
	BitstreamCursor &Cursor,
	uint64_t Offset,
	DeclID ID) {
	assert(Offset != 0);

	SavedStreamPosition SavedPosition(Cursor);
	if (llvm::Error Err = Cursor.JumpToBit(Offset)) {
	Error(std::move(Err));
	return true;
	}

	RecordData Record;
	StringRef Blob;
	Expected<unsigned> MaybeCode = Cursor.ReadCode();
	if (!MaybeCode) {
	Error(MaybeCode.takeError());
	return true;
	}
	unsigned Code = MaybeCode.get();

	Expected<unsigned> MaybeRecCode = Cursor.readRecord(Code, Record, &Blob);
	if (!MaybeRecCode) {
	Error(MaybeRecCode.takeError());
	return true;
	}
	unsigned RecCode = MaybeRecCode.get();
	if (RecCode != DECL_CONTEXT_VISIBLE) {
	Error("Expected visible lookup table block");
	return true;
	}

	// We can't safely determine the primary context yet, so delay attaching the
	// lookup table until we're done with recursive deserialization.
	auto Data = (const unsigned char)Blob.data();
	PendingVisibleUpdates[ID].push_back(PendingVisibleUpdate{&M, Data});
	return false;
	}

	void ASTReader::Error(StringRef Msg) const {
	Error(diag::err_fe_pch_malformed, Msg);
	if (PP.getLangOpts().Modules && !Diags.isDiagnosticInFlight() &&
	!PP.getHeaderSearchInfo().getModuleCachePath().empty()) {
	Diag(diag::note_module_cache_path)
	<< PP.getHeaderSearchInfo().getModuleCachePath();
	}
	}

	void ASTReader::Error(unsigned DiagID, StringRef Arg1, StringRef Arg2,
	StringRef Arg3) const {
	if (Diags.isDiagnosticInFlight())
	Diags.SetDelayedDiagnostic(DiagID, Arg1, Arg2, Arg3);
	else
	Diag(DiagID) << Arg1 << Arg2 << Arg3;
	}

	void ASTReader::Error(llvm::Error &&Err) const {
	Error(toString(std::move(Err)));
	}

	//===----------------------------------------------------------------------===//
	// Source Manager Deserialization
	//===----------------------------------------------------------------------===//

	/// Read the line table in the source manager block.
	/// \returns true if there was an error.
	bool ASTReader::ParseLineTable(ModuleFile &F,
	const RecordData &Record) {
	unsigned Idx = 0;
	LineTableInfo &LineTable = SourceMgr.getLineTable();

	// Parse the file names
	std::map<int, int> FileIDs;
	FileIDs[-1] = -1; // For unspecified filenames.
	for (unsigned I = 0; Record[Idx]; ++I) {
	// Extract the file name
	auto Filename = ReadPath(F, Record, Idx);
	FileIDs[I] = LineTable.getLineTableFilenameID(Filename);
	}
	++Idx;

	// Parse the line entries
	std::vector<LineEntry> Entries;
	while (Idx < Record.size()) {
	int FID = Record[Idx++];
	assert(FID >= 0 && "Serialized line entries for non-local file.");
	// Remap FileID from 1-based old view.
	FID += F.SLocEntryBaseID - 1;

	// Extract the line entries
	unsigned NumEntries = Record[Idx++];
	assert(NumEntries && "no line entries for file ID");
	Entries.clear();
	Entries.reserve(NumEntries);
	for (unsigned I = 0; I != NumEntries; ++I) {
	unsigned FileOffset = Record[Idx++];
	unsigned LineNo = Record[Idx++];
	int FilenameID = FileIDs[Record[Idx++]];
	SrcMgr::CharacteristicKind FileKind
	= (SrcMgr::CharacteristicKind)Record[Idx++];
	unsigned IncludeOffset = Record[Idx++];
	Entries.push_back(LineEntry::get(FileOffset, LineNo, FilenameID,
	FileKind, IncludeOffset));
	}
	LineTable.AddEntry(FileID::get(FID), Entries);
	}

	return false;
	}

	/// Read a source manager block
	bool ASTReader::ReadSourceManagerBlock(ModuleFile &F) {
	using namespace SrcMgr;

	BitstreamCursor &SLocEntryCursor = F.SLocEntryCursor;

	// Set the source-location entry cursor to the current position in
	// the stream. This cursor will be used to read the contents of the
	// source manager block initially, and then lazily read
	// source-location entries as needed.
	SLocEntryCursor = F.Stream;

	// The stream itself is going to skip over the source manager block.
	if (llvm::Error Err = F.Stream.SkipBlock()) {
	Error(std::move(Err));
	return true;
	}

	// Enter the source manager block.
	if (llvm::Error Err =
	SLocEntryCursor.EnterSubBlock(SOURCE_MANAGER_BLOCK_ID)) {
	Error(std::move(Err));
	return true;
	}
	F.SourceManagerBlockStartOffset = SLocEntryCursor.GetCurrentBitNo();

	RecordData Record;
	while (true) {
	Expected<llvm::BitstreamEntry> MaybeE =
	SLocEntryCursor.advanceSkippingSubblocks();
	if (!MaybeE) {
	Error(MaybeE.takeError());
	return true;
	}
	llvm::BitstreamEntry E = MaybeE.get();

	switch (E.Kind) {
	case llvm::BitstreamEntry::SubBlock: // Handled for us already.
	case llvm::BitstreamEntry::Error:
	Error("malformed block record in AST file");
	return true;
	case llvm::BitstreamEntry::EndBlock:
	return false;
	case llvm::BitstreamEntry::Record:
	// The interesting case.
	break;
	}

	// Read a record.
	Record.clear();
	StringRef Blob;
	Expected<unsigned> MaybeRecord =
	SLocEntryCursor.readRecord(E.ID, Record, &Blob);
	if (!MaybeRecord) {
	Error(MaybeRecord.takeError());
	return true;
	}
	switch (MaybeRecord.get()) {
	default: // Default behavior: ignore.
	break;

	case SM_SLOC_FILE_ENTRY:
	case SM_SLOC_BUFFER_ENTRY:
	case SM_SLOC_EXPANSION_ENTRY:
	// Once we hit one of the source location entries, we're done.
	return false;
	}
	}
	}

	/// If a header file is not found at the path that we expect it to be
	/// and the PCH file was moved from its original location, try to resolve the
	/// file by assuming that header+PCH were moved together and the header is in
	/// the same place relative to the PCH.
	static std::string
	resolveFileRelativeToOriginalDir(const std::string &Filename,
	const std::string &OriginalDir,
	const std::string &CurrDir) {
	assert(OriginalDir != CurrDir &&
	"No point trying to resolve the file if the PCH dir didn't change");

	using namespace llvm::sys;

	SmallString<128> filePath(Filename);
	fs::make_absolute(filePath);
	assert(path::is_absolute(OriginalDir));
	SmallString<128> currPCHPath(CurrDir);

	path::const_iterator fileDirI = path::begin(path::parent_path(filePath)),
	fileDirE = path::end(path::parent_path(filePath));
	path::const_iterator origDirI = path::begin(OriginalDir),
	origDirE = path::end(OriginalDir);
	// Skip the common path components from filePath and OriginalDir.
	while (fileDirI != fileDirE && origDirI != origDirE &&
	fileDirI == origDirI) {
	++fileDirI;
	++origDirI;
	}
	for (; origDirI != origDirE; ++origDirI)
	path::append(currPCHPath, "..");
	path::append(currPCHPath, fileDirI, fileDirE);
	path::append(currPCHPath, path::filename(Filename));
	return std::string(currPCHPath.str());
	}

	bool ASTReader::ReadSLocEntry(int ID) {
	if (ID == 0)
	return false;

	if (unsigned(-ID) - 2 >= getTotalNumSLocs() \|\| ID > 0) {
	Error("source location entry ID out-of-range for AST file");
	return true;
	}

	// Local helper to read the (possibly-compressed) buffer data following the
	// entry record.
	auto ReadBuffer = [this](
	BitstreamCursor &SLocEntryCursor,
	StringRef Name) -> std::unique_ptr<llvm::MemoryBuffer> {
	RecordData Record;
	StringRef Blob;
	Expected<unsigned> MaybeCode = SLocEntryCursor.ReadCode();
	if (!MaybeCode) {
	Error(MaybeCode.takeError());
	return nullptr;
	}
	unsigned Code = MaybeCode.get();

	Expected<unsigned> MaybeRecCode =
	SLocEntryCursor.readRecord(Code, Record, &Blob);
	if (!MaybeRecCode) {
	Error(MaybeRecCode.takeError());
	return nullptr;
	}
	unsigned RecCode = MaybeRecCode.get();

	if (RecCode == SM_SLOC_BUFFER_BLOB_COMPRESSED) {
	if (!llvm::zlib::isAvailable()) {
	Error("zlib is not available");
	return nullptr;
	}
	SmallString<0> Uncompressed;
	if (llvm::Error E =
	llvm::zlib::uncompress(Blob, Uncompressed, Record[0])) {
	Error("could not decompress embedded file contents: " +
	llvm::toString(std::move(E)));
	return nullptr;
	}
	return llvm::MemoryBuffer::getMemBufferCopy(Uncompressed, Name);
	} else if (RecCode == SM_SLOC_BUFFER_BLOB) {
	return llvm::MemoryBuffer::getMemBuffer(Blob.drop_back(1), Name, true);
	} else {
	Error("AST record has invalid code");
	return nullptr;
	}
	};

	ModuleFile *F = GlobalSLocEntryMap.find(-ID)->second;
	if (llvm::Error Err = F->SLocEntryCursor.JumpToBit(
	F->SLocEntryOffsetsBase +
	F->SLocEntryOffsets[ID - F->SLocEntryBaseID])) {
	Error(std::move(Err));
	return true;
	}

	BitstreamCursor &SLocEntryCursor = F->SLocEntryCursor;
	SourceLocation::UIntTy BaseOffset = F->SLocEntryBaseOffset;

	++NumSLocEntriesRead;
	Expected<llvm::BitstreamEntry> MaybeEntry = SLocEntryCursor.advance();
	if (!MaybeEntry) {
	Error(MaybeEntry.takeError());
	return true;
	}
	llvm::BitstreamEntry Entry = MaybeEntry.get();

	if (Entry.Kind != llvm::BitstreamEntry::Record) {
	Error("incorrectly-formatted source location entry in AST file");
	return true;
	}

	RecordData Record;
	StringRef Blob;
	Expected<unsigned> MaybeSLOC =
	SLocEntryCursor.readRecord(Entry.ID, Record, &Blob);
	if (!MaybeSLOC) {
	Error(MaybeSLOC.takeError());
	return true;
	}
	switch (MaybeSLOC.get()) {
	default:
	Error("incorrectly-formatted source location entry in AST file");
	return true;

	case SM_SLOC_FILE_ENTRY: {
	// We will detect whether a file changed and return 'Failure' for it, but
	// we will also try to fail gracefully by setting up the SLocEntry.
	unsigned InputID = Record[4];
	InputFile IF = getInputFile(*F, InputID);
	Optional<FileEntryRef> File = IF.getFile();
	bool OverriddenBuffer = IF.isOverridden();

	// Note that we only check if a File was returned. If it was out-of-date
	// we have complained but we will continue creating a FileID to recover
	// gracefully.
	if (!File)
	return true;

	SourceLocation IncludeLoc = ReadSourceLocation(*F, Record[1]);
	if (IncludeLoc.isInvalid() && F->Kind != MK_MainFile) {
	// This is the module's main file.
	IncludeLoc = getImportLocation(F);
	}
	SrcMgr::CharacteristicKind
	FileCharacter = (SrcMgr::CharacteristicKind)Record[2];
	FileID FID = SourceMgr.createFileID(*File, IncludeLoc, FileCharacter, ID,
	BaseOffset + Record[0]);
	SrcMgr::FileInfo &FileInfo =
	const_cast<SrcMgr::FileInfo&>(SourceMgr.getSLocEntry(FID).getFile());
	FileInfo.NumCreatedFIDs = Record[5];
	if (Record[3])
	FileInfo.setHasLineDirectives();

	unsigned NumFileDecls = Record[7];
	if (NumFileDecls && ContextObj) {
	const DeclID *FirstDecl = F->FileSortedDecls + Record[6];
	assert(F->FileSortedDecls && "FILE_SORTED_DECLS not encountered yet ?");
	FileDeclIDs[FID] = FileDeclsInfo(F, llvm::makeArrayRef(FirstDecl,
	NumFileDecls));
	}

	const SrcMgr::ContentCache &ContentCache =
	SourceMgr.getOrCreateContentCache(*File, isSystem(FileCharacter));
	if (OverriddenBuffer && !ContentCache.BufferOverridden &&
	ContentCache.ContentsEntry == ContentCache.OrigEntry &&
	!ContentCache.getBufferIfLoaded()) {
	auto Buffer = ReadBuffer(SLocEntryCursor, File->getName());
	if (!Buffer)
	return true;
	SourceMgr.overrideFileContents(*File, std::move(Buffer));
	}

	break;
	}

	case SM_SLOC_BUFFER_ENTRY: {
	const char *Name = Blob.data();
	unsigned Offset = Record[0];
	SrcMgr::CharacteristicKind
	FileCharacter = (SrcMgr::CharacteristicKind)Record[2];
	SourceLocation IncludeLoc = ReadSourceLocation(*F, Record[1]);
	if (IncludeLoc.isInvalid() && F->isModule()) {
	IncludeLoc = getImportLocation(F);
	}

	auto Buffer = ReadBuffer(SLocEntryCursor, Name);
	if (!Buffer)
	return true;
	SourceMgr.createFileID(std::move(Buffer), FileCharacter, ID,
	BaseOffset + Offset, IncludeLoc);
	break;
	}

	case SM_SLOC_EXPANSION_ENTRY: {
	SourceLocation SpellingLoc = ReadSourceLocation(*F, Record[1]);
	SourceMgr.createExpansionLoc(SpellingLoc,
	ReadSourceLocation(*F, Record[2]),
	ReadSourceLocation(*F, Record[3]),
	Record[5],
	Record[4],
	ID,
	BaseOffset + Record[0]);
	break;
	}
	}

	return false;
	}

	std::pair<SourceLocation, StringRef> ASTReader::getModuleImportLoc(int ID) {
	if (ID == 0)
	return std::make_pair(SourceLocation(), "");

	if (unsigned(-ID) - 2 >= getTotalNumSLocs() \|\| ID > 0) {
	Error("source location entry ID out-of-range for AST file");
	return std::make_pair(SourceLocation(), "");
	}

	// Find which module file this entry lands in.
	ModuleFile *M = GlobalSLocEntryMap.find(-ID)->second;
	if (!M->isModule())
	return std::make_pair(SourceLocation(), "");

	// FIXME: Can we map this down to a particular submodule? That would be
	// ideal.
	return std::make_pair(M->ImportLoc, StringRef(M->ModuleName));
	}

	/// Find the location where the module F is imported.
	SourceLocation ASTReader::getImportLocation(ModuleFile *F) {
	if (F->ImportLoc.isValid())
	return F->ImportLoc;

	// Otherwise we have a PCH. It's considered to be "imported" at the first
	// location of its includer.
	if (F->ImportedBy.empty() \|\| !F->ImportedBy[0]) {
	// Main file is the importer.
	assert(SourceMgr.getMainFileID().isValid() && "missing main file");
	return SourceMgr.getLocForStartOfFile(SourceMgr.getMainFileID());
	}
	return F->ImportedBy[0]->FirstLoc;
	}

	/// Enter a subblock of the specified BlockID with the specified cursor. Read
	/// the abbreviations that are at the top of the block and then leave the cursor
	/// pointing into the block.
	bool ASTReader::ReadBlockAbbrevs(BitstreamCursor &Cursor, unsigned BlockID,
	uint64_t *StartOfBlockOffset) {
	if (llvm::Error Err = Cursor.EnterSubBlock(BlockID)) {
	// FIXME this drops errors on the floor.
	consumeError(std::move(Err));
	return true;
	}

	if (StartOfBlockOffset)
	*StartOfBlockOffset = Cursor.GetCurrentBitNo();

	while (true) {
	uint64_t Offset = Cursor.GetCurrentBitNo();
	Expected<unsigned> MaybeCode = Cursor.ReadCode();
	if (!MaybeCode) {
	// FIXME this drops errors on the floor.
	consumeError(MaybeCode.takeError());
	return true;
	}
	unsigned Code = MaybeCode.get();

	// We expect all abbrevs to be at the start of the block.
	if (Code != llvm::bitc::DEFINE_ABBREV) {
	if (llvm::Error Err = Cursor.JumpToBit(Offset)) {
	// FIXME this drops errors on the floor.
	consumeError(std::move(Err));
	return true;
	}
	return false;
	}
	if (llvm::Error Err = Cursor.ReadAbbrevRecord()) {
	// FIXME this drops errors on the floor.
	consumeError(std::move(Err));
	return true;
	}
	}
	}

	Token ASTReader::ReadToken(ModuleFile &F, const RecordDataImpl &Record,
	unsigned &Idx) {
	Token Tok;
	Tok.startToken();
	Tok.setLocation(ReadSourceLocation(F, Record, Idx));
	Tok.setLength(Record[Idx++]);
	if (IdentifierInfo *II = getLocalIdentifier(F, Record[Idx++]))
	Tok.setIdentifierInfo(II);
	Tok.setKind((tok::TokenKind)Record[Idx++]);
	Tok.setFlag((Token::TokenFlags)Record[Idx++]);
	return Tok;
	}

	MacroInfo *ASTReader::ReadMacroRecord(ModuleFile &F, uint64_t Offset) {
	BitstreamCursor &Stream = F.MacroCursor;

	// Keep track of where we are in the stream, then jump back there
	// after reading this macro.
	SavedStreamPosition SavedPosition(Stream);

	if (llvm::Error Err = Stream.JumpToBit(Offset)) {
	// FIXME this drops errors on the floor.
	consumeError(std::move(Err));
	return nullptr;
	}
	RecordData Record;
	SmallVector<IdentifierInfo*, 16> MacroParams;
	MacroInfo *Macro = nullptr;

	while (true) {
	// Advance to the next record, but if we get to the end of the block, don't
	// pop it (removing all the abbreviations from the cursor) since we want to
	// be able to reseek within the block and read entries.
	unsigned Flags = BitstreamCursor::AF_DontPopBlockAtEnd;
	Expected<llvm::BitstreamEntry> MaybeEntry =
	Stream.advanceSkippingSubblocks(Flags);
	if (!MaybeEntry) {
	Error(MaybeEntry.takeError());
	return Macro;
	}
	llvm::BitstreamEntry Entry = MaybeEntry.get();

	switch (Entry.Kind) {
	case llvm::BitstreamEntry::SubBlock: // Handled for us already.
	case llvm::BitstreamEntry::Error:
	Error("malformed block record in AST file");
	return Macro;
	case llvm::BitstreamEntry::EndBlock:
	return Macro;
	case llvm::BitstreamEntry::Record:
	// The interesting case.
	break;
	}

	// Read a record.
	Record.clear();
	PreprocessorRecordTypes RecType;
	if (Expected<unsigned> MaybeRecType = Stream.readRecord(Entry.ID, Record))
	RecType = (PreprocessorRecordTypes)MaybeRecType.get();
	else {
	Error(MaybeRecType.takeError());
	return Macro;
	}
	switch (RecType) {
	case PP_MODULE_MACRO:
	case PP_MACRO_DIRECTIVE_HISTORY:
	return Macro;

	case PP_MACRO_OBJECT_LIKE:
	case PP_MACRO_FUNCTION_LIKE: {
	// If we already have a macro, that means that we've hit the end
	// of the definition of the macro we were looking for. We're
	// done.
	if (Macro)
	return Macro;

	unsigned NextIndex = 1; // Skip identifier ID.
	SourceLocation Loc = ReadSourceLocation(F, Record, NextIndex);
	MacroInfo *MI = PP.AllocateMacroInfo(Loc);
	MI->setDefinitionEndLoc(ReadSourceLocation(F, Record, NextIndex));
	MI->setIsUsed(Record[NextIndex++]);
	MI->setUsedForHeaderGuard(Record[NextIndex++]);

	if (RecType == PP_MACRO_FUNCTION_LIKE) {
	// Decode function-like macro info.
	bool isC99VarArgs = Record[NextIndex++];
	bool isGNUVarArgs = Record[NextIndex++];
	bool hasCommaPasting = Record[NextIndex++];
	MacroParams.clear();
	unsigned NumArgs = Record[NextIndex++];
	for (unsigned i = 0; i != NumArgs; ++i)
	MacroParams.push_back(getLocalIdentifier(F, Record[NextIndex++]));

	// Install function-like macro info.
	MI->setIsFunctionLike();
	if (isC99VarArgs) MI->setIsC99Varargs();
	if (isGNUVarArgs) MI->setIsGNUVarargs();
	if (hasCommaPasting) MI->setHasCommaPasting();
	MI->setParameterList(MacroParams, PP.getPreprocessorAllocator());
	}

	// Remember that we saw this macro last so that we add the tokens that
	// form its body to it.
	Macro = MI;

	if (NextIndex + 1 == Record.size() && PP.getPreprocessingRecord() &&
	Record[NextIndex]) {
	// We have a macro definition. Register the association
	PreprocessedEntityID
	GlobalID = getGlobalPreprocessedEntityID(F, Record[NextIndex]);
	PreprocessingRecord &PPRec = *PP.getPreprocessingRecord();
	PreprocessingRecord::PPEntityID PPID =
	PPRec.getPPEntityID(GlobalID - 1, /isLoaded=/true);
	MacroDefinitionRecord *PPDef = cast_or_null<MacroDefinitionRecord>(
	PPRec.getPreprocessedEntity(PPID));
	if (PPDef)
	PPRec.RegisterMacroDefinition(Macro, PPDef);
	}

	++NumMacrosRead;
	break;
	}

	case PP_TOKEN: {
	// If we see a TOKEN before a PP_MACRO_*, then the file is
	// erroneous, just pretend we didn't see this.
	if (!Macro) break;

	unsigned Idx = 0;
	Token Tok = ReadToken(F, Record, Idx);
	Macro->AddTokenToBody(Tok);
	break;
	}
	}
	}
	}

	PreprocessedEntityID
	ASTReader::getGlobalPreprocessedEntityID(ModuleFile &M,
	unsigned LocalID) const {
	if (!M.ModuleOffsetMap.empty())
	ReadModuleOffsetMap(M);

	ContinuousRangeMap<uint32_t, int, 2>::const_iterator
	I = M.PreprocessedEntityRemap.find(LocalID - NUM_PREDEF_PP_ENTITY_IDS);
	assert(I != M.PreprocessedEntityRemap.end()
	&& "Invalid index into preprocessed entity index remap");

	return LocalID + I->second;
	}

	unsigned HeaderFileInfoTrait::ComputeHash(internal_key_ref ikey) {
	return llvm::hash_combine(ikey.Size, ikey.ModTime);
	}

	HeaderFileInfoTrait::internal_key_type
	HeaderFileInfoTrait::GetInternalKey(const FileEntry *FE) {
	internal_key_type ikey = {FE->getSize(),
	M.HasTimestamps ? FE->getModificationTime() : 0,
	FE->getName(), /Imported/ false};
	return ikey;
	}

	bool HeaderFileInfoTrait::EqualKey(internal_key_ref a, internal_key_ref b) {
	if (a.Size != b.Size \|\| (a.ModTime && b.ModTime && a.ModTime != b.ModTime))
	return false;

	if (llvm::sys::path::is_absolute(a.Filename) && a.Filename == b.Filename)
	return true;

	// Determine whether the actual files are equivalent.
	FileManager &FileMgr = Reader.getFileManager();
	auto GetFile = [&](const internal_key_type &Key) -> const FileEntry* {
	if (!Key.Imported) {
	if (auto File = FileMgr.getFile(Key.Filename))
	return *File;
	return nullptr;
	}

	std::string Resolved = std::string(Key.Filename);
	Reader.ResolveImportedPath(M, Resolved);
	if (auto File = FileMgr.getFile(Resolved))
	return *File;
	return nullptr;
	};

	const FileEntry *FEA = GetFile(a);
	const FileEntry *FEB = GetFile(b);
	return FEA && FEA == FEB;
	}

	std::pair<unsigned, unsigned>
	HeaderFileInfoTrait::ReadKeyDataLength(const unsigned char*& d) {
	return readULEBKeyDataLength(d);
	}

	HeaderFileInfoTrait::internal_key_type
	HeaderFileInfoTrait::ReadKey(const unsigned char *d, unsigned) {
	using namespace llvm::support;

	internal_key_type ikey;
	ikey.Size = off_t(endian::readNext<uint64_t, little, unaligned>(d));
	ikey.ModTime = time_t(endian::readNext<uint64_t, little, unaligned>(d));
	ikey.Filename = (const char *)d;
	ikey.Imported = true;
	return ikey;
	}

	HeaderFileInfoTrait::data_type
	HeaderFileInfoTrait::ReadData(internal_key_ref key, const unsigned char *d,
	unsigned DataLen) {
	using namespace llvm::support;

	const unsigned char *End = d + DataLen;
	HeaderFileInfo HFI;
	unsigned Flags = *d++;
	// FIXME: Refactor with mergeHeaderFileInfo in HeaderSearch.cpp.
	HFI.isImport \|= (Flags >> 5) & 0x01;
	HFI.isPragmaOnce \|= (Flags >> 4) & 0x01;
	HFI.DirInfo = (Flags >> 1) & 0x07;
	HFI.IndexHeaderMapHeader = Flags & 0x01;
	// FIXME: Find a better way to handle this. Maybe just store a
	// "has been included" flag?
	HFI.NumIncludes = std::max(endian::readNext<uint16_t, little, unaligned>(d),
	HFI.NumIncludes);
	HFI.ControllingMacroID = Reader.getGlobalIdentifierID(
	M, endian::readNext<uint32_t, little, unaligned>(d));
	if (unsigned FrameworkOffset =
	endian::readNext<uint32_t, little, unaligned>(d)) {
	// The framework offset is 1 greater than the actual offset,
	// since 0 is used as an indicator for "no framework name".
	StringRef FrameworkName(FrameworkStrings + FrameworkOffset - 1);
	HFI.Framework = HS->getUniqueFrameworkName(FrameworkName);
	}

	assert((End - d) % 4 == 0 &&
	"Wrong data length in HeaderFileInfo deserialization");
	while (d != End) {
	uint32_t LocalSMID = endian::readNext<uint32_t, little, unaligned>(d);
	auto HeaderRole = static_cast<ModuleMap::ModuleHeaderRole>(LocalSMID & 3);
	LocalSMID >>= 2;

	// This header is part of a module. Associate it with the module to enable
	// implicit module import.
	SubmoduleID GlobalSMID = Reader.getGlobalSubmoduleID(M, LocalSMID);
	Module *Mod = Reader.getSubmodule(GlobalSMID);
	FileManager &FileMgr = Reader.getFileManager();
	ModuleMap &ModMap =
	Reader.getPreprocessor().getHeaderSearchInfo().getModuleMap();

	std::string Filename = std::string(key.Filename);
	if (key.Imported)
	Reader.ResolveImportedPath(M, Filename);
	// FIXME: NameAsWritten
	Module::Header H = {std::string(key.Filename), "",
	*FileMgr.getFile(Filename)};
	ModMap.addHeader(Mod, H, HeaderRole, /Imported/true);
	HFI.isModuleHeader \|= !(HeaderRole & ModuleMap::TextualHeader);
	}

	// This HeaderFileInfo was externally loaded.
	HFI.External = true;
	HFI.IsValid = true;
	return HFI;
	}

	void ASTReader::addPendingMacro(IdentifierInfo II, ModuleFile M,
	uint32_t MacroDirectivesOffset) {
	assert(NumCurrentElementsDeserializing > 0 &&"Missing deserialization guard");
	PendingMacroIDs[II].push_back(PendingMacroInfo(M, MacroDirectivesOffset));
	}

	void ASTReader::ReadDefinedMacros() {
	// Note that we are loading defined macros.
	Deserializing Macros(this);

	for (ModuleFile &I : llvm::reverse(ModuleMgr)) {
	BitstreamCursor &MacroCursor = I.MacroCursor;

	// If there was no preprocessor block, skip this file.
	if (MacroCursor.getBitcodeBytes().empty())
	continue;

	BitstreamCursor Cursor = MacroCursor;
	if (llvm::Error Err = Cursor.JumpToBit(I.MacroStartOffset)) {
	Error(std::move(Err));
	return;
	}

	RecordData Record;
	while (true) {
	Expected<llvm::BitstreamEntry> MaybeE = Cursor.advanceSkippingSubblocks();
	if (!MaybeE) {
	Error(MaybeE.takeError());
	return;
	}
	llvm::BitstreamEntry E = MaybeE.get();

	switch (E.Kind) {
	case llvm::BitstreamEntry::SubBlock: // Handled for us already.
	case llvm::BitstreamEntry::Error:
	Error("malformed block record in AST file");
	return;
	case llvm::BitstreamEntry::EndBlock:
	goto NextCursor;

	case llvm::BitstreamEntry::Record: {
	Record.clear();
	Expected<unsigned> MaybeRecord = Cursor.readRecord(E.ID, Record);
	if (!MaybeRecord) {
	Error(MaybeRecord.takeError());
	return;
	}
	switch (MaybeRecord.get()) {
	default: // Default behavior: ignore.
	break;

	case PP_MACRO_OBJECT_LIKE:
	case PP_MACRO_FUNCTION_LIKE: {
	IdentifierInfo *II = getLocalIdentifier(I, Record[0]);
	if (II->isOutOfDate())
	updateOutOfDateIdentifier(*II);
	break;
	}

	case PP_TOKEN:
	// Ignore tokens.
	break;
	}
	break;
	}
	}
	}
	NextCursor: ;
	}
	}

	namespace {

	/// Visitor class used to look up identifirs in an AST file.
	class IdentifierLookupVisitor {
	StringRef Name;
	unsigned NameHash;
	unsigned PriorGeneration;
	unsigned &NumIdentifierLookups;
	unsigned &NumIdentifierLookupHits;
	IdentifierInfo *Found = nullptr;

	public:
	IdentifierLookupVisitor(StringRef Name, unsigned PriorGeneration,
	unsigned &NumIdentifierLookups,
	unsigned &NumIdentifierLookupHits)
	: Name(Name), NameHash(ASTIdentifierLookupTrait::ComputeHash(Name)),
	PriorGeneration(PriorGeneration),
	NumIdentifierLookups(NumIdentifierLookups),
	NumIdentifierLookupHits(NumIdentifierLookupHits) {}

	bool operator()(ModuleFile &M) {
	// If we've already searched this module file, skip it now.
	if (M.Generation <= PriorGeneration)
	return true;

	ASTIdentifierLookupTable *IdTable
	= (ASTIdentifierLookupTable *)M.IdentifierLookupTable;
	if (!IdTable)
	return false;

	ASTIdentifierLookupTrait Trait(IdTable->getInfoObj().getReader(), M,
	Found);
	++NumIdentifierLookups;
	ASTIdentifierLookupTable::iterator Pos =
	IdTable->find_hashed(Name, NameHash, &Trait);
	if (Pos == IdTable->end())
	return false;

	// Dereferencing the iterator has the effect of building the
	// IdentifierInfo node and populating it with the various
	// declarations it needs.
	++NumIdentifierLookupHits;
	Found = *Pos;
	return true;
	}

	// Retrieve the identifier info found within the module
	// files.
	IdentifierInfo *getIdentifierInfo() const { return Found; }
	};

	} // namespace

	void ASTReader::updateOutOfDateIdentifier(IdentifierInfo &II) {
	// Note that we are loading an identifier.
	Deserializing AnIdentifier(this);

	unsigned PriorGeneration = 0;
	if (getContext().getLangOpts().Modules)
	PriorGeneration = IdentifierGeneration[&II];

	// If there is a global index, look there first to determine which modules
	// provably do not have any results for this identifier.
	GlobalModuleIndex::HitSet Hits;
	GlobalModuleIndex::HitSet *HitsPtr = nullptr;
	if (!loadGlobalIndex()) {
	if (GlobalIndex->lookupIdentifier(II.getName(), Hits)) {
	HitsPtr = &Hits;
	}
	}

	IdentifierLookupVisitor Visitor(II.getName(), PriorGeneration,
	NumIdentifierLookups,
	NumIdentifierLookupHits);
	ModuleMgr.visit(Visitor, HitsPtr);
	markIdentifierUpToDate(&II);
	}

	void ASTReader::markIdentifierUpToDate(IdentifierInfo *II) {
	if (!II)
	return;

	II->setOutOfDate(false);

	// Update the generation for this identifier.
	if (getContext().getLangOpts().Modules)
	IdentifierGeneration[II] = getGeneration();
	}

	void ASTReader::resolvePendingMacro(IdentifierInfo *II,
	const PendingMacroInfo &PMInfo) {
	ModuleFile &M = *PMInfo.M;

	BitstreamCursor &Cursor = M.MacroCursor;
	SavedStreamPosition SavedPosition(Cursor);
	if (llvm::Error Err =
	Cursor.JumpToBit(M.MacroOffsetsBase + PMInfo.MacroDirectivesOffset)) {
	Error(std::move(Err));
	return;
	}

	struct ModuleMacroRecord {
	SubmoduleID SubModID;
	MacroInfo *MI;
	SmallVector<SubmoduleID, 8> Overrides;
	};
	llvm::SmallVector<ModuleMacroRecord, 8> ModuleMacros;

	// We expect to see a sequence of PP_MODULE_MACRO records listing exported
	// macros, followed by a PP_MACRO_DIRECTIVE_HISTORY record with the complete
	// macro histroy.
	RecordData Record;
	while (true) {
	Expected<llvm::BitstreamEntry> MaybeEntry =
	Cursor.advance(BitstreamCursor::AF_DontPopBlockAtEnd);
	if (!MaybeEntry) {
	Error(MaybeEntry.takeError());
	return;
	}
	llvm::BitstreamEntry Entry = MaybeEntry.get();

	if (Entry.Kind != llvm::BitstreamEntry::Record) {
	Error("malformed block record in AST file");
	return;
	}

	Record.clear();
	Expected<unsigned> MaybePP = Cursor.readRecord(Entry.ID, Record);
	if (!MaybePP) {
	Error(MaybePP.takeError());
	return;
	}
	switch ((PreprocessorRecordTypes)MaybePP.get()) {
	case PP_MACRO_DIRECTIVE_HISTORY:
	break;

	case PP_MODULE_MACRO: {
	ModuleMacros.push_back(ModuleMacroRecord());
	auto &Info = ModuleMacros.back();
	Info.SubModID = getGlobalSubmoduleID(M, Record[0]);
	Info.MI = getMacro(getGlobalMacroID(M, Record[1]));
	for (int I = 2, N = Record.size(); I != N; ++I)
	Info.Overrides.push_back(getGlobalSubmoduleID(M, Record[I]));
	continue;
	}

	default:
	Error("malformed block record in AST file");
	return;
	}

	// We found the macro directive history; that's the last record
	// for this macro.
	break;
	}

	// Module macros are listed in reverse dependency order.
	{
	std::reverse(ModuleMacros.begin(), ModuleMacros.end());
	llvm::SmallVector<ModuleMacro*, 8> Overrides;
	for (auto &MMR : ModuleMacros) {
	Overrides.clear();
	for (unsigned ModID : MMR.Overrides) {
	Module *Mod = getSubmodule(ModID);
	auto *Macro = PP.getModuleMacro(Mod, II);
	assert(Macro && "missing definition for overridden macro");
	Overrides.push_back(Macro);
	}

	bool Inserted = false;
	Module *Owner = getSubmodule(MMR.SubModID);
	PP.addModuleMacro(Owner, II, MMR.MI, Overrides, Inserted);
	}
	}

	// Don't read the directive history for a module; we don't have anywhere
	// to put it.
	if (M.isModule())
	return;

	// Deserialize the macro directives history in reverse source-order.
	MacroDirective Latest = nullptr, Earliest = nullptr;
	unsigned Idx = 0, N = Record.size();
	while (Idx < N) {
	MacroDirective *MD = nullptr;
	SourceLocation Loc = ReadSourceLocation(M, Record, Idx);
	MacroDirective::Kind K = (MacroDirective::Kind)Record[Idx++];
	switch (K) {
	case MacroDirective::MD_Define: {
	MacroInfo *MI = getMacro(getGlobalMacroID(M, Record[Idx++]));
	MD = PP.AllocateDefMacroDirective(MI, Loc);
	break;
	}
	case MacroDirective::MD_Undefine:
	MD = PP.AllocateUndefMacroDirective(Loc);
	break;
	case MacroDirective::MD_Visibility:
	bool isPublic = Record[Idx++];
	MD = PP.AllocateVisibilityMacroDirective(Loc, isPublic);
	break;
	}

	if (!Latest)
	Latest = MD;
	if (Earliest)
	Earliest->setPrevious(MD);
	Earliest = MD;
	}

	if (Latest)
	PP.setLoadedMacroDirective(II, Earliest, Latest);
	}

	bool ASTReader::shouldDisableValidationForFile(
	const serialization::ModuleFile &M) const {
	if (DisableValidationKind == DisableValidationForModuleKind::None)
	return false;

	// If a PCH is loaded and validation is disabled for PCH then disable
	// validation for the PCH and the modules it loads.
	ModuleKind K = CurrentDeserializingModuleKind.getValueOr(M.Kind);

	switch (K) {
	case MK_MainFile:
	case MK_Preamble:
	case MK_PCH:
	return bool(DisableValidationKind & DisableValidationForModuleKind::PCH);
	case MK_ImplicitModule:
	case MK_ExplicitModule:
	case MK_PrebuiltModule:
	return bool(DisableValidationKind & DisableValidationForModuleKind::Module);
	}

	return false;
	}

	ASTReader::InputFileInfo
	ASTReader::readInputFileInfo(ModuleFile &F, unsigned ID) {
	// Go find this input file.
	BitstreamCursor &Cursor = F.InputFilesCursor;
	SavedStreamPosition SavedPosition(Cursor);
	if (llvm::Error Err = Cursor.JumpToBit(F.InputFileOffsets[ID - 1])) {
	// FIXME this drops errors on the floor.
	consumeError(std::move(Err));
	}

	Expected<unsigned> MaybeCode = Cursor.ReadCode();
	if (!MaybeCode) {
	// FIXME this drops errors on the floor.
	consumeError(MaybeCode.takeError());
	}
	unsigned Code = MaybeCode.get();
	RecordData Record;
	StringRef Blob;

	if (Expected<unsigned> Maybe = Cursor.readRecord(Code, Record, &Blob))
	assert(static_cast<InputFileRecordTypes>(Maybe.get()) == INPUT_FILE &&
	"invalid record type for input file");
	else {
	// FIXME this drops errors on the floor.
	consumeError(Maybe.takeError());
	}

	assert(Record[0] == ID && "Bogus stored ID or offset");
	InputFileInfo R;
	R.StoredSize = static_cast<off_t>(Record[1]);
	R.StoredTime = static_cast<time_t>(Record[2]);
	R.Overridden = static_cast<bool>(Record[3]);
	R.Transient = static_cast<bool>(Record[4]);
	R.TopLevelModuleMap = static_cast<bool>(Record[5]);
	R.Filename = std::string(Blob);
	ResolveImportedPath(F, R.Filename);

	Expected<llvm::BitstreamEntry> MaybeEntry = Cursor.advance();
	if (!MaybeEntry) // FIXME this drops errors on the floor.
	consumeError(MaybeEntry.takeError());
	llvm::BitstreamEntry Entry = MaybeEntry.get();
	assert(Entry.Kind == llvm::BitstreamEntry::Record &&
	"expected record type for input file hash");

	Record.clear();
	if (Expected<unsigned> Maybe = Cursor.readRecord(Entry.ID, Record))
	assert(static_cast<InputFileRecordTypes>(Maybe.get()) == INPUT_FILE_HASH &&
	"invalid record type for input file hash");
	else {
	// FIXME this drops errors on the floor.
	consumeError(Maybe.takeError());
	}
	R.ContentHash = (static_cast<uint64_t>(Record[1]) << 32) \|
	static_cast<uint64_t>(Record[0]);
	return R;
	}

	static unsigned moduleKindForDiagnostic(ModuleKind Kind);
	InputFile ASTReader::getInputFile(ModuleFile &F, unsigned ID, bool Complain) {
	// If this ID is bogus, just return an empty input file.
	if (ID == 0 \|\| ID > F.InputFilesLoaded.size())
	return InputFile();

	// If we've already loaded this input file, return it.
	if (F.InputFilesLoaded[ID-1].getFile())
	return F.InputFilesLoaded[ID-1];

	if (F.InputFilesLoaded[ID-1].isNotFound())
	return InputFile();

	// Go find this input file.
	BitstreamCursor &Cursor = F.InputFilesCursor;
	SavedStreamPosition SavedPosition(Cursor);
	if (llvm::Error Err = Cursor.JumpToBit(F.InputFileOffsets[ID - 1])) {
	// FIXME this drops errors on the floor.
	consumeError(std::move(Err));
	}

	InputFileInfo FI = readInputFileInfo(F, ID);
	off_t StoredSize = FI.StoredSize;
	time_t StoredTime = FI.StoredTime;
	bool Overridden = FI.Overridden;
	bool Transient = FI.Transient;
	StringRef Filename = FI.Filename;
	uint64_t StoredContentHash = FI.ContentHash;

	OptionalFileEntryRefDegradesToFileEntryPtr File =
	expectedToOptional(FileMgr.getFileRef(Filename, /OpenFile=/false));

	// If we didn't find the file, resolve it relative to the
	// original directory from which this AST file was created.
	if (!File && !F.OriginalDir.empty() && !F.BaseDirectory.empty() &&
	F.OriginalDir != F.BaseDirectory) {
	std::string Resolved = resolveFileRelativeToOriginalDir(
	std::string(Filename), F.OriginalDir, F.BaseDirectory);
	if (!Resolved.empty())
	File = expectedToOptional(FileMgr.getFileRef(Resolved));
	}

	// For an overridden file, create a virtual file with the stored
	// size/timestamp.
	if ((Overridden \|\| Transient) && !File)
	File = FileMgr.getVirtualFileRef(Filename, StoredSize, StoredTime);

	if (!File) {
	if (Complain) {
	std::string ErrorStr = "could not find file '";
	ErrorStr += Filename;
	ErrorStr += "' referenced by AST file '";
	ErrorStr += F.FileName;
	ErrorStr += "'";
	Error(ErrorStr);
	}
	// Record that we didn't find the file.
	F.InputFilesLoaded[ID-1] = InputFile::getNotFound();
	return InputFile();
	}

	// Check if there was a request to override the contents of the file
	// that was part of the precompiled header. Overriding such a file
	// can lead to problems when lexing using the source locations from the
	// PCH.
	SourceManager &SM = getSourceManager();
	// FIXME: Reject if the overrides are different.
	if ((!Overridden && !Transient) && SM.isFileOverridden(File)) {
	if (Complain)
	Error(diag::err_fe_pch_file_overridden, Filename);

	// After emitting the diagnostic, bypass the overriding file to recover
	// (this creates a separate FileEntry).
	File = SM.bypassFileContentsOverride(*File);
	if (!File) {
	F.InputFilesLoaded[ID - 1] = InputFile::getNotFound();
	return InputFile();
	}
	}

	enum ModificationType {
	Size,
	ModTime,
	Content,
	None,
	};
	auto HasInputFileChanged = [&]() {
	if (StoredSize != File->getSize())
	return ModificationType::Size;
	if (!shouldDisableValidationForFile(F) && StoredTime &&
	StoredTime != File->getModificationTime()) {
	// In case the modification time changes but not the content,
	// accept the cached file as legit.
	if (ValidateASTInputFilesContent &&
	StoredContentHash != static_cast<uint64_t>(llvm::hash_code(-1))) {
	auto MemBuffOrError = FileMgr.getBufferForFile(File);
	if (!MemBuffOrError) {
	if (!Complain)
	return ModificationType::ModTime;
	std::string ErrorStr = "could not get buffer for file '";
	ErrorStr += File->getName();
	ErrorStr += "'";
	Error(ErrorStr);
	return ModificationType::ModTime;
	}

	auto ContentHash = hash_value(MemBuffOrError.get()->getBuffer());
	if (StoredContentHash == static_cast<uint64_t>(ContentHash))
	return ModificationType::None;
	return ModificationType::Content;
	}
	return ModificationType::ModTime;
	}
	return ModificationType::None;
	};

	bool IsOutOfDate = false;
	auto FileChange = HasInputFileChanged();
	// For an overridden file, there is nothing to validate.
	if (!Overridden && FileChange != ModificationType::None) {
	if (Complain && !Diags.isDiagnosticInFlight()) {
	// Build a list of the PCH imports that got us here (in reverse).
	SmallVector<ModuleFile *, 4> ImportStack(1, &F);
	while (!ImportStack.back()->ImportedBy.empty())
	ImportStack.push_back(ImportStack.back()->ImportedBy[0]);

	// The top-level PCH is stale.
	StringRef TopLevelPCHName(ImportStack.back()->FileName);
	Diag(diag::err_fe_ast_file_modified)
	<< Filename << moduleKindForDiagnostic(ImportStack.back()->Kind)
	<< TopLevelPCHName << FileChange;

	// Print the import stack.
	if (ImportStack.size() > 1) {
	Diag(diag::note_pch_required_by)
	<< Filename << ImportStack[0]->FileName;
	for (unsigned I = 1; I < ImportStack.size(); ++I)
	Diag(diag::note_pch_required_by)
	<< ImportStack[I-1]->FileName << ImportStack[I]->FileName;
	}

	Diag(diag::note_pch_rebuild_required) << TopLevelPCHName;
	}

	IsOutOfDate = true;
	}
	// FIXME: If the file is overridden and we've already opened it,
	// issue an error (or split it into a separate FileEntry).

	InputFile IF = InputFile(*File, Overridden \|\| Transient, IsOutOfDate);

	// Note that we've loaded this input file.
	F.InputFilesLoaded[ID-1] = IF;
	return IF;
	}

	/// If we are loading a relocatable PCH or module file, and the filename
	/// is not an absolute path, add the system or module root to the beginning of
	/// the file name.
	void ASTReader::ResolveImportedPath(ModuleFile &M, std::string &Filename) {
	// Resolve relative to the base directory, if we have one.
	if (!M.BaseDirectory.empty())
	return ResolveImportedPath(Filename, M.BaseDirectory);
	}

	void ASTReader::ResolveImportedPath(std::string &Filename, StringRef Prefix) {
	if (Filename.empty() \|\| llvm::sys::path::is_absolute(Filename))
	return;

	SmallString<128> Buffer;
	llvm::sys::path::append(Buffer, Prefix, Filename);
	Filename.assign(Buffer.begin(), Buffer.end());
	}

	static bool isDiagnosedResult(ASTReader::ASTReadResult ARR, unsigned Caps) {
	switch (ARR) {
	case ASTReader::Failure: return true;
	case ASTReader::Missing: return !(Caps & ASTReader::ARR_Missing);
	case ASTReader::OutOfDate: return !(Caps & ASTReader::ARR_OutOfDate);
	case ASTReader::VersionMismatch: return !(Caps & ASTReader::ARR_VersionMismatch);
	case ASTReader::ConfigurationMismatch:
	return !(Caps & ASTReader::ARR_ConfigurationMismatch);
	case ASTReader::HadErrors: return true;
	case ASTReader::Success: return false;
	}

	llvm_unreachable("unknown ASTReadResult");
	}

	ASTReader::ASTReadResult ASTReader::ReadOptionsBlock(
	BitstreamCursor &Stream, unsigned ClientLoadCapabilities,
	bool AllowCompatibleConfigurationMismatch, ASTReaderListener &Listener,
	std::string &SuggestedPredefines) {
	if (llvm::Error Err = Stream.EnterSubBlock(OPTIONS_BLOCK_ID)) {
	// FIXME this drops errors on the floor.
	consumeError(std::move(Err));
	return Failure;
	}

	// Read all of the records in the options block.
	RecordData Record;
	ASTReadResult Result = Success;
	while (true) {
	Expected<llvm::BitstreamEntry> MaybeEntry = Stream.advance();
	if (!MaybeEntry) {
	// FIXME this drops errors on the floor.
	consumeError(MaybeEntry.takeError());
	return Failure;
	}
	llvm::BitstreamEntry Entry = MaybeEntry.get();

	switch (Entry.Kind) {
	case llvm::BitstreamEntry::Error:
	case llvm::BitstreamEntry::SubBlock:
	return Failure;

	case llvm::BitstreamEntry::EndBlock:
	return Result;

	case llvm::BitstreamEntry::Record:
	// The interesting case.
	break;
	}

	// Read and process a record.
	Record.clear();
	Expected<unsigned> MaybeRecordType = Stream.readRecord(Entry.ID, Record);
	if (!MaybeRecordType) {
	// FIXME this drops errors on the floor.
	consumeError(MaybeRecordType.takeError());
	return Failure;
	}
	switch ((OptionsRecordTypes)MaybeRecordType.get()) {
	case LANGUAGE_OPTIONS: {
	bool Complain = (ClientLoadCapabilities & ARR_ConfigurationMismatch) == 0;
	if (ParseLanguageOptions(Record, Complain, Listener,
	AllowCompatibleConfigurationMismatch))
	Result = ConfigurationMismatch;
	break;
	}

	case TARGET_OPTIONS: {
	bool Complain = (ClientLoadCapabilities & ARR_ConfigurationMismatch) == 0;
	if (ParseTargetOptions(Record, Complain, Listener,
	AllowCompatibleConfigurationMismatch))
	Result = ConfigurationMismatch;
	break;
	}

	case FILE_SYSTEM_OPTIONS: {
	bool Complain = (ClientLoadCapabilities & ARR_ConfigurationMismatch) == 0;
	if (!AllowCompatibleConfigurationMismatch &&
	ParseFileSystemOptions(Record, Complain, Listener))
	Result = ConfigurationMismatch;
	break;
	}

	case HEADER_SEARCH_OPTIONS: {
	bool Complain = (ClientLoadCapabilities & ARR_ConfigurationMismatch) == 0;
	if (!AllowCompatibleConfigurationMismatch &&
	ParseHeaderSearchOptions(Record, Complain, Listener))
	Result = ConfigurationMismatch;
	break;
	}

	case PREPROCESSOR_OPTIONS:
	bool Complain = (ClientLoadCapabilities & ARR_ConfigurationMismatch) == 0;
	if (!AllowCompatibleConfigurationMismatch &&
	ParsePreprocessorOptions(Record, Complain, Listener,
	SuggestedPredefines))
	Result = ConfigurationMismatch;
	break;
	}
	}
	}

	ASTReader::ASTReadResult
	ASTReader::ReadControlBlock(ModuleFile &F,
	SmallVectorImpl<ImportedModule> &Loaded,
	const ModuleFile *ImportedBy,
	unsigned ClientLoadCapabilities) {
	BitstreamCursor &Stream = F.Stream;

	if (llvm::Error Err = Stream.EnterSubBlock(CONTROL_BLOCK_ID)) {
	Error(std::move(Err));
	return Failure;
	}

	// Lambda to read the unhashed control block the first time it's called.
	//
	// For PCM files, the unhashed control block cannot be read until after the
	// MODULE_NAME record. However, PCH files have no MODULE_NAME, and yet still
	// need to look ahead before reading the IMPORTS record. For consistency,
	// this block is always read somehow (see BitstreamEntry::EndBlock).
	bool HasReadUnhashedControlBlock = false;
	auto readUnhashedControlBlockOnce = [&]() {
	if (!HasReadUnhashedControlBlock) {
	HasReadUnhashedControlBlock = true;
	if (ASTReadResult Result =
	readUnhashedControlBlock(F, ImportedBy, ClientLoadCapabilities))
	return Result;
	}
	return Success;
	};

	bool DisableValidation = shouldDisableValidationForFile(F);

	// Read all of the records and blocks in the control block.
	RecordData Record;
	unsigned NumInputs = 0;
	unsigned NumUserInputs = 0;
	StringRef BaseDirectoryAsWritten;
	while (true) {
	Expected<llvm::BitstreamEntry> MaybeEntry = Stream.advance();
	if (!MaybeEntry) {
	Error(MaybeEntry.takeError());
	return Failure;
	}
	llvm::BitstreamEntry Entry = MaybeEntry.get();

	switch (Entry.Kind) {
	case llvm::BitstreamEntry::Error:
	Error("malformed block record in AST file");
	return Failure;
	case llvm::BitstreamEntry::EndBlock: {
	// Validate the module before returning. This call catches an AST with
	// no module name and no imports.
	if (ASTReadResult Result = readUnhashedControlBlockOnce())
	return Result;

	// Validate input files.
	const HeaderSearchOptions &HSOpts =
	PP.getHeaderSearchInfo().getHeaderSearchOpts();

	// All user input files reside at the index range [0, NumUserInputs), and
	// system input files reside at [NumUserInputs, NumInputs). For explicitly
	// loaded module files, ignore missing inputs.
	if (!DisableValidation && F.Kind != MK_ExplicitModule &&
	F.Kind != MK_PrebuiltModule) {
	bool Complain = (ClientLoadCapabilities & ARR_OutOfDate) == 0;

	// If we are reading a module, we will create a verification timestamp,
	// so we verify all input files. Otherwise, verify only user input
	// files.

	unsigned N = NumUserInputs;
	if (ValidateSystemInputs \|\|
	(HSOpts.ModulesValidateOncePerBuildSession &&
	F.InputFilesValidationTimestamp <= HSOpts.BuildSessionTimestamp &&
	F.Kind == MK_ImplicitModule))
	N = NumInputs;

	for (unsigned I = 0; I < N; ++I) {
	InputFile IF = getInputFile(F, I+1, Complain);
	if (!IF.getFile() \|\| IF.isOutOfDate())
	return OutOfDate;
	}
	}

	if (Listener)
	Listener->visitModuleFile(F.FileName, F.Kind);

	if (Listener && Listener->needsInputFileVisitation()) {
	unsigned N = Listener->needsSystemInputFileVisitation() ? NumInputs
	: NumUserInputs;
	for (unsigned I = 0; I < N; ++I) {
	bool IsSystem = I >= NumUserInputs;
	InputFileInfo FI = readInputFileInfo(F, I+1);
	Listener->visitInputFile(FI.Filename, IsSystem, FI.Overridden,
	F.Kind == MK_ExplicitModule \|\|
	F.Kind == MK_PrebuiltModule);
	}
	}

	return Success;
	}

	case llvm::BitstreamEntry::SubBlock:
	switch (Entry.ID) {
	case INPUT_FILES_BLOCK_ID:
	F.InputFilesCursor = Stream;
	if (llvm::Error Err = Stream.SkipBlock()) {
	Error(std::move(Err));
	return Failure;
	}
	if (ReadBlockAbbrevs(F.InputFilesCursor, INPUT_FILES_BLOCK_ID)) {
	Error("malformed block record in AST file");
	return Failure;
	}
	continue;

	case OPTIONS_BLOCK_ID:
	// If we're reading the first module for this group, check its options
	// are compatible with ours. For modules it imports, no further checking
	// is required, because we checked them when we built it.
	if (Listener && !ImportedBy) {
	// Should we allow the configuration of the module file to differ from
	// the configuration of the current translation unit in a compatible
	// way?
	//
	// FIXME: Allow this for files explicitly specified with -include-pch.
	bool AllowCompatibleConfigurationMismatch =
	F.Kind == MK_ExplicitModule \|\| F.Kind == MK_PrebuiltModule;

	ASTReadResult Result =
	ReadOptionsBlock(Stream, ClientLoadCapabilities,
	AllowCompatibleConfigurationMismatch, *Listener,
	SuggestedPredefines);
	if (Result == Failure) {
	Error("malformed block record in AST file");
	return Result;
	}

	if (DisableValidation \|\|
	(AllowConfigurationMismatch && Result == ConfigurationMismatch))
	Result = Success;

	// If we can't load the module, exit early since we likely
	// will rebuild the module anyway. The stream may be in the
	// middle of a block.
	if (Result != Success)
	return Result;
	} else if (llvm::Error Err = Stream.SkipBlock()) {
	Error(std::move(Err));
	return Failure;
	}
	continue;

	default:
	if (llvm::Error Err = Stream.SkipBlock()) {
	Error(std::move(Err));
	return Failure;
	}
	continue;
	}

	case llvm::BitstreamEntry::Record:
	// The interesting case.
	break;
	}

	// Read and process a record.
	Record.clear();
	StringRef Blob;
	Expected<unsigned> MaybeRecordType =
	Stream.readRecord(Entry.ID, Record, &Blob);
	if (!MaybeRecordType) {
	Error(MaybeRecordType.takeError());
	return Failure;
	}
	switch ((ControlRecordTypes)MaybeRecordType.get()) {
	case METADATA: {
	if (Record[0] != VERSION_MAJOR && !DisableValidation) {
	if ((ClientLoadCapabilities & ARR_VersionMismatch) == 0)
	Diag(Record[0] < VERSION_MAJOR? diag::err_pch_version_too_old
	: diag::err_pch_version_too_new);
	return VersionMismatch;
	}

	bool hasErrors = Record[6];
	if (hasErrors && !DisableValidation) {
	// If requested by the caller and the module hasn't already been read
	// or compiled, mark modules on error as out-of-date.
	if ((ClientLoadCapabilities & ARR_TreatModuleWithErrorsAsOutOfDate) &&
	canRecoverFromOutOfDate(F.FileName, ClientLoadCapabilities))
	return OutOfDate;

	if (!AllowASTWithCompilerErrors) {
	Diag(diag::err_pch_with_compiler_errors);
	return HadErrors;
	}
	}
	if (hasErrors) {
	Diags.ErrorOccurred = true;
	Diags.UncompilableErrorOccurred = true;
	Diags.UnrecoverableErrorOccurred = true;
	}

	F.RelocatablePCH = Record[4];
	// Relative paths in a relocatable PCH are relative to our sysroot.
	if (F.RelocatablePCH)
	F.BaseDirectory = isysroot.empty() ? "/" : isysroot;

	F.HasTimestamps = Record[5];

	const std::string &CurBranch = getClangFullRepositoryVersion();
	StringRef ASTBranch = Blob;
	if (StringRef(CurBranch) != ASTBranch && !DisableValidation) {
	if ((ClientLoadCapabilities & ARR_VersionMismatch) == 0)
	Diag(diag::err_pch_different_branch) << ASTBranch << CurBranch;
	return VersionMismatch;
	}
	break;
	}

	case IMPORTS: {
	// Validate the AST before processing any imports (otherwise, untangling
	// them can be error-prone and expensive). A module will have a name and
	// will already have been validated, but this catches the PCH case.
	if (ASTReadResult Result = readUnhashedControlBlockOnce())
	return Result;

	// Load each of the imported PCH files.
	unsigned Idx = 0, N = Record.size();
	while (Idx < N) {
	// Read information about the AST file.
	ModuleKind ImportedKind = (ModuleKind)Record[Idx++];
	// The import location will be the local one for now; we will adjust
	// all import locations of module imports after the global source
	// location info are setup, in ReadAST.
	SourceLocation ImportLoc =
	ReadUntranslatedSourceLocation(Record[Idx++]);
	off_t StoredSize = (off_t)Record[Idx++];
	time_t StoredModTime = (time_t)Record[Idx++];
	auto FirstSignatureByte = Record.begin() + Idx;
	ASTFileSignature StoredSignature = ASTFileSignature::create(
	FirstSignatureByte, FirstSignatureByte + ASTFileSignature::size);
	Idx += ASTFileSignature::size;

	std::string ImportedName = ReadString(Record, Idx);
	std::string ImportedFile;

	// For prebuilt and explicit modules first consult the file map for
	// an override. Note that here we don't search prebuilt module
	// directories, only the explicit name to file mappings. Also, we will
	// still verify the size/signature making sure it is essentially the
	// same file but perhaps in a different location.
	if (ImportedKind == MK_PrebuiltModule \|\| ImportedKind == MK_ExplicitModule)
	ImportedFile = PP.getHeaderSearchInfo().getPrebuiltModuleFileName(
	ImportedName, /FileMapOnly/ true);

	if (ImportedFile.empty())
	// Use BaseDirectoryAsWritten to ensure we use the same path in the
	// ModuleCache as when writing.
	ImportedFile = ReadPath(BaseDirectoryAsWritten, Record, Idx);
	else
	SkipPath(Record, Idx);

	// If our client can't cope with us being out of date, we can't cope with
	// our dependency being missing.
	unsigned Capabilities = ClientLoadCapabilities;
	if ((ClientLoadCapabilities & ARR_OutOfDate) == 0)
	Capabilities &= ~ARR_Missing;

	// Load the AST file.
	auto Result = ReadASTCore(ImportedFile, ImportedKind, ImportLoc, &F,
	Loaded, StoredSize, StoredModTime,
	StoredSignature, Capabilities);

	// If we diagnosed a problem, produce a backtrace.
	bool recompilingFinalized =
	Result == OutOfDate && (Capabilities & ARR_OutOfDate) &&
	getModuleManager().getModuleCache().isPCMFinal(F.FileName);
	if (isDiagnosedResult(Result, Capabilities) \|\| recompilingFinalized)
	Diag(diag::note_module_file_imported_by)
	<< F.FileName << !F.ModuleName.empty() << F.ModuleName;
	if (recompilingFinalized)
	Diag(diag::note_module_file_conflict);

	switch (Result) {
	case Failure: return Failure;
	// If we have to ignore the dependency, we'll have to ignore this too.
	case Missing:
	case OutOfDate: return OutOfDate;
	case VersionMismatch: return VersionMismatch;
	case ConfigurationMismatch: return ConfigurationMismatch;
	case HadErrors: return HadErrors;
	case Success: break;
	}
	}
	break;
	}

	case ORIGINAL_FILE:
	F.OriginalSourceFileID = FileID::get(Record[0]);
	F.ActualOriginalSourceFileName = std::string(Blob);
	F.OriginalSourceFileName = F.ActualOriginalSourceFileName;
	ResolveImportedPath(F, F.OriginalSourceFileName);
	break;

	case ORIGINAL_FILE_ID:
	F.OriginalSourceFileID = FileID::get(Record[0]);
	break;

	case ORIGINAL_PCH_DIR:
	F.OriginalDir = std::string(Blob);
	break;

	case MODULE_NAME:
	F.ModuleName = std::string(Blob);
	Diag(diag::remark_module_import)
	<< F.ModuleName << F.FileName << (ImportedBy ? true : false)
	<< (ImportedBy ? StringRef(ImportedBy->ModuleName) : StringRef());
	if (Listener)
	Listener->ReadModuleName(F.ModuleName);

	// Validate the AST as soon as we have a name so we can exit early on
	// failure.
	if (ASTReadResult Result = readUnhashedControlBlockOnce())
	return Result;

	break;

	case MODULE_DIRECTORY: {
	// Save the BaseDirectory as written in the PCM for computing the module
	// filename for the ModuleCache.
	BaseDirectoryAsWritten = Blob;
	assert(!F.ModuleName.empty() &&
	"MODULE_DIRECTORY found before MODULE_NAME");
	// If we've already loaded a module map file covering this module, we may
	// have a better path for it (relative to the current build).
	Module *M = PP.getHeaderSearchInfo().lookupModule(
	F.ModuleName, /AllowSearch/ true,
	/AllowExtraModuleMapSearch/ true);
	if (M && M->Directory) {
	// If we're implicitly loading a module, the base directory can't
	// change between the build and use.
	// Don't emit module relocation error if we have -fno-validate-pch
	if (!bool(PP.getPreprocessorOpts().DisablePCHOrModuleValidation &
	DisableValidationForModuleKind::Module) &&
	F.Kind != MK_ExplicitModule && F.Kind != MK_PrebuiltModule) {
	auto BuildDir = PP.getFileManager().getDirectory(Blob);
	if (!BuildDir \|\| *BuildDir != M->Directory) {
	if (!canRecoverFromOutOfDate(F.FileName, ClientLoadCapabilities))
	Diag(diag::err_imported_module_relocated)
	<< F.ModuleName << Blob << M->Directory->getName();
	return OutOfDate;
	}
	}
	F.BaseDirectory = std::string(M->Directory->getName());
	} else {
	F.BaseDirectory = std::string(Blob);
	}
	break;
	}

	case MODULE_MAP_FILE:
	if (ASTReadResult Result =
	ReadModuleMapFileBlock(Record, F, ImportedBy, ClientLoadCapabilities))
	return Result;
	break;

	case INPUT_FILE_OFFSETS:
	NumInputs = Record[0];
	NumUserInputs = Record[1];
	F.InputFileOffsets =
	(const llvm::support::unaligned_uint64_t *)Blob.data();
	F.InputFilesLoaded.resize(NumInputs);
	F.NumUserInputFiles = NumUserInputs;
	break;
	}
	}
	}

	ASTReader::ASTReadResult
	ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
	BitstreamCursor &Stream = F.Stream;

	if (llvm::Error Err = Stream.EnterSubBlock(AST_BLOCK_ID)) {
	Error(std::move(Err));
	return Failure;
	}
	F.ASTBlockStartOffset = Stream.GetCurrentBitNo();

	// Read all of the records and blocks for the AST file.
	RecordData Record;
	while (true) {
	Expected<llvm::BitstreamEntry> MaybeEntry = Stream.advance();
	if (!MaybeEntry) {
	Error(MaybeEntry.takeError());
	return Failure;
	}
	llvm::BitstreamEntry Entry = MaybeEntry.get();

	switch (Entry.Kind) {
	case llvm::BitstreamEntry::Error:
	Error("error at end of module block in AST file");
	return Failure;
	case llvm::BitstreamEntry::EndBlock:
	// Outside of C++, we do not store a lookup map for the translation unit.
	// Instead, mark it as needing a lookup map to be built if this module
	// contains any declarations lexically within it (which it always does!).
	// This usually has no cost, since we very rarely need the lookup map for
	// the translation unit outside C++.
	if (ASTContext *Ctx = ContextObj) {
	DeclContext *DC = Ctx->getTranslationUnitDecl();
	if (DC->hasExternalLexicalStorage() && !Ctx->getLangOpts().CPlusPlus)
	DC->setMustBuildLookupTable();
	}

	return Success;
	case llvm::BitstreamEntry::SubBlock:
	switch (Entry.ID) {
	case DECLTYPES_BLOCK_ID:
	// We lazily load the decls block, but we want to set up the
	// DeclsCursor cursor to point into it. Clone our current bitcode
	// cursor to it, enter the block and read the abbrevs in that block.
	// With the main cursor, we just skip over it.
	F.DeclsCursor = Stream;
	if (llvm::Error Err = Stream.SkipBlock()) {
	Error(std::move(Err));
	return Failure;
	}
	if (ReadBlockAbbrevs(F.DeclsCursor, DECLTYPES_BLOCK_ID,
	&F.DeclsBlockStartOffset)) {
	Error("malformed block record in AST file");
	return Failure;
	}
	break;

	case PREPROCESSOR_BLOCK_ID:
	F.MacroCursor = Stream;
	if (!PP.getExternalSource())
	PP.setExternalSource(this);

	if (llvm::Error Err = Stream.SkipBlock()) {
	Error(std::move(Err));
	return Failure;
	}
	if (ReadBlockAbbrevs(F.MacroCursor, PREPROCESSOR_BLOCK_ID)) {
	Error("malformed block record in AST file");
	return Failure;
	}
	F.MacroStartOffset = F.MacroCursor.GetCurrentBitNo();
	break;

	case PREPROCESSOR_DETAIL_BLOCK_ID:
	F.PreprocessorDetailCursor = Stream;

	if (llvm::Error Err = Stream.SkipBlock()) {
	Error(std::move(Err));
	return Failure;
	}
	if (ReadBlockAbbrevs(F.PreprocessorDetailCursor,
	PREPROCESSOR_DETAIL_BLOCK_ID)) {
	Error("malformed preprocessor detail record in AST file");
	return Failure;
	}
	F.PreprocessorDetailStartOffset
	= F.PreprocessorDetailCursor.GetCurrentBitNo();

	if (!PP.getPreprocessingRecord())
	PP.createPreprocessingRecord();
	if (!PP.getPreprocessingRecord()->getExternalSource())
	PP.getPreprocessingRecord()->SetExternalSource(*this);
	break;

	case SOURCE_MANAGER_BLOCK_ID:
	if (ReadSourceManagerBlock(F))
	return Failure;
	break;

	case SUBMODULE_BLOCK_ID:
	if (ASTReadResult Result =
	ReadSubmoduleBlock(F, ClientLoadCapabilities))
	return Result;
	break;

	case COMMENTS_BLOCK_ID: {
	BitstreamCursor C = Stream;

	if (llvm::Error Err = Stream.SkipBlock()) {
	Error(std::move(Err));
	return Failure;
	}
	if (ReadBlockAbbrevs(C, COMMENTS_BLOCK_ID)) {
	Error("malformed comments block in AST file");
	return Failure;
	}
	CommentsCursors.push_back(std::make_pair(C, &F));
	break;
	}

	default:
	if (llvm::Error Err = Stream.SkipBlock()) {
	Error(std::move(Err));
	return Failure;
	}
	break;
	}
	continue;

	case llvm::BitstreamEntry::Record:
	// The interesting case.
	break;
	}

	// Read and process a record.
	Record.clear();
	StringRef Blob;
	Expected<unsigned> MaybeRecordType =
	Stream.readRecord(Entry.ID, Record, &Blob);
	if (!MaybeRecordType) {
	Error(MaybeRecordType.takeError());
	return Failure;
	}
	ASTRecordTypes RecordType = (ASTRecordTypes)MaybeRecordType.get();

	// If we're not loading an AST context, we don't care about most records.
	if (!ContextObj) {
	switch (RecordType) {
	case IDENTIFIER_TABLE:
	case IDENTIFIER_OFFSET:
	case INTERESTING_IDENTIFIERS:
	case STATISTICS:
	case PP_CONDITIONAL_STACK:
	case PP_COUNTER_VALUE:
	case SOURCE_LOCATION_OFFSETS:
	case MODULE_OFFSET_MAP:
	case SOURCE_MANAGER_LINE_TABLE:
	case SOURCE_LOCATION_PRELOADS:
	case PPD_ENTITIES_OFFSETS:
	case HEADER_SEARCH_TABLE:
	case IMPORTED_MODULES:
	case MACRO_OFFSET:
	break;
	default:
	continue;
	}
	}

	switch (RecordType) {
	default: // Default behavior: ignore.
	break;

	case TYPE_OFFSET: {
	if (F.LocalNumTypes != 0) {
	Error("duplicate TYPE_OFFSET record in AST file");
	return Failure;
	}
	F.TypeOffsets = reinterpret_cast<const UnderalignedInt64 *>(Blob.data());
	F.LocalNumTypes = Record[0];
	unsigned LocalBaseTypeIndex = Record[1];
	F.BaseTypeIndex = getTotalNumTypes();

	if (F.LocalNumTypes > 0) {
	// Introduce the global -> local mapping for types within this module.
	GlobalTypeMap.insert(std::make_pair(getTotalNumTypes(), &F));

	// Introduce the local -> global mapping for types within this module.
	F.TypeRemap.insertOrReplace(
	std::make_pair(LocalBaseTypeIndex,
	F.BaseTypeIndex - LocalBaseTypeIndex));

	TypesLoaded.resize(TypesLoaded.size() + F.LocalNumTypes);
	}
	break;
	}

	case DECL_OFFSET: {
	if (F.LocalNumDecls != 0) {
	Error("duplicate DECL_OFFSET record in AST file");
	return Failure;
	}
	F.DeclOffsets = (const DeclOffset *)Blob.data();
	F.LocalNumDecls = Record[0];
	unsigned LocalBaseDeclID = Record[1];
	F.BaseDeclID = getTotalNumDecls();

	if (F.LocalNumDecls > 0) {
	// Introduce the global -> local mapping for declarations within this
	// module.
	GlobalDeclMap.insert(
	std::make_pair(getTotalNumDecls() + NUM_PREDEF_DECL_IDS, &F));

	// Introduce the local -> global mapping for declarations within this
	// module.
	F.DeclRemap.insertOrReplace(
	std::make_pair(LocalBaseDeclID, F.BaseDeclID - LocalBaseDeclID));

	// Introduce the global -> local mapping for declarations within this
	// module.
	F.GlobalToLocalDeclIDs[&F] = LocalBaseDeclID;

	DeclsLoaded.resize(DeclsLoaded.size() + F.LocalNumDecls);
	}
	break;
	}

	case TU_UPDATE_LEXICAL: {
	DeclContext *TU = ContextObj->getTranslationUnitDecl();
	LexicalContents Contents(
	reinterpret_cast<const llvm::support::unaligned_uint32_t *>(
	Blob.data()),
	static_cast<unsigned int>(Blob.size() / 4));
	TULexicalDecls.push_back(std::make_pair(&F, Contents));
	TU->setHasExternalLexicalStorage(true);
	break;
	}

	case UPDATE_VISIBLE: {
	unsigned Idx = 0;
	serialization::DeclID ID = ReadDeclID(F, Record, Idx);
	auto Data = (const unsigned char)Blob.data();
	PendingVisibleUpdates[ID].push_back(PendingVisibleUpdate{&F, Data});
	// If we've already loaded the decl, perform the updates when we finish
	// loading this block.
	if (Decl *D = GetExistingDecl(ID))
	PendingUpdateRecords.push_back(
	PendingUpdateRecord(ID, D, /JustLoaded=/false));
	break;
	}

	case IDENTIFIER_TABLE:
	F.IdentifierTableData =
	reinterpret_cast<const unsigned char *>(Blob.data());
	if (Record[0]) {
	F.IdentifierLookupTable = ASTIdentifierLookupTable::Create(
	F.IdentifierTableData + Record[0],
	F.IdentifierTableData + sizeof(uint32_t),
	F.IdentifierTableData,
	ASTIdentifierLookupTrait(*this, F));

	PP.getIdentifierTable().setExternalIdentifierLookup(this);
	}
	break;

	case IDENTIFIER_OFFSET: {
	if (F.LocalNumIdentifiers != 0) {
	Error("duplicate IDENTIFIER_OFFSET record in AST file");
	return Failure;
	}
	F.IdentifierOffsets = (const uint32_t *)Blob.data();
	F.LocalNumIdentifiers = Record[0];
	unsigned LocalBaseIdentifierID = Record[1];
	F.BaseIdentifierID = getTotalNumIdentifiers();

	if (F.LocalNumIdentifiers > 0) {
	// Introduce the global -> local mapping for identifiers within this
	// module.
	GlobalIdentifierMap.insert(std::make_pair(getTotalNumIdentifiers() + 1,
	&F));

	// Introduce the local -> global mapping for identifiers within this
	// module.
	F.IdentifierRemap.insertOrReplace(
	std::make_pair(LocalBaseIdentifierID,
	F.BaseIdentifierID - LocalBaseIdentifierID));

	IdentifiersLoaded.resize(IdentifiersLoaded.size()
	+ F.LocalNumIdentifiers);
	}
	break;
	}

	case INTERESTING_IDENTIFIERS:
	F.PreloadIdentifierOffsets.assign(Record.begin(), Record.end());
	break;

	case EAGERLY_DESERIALIZED_DECLS:
	// FIXME: Skip reading this record if our ASTConsumer doesn't care
	// about "interesting" decls (for instance, if we're building a module).
	for (unsigned I = 0, N = Record.size(); I != N; ++I)
	EagerlyDeserializedDecls.push_back(getGlobalDeclID(F, Record[I]));
	break;

	case MODULAR_CODEGEN_DECLS:
	// FIXME: Skip reading this record if our ASTConsumer doesn't care about
	// them (ie: if we're not codegenerating this module).
	if (F.Kind == MK_MainFile \|\|
	getContext().getLangOpts().BuildingPCHWithObjectFile)
	for (unsigned I = 0, N = Record.size(); I != N; ++I)
	EagerlyDeserializedDecls.push_back(getGlobalDeclID(F, Record[I]));
	break;

	case SPECIAL_TYPES:
	if (SpecialTypes.empty()) {
	for (unsigned I = 0, N = Record.size(); I != N; ++I)
	SpecialTypes.push_back(getGlobalTypeID(F, Record[I]));
	break;
	}

	if (SpecialTypes.size() != Record.size()) {
	Error("invalid special-types record");
	return Failure;
	}

	for (unsigned I = 0, N = Record.size(); I != N; ++I) {
	serialization::TypeID ID = getGlobalTypeID(F, Record[I]);
	if (!SpecialTypes[I])
	SpecialTypes[I] = ID;
	// FIXME: If ID && SpecialTypes[I] != ID, do we need a separate
	// merge step?
	}
	break;

	case STATISTICS:
	TotalNumStatements += Record[0];
	TotalNumMacros += Record[1];
	TotalLexicalDeclContexts += Record[2];
	TotalVisibleDeclContexts += Record[3];
	break;

	case UNUSED_FILESCOPED_DECLS:
	for (unsigned I = 0, N = Record.size(); I != N; ++I)
	UnusedFileScopedDecls.push_back(getGlobalDeclID(F, Record[I]));
	break;

	case DELEGATING_CTORS:
	for (unsigned I = 0, N = Record.size(); I != N; ++I)
	DelegatingCtorDecls.push_back(getGlobalDeclID(F, Record[I]));
	break;

	case WEAK_UNDECLARED_IDENTIFIERS:
	if (Record.size() % 4 != 0) {
	Error("invalid weak identifiers record");
	return Failure;
	}

	// FIXME: Ignore weak undeclared identifiers from non-original PCH
	// files. This isn't the way to do it :)
	WeakUndeclaredIdentifiers.clear();

	// Translate the weak, undeclared identifiers into global IDs.
	for (unsigned I = 0, N = Record.size(); I < N; /* in loop */) {
	WeakUndeclaredIdentifiers.push_back(
	getGlobalIdentifierID(F, Record[I++]));
	WeakUndeclaredIdentifiers.push_back(
	getGlobalIdentifierID(F, Record[I++]));
	WeakUndeclaredIdentifiers.push_back(
	ReadSourceLocation(F, Record, I).getRawEncoding());
	WeakUndeclaredIdentifiers.push_back(Record[I++]);
	}
	break;

	case SELECTOR_OFFSETS: {
	F.SelectorOffsets = (const uint32_t *)Blob.data();
	F.LocalNumSelectors = Record[0];
	unsigned LocalBaseSelectorID = Record[1];
	F.BaseSelectorID = getTotalNumSelectors();

	if (F.LocalNumSelectors > 0) {
	// Introduce the global -> local mapping for selectors within this
	// module.
	GlobalSelectorMap.insert(std::make_pair(getTotalNumSelectors()+1, &F));

	// Introduce the local -> global mapping for selectors within this
	// module.
	F.SelectorRemap.insertOrReplace(
	std::make_pair(LocalBaseSelectorID,
	F.BaseSelectorID - LocalBaseSelectorID));

	SelectorsLoaded.resize(SelectorsLoaded.size() + F.LocalNumSelectors);
	}
	break;
	}

	case METHOD_POOL:
	F.SelectorLookupTableData = (const unsigned char *)Blob.data();
	if (Record[0])
	F.SelectorLookupTable
	= ASTSelectorLookupTable::Create(
	F.SelectorLookupTableData + Record[0],
	F.SelectorLookupTableData,
	ASTSelectorLookupTrait(*this, F));
	TotalNumMethodPoolEntries += Record[1];
	break;

	case REFERENCED_SELECTOR_POOL:
	if (!Record.empty()) {
	for (unsigned Idx = 0, N = Record.size() - 1; Idx < N; /* in loop */) {
	ReferencedSelectorsData.push_back(getGlobalSelectorID(F,
	Record[Idx++]));
	ReferencedSelectorsData.push_back(ReadSourceLocation(F, Record, Idx).
	getRawEncoding());
	}
	}
	break;

	case PP_CONDITIONAL_STACK:
	if (!Record.empty()) {
	unsigned Idx = 0, End = Record.size() - 1;
	bool ReachedEOFWhileSkipping = Record[Idx++];
	llvm::Optional<Preprocessor::PreambleSkipInfo> SkipInfo;
	if (ReachedEOFWhileSkipping) {
	SourceLocation HashToken = ReadSourceLocation(F, Record, Idx);
	SourceLocation IfTokenLoc = ReadSourceLocation(F, Record, Idx);
	bool FoundNonSkipPortion = Record[Idx++];
	bool FoundElse = Record[Idx++];
	SourceLocation ElseLoc = ReadSourceLocation(F, Record, Idx);
	SkipInfo.emplace(HashToken, IfTokenLoc, FoundNonSkipPortion,
	FoundElse, ElseLoc);
	}
	SmallVector<PPConditionalInfo, 4> ConditionalStack;
	while (Idx < End) {
	auto Loc = ReadSourceLocation(F, Record, Idx);
	bool WasSkipping = Record[Idx++];
	bool FoundNonSkip = Record[Idx++];
	bool FoundElse = Record[Idx++];
	ConditionalStack.push_back(
	{Loc, WasSkipping, FoundNonSkip, FoundElse});
	}
	PP.setReplayablePreambleConditionalStack(ConditionalStack, SkipInfo);
	}
	break;

	case PP_COUNTER_VALUE:
	if (!Record.empty() && Listener)
	Listener->ReadCounter(F, Record[0]);
	break;

	case FILE_SORTED_DECLS:
	F.FileSortedDecls = (const DeclID *)Blob.data();
	F.NumFileSortedDecls = Record[0];
	break;

	case SOURCE_LOCATION_OFFSETS: {
	F.SLocEntryOffsets = (const uint32_t *)Blob.data();
	F.LocalNumSLocEntries = Record[0];
	SourceLocation::UIntTy SLocSpaceSize = Record[1];
	F.SLocEntryOffsetsBase = Record[2] + F.SourceManagerBlockStartOffset;
	std::tie(F.SLocEntryBaseID, F.SLocEntryBaseOffset) =
	SourceMgr.AllocateLoadedSLocEntries(F.LocalNumSLocEntries,
	SLocSpaceSize);
	if (!F.SLocEntryBaseID) {
	Error("ran out of source locations");
	break;
	}
	// Make our entry in the range map. BaseID is negative and growing, so
	// we invert it. Because we invert it, though, we need the other end of
	// the range.
	unsigned RangeStart =
	unsigned(-F.SLocEntryBaseID) - F.LocalNumSLocEntries + 1;
	GlobalSLocEntryMap.insert(std::make_pair(RangeStart, &F));
	F.FirstLoc = SourceLocation::getFromRawEncoding(F.SLocEntryBaseOffset);

	// SLocEntryBaseOffset is lower than MaxLoadedOffset and decreasing.
	assert((F.SLocEntryBaseOffset & SourceLocation::MacroIDBit) == 0);
	GlobalSLocOffsetMap.insert(
	std::make_pair(SourceManager::MaxLoadedOffset - F.SLocEntryBaseOffset
	- SLocSpaceSize,&F));

	// Initialize the remapping table.
	// Invalid stays invalid.
	F.SLocRemap.insertOrReplace(std::make_pair(0U, 0));
	// This module. Base was 2 when being compiled.
	F.SLocRemap.insertOrReplace(std::make_pair(
	2U, static_cast<SourceLocation::IntTy>(F.SLocEntryBaseOffset - 2)));

	TotalNumSLocEntries += F.LocalNumSLocEntries;
	break;
	}

	case MODULE_OFFSET_MAP:
	F.ModuleOffsetMap = Blob;
	break;

	case SOURCE_MANAGER_LINE_TABLE:
	if (ParseLineTable(F, Record)) {
	Error("malformed SOURCE_MANAGER_LINE_TABLE in AST file");
	return Failure;
	}
	break;

	case SOURCE_LOCATION_PRELOADS: {
	// Need to transform from the local view (1-based IDs) to the global view,
	// which is based off F.SLocEntryBaseID.
	if (!F.PreloadSLocEntries.empty()) {
	Error("Multiple SOURCE_LOCATION_PRELOADS records in AST file");
	return Failure;
	}

	F.PreloadSLocEntries.swap(Record);
	break;
	}

	case EXT_VECTOR_DECLS:
	for (unsigned I = 0, N = Record.size(); I != N; ++I)
	ExtVectorDecls.push_back(getGlobalDeclID(F, Record[I]));
	break;

	case VTABLE_USES:
	if (Record.size() % 3 != 0) {
	Error("Invalid VTABLE_USES record");
	return Failure;
	}

	// Later tables overwrite earlier ones.
	// FIXME: Modules will have some trouble with this. This is clearly not
	// the right way to do this.
	VTableUses.clear();

	for (unsigned Idx = 0, N = Record.size(); Idx != N; /* In loop */) {
	VTableUses.push_back(getGlobalDeclID(F, Record[Idx++]));
	VTableUses.push_back(
	ReadSourceLocation(F, Record, Idx).getRawEncoding());
	VTableUses.push_back(Record[Idx++]);
	}
	break;

	case PENDING_IMPLICIT_INSTANTIATIONS:
	if (PendingInstantiations.size() % 2 != 0) {
	Error("Invalid existing PendingInstantiations");
	return Failure;
	}

	if (Record.size() % 2 != 0) {
	Error("Invalid PENDING_IMPLICIT_INSTANTIATIONS block");
	return Failure;
	}

	for (unsigned I = 0, N = Record.size(); I != N; /* in loop */) {
	PendingInstantiations.push_back(getGlobalDeclID(F, Record[I++]));
	PendingInstantiations.push_back(
	ReadSourceLocation(F, Record, I).getRawEncoding());
	}
	break;

	case SEMA_DECL_REFS:
	if (Record.size() != 3) {
	Error("Invalid SEMA_DECL_REFS block");
	return Failure;
	}
	for (unsigned I = 0, N = Record.size(); I != N; ++I)
	SemaDeclRefs.push_back(getGlobalDeclID(F, Record[I]));
	break;

	case PPD_ENTITIES_OFFSETS: {
	F.PreprocessedEntityOffsets = (const PPEntityOffset *)Blob.data();
	assert(Blob.size() % sizeof(PPEntityOffset) == 0);
	F.NumPreprocessedEntities = Blob.size() / sizeof(PPEntityOffset);

	unsigned LocalBasePreprocessedEntityID = Record[0];

	unsigned StartingID;
	if (!PP.getPreprocessingRecord())
	PP.createPreprocessingRecord();
	if (!PP.getPreprocessingRecord()->getExternalSource())
	PP.getPreprocessingRecord()->SetExternalSource(*this);
	StartingID
	= PP.getPreprocessingRecord()
	->allocateLoadedEntities(F.NumPreprocessedEntities);
	F.BasePreprocessedEntityID = StartingID;

	if (F.NumPreprocessedEntities > 0) {
	// Introduce the global -> local mapping for preprocessed entities in
	// this module.
	GlobalPreprocessedEntityMap.insert(std::make_pair(StartingID, &F));

	// Introduce the local -> global mapping for preprocessed entities in
	// this module.
	F.PreprocessedEntityRemap.insertOrReplace(
	std::make_pair(LocalBasePreprocessedEntityID,
	F.BasePreprocessedEntityID - LocalBasePreprocessedEntityID));
	}

	break;
	}

	case PPD_SKIPPED_RANGES: {
	F.PreprocessedSkippedRangeOffsets = (const PPSkippedRange*)Blob.data();
	assert(Blob.size() % sizeof(PPSkippedRange) == 0);
	F.NumPreprocessedSkippedRanges = Blob.size() / sizeof(PPSkippedRange);

	if (!PP.getPreprocessingRecord())
	PP.createPreprocessingRecord();
	if (!PP.getPreprocessingRecord()->getExternalSource())
	PP.getPreprocessingRecord()->SetExternalSource(*this);
	F.BasePreprocessedSkippedRangeID = PP.getPreprocessingRecord()
	->allocateSkippedRanges(F.NumPreprocessedSkippedRanges);

	if (F.NumPreprocessedSkippedRanges > 0)
	GlobalSkippedRangeMap.insert(
	std::make_pair(F.BasePreprocessedSkippedRangeID, &F));
	break;
	}

	case DECL_UPDATE_OFFSETS:
	if (Record.size() % 2 != 0) {
	Error("invalid DECL_UPDATE_OFFSETS block in AST file");
	return Failure;
	}
	for (unsigned I = 0, N = Record.size(); I != N; I += 2) {
	GlobalDeclID ID = getGlobalDeclID(F, Record[I]);
	DeclUpdateOffsets[ID].push_back(std::make_pair(&F, Record[I + 1]));

	// If we've already loaded the decl, perform the updates when we finish
	// loading this block.
	if (Decl *D = GetExistingDecl(ID))
	PendingUpdateRecords.push_back(
	PendingUpdateRecord(ID, D, /JustLoaded=/false));
	}
	break;

	case OBJC_CATEGORIES_MAP:
	if (F.LocalNumObjCCategoriesInMap != 0) {
	Error("duplicate OBJC_CATEGORIES_MAP record in AST file");
	return Failure;
	}

	F.LocalNumObjCCategoriesInMap = Record[0];
	F.ObjCCategoriesMap = (const ObjCCategoriesInfo *)Blob.data();
	break;

	case OBJC_CATEGORIES:
	F.ObjCCategories.swap(Record);
	break;

	case CUDA_SPECIAL_DECL_REFS:
	// Later tables overwrite earlier ones.
	// FIXME: Modules will have trouble with this.
	CUDASpecialDeclRefs.clear();
	for (unsigned I = 0, N = Record.size(); I != N; ++I)
	CUDASpecialDeclRefs.push_back(getGlobalDeclID(F, Record[I]));
	break;

	case HEADER_SEARCH_TABLE:
	F.HeaderFileInfoTableData = Blob.data();
	F.LocalNumHeaderFileInfos = Record[1];
	if (Record[0]) {
	F.HeaderFileInfoTable
	= HeaderFileInfoLookupTable::Create(
	(const unsigned char *)F.HeaderFileInfoTableData + Record[0],
	(const unsigned char *)F.HeaderFileInfoTableData,
	HeaderFileInfoTrait(*this, F,
	&PP.getHeaderSearchInfo(),
	Blob.data() + Record[2]));

	PP.getHeaderSearchInfo().SetExternalSource(this);
	if (!PP.getHeaderSearchInfo().getExternalLookup())
	PP.getHeaderSearchInfo().SetExternalLookup(this);
	}
	break;

	case FP_PRAGMA_OPTIONS:
	// Later tables overwrite earlier ones.
	FPPragmaOptions.swap(Record);
	break;

	case OPENCL_EXTENSIONS:
	for (unsigned I = 0, E = Record.size(); I != E; ) {
	auto Name = ReadString(Record, I);
	auto &OptInfo = OpenCLExtensions.OptMap[Name];
	OptInfo.Supported = Record[I++] != 0;
	OptInfo.Enabled = Record[I++] != 0;
	OptInfo.WithPragma = Record[I++] != 0;
	OptInfo.Avail = Record[I++];
	OptInfo.Core = Record[I++];
	OptInfo.Opt = Record[I++];
	}
	break;

	case TENTATIVE_DEFINITIONS:
	for (unsigned I = 0, N = Record.size(); I != N; ++I)
	TentativeDefinitions.push_back(getGlobalDeclID(F, Record[I]));
	break;

	case KNOWN_NAMESPACES:
	for (unsigned I = 0, N = Record.size(); I != N; ++I)
	KnownNamespaces.push_back(getGlobalDeclID(F, Record[I]));
	break;

	case UNDEFINED_BUT_USED:
	if (UndefinedButUsed.size() % 2 != 0) {
	Error("Invalid existing UndefinedButUsed");
	return Failure;
	}

	if (Record.size() % 2 != 0) {
	Error("invalid undefined-but-used record");
	return Failure;
	}
	for (unsigned I = 0, N = Record.size(); I != N; /* in loop */) {
	UndefinedButUsed.push_back(getGlobalDeclID(F, Record[I++]));
	UndefinedButUsed.push_back(
	ReadSourceLocation(F, Record, I).getRawEncoding());
	}
	break;

	case DELETE_EXPRS_TO_ANALYZE:
	for (unsigned I = 0, N = Record.size(); I != N;) {
	DelayedDeleteExprs.push_back(getGlobalDeclID(F, Record[I++]));
	const uint64_t Count = Record[I++];
	DelayedDeleteExprs.push_back(Count);
	for (uint64_t C = 0; C < Count; ++C) {
	DelayedDeleteExprs.push_back(ReadSourceLocation(F, Record, I).getRawEncoding());
	bool IsArrayForm = Record[I++] == 1;
	DelayedDeleteExprs.push_back(IsArrayForm);
	}
	}
	break;

	case IMPORTED_MODULES:
	if (!F.isModule()) {
	// If we aren't loading a module (which has its own exports), make
	// all of the imported modules visible.
	// FIXME: Deal with macros-only imports.
	for (unsigned I = 0, N = Record.size(); I != N; /**/) {
	unsigned GlobalID = getGlobalSubmoduleID(F, Record[I++]);
	SourceLocation Loc = ReadSourceLocation(F, Record, I);
	if (GlobalID) {
	ImportedModules.push_back(ImportedSubmodule(GlobalID, Loc));
	if (DeserializationListener)
	DeserializationListener->ModuleImportRead(GlobalID, Loc);
	}
	}
	}
	break;

	case MACRO_OFFSET: {
	if (F.LocalNumMacros != 0) {
	Error("duplicate MACRO_OFFSET record in AST file");
	return Failure;
	}
	F.MacroOffsets = (const uint32_t *)Blob.data();
	F.LocalNumMacros = Record[0];
	unsigned LocalBaseMacroID = Record[1];
	F.MacroOffsetsBase = Record[2] + F.ASTBlockStartOffset;
	F.BaseMacroID = getTotalNumMacros();

	if (F.LocalNumMacros > 0) {
	// Introduce the global -> local mapping for macros within this module.
	GlobalMacroMap.insert(std::make_pair(getTotalNumMacros() + 1, &F));

	// Introduce the local -> global mapping for macros within this module.
	F.MacroRemap.insertOrReplace(
	std::make_pair(LocalBaseMacroID,
	F.BaseMacroID - LocalBaseMacroID));

	MacrosLoaded.resize(MacrosLoaded.size() + F.LocalNumMacros);
	}
	break;
	}

	case LATE_PARSED_TEMPLATE:
	LateParsedTemplates.emplace_back(
	std::piecewise_construct, std::forward_as_tuple(&F),
	std::forward_as_tuple(Record.begin(), Record.end()));
	break;

	case OPTIMIZE_PRAGMA_OPTIONS:
	if (Record.size() != 1) {
	Error("invalid pragma optimize record");
	return Failure;
	}
	OptimizeOffPragmaLocation = ReadSourceLocation(F, Record[0]);
	break;

	case MSSTRUCT_PRAGMA_OPTIONS:
	if (Record.size() != 1) {
	Error("invalid pragma ms_struct record");
	return Failure;
	}
	PragmaMSStructState = Record[0];
	break;

	case POINTERS_TO_MEMBERS_PRAGMA_OPTIONS:
	if (Record.size() != 2) {
	Error("invalid pragma ms_struct record");
	return Failure;
	}
	PragmaMSPointersToMembersState = Record[0];
	PointersToMembersPragmaLocation = ReadSourceLocation(F, Record[1]);
	break;

	case UNUSED_LOCAL_TYPEDEF_NAME_CANDIDATES:
	for (unsigned I = 0, N = Record.size(); I != N; ++I)
	UnusedLocalTypedefNameCandidates.push_back(
	getGlobalDeclID(F, Record[I]));
	break;

	case CUDA_PRAGMA_FORCE_HOST_DEVICE_DEPTH:
	if (Record.size() != 1) {
	Error("invalid cuda pragma options record");
	return Failure;
	}
	ForceCUDAHostDeviceDepth = Record[0];
	break;

	case ALIGN_PACK_PRAGMA_OPTIONS: {
	if (Record.size() < 3) {
	Error("invalid pragma pack record");
	return Failure;
	}
	PragmaAlignPackCurrentValue = ReadAlignPackInfo(Record[0]);
	PragmaAlignPackCurrentLocation = ReadSourceLocation(F, Record[1]);
	unsigned NumStackEntries = Record[2];
	unsigned Idx = 3;
	// Reset the stack when importing a new module.
	PragmaAlignPackStack.clear();
	for (unsigned I = 0; I < NumStackEntries; ++I) {
	PragmaAlignPackStackEntry Entry;
	Entry.Value = ReadAlignPackInfo(Record[Idx++]);
	Entry.Location = ReadSourceLocation(F, Record[Idx++]);
	Entry.PushLocation = ReadSourceLocation(F, Record[Idx++]);
	PragmaAlignPackStrings.push_back(ReadString(Record, Idx));
	Entry.SlotLabel = PragmaAlignPackStrings.back();
	PragmaAlignPackStack.push_back(Entry);
	}
	break;
	}

	case FLOAT_CONTROL_PRAGMA_OPTIONS: {
	if (Record.size() < 3) {
	Error("invalid pragma pack record");
	return Failure;
	}
	FpPragmaCurrentValue = FPOptionsOverride::getFromOpaqueInt(Record[0]);
	FpPragmaCurrentLocation = ReadSourceLocation(F, Record[1]);
	unsigned NumStackEntries = Record[2];
	unsigned Idx = 3;
	// Reset the stack when importing a new module.
	FpPragmaStack.clear();
	for (unsigned I = 0; I < NumStackEntries; ++I) {
	FpPragmaStackEntry Entry;
	Entry.Value = FPOptionsOverride::getFromOpaqueInt(Record[Idx++]);
	Entry.Location = ReadSourceLocation(F, Record[Idx++]);
	Entry.PushLocation = ReadSourceLocation(F, Record[Idx++]);
	FpPragmaStrings.push_back(ReadString(Record, Idx));
	Entry.SlotLabel = FpPragmaStrings.back();
	FpPragmaStack.push_back(Entry);
	}
	break;
	}

	case DECLS_TO_CHECK_FOR_DEFERRED_DIAGS:
	for (unsigned I = 0, N = Record.size(); I != N; ++I)
	DeclsToCheckForDeferredDiags.insert(getGlobalDeclID(F, Record[I]));
	break;
	}
	}
	}

	void ASTReader::ReadModuleOffsetMap(ModuleFile &F) const {
	assert(!F.ModuleOffsetMap.empty() && "no module offset map to read");

	// Additional remapping information.
	const unsigned char Data = (const unsigned char)F.ModuleOffsetMap.data();
	const unsigned char *DataEnd = Data + F.ModuleOffsetMap.size();
	F.ModuleOffsetMap = StringRef();

	// If we see this entry before SOURCE_LOCATION_OFFSETS, add placeholders.
	if (F.SLocRemap.find(0) == F.SLocRemap.end()) {
	F.SLocRemap.insert(std::make_pair(0U, 0));
	F.SLocRemap.insert(std::make_pair(2U, 1));
	}

	// Continuous range maps we may be updating in our module.
	using SLocRemapBuilder =
	ContinuousRangeMap<SourceLocation::UIntTy, SourceLocation::IntTy,
	2>::Builder;
	using RemapBuilder = ContinuousRangeMap<uint32_t, int, 2>::Builder;
	SLocRemapBuilder SLocRemap(F.SLocRemap);
	RemapBuilder IdentifierRemap(F.IdentifierRemap);
	RemapBuilder MacroRemap(F.MacroRemap);
	RemapBuilder PreprocessedEntityRemap(F.PreprocessedEntityRemap);
	RemapBuilder SubmoduleRemap(F.SubmoduleRemap);
	RemapBuilder SelectorRemap(F.SelectorRemap);
	RemapBuilder DeclRemap(F.DeclRemap);
	RemapBuilder TypeRemap(F.TypeRemap);

	while (Data < DataEnd) {
	// FIXME: Looking up dependency modules by filename is horrible. Let's
	// start fixing this with prebuilt, explicit and implicit modules and see
	// how it goes...
	using namespace llvm::support;
	ModuleKind Kind = static_cast<ModuleKind>(
	endian::readNext<uint8_t, little, unaligned>(Data));
	uint16_t Len = endian::readNext<uint16_t, little, unaligned>(Data);
	StringRef Name = StringRef((const char*)Data, Len);
	Data += Len;
	ModuleFile *OM = (Kind == MK_PrebuiltModule \|\| Kind == MK_ExplicitModule \|\|
	Kind == MK_ImplicitModule
	? ModuleMgr.lookupByModuleName(Name)
	: ModuleMgr.lookupByFileName(Name));
	if (!OM) {
	std::string Msg =
	"SourceLocation remap refers to unknown module, cannot find ";
	Msg.append(std::string(Name));
	Error(Msg);
	return;
	}

	SourceLocation::UIntTy SLocOffset =
	endian::readNext<uint32_t, little, unaligned>(Data);
	uint32_t IdentifierIDOffset =
	endian::readNext<uint32_t, little, unaligned>(Data);
	uint32_t MacroIDOffset =
	endian::readNext<uint32_t, little, unaligned>(Data);
	uint32_t PreprocessedEntityIDOffset =
	endian::readNext<uint32_t, little, unaligned>(Data);
	uint32_t SubmoduleIDOffset =
	endian::readNext<uint32_t, little, unaligned>(Data);
	uint32_t SelectorIDOffset =
	endian::readNext<uint32_t, little, unaligned>(Data);
	uint32_t DeclIDOffset =
	endian::readNext<uint32_t, little, unaligned>(Data);
	uint32_t TypeIndexOffset =
	endian::readNext<uint32_t, little, unaligned>(Data);

	auto mapOffset = [&](uint32_t Offset, uint32_t BaseOffset,
	RemapBuilder &Remap) {
	constexpr uint32_t None = std::numeric_limits<uint32_t>::max();
	if (Offset != None)
	Remap.insert(std::make_pair(Offset,
	static_cast<int>(BaseOffset - Offset)));
	};

	constexpr SourceLocation::UIntTy SLocNone =
	std::numeric_limits<SourceLocation::UIntTy>::max();
	if (SLocOffset != SLocNone)
	SLocRemap.insert(std::make_pair(
	SLocOffset, static_cast<SourceLocation::IntTy>(
	OM->SLocEntryBaseOffset - SLocOffset)));

	mapOffset(IdentifierIDOffset, OM->BaseIdentifierID, IdentifierRemap);
	mapOffset(MacroIDOffset, OM->BaseMacroID, MacroRemap);
	mapOffset(PreprocessedEntityIDOffset, OM->BasePreprocessedEntityID,
	PreprocessedEntityRemap);
	mapOffset(SubmoduleIDOffset, OM->BaseSubmoduleID, SubmoduleRemap);
	mapOffset(SelectorIDOffset, OM->BaseSelectorID, SelectorRemap);
	mapOffset(DeclIDOffset, OM->BaseDeclID, DeclRemap);
	mapOffset(TypeIndexOffset, OM->BaseTypeIndex, TypeRemap);

	// Global -> local mappings.
	F.GlobalToLocalDeclIDs[OM] = DeclIDOffset;
	}
	}

	ASTReader::ASTReadResult
	ASTReader::ReadModuleMapFileBlock(RecordData &Record, ModuleFile &F,
	const ModuleFile *ImportedBy,
	unsigned ClientLoadCapabilities) {
	unsigned Idx = 0;
	F.ModuleMapPath = ReadPath(F, Record, Idx);

	// Try to resolve ModuleName in the current header search context and
	// verify that it is found in the same module map file as we saved. If the
	// top-level AST file is a main file, skip this check because there is no
	// usable header search context.
	assert(!F.ModuleName.empty() &&
	"MODULE_NAME should come before MODULE_MAP_FILE");
	if (F.Kind == MK_ImplicitModule && ModuleMgr.begin()->Kind != MK_MainFile) {
	// An implicitly-loaded module file should have its module listed in some
	// module map file that we've already loaded.
	Module *M = PP.getHeaderSearchInfo().lookupModule(F.ModuleName);
	auto &Map = PP.getHeaderSearchInfo().getModuleMap();
	const FileEntry *ModMap = M ? Map.getModuleMapFileForUniquing(M) : nullptr;
	// Don't emit module relocation error if we have -fno-validate-pch
	if (!bool(PP.getPreprocessorOpts().DisablePCHOrModuleValidation &
	DisableValidationForModuleKind::Module) &&
	!ModMap) {
	if (!canRecoverFromOutOfDate(F.FileName, ClientLoadCapabilities)) {
	if (auto ASTFE = M ? M->getASTFile() : None) {
	// This module was defined by an imported (explicit) module.
	Diag(diag::err_module_file_conflict) << F.ModuleName << F.FileName
	<< ASTFE->getName();
	} else {
	// This module was built with a different module map.
	Diag(diag::err_imported_module_not_found)
	<< F.ModuleName << F.FileName
	<< (ImportedBy ? ImportedBy->FileName : "") << F.ModuleMapPath
	<< !ImportedBy;
	// In case it was imported by a PCH, there's a chance the user is
	// just missing to include the search path to the directory containing
	// the modulemap.
	if (ImportedBy && ImportedBy->Kind == MK_PCH)
	Diag(diag::note_imported_by_pch_module_not_found)
	<< llvm::sys::path::parent_path(F.ModuleMapPath);
	}
	}
	return OutOfDate;
	}

	assert(M && M->Name == F.ModuleName && "found module with different name");

	// Check the primary module map file.
	auto StoredModMap = FileMgr.getFile(F.ModuleMapPath);
	if (!StoredModMap \|\| *StoredModMap != ModMap) {
	assert(ModMap && "found module is missing module map file");
	assert((ImportedBy \|\| F.Kind == MK_ImplicitModule) &&
	"top-level import should be verified");
	bool NotImported = F.Kind == MK_ImplicitModule && !ImportedBy;
	if (!canRecoverFromOutOfDate(F.FileName, ClientLoadCapabilities))
	Diag(diag::err_imported_module_modmap_changed)
	<< F.ModuleName << (NotImported ? F.FileName : ImportedBy->FileName)
	<< ModMap->getName() << F.ModuleMapPath << NotImported;
	return OutOfDate;
	}

	llvm::SmallPtrSet<const FileEntry *, 1> AdditionalStoredMaps;
	for (unsigned I = 0, N = Record[Idx++]; I < N; ++I) {
	// FIXME: we should use input files rather than storing names.
	std::string Filename = ReadPath(F, Record, Idx);
	auto SF = FileMgr.getFile(Filename, false, false);
	if (!SF) {
	if (!canRecoverFromOutOfDate(F.FileName, ClientLoadCapabilities))
	Error("could not find file '" + Filename +"' referenced by AST file");
	return OutOfDate;
	}
	AdditionalStoredMaps.insert(*SF);
	}

	// Check any additional module map files (e.g. module.private.modulemap)
	// that are not in the pcm.
	if (auto *AdditionalModuleMaps = Map.getAdditionalModuleMapFiles(M)) {
	for (const FileEntry ModMap : AdditionalModuleMaps) {
	// Remove files that match
	// Note: SmallPtrSet::erase is really remove
	if (!AdditionalStoredMaps.erase(ModMap)) {
	if (!canRecoverFromOutOfDate(F.FileName, ClientLoadCapabilities))
	Diag(diag::err_module_different_modmap)
	<< F.ModuleName << /new/0 << ModMap->getName();
	return OutOfDate;
	}
	}
	}

	// Check any additional module map files that are in the pcm, but not
	// found in header search. Cases that match are already removed.
	for (const FileEntry *ModMap : AdditionalStoredMaps) {
	if (!canRecoverFromOutOfDate(F.FileName, ClientLoadCapabilities))
	Diag(diag::err_module_different_modmap)
	<< F.ModuleName << /not new/1 << ModMap->getName();
	return OutOfDate;
	}
	}

	if (Listener)
	Listener->ReadModuleMapFile(F.ModuleMapPath);
	return Success;
	}

	/// Move the given method to the back of the global list of methods.
	static void moveMethodToBackOfGlobalList(Sema &S, ObjCMethodDecl *Method) {
	// Find the entry for this selector in the method pool.
	Sema::GlobalMethodPool::iterator Known
	= S.MethodPool.find(Method->getSelector());
	if (Known == S.MethodPool.end())
	return;

	// Retrieve the appropriate method list.
	ObjCMethodList &Start = Method->isInstanceMethod()? Known->second.first
	: Known->second.second;
	bool Found = false;
	for (ObjCMethodList *List = &Start; List; List = List->getNext()) {
	if (!Found) {
	if (List->getMethod() == Method) {
	Found = true;
	} else {
	// Keep searching.
	continue;
	}
	}

	if (List->getNext())
	List->setMethod(List->getNext()->getMethod());
	else
	List->setMethod(Method);
	}
	}

	void ASTReader::makeNamesVisible(const HiddenNames &Names, Module *Owner) {
	assert(Owner->NameVisibility != Module::Hidden && "nothing to make visible?");
	for (Decl *D : Names) {
	bool wasHidden = !D->isUnconditionallyVisible();
	D->setVisibleDespiteOwningModule();

	if (wasHidden && SemaObj) {
	if (ObjCMethodDecl *Method = dyn_cast<ObjCMethodDecl>(D)) {
	moveMethodToBackOfGlobalList(*SemaObj, Method);
	}
	}
	}
	}

	void ASTReader::makeModuleVisible(Module *Mod,
	Module::NameVisibilityKind NameVisibility,
	SourceLocation ImportLoc) {
	llvm::SmallPtrSet<Module *, 4> Visited;
	SmallVector<Module *, 4> Stack;
	Stack.push_back(Mod);
	while (!Stack.empty()) {
	Mod = Stack.pop_back_val();

	if (NameVisibility <= Mod->NameVisibility) {
	// This module already has this level of visibility (or greater), so
	// there is nothing more to do.
	continue;
	}

	if (Mod->isUnimportable()) {
	// Modules that aren't importable cannot be made visible.
	continue;
	}

	// Update the module's name visibility.
	Mod->NameVisibility = NameVisibility;

	// If we've already deserialized any names from this module,
	// mark them as visible.
	HiddenNamesMapType::iterator Hidden = HiddenNamesMap.find(Mod);
	if (Hidden != HiddenNamesMap.end()) {
	auto HiddenNames = std::move(*Hidden);
	HiddenNamesMap.erase(Hidden);
	makeNamesVisible(HiddenNames.second, HiddenNames.first);
	assert(HiddenNamesMap.find(Mod) == HiddenNamesMap.end() &&
	"making names visible added hidden names");
	}

	// Push any exported modules onto the stack to be marked as visible.
	SmallVector<Module *, 16> Exports;
	Mod->getExportedModules(Exports);
	for (SmallVectorImpl<Module *>::iterator
	I = Exports.begin(), E = Exports.end(); I != E; ++I) {
	Module Exported = I;
	if (Visited.insert(Exported).second)
	Stack.push_back(Exported);
	}
	}
	}

	/// We've merged the definition \p MergedDef into the existing definition
	/// \p Def. Ensure that \p Def is made visible whenever \p MergedDef is made
	/// visible.
	void ASTReader::mergeDefinitionVisibility(NamedDecl *Def,
	NamedDecl *MergedDef) {
	if (!Def->isUnconditionallyVisible()) {
	// If MergedDef is visible or becomes visible, make the definition visible.
	if (MergedDef->isUnconditionallyVisible())
	Def->setVisibleDespiteOwningModule();
	else {
	getContext().mergeDefinitionIntoModule(
	Def, MergedDef->getImportedOwningModule(),
	/NotifyListeners/ false);
	PendingMergedDefinitionsToDeduplicate.insert(Def);
	}
	}
	}

	bool ASTReader::loadGlobalIndex() {
	if (GlobalIndex)
	return false;

	if (TriedLoadingGlobalIndex \|\| !UseGlobalIndex \|\|
	!PP.getLangOpts().Modules)
	return true;

	// Try to load the global index.
	TriedLoadingGlobalIndex = true;
	StringRef ModuleCachePath
	= getPreprocessor().getHeaderSearchInfo().getModuleCachePath();
	std::pair<GlobalModuleIndex *, llvm::Error> Result =
	GlobalModuleIndex::readIndex(ModuleCachePath);
	if (llvm::Error Err = std::move(Result.second)) {
	assert(!Result.first);
	consumeError(std::move(Err)); // FIXME this drops errors on the floor.
	return true;
	}

	GlobalIndex.reset(Result.first);
	ModuleMgr.setGlobalIndex(GlobalIndex.get());
	return false;
	}

	bool ASTReader::isGlobalIndexUnavailable() const {
	return PP.getLangOpts().Modules && UseGlobalIndex &&
	!hasGlobalIndex() && TriedLoadingGlobalIndex;
	}

	static void updateModuleTimestamp(ModuleFile &MF) {
	// Overwrite the timestamp file contents so that file's mtime changes.
	std::string TimestampFilename = MF.getTimestampFilename();
	std::error_code EC;
	llvm::raw_fd_ostream OS(TimestampFilename, EC,
	llvm::sys::fs::OF_TextWithCRLF);
	if (EC)
	return;
	OS << "Timestamp file\n";
	OS.close();
	OS.clear_error(); // Avoid triggering a fatal error.
	}

	/// Given a cursor at the start of an AST file, scan ahead and drop the
	/// cursor into the start of the given block ID, returning false on success and
	/// true on failure.
	static bool SkipCursorToBlock(BitstreamCursor &Cursor, unsigned BlockID) {
	while (true) {
	Expected<llvm::BitstreamEntry> MaybeEntry = Cursor.advance();
	if (!MaybeEntry) {
	// FIXME this drops errors on the floor.
	consumeError(MaybeEntry.takeError());
	return true;
	}
	llvm::BitstreamEntry Entry = MaybeEntry.get();

	switch (Entry.Kind) {
	case llvm::BitstreamEntry::Error:
	case llvm::BitstreamEntry::EndBlock:
	return true;

	case llvm::BitstreamEntry::Record:
	// Ignore top-level records.
	if (Expected<unsigned> Skipped = Cursor.skipRecord(Entry.ID))
	break;
	else {
	// FIXME this drops errors on the floor.
	consumeError(Skipped.takeError());
	return true;
	}

	case llvm::BitstreamEntry::SubBlock:
	if (Entry.ID == BlockID) {
	if (llvm::Error Err = Cursor.EnterSubBlock(BlockID)) {
	// FIXME this drops the error on the floor.
	consumeError(std::move(Err));
	return true;
	}
	// Found it!
	return false;
	}

	if (llvm::Error Err = Cursor.SkipBlock()) {
	// FIXME this drops the error on the floor.
	consumeError(std::move(Err));
	return true;
	}
	}
	}
	}

	ASTReader::ASTReadResult ASTReader::ReadAST(StringRef FileName,
	ModuleKind Type,
	SourceLocation ImportLoc,
	unsigned ClientLoadCapabilities,
	SmallVectorImpl<ImportedSubmodule> *Imported) {
	llvm::SaveAndRestore<SourceLocation>
	SetCurImportLocRAII(CurrentImportLoc, ImportLoc);
	llvm::SaveAndRestore<Optional<ModuleKind>> SetCurModuleKindRAII(
	CurrentDeserializingModuleKind, Type);

	// Defer any pending actions until we get to the end of reading the AST file.
	Deserializing AnASTFile(this);

	// Bump the generation number.
	unsigned PreviousGeneration = 0;
	if (ContextObj)
	PreviousGeneration = incrementGeneration(*ContextObj);

	unsigned NumModules = ModuleMgr.size();
	auto removeModulesAndReturn = [&](ASTReadResult ReadResult) {
	assert(ReadResult && "expected to return error");
	ModuleMgr.removeModules(ModuleMgr.begin() + NumModules,
	PP.getLangOpts().Modules
	? &PP.getHeaderSearchInfo().getModuleMap()
	: nullptr);

	// If we find that any modules are unusable, the global index is going
	// to be out-of-date. Just remove it.
	GlobalIndex.reset();
	ModuleMgr.setGlobalIndex(nullptr);
	return ReadResult;
	};

	SmallVector<ImportedModule, 4> Loaded;
	switch (ASTReadResult ReadResult =
	ReadASTCore(FileName, Type, ImportLoc,
	/ImportedBy=/nullptr, Loaded, 0, 0,
	ASTFileSignature(), ClientLoadCapabilities)) {
	case Failure:
	case Missing:
	case OutOfDate:
	case VersionMismatch:
	case ConfigurationMismatch:
	case HadErrors:
	return removeModulesAndReturn(ReadResult);
	case Success:
	break;
	}

	// Here comes stuff that we only do once the entire chain is loaded.

	// Load the AST blocks of all of the modules that we loaded. We can still
	// hit errors parsing the ASTs at this point.
	for (ImportedModule &M : Loaded) {
	ModuleFile &F = *M.Mod;

	// Read the AST block.
	if (ASTReadResult Result = ReadASTBlock(F, ClientLoadCapabilities))
	return removeModulesAndReturn(Result);

	// The AST block should always have a definition for the main module.
	if (F.isModule() && !F.DidReadTopLevelSubmodule) {
	Error(diag::err_module_file_missing_top_level_submodule, F.FileName);
	return removeModulesAndReturn(Failure);
	}

	// Read the extension blocks.
	while (!SkipCursorToBlock(F.Stream, EXTENSION_BLOCK_ID)) {
	if (ASTReadResult Result = ReadExtensionBlock(F))
	return removeModulesAndReturn(Result);
	}

	// Once read, set the ModuleFile bit base offset and update the size in
	// bits of all files we've seen.
	F.GlobalBitOffset = TotalModulesSizeInBits;
	TotalModulesSizeInBits += F.SizeInBits;
	GlobalBitOffsetsMap.insert(std::make_pair(F.GlobalBitOffset, &F));
	}

	// Preload source locations and interesting indentifiers.
	for (ImportedModule &M : Loaded) {
	ModuleFile &F = *M.Mod;

	// Preload SLocEntries.
	for (unsigned I = 0, N = F.PreloadSLocEntries.size(); I != N; ++I) {
	int Index = int(F.PreloadSLocEntries[I] - 1) + F.SLocEntryBaseID;
	// Load it through the SourceManager and don't call ReadSLocEntry()
	// directly because the entry may have already been loaded in which case
	// calling ReadSLocEntry() directly would trigger an assertion in
	// SourceManager.
	SourceMgr.getLoadedSLocEntryByID(Index);
	}

	// Map the original source file ID into the ID space of the current
	// compilation.
	if (F.OriginalSourceFileID.isValid()) {
	F.OriginalSourceFileID = FileID::get(
	F.SLocEntryBaseID + F.OriginalSourceFileID.getOpaqueValue() - 1);
	}

	// Preload all the pending interesting identifiers by marking them out of
	// date.
	for (auto Offset : F.PreloadIdentifierOffsets) {
	const unsigned char *Data = F.IdentifierTableData + Offset;

	ASTIdentifierLookupTrait Trait(*this, F);
	auto KeyDataLen = Trait.ReadKeyDataLength(Data);
	auto Key = Trait.ReadKey(Data, KeyDataLen.first);
	auto &II = PP.getIdentifierTable().getOwn(Key);
	II.setOutOfDate(true);

	// Mark this identifier as being from an AST file so that we can track
	// whether we need to serialize it.
	markIdentifierFromAST(*this, II);

	// Associate the ID with the identifier so that the writer can reuse it.
	auto ID = Trait.ReadIdentifierID(Data + KeyDataLen.first);
	SetIdentifierInfo(ID, &II);
	}
	}

	// Setup the import locations and notify the module manager that we've
	// committed to these module files.
	for (ImportedModule &M : Loaded) {
	ModuleFile &F = *M.Mod;

	ModuleMgr.moduleFileAccepted(&F);

	// Set the import location.
	F.DirectImportLoc = ImportLoc;
	// FIXME: We assume that locations from PCH / preamble do not need
	// any translation.
	if (!M.ImportedBy)
	F.ImportLoc = M.ImportLoc;
	else
	F.ImportLoc = TranslateSourceLocation(*M.ImportedBy, M.ImportLoc);
	}

	if (!PP.getLangOpts().CPlusPlus \|\|
	(Type != MK_ImplicitModule && Type != MK_ExplicitModule &&
	Type != MK_PrebuiltModule)) {
	// Mark all of the identifiers in the identifier table as being out of date,
	// so that various accessors know to check the loaded modules when the
	// identifier is used.
	//
	// For C++ modules, we don't need information on many identifiers (just
	// those that provide macros or are poisoned), so we mark all of
	// the interesting ones via PreloadIdentifierOffsets.
	for (IdentifierTable::iterator Id = PP.getIdentifierTable().begin(),
	IdEnd = PP.getIdentifierTable().end();
	Id != IdEnd; ++Id)
	Id->second->setOutOfDate(true);
	}
	// Mark selectors as out of date.
	for (auto Sel : SelectorGeneration)
	SelectorOutOfDate[Sel.first] = true;

	// Resolve any unresolved module exports.
	for (unsigned I = 0, N = UnresolvedModuleRefs.size(); I != N; ++I) {
	UnresolvedModuleRef &Unresolved = UnresolvedModuleRefs[I];
	SubmoduleID GlobalID = getGlobalSubmoduleID(*Unresolved.File,Unresolved.ID);
	Module *ResolvedMod = getSubmodule(GlobalID);

	switch (Unresolved.Kind) {
	case UnresolvedModuleRef::Conflict:
	if (ResolvedMod) {
	Module::Conflict Conflict;
	Conflict.Other = ResolvedMod;
	Conflict.Message = Unresolved.String.str();
	Unresolved.Mod->Conflicts.push_back(Conflict);
	}
	continue;

	case UnresolvedModuleRef::Import:
	if (ResolvedMod)
	Unresolved.Mod->Imports.insert(ResolvedMod);
	continue;

	case UnresolvedModuleRef::Export:
	if (ResolvedMod \|\| Unresolved.IsWildcard)
	Unresolved.Mod->Exports.push_back(
	Module::ExportDecl(ResolvedMod, Unresolved.IsWildcard));
	continue;
	}
	}
	UnresolvedModuleRefs.clear();

	if (Imported)
	Imported->append(ImportedModules.begin(),
	ImportedModules.end());

	// FIXME: How do we load the 'use'd modules? They may not be submodules.
	// Might be unnecessary as use declarations are only used to build the
	// module itself.

	if (ContextObj)
	InitializeContext();

	if (SemaObj)
	UpdateSema();

	if (DeserializationListener)
	DeserializationListener->ReaderInitialized(this);

	ModuleFile &PrimaryModule = ModuleMgr.getPrimaryModule();
	if (PrimaryModule.OriginalSourceFileID.isValid()) {
	// If this AST file is a precompiled preamble, then set the
	// preamble file ID of the source manager to the file source file
	// from which the preamble was built.
	if (Type == MK_Preamble) {
	SourceMgr.setPreambleFileID(PrimaryModule.OriginalSourceFileID);
	} else if (Type == MK_MainFile) {
	SourceMgr.setMainFileID(PrimaryModule.OriginalSourceFileID);
	}
	}

	// For any Objective-C class definitions we have already loaded, make sure
	// that we load any additional categories.
	if (ContextObj) {
	for (unsigned I = 0, N = ObjCClassesLoaded.size(); I != N; ++I) {
	loadObjCCategories(ObjCClassesLoaded[I]->getGlobalID(),
	ObjCClassesLoaded[I],
	PreviousGeneration);
	}
	}

	if (PP.getHeaderSearchInfo()
	.getHeaderSearchOpts()
	.ModulesValidateOncePerBuildSession) {
	// Now we are certain that the module and all modules it depends on are
	// up to date. Create or update timestamp files for modules that are
	// located in the module cache (not for PCH files that could be anywhere
	// in the filesystem).
	for (unsigned I = 0, N = Loaded.size(); I != N; ++I) {
	ImportedModule &M = Loaded[I];
	if (M.Mod->Kind == MK_ImplicitModule) {
	updateModuleTimestamp(*M.Mod);
	}
	}
	}

	return Success;
	}

	static ASTFileSignature readASTFileSignature(StringRef PCH);

	/// Whether \p Stream doesn't start with the AST/PCH file magic number 'CPCH'.
	static llvm::Error doesntStartWithASTFileMagic(BitstreamCursor &Stream) {
	// FIXME checking magic headers is done in other places such as
	// SerializedDiagnosticReader and GlobalModuleIndex, but error handling isn't
	// always done the same. Unify it all with a helper.
	if (!Stream.canSkipToPos(4))
	return llvm::createStringError(std::errc::illegal_byte_sequence,
	"file too small to contain AST file magic");
	for (unsigned C : {'C', 'P', 'C', 'H'})
	if (Expected<llvm::SimpleBitstreamCursor::word_t> Res = Stream.Read(8)) {
	if (Res.get() != C)
	return llvm::createStringError(
	std::errc::illegal_byte_sequence,
	"file doesn't start with AST file magic");
	} else
	return Res.takeError();
	return llvm::Error::success();
	}

	static unsigned moduleKindForDiagnostic(ModuleKind Kind) {
	switch (Kind) {
	case MK_PCH:
	return 0; // PCH
	case MK_ImplicitModule:
	case MK_ExplicitModule:
	case MK_PrebuiltModule:
	return 1; // module
	case MK_MainFile:
	case MK_Preamble:
	return 2; // main source file
	}
	llvm_unreachable("unknown module kind");
	}

	ASTReader::ASTReadResult
	ASTReader::ReadASTCore(StringRef FileName,
	ModuleKind Type,
	SourceLocation ImportLoc,
	ModuleFile *ImportedBy,
	SmallVectorImpl<ImportedModule> &Loaded,
	off_t ExpectedSize, time_t ExpectedModTime,
	ASTFileSignature ExpectedSignature,
	unsigned ClientLoadCapabilities) {
	ModuleFile *M;
	std::string ErrorStr;
	ModuleManager::AddModuleResult AddResult
	= ModuleMgr.addModule(FileName, Type, ImportLoc, ImportedBy,
	getGeneration(), ExpectedSize, ExpectedModTime,
	ExpectedSignature, readASTFileSignature,
	M, ErrorStr);

	switch (AddResult) {
	case ModuleManager::AlreadyLoaded:
	Diag(diag::remark_module_import)
	<< M->ModuleName << M->FileName << (ImportedBy ? true : false)
	<< (ImportedBy ? StringRef(ImportedBy->ModuleName) : StringRef());
	return Success;

	case ModuleManager::NewlyLoaded:
	// Load module file below.
	break;

	case ModuleManager::Missing:
	// The module file was missing; if the client can handle that, return
	// it.
	if (ClientLoadCapabilities & ARR_Missing)
	return Missing;

	// Otherwise, return an error.
	Diag(diag::err_ast_file_not_found)
	<< moduleKindForDiagnostic(Type) << FileName << !ErrorStr.empty()
	<< ErrorStr;
	return Failure;

	case ModuleManager::OutOfDate:
	// We couldn't load the module file because it is out-of-date. If the
	// client can handle out-of-date, return it.
	if (ClientLoadCapabilities & ARR_OutOfDate)
	return OutOfDate;

	// Otherwise, return an error.
	Diag(diag::err_ast_file_out_of_date)
	<< moduleKindForDiagnostic(Type) << FileName << !ErrorStr.empty()
	<< ErrorStr;
	return Failure;
	}

	assert(M && "Missing module file");

	bool ShouldFinalizePCM = false;
	auto FinalizeOrDropPCM = llvm::make_scope_exit([&]() {
	auto &MC = getModuleManager().getModuleCache();
	if (ShouldFinalizePCM)
	MC.finalizePCM(FileName);
	else
	MC.tryToDropPCM(FileName);
	});
	ModuleFile &F = *M;
	BitstreamCursor &Stream = F.Stream;
	Stream = BitstreamCursor(PCHContainerRdr.ExtractPCH(*F.Buffer));
	F.SizeInBits = F.Buffer->getBufferSize() * 8;

	// Sniff for the signature.
	if (llvm::Error Err = doesntStartWithASTFileMagic(Stream)) {
	Diag(diag::err_ast_file_invalid)
	<< moduleKindForDiagnostic(Type) << FileName << std::move(Err);
	return Failure;
	}

	// This is used for compatibility with older PCH formats.
	bool HaveReadControlBlock = false;
	while (true) {
	Expected<llvm::BitstreamEntry> MaybeEntry = Stream.advance();
	if (!MaybeEntry) {
	Error(MaybeEntry.takeError());
	return Failure;
	}
	llvm::BitstreamEntry Entry = MaybeEntry.get();

	switch (Entry.Kind) {
	case llvm::BitstreamEntry::Error:
	case llvm::BitstreamEntry::Record:
	case llvm::BitstreamEntry::EndBlock:
	Error("invalid record at top-level of AST file");
	return Failure;

	case llvm::BitstreamEntry::SubBlock:
	break;
	}

	switch (Entry.ID) {
	case CONTROL_BLOCK_ID:
	HaveReadControlBlock = true;
	switch (ReadControlBlock(F, Loaded, ImportedBy, ClientLoadCapabilities)) {
	case Success:
	// Check that we didn't try to load a non-module AST file as a module.
	//
	// FIXME: Should we also perform the converse check? Loading a module as
	// a PCH file sort of works, but it's a bit wonky.
	if ((Type == MK_ImplicitModule \|\| Type == MK_ExplicitModule \|\|
	Type == MK_PrebuiltModule) &&
	F.ModuleName.empty()) {
	auto Result = (Type == MK_ImplicitModule) ? OutOfDate : Failure;
	if (Result != OutOfDate \|\|
	(ClientLoadCapabilities & ARR_OutOfDate) == 0)
	Diag(diag::err_module_file_not_module) << FileName;
	return Result;
	}
	break;

	case Failure: return Failure;
	case Missing: return Missing;
	case OutOfDate: return OutOfDate;
	case VersionMismatch: return VersionMismatch;
	case ConfigurationMismatch: return ConfigurationMismatch;
	case HadErrors: return HadErrors;
	}
	break;

	case AST_BLOCK_ID:
	if (!HaveReadControlBlock) {
	if ((ClientLoadCapabilities & ARR_VersionMismatch) == 0)
	Diag(diag::err_pch_version_too_old);
	return VersionMismatch;
	}

	// Record that we've loaded this module.
	Loaded.push_back(ImportedModule(M, ImportedBy, ImportLoc));
	ShouldFinalizePCM = true;
	return Success;

	case UNHASHED_CONTROL_BLOCK_ID:
	// This block is handled using look-ahead during ReadControlBlock. We
	// shouldn't get here!
	Error("malformed block record in AST file");
	return Failure;

	default:
	if (llvm::Error Err = Stream.SkipBlock()) {
	Error(std::move(Err));
	return Failure;
	}
	break;
	}
	}

	llvm_unreachable("unexpected break; expected return");
	}

	ASTReader::ASTReadResult
	ASTReader::readUnhashedControlBlock(ModuleFile &F, bool WasImportedBy,
	unsigned ClientLoadCapabilities) {
	const HeaderSearchOptions &HSOpts =
	PP.getHeaderSearchInfo().getHeaderSearchOpts();
	bool AllowCompatibleConfigurationMismatch =
	F.Kind == MK_ExplicitModule \|\| F.Kind == MK_PrebuiltModule;
	bool DisableValidation = shouldDisableValidationForFile(F);

	ASTReadResult Result = readUnhashedControlBlockImpl(
	&F, F.Data, ClientLoadCapabilities, AllowCompatibleConfigurationMismatch,
	Listener.get(),
	WasImportedBy ? false : HSOpts.ModulesValidateDiagnosticOptions);

	// If F was directly imported by another module, it's implicitly validated by
	// the importing module.
	if (DisableValidation \|\| WasImportedBy \|\|
	(AllowConfigurationMismatch && Result == ConfigurationMismatch))
	return Success;

	if (Result == Failure) {
	Error("malformed block record in AST file");
	return Failure;
	}

	if (Result == OutOfDate && F.Kind == MK_ImplicitModule) {
	// If this module has already been finalized in the ModuleCache, we're stuck
	// with it; we can only load a single version of each module.
	//
	// This can happen when a module is imported in two contexts: in one, as a
	// user module; in another, as a system module (due to an import from
	// another module marked with the [system] flag). It usually indicates a
	// bug in the module map: this module should also be marked with [system].
	//
	// If -Wno-system-headers (the default), and the first import is as a
	// system module, then validation will fail during the as-user import,
	// since -Werror flags won't have been validated. However, it's reasonable
	// to treat this consistently as a system module.
	//
	// If -Wsystem-headers, the PCM on disk was built with
	// -Wno-system-headers, and the first import is as a user module, then
	// validation will fail during the as-system import since the PCM on disk
	// doesn't guarantee that -Werror was respected. However, the -Werror
	// flags were checked during the initial as-user import.
	if (getModuleManager().getModuleCache().isPCMFinal(F.FileName)) {
	Diag(diag::warn_module_system_bit_conflict) << F.FileName;
	return Success;
	}
	}

	return Result;
	}

	ASTReader::ASTReadResult ASTReader::readUnhashedControlBlockImpl(
	ModuleFile *F, llvm::StringRef StreamData, unsigned ClientLoadCapabilities,
	bool AllowCompatibleConfigurationMismatch, ASTReaderListener *Listener,
	bool ValidateDiagnosticOptions) {
	// Initialize a stream.
	BitstreamCursor Stream(StreamData);

	// Sniff for the signature.
	if (llvm::Error Err = doesntStartWithASTFileMagic(Stream)) {
	// FIXME this drops the error on the floor.
	consumeError(std::move(Err));
	return Failure;
	}

	// Scan for the UNHASHED_CONTROL_BLOCK_ID block.
	if (SkipCursorToBlock(Stream, UNHASHED_CONTROL_BLOCK_ID))
	return Failure;

	// Read all of the records in the options block.
	RecordData Record;
	ASTReadResult Result = Success;
	while (true) {
	Expected<llvm::BitstreamEntry> MaybeEntry = Stream.advance();
	if (!MaybeEntry) {
	// FIXME this drops the error on the floor.
	consumeError(MaybeEntry.takeError());
	return Failure;
	}
	llvm::BitstreamEntry Entry = MaybeEntry.get();

	switch (Entry.Kind) {
	case llvm::BitstreamEntry::Error:
	case llvm::BitstreamEntry::SubBlock:
	return Failure;

	case llvm::BitstreamEntry::EndBlock:
	return Result;

	case llvm::BitstreamEntry::Record:
	// The interesting case.
	break;
	}

	// Read and process a record.
	Record.clear();
	Expected<unsigned> MaybeRecordType = Stream.readRecord(Entry.ID, Record);
	if (!MaybeRecordType) {
	// FIXME this drops the error.
	return Failure;
	}
	switch ((UnhashedControlBlockRecordTypes)MaybeRecordType.get()) {
	case SIGNATURE:
	if (F)
	F->Signature = ASTFileSignature::create(Record.begin(), Record.end());
	break;
	case AST_BLOCK_HASH:
	if (F)
	F->ASTBlockHash =
	ASTFileSignature::create(Record.begin(), Record.end());
	break;
	case DIAGNOSTIC_OPTIONS: {
	bool Complain = (ClientLoadCapabilities & ARR_OutOfDate) == 0;
	if (Listener && ValidateDiagnosticOptions &&
	!AllowCompatibleConfigurationMismatch &&
	ParseDiagnosticOptions(Record, Complain, *Listener))
	Result = OutOfDate; // Don't return early. Read the signature.
	break;
	}
	case DIAG_PRAGMA_MAPPINGS:
	if (!F)
	break;
	if (F->PragmaDiagMappings.empty())
	F->PragmaDiagMappings.swap(Record);
	else
	F->PragmaDiagMappings.insert(F->PragmaDiagMappings.end(),
	Record.begin(), Record.end());
	break;
	}
	}
	}

	/// Parse a record and blob containing module file extension metadata.
	static bool parseModuleFileExtensionMetadata(
	const SmallVectorImpl<uint64_t> &Record,
	StringRef Blob,
	ModuleFileExtensionMetadata &Metadata) {
	if (Record.size() < 4) return true;

	Metadata.MajorVersion = Record[0];
	Metadata.MinorVersion = Record[1];

	unsigned BlockNameLen = Record[2];
	unsigned UserInfoLen = Record[3];

	if (BlockNameLen + UserInfoLen > Blob.size()) return true;

	Metadata.BlockName = std::string(Blob.data(), Blob.data() + BlockNameLen);
	Metadata.UserInfo = std::string(Blob.data() + BlockNameLen,
	Blob.data() + BlockNameLen + UserInfoLen);
	return false;
	}

	ASTReader::ASTReadResult ASTReader::ReadExtensionBlock(ModuleFile &F) {
	BitstreamCursor &Stream = F.Stream;

	RecordData Record;
	while (true) {
	Expected<llvm::BitstreamEntry> MaybeEntry = Stream.advance();
	if (!MaybeEntry) {
	Error(MaybeEntry.takeError());
	return Failure;
	}
	llvm::BitstreamEntry Entry = MaybeEntry.get();

	switch (Entry.Kind) {
	case llvm::BitstreamEntry::SubBlock:
	if (llvm::Error Err = Stream.SkipBlock()) {
	Error(std::move(Err));
	return Failure;
	}
	continue;

	case llvm::BitstreamEntry::EndBlock:
	return Success;

	case llvm::BitstreamEntry::Error:
	return HadErrors;

	case llvm::BitstreamEntry::Record:
	break;
	}

	Record.clear();
	StringRef Blob;
	Expected<unsigned> MaybeRecCode =
	Stream.readRecord(Entry.ID, Record, &Blob);
	if (!MaybeRecCode) {
	Error(MaybeRecCode.takeError());
	return Failure;
	}
	switch (MaybeRecCode.get()) {
	case EXTENSION_METADATA: {
	ModuleFileExtensionMetadata Metadata;
	if (parseModuleFileExtensionMetadata(Record, Blob, Metadata)) {
	Error("malformed EXTENSION_METADATA in AST file");
	return Failure;
	}

	// Find a module file extension with this block name.
	auto Known = ModuleFileExtensions.find(Metadata.BlockName);
	if (Known == ModuleFileExtensions.end()) break;

	// Form a reader.
	if (auto Reader = Known->second->createExtensionReader(Metadata, *this,
	F, Stream)) {
	F.ExtensionReaders.push_back(std::move(Reader));
	}

	break;
	}
	}
	}

	return Success;
	}

	void ASTReader::InitializeContext() {
	assert(ContextObj && "no context to initialize");
	ASTContext &Context = *ContextObj;

	// If there's a listener, notify them that we "read" the translation unit.
	if (DeserializationListener)
	DeserializationListener->DeclRead(PREDEF_DECL_TRANSLATION_UNIT_ID,
	Context.getTranslationUnitDecl());

	// FIXME: Find a better way to deal with collisions between these
	// built-in types. Right now, we just ignore the problem.

	// Load the special types.
	if (SpecialTypes.size() >= NumSpecialTypeIDs) {
	if (unsigned String = SpecialTypes[SPECIAL_TYPE_CF_CONSTANT_STRING]) {
	if (!Context.CFConstantStringTypeDecl)
	Context.setCFConstantStringType(GetType(String));
	}

	if (unsigned File = SpecialTypes[SPECIAL_TYPE_FILE]) {
	QualType FileType = GetType(File);
	if (FileType.isNull()) {
	Error("FILE type is NULL");
	return;
	}

	if (!Context.FILEDecl) {
	if (const TypedefType *Typedef = FileType->getAs<TypedefType>())
	Context.setFILEDecl(Typedef->getDecl());
	else {
	const TagType *Tag = FileType->getAs<TagType>();
	if (!Tag) {
	Error("Invalid FILE type in AST file");
	return;
	}
	Context.setFILEDecl(Tag->getDecl());
	}
	}
	}

	if (unsigned Jmp_buf = SpecialTypes[SPECIAL_TYPE_JMP_BUF]) {
	QualType Jmp_bufType = GetType(Jmp_buf);
	if (Jmp_bufType.isNull()) {
	Error("jmp_buf type is NULL");
	return;
	}

	if (!Context.jmp_bufDecl) {
	if (const TypedefType *Typedef = Jmp_bufType->getAs<TypedefType>())
	Context.setjmp_bufDecl(Typedef->getDecl());
	else {
	const TagType *Tag = Jmp_bufType->getAs<TagType>();
	if (!Tag) {
	Error("Invalid jmp_buf type in AST file");
	return;
	}
	Context.setjmp_bufDecl(Tag->getDecl());
	}
	}
	}

	if (unsigned Sigjmp_buf = SpecialTypes[SPECIAL_TYPE_SIGJMP_BUF]) {
	QualType Sigjmp_bufType = GetType(Sigjmp_buf);
	if (Sigjmp_bufType.isNull()) {
	Error("sigjmp_buf type is NULL");
	return;
	}

	if (!Context.sigjmp_bufDecl) {
	if (const TypedefType *Typedef = Sigjmp_bufType->getAs<TypedefType>())
	Context.setsigjmp_bufDecl(Typedef->getDecl());
	else {
	const TagType *Tag = Sigjmp_bufType->getAs<TagType>();
	assert(Tag && "Invalid sigjmp_buf type in AST file");
	Context.setsigjmp_bufDecl(Tag->getDecl());
	}
	}
	}

	if (unsigned ObjCIdRedef
	= SpecialTypes[SPECIAL_TYPE_OBJC_ID_REDEFINITION]) {
	if (Context.ObjCIdRedefinitionType.isNull())
	Context.ObjCIdRedefinitionType = GetType(ObjCIdRedef);
	}

	if (unsigned ObjCClassRedef
	= SpecialTypes[SPECIAL_TYPE_OBJC_CLASS_REDEFINITION]) {
	if (Context.ObjCClassRedefinitionType.isNull())
	Context.ObjCClassRedefinitionType = GetType(ObjCClassRedef);
	}

	if (unsigned ObjCSelRedef
	= SpecialTypes[SPECIAL_TYPE_OBJC_SEL_REDEFINITION]) {
	if (Context.ObjCSelRedefinitionType.isNull())
	Context.ObjCSelRedefinitionType = GetType(ObjCSelRedef);
	}

	if (unsigned Ucontext_t = SpecialTypes[SPECIAL_TYPE_UCONTEXT_T]) {
	QualType Ucontext_tType = GetType(Ucontext_t);
	if (Ucontext_tType.isNull()) {
	Error("ucontext_t type is NULL");
	return;
	}

	if (!Context.ucontext_tDecl) {
	if (const TypedefType *Typedef = Ucontext_tType->getAs<TypedefType>())
	Context.setucontext_tDecl(Typedef->getDecl());
	else {
	const TagType *Tag = Ucontext_tType->getAs<TagType>();
	assert(Tag && "Invalid ucontext_t type in AST file");
	Context.setucontext_tDecl(Tag->getDecl());
	}
	}
	}
	}

	ReadPragmaDiagnosticMappings(Context.getDiagnostics());

	// If there were any CUDA special declarations, deserialize them.
	if (!CUDASpecialDeclRefs.empty()) {
	assert(CUDASpecialDeclRefs.size() == 1 && "More decl refs than expected!");
	Context.setcudaConfigureCallDecl(
	cast<FunctionDecl>(GetDecl(CUDASpecialDeclRefs[0])));
	}

	// Re-export any modules that were imported by a non-module AST file.
	// FIXME: This does not make macro-only imports visible again.
	for (auto &Import : ImportedModules) {
	if (Module *Imported = getSubmodule(Import.ID)) {
	makeModuleVisible(Imported, Module::AllVisible,
	/ImportLoc=/Import.ImportLoc);
	if (Import.ImportLoc.isValid())
	PP.makeModuleVisible(Imported, Import.ImportLoc);
	// This updates visibility for Preprocessor only. For Sema, which can be
	// nullptr here, we do the same later, in UpdateSema().
	}
	}
	}

	void ASTReader::finalizeForWriting() {
	// Nothing to do for now.
	}

	/// Reads and return the signature record from \p PCH's control block, or
	/// else returns 0.
	static ASTFileSignature readASTFileSignature(StringRef PCH) {
	BitstreamCursor Stream(PCH);
	if (llvm::Error Err = doesntStartWithASTFileMagic(Stream)) {
	// FIXME this drops the error on the floor.
	consumeError(std::move(Err));
	return ASTFileSignature();
	}

	// Scan for the UNHASHED_CONTROL_BLOCK_ID block.
	if (SkipCursorToBlock(Stream, UNHASHED_CONTROL_BLOCK_ID))
	return ASTFileSignature();

	// Scan for SIGNATURE inside the diagnostic options block.
	ASTReader::RecordData Record;
	while (true) {
	Expected<llvm::BitstreamEntry> MaybeEntry =
	Stream.advanceSkippingSubblocks();
	if (!MaybeEntry) {
	// FIXME this drops the error on the floor.
	consumeError(MaybeEntry.takeError());
	return ASTFileSignature();
	}
	llvm::BitstreamEntry Entry = MaybeEntry.get();

	if (Entry.Kind != llvm::BitstreamEntry::Record)
	return ASTFileSignature();

	Record.clear();
	StringRef Blob;
	Expected<unsigned> MaybeRecord = Stream.readRecord(Entry.ID, Record, &Blob);
	if (!MaybeRecord) {
	// FIXME this drops the error on the floor.
	consumeError(MaybeRecord.takeError());
	return ASTFileSignature();
	}
	if (SIGNATURE == MaybeRecord.get())
	return ASTFileSignature::create(Record.begin(),
	Record.begin() + ASTFileSignature::size);
	}
	}

	/// Retrieve the name of the original source file name
	/// directly from the AST file, without actually loading the AST
	/// file.
	std::string ASTReader::getOriginalSourceFile(
	const std::string &ASTFileName, FileManager &FileMgr,
	const PCHContainerReader &PCHContainerRdr, DiagnosticsEngine &Diags) {
	// Open the AST file.
	auto Buffer = FileMgr.getBufferForFile(ASTFileName);
	if (!Buffer) {
	Diags.Report(diag::err_fe_unable_to_read_pch_file)
	<< ASTFileName << Buffer.getError().message();
	return std::string();
	}

	// Initialize the stream
	BitstreamCursor Stream(PCHContainerRdr.ExtractPCH(**Buffer));

	// Sniff for the signature.
	if (llvm::Error Err = doesntStartWithASTFileMagic(Stream)) {
	Diags.Report(diag::err_fe_not_a_pch_file) << ASTFileName << std::move(Err);
	return std::string();
	}

	// Scan for the CONTROL_BLOCK_ID block.
	if (SkipCursorToBlock(Stream, CONTROL_BLOCK_ID)) {
	Diags.Report(diag::err_fe_pch_malformed_block) << ASTFileName;
	return std::string();
	}

	// Scan for ORIGINAL_FILE inside the control block.
	RecordData Record;
	while (true) {
	Expected<llvm::BitstreamEntry> MaybeEntry =
	Stream.advanceSkippingSubblocks();
	if (!MaybeEntry) {
	// FIXME this drops errors on the floor.
	consumeError(MaybeEntry.takeError());
	return std::string();
	}
	llvm::BitstreamEntry Entry = MaybeEntry.get();

	if (Entry.Kind == llvm::BitstreamEntry::EndBlock)
	return std::string();

	if (Entry.Kind != llvm::BitstreamEntry::Record) {
	Diags.Report(diag::err_fe_pch_malformed_block) << ASTFileName;
	return std::string();
	}

	Record.clear();
	StringRef Blob;
	Expected<unsigned> MaybeRecord = Stream.readRecord(Entry.ID, Record, &Blob);
	if (!MaybeRecord) {
	// FIXME this drops the errors on the floor.
	consumeError(MaybeRecord.takeError());
	return std::string();
	}
	if (ORIGINAL_FILE == MaybeRecord.get())
	return Blob.str();
	}
	}

	namespace {

	class SimplePCHValidator : public ASTReaderListener {
	const LangOptions &ExistingLangOpts;
	const TargetOptions &ExistingTargetOpts;
	const PreprocessorOptions &ExistingPPOpts;
	std::string ExistingModuleCachePath;
	FileManager &FileMgr;

	public:
	SimplePCHValidator(const LangOptions &ExistingLangOpts,
	const TargetOptions &ExistingTargetOpts,
	const PreprocessorOptions &ExistingPPOpts,
	StringRef ExistingModuleCachePath, FileManager &FileMgr)
	: ExistingLangOpts(ExistingLangOpts),
	ExistingTargetOpts(ExistingTargetOpts),
	ExistingPPOpts(ExistingPPOpts),
	ExistingModuleCachePath(ExistingModuleCachePath), FileMgr(FileMgr) {}

	bool ReadLanguageOptions(const LangOptions &LangOpts, bool Complain,
	bool AllowCompatibleDifferences) override {
	return checkLanguageOptions(ExistingLangOpts, LangOpts, nullptr,
	AllowCompatibleDifferences);
	}

	bool ReadTargetOptions(const TargetOptions &TargetOpts, bool Complain,
	bool AllowCompatibleDifferences) override {
	return checkTargetOptions(ExistingTargetOpts, TargetOpts, nullptr,
	AllowCompatibleDifferences);
	}

	bool ReadHeaderSearchOptions(const HeaderSearchOptions &HSOpts,
	StringRef SpecificModuleCachePath,
	bool Complain) override {
	return checkHeaderSearchOptions(HSOpts, SpecificModuleCachePath,
	ExistingModuleCachePath, nullptr,
	ExistingLangOpts, ExistingPPOpts);
	}

	bool ReadPreprocessorOptions(const PreprocessorOptions &PPOpts,
	bool Complain,
	std::string &SuggestedPredefines) override {
	return checkPreprocessorOptions(ExistingPPOpts, PPOpts, nullptr, FileMgr,
	SuggestedPredefines, ExistingLangOpts);
	}
	};

	} // namespace

	bool ASTReader::readASTFileControlBlock(
	StringRef Filename, FileManager &FileMgr,
	const PCHContainerReader &PCHContainerRdr,
	bool FindModuleFileExtensions,
	ASTReaderListener &Listener, bool ValidateDiagnosticOptions) {
	// Open the AST file.
	// FIXME: This allows use of the VFS; we do not allow use of the
	// VFS when actually loading a module.
	auto Buffer = FileMgr.getBufferForFile(Filename);
	if (!Buffer) {
	return true;
	}

	// Initialize the stream
	StringRef Bytes = PCHContainerRdr.ExtractPCH(**Buffer);
	BitstreamCursor Stream(Bytes);

	// Sniff for the signature.
	if (llvm::Error Err = doesntStartWithASTFileMagic(Stream)) {
	consumeError(std::move(Err)); // FIXME this drops errors on the floor.
	return true;
	}

	// Scan for the CONTROL_BLOCK_ID block.
	if (SkipCursorToBlock(Stream, CONTROL_BLOCK_ID))
	return true;

	bool NeedsInputFiles = Listener.needsInputFileVisitation();
	bool NeedsSystemInputFiles = Listener.needsSystemInputFileVisitation();
	bool NeedsImports = Listener.needsImportVisitation();
	BitstreamCursor InputFilesCursor;

	RecordData Record;
	std::string ModuleDir;
	bool DoneWithControlBlock = false;
	while (!DoneWithControlBlock) {
	Expected<llvm::BitstreamEntry> MaybeEntry = Stream.advance();
	if (!MaybeEntry) {
	// FIXME this drops the error on the floor.
	consumeError(MaybeEntry.takeError());
	return true;
	}
	llvm::BitstreamEntry Entry = MaybeEntry.get();

	switch (Entry.Kind) {
	case llvm::BitstreamEntry::SubBlock: {
	switch (Entry.ID) {
	case OPTIONS_BLOCK_ID: {
	std::string IgnoredSuggestedPredefines;
	if (ReadOptionsBlock(Stream, ARR_ConfigurationMismatch \| ARR_OutOfDate,
	/AllowCompatibleConfigurationMismatch/ false,
	Listener, IgnoredSuggestedPredefines) != Success)
	return true;
	break;
	}

	case INPUT_FILES_BLOCK_ID:
	InputFilesCursor = Stream;
	if (llvm::Error Err = Stream.SkipBlock()) {
	// FIXME this drops the error on the floor.
	consumeError(std::move(Err));
	return true;
	}
	if (NeedsInputFiles &&
	ReadBlockAbbrevs(InputFilesCursor, INPUT_FILES_BLOCK_ID))
	return true;
	break;

	default:
	if (llvm::Error Err = Stream.SkipBlock()) {
	// FIXME this drops the error on the floor.
	consumeError(std::move(Err));
	return true;
	}
	break;
	}

	continue;
	}

	case llvm::BitstreamEntry::EndBlock:
	DoneWithControlBlock = true;
	break;

	case llvm::BitstreamEntry::Error:
	return true;

	case llvm::BitstreamEntry::Record:
	break;
	}

	if (DoneWithControlBlock) break;

	Record.clear();
	StringRef Blob;
	Expected<unsigned> MaybeRecCode =
	Stream.readRecord(Entry.ID, Record, &Blob);
	if (!MaybeRecCode) {
	// FIXME this drops the error.
	return Failure;
	}
	switch ((ControlRecordTypes)MaybeRecCode.get()) {
	case METADATA:
	if (Record[0] != VERSION_MAJOR)
	return true;
	if (Listener.ReadFullVersionInformation(Blob))
	return true;
	break;
	case MODULE_NAME:
	Listener.ReadModuleName(Blob);
	break;
	case MODULE_DIRECTORY:
	ModuleDir = std::string(Blob);
	break;
	case MODULE_MAP_FILE: {
	unsigned Idx = 0;
	auto Path = ReadString(Record, Idx);
	ResolveImportedPath(Path, ModuleDir);
	Listener.ReadModuleMapFile(Path);
	break;
	}
	case INPUT_FILE_OFFSETS: {
	if (!NeedsInputFiles)
	break;

	unsigned NumInputFiles = Record[0];
	unsigned NumUserFiles = Record[1];
	const llvm::support::unaligned_uint64_t *InputFileOffs =
	(const llvm::support::unaligned_uint64_t *)Blob.data();
	for (unsigned I = 0; I != NumInputFiles; ++I) {
	// Go find this input file.
	bool isSystemFile = I >= NumUserFiles;

	if (isSystemFile && !NeedsSystemInputFiles)
	break; // the rest are system input files

	BitstreamCursor &Cursor = InputFilesCursor;
	SavedStreamPosition SavedPosition(Cursor);
	if (llvm::Error Err = Cursor.JumpToBit(InputFileOffs[I])) {
	// FIXME this drops errors on the floor.
	consumeError(std::move(Err));
	}

	Expected<unsigned> MaybeCode = Cursor.ReadCode();
	if (!MaybeCode) {
	// FIXME this drops errors on the floor.
	consumeError(MaybeCode.takeError());
	}
	unsigned Code = MaybeCode.get();

	RecordData Record;
	StringRef Blob;
	bool shouldContinue = false;
	Expected<unsigned> MaybeRecordType =
	Cursor.readRecord(Code, Record, &Blob);
	if (!MaybeRecordType) {
	// FIXME this drops errors on the floor.
	consumeError(MaybeRecordType.takeError());
	}
	switch ((InputFileRecordTypes)MaybeRecordType.get()) {
	case INPUT_FILE_HASH:
	break;
	case INPUT_FILE:
	bool Overridden = static_cast<bool>(Record[3]);
	std::string Filename = std::string(Blob);
	ResolveImportedPath(Filename, ModuleDir);
	shouldContinue = Listener.visitInputFile(
	Filename, isSystemFile, Overridden, /IsExplicitModule/false);
	break;
	}
	if (!shouldContinue)
	break;
	}
	break;
	}

	case IMPORTS: {
	if (!NeedsImports)
	break;

	unsigned Idx = 0, N = Record.size();
	while (Idx < N) {
	// Read information about the AST file.
	Idx +=
	1 + 1 + 1 + 1 +
	ASTFileSignature::size; // Kind, ImportLoc, Size, ModTime, Signature
	std::string ModuleName = ReadString(Record, Idx);
	std::string Filename = ReadString(Record, Idx);
	ResolveImportedPath(Filename, ModuleDir);
	Listener.visitImport(ModuleName, Filename);
	}
	break;
	}

	default:
	// No other validation to perform.
	break;
	}
	}

	// Look for module file extension blocks, if requested.
	if (FindModuleFileExtensions) {
	BitstreamCursor SavedStream = Stream;
	while (!SkipCursorToBlock(Stream, EXTENSION_BLOCK_ID)) {
	bool DoneWithExtensionBlock = false;
	while (!DoneWithExtensionBlock) {
	Expected<llvm::BitstreamEntry> MaybeEntry = Stream.advance();
	if (!MaybeEntry) {
	// FIXME this drops the error.
	return true;
	}
	llvm::BitstreamEntry Entry = MaybeEntry.get();

	switch (Entry.Kind) {
	case llvm::BitstreamEntry::SubBlock:
	if (llvm::Error Err = Stream.SkipBlock()) {
	// FIXME this drops the error on the floor.
	consumeError(std::move(Err));
	return true;
	}
	continue;

	case llvm::BitstreamEntry::EndBlock:
	DoneWithExtensionBlock = true;
	continue;

	case llvm::BitstreamEntry::Error:
	return true;

	case llvm::BitstreamEntry::Record:
	break;
	}

	Record.clear();
	StringRef Blob;
	Expected<unsigned> MaybeRecCode =
	Stream.readRecord(Entry.ID, Record, &Blob);
	if (!MaybeRecCode) {
	// FIXME this drops the error.
	return true;
	}
	switch (MaybeRecCode.get()) {
	case EXTENSION_METADATA: {
	ModuleFileExtensionMetadata Metadata;
	if (parseModuleFileExtensionMetadata(Record, Blob, Metadata))
	return true;

	Listener.readModuleFileExtension(Metadata);
	break;
	}
	}
	}
	}
	Stream = SavedStream;
	}

	// Scan for the UNHASHED_CONTROL_BLOCK_ID block.
	if (readUnhashedControlBlockImpl(
	nullptr, Bytes, ARR_ConfigurationMismatch \| ARR_OutOfDate,
	/AllowCompatibleConfigurationMismatch/ false, &Listener,
	ValidateDiagnosticOptions) != Success)
	return true;

	return false;
	}

	bool ASTReader::isAcceptableASTFile(StringRef Filename, FileManager &FileMgr,
	const PCHContainerReader &PCHContainerRdr,
	const LangOptions &LangOpts,
	const TargetOptions &TargetOpts,
	const PreprocessorOptions &PPOpts,
	StringRef ExistingModuleCachePath) {
	SimplePCHValidator validator(LangOpts, TargetOpts, PPOpts,
	ExistingModuleCachePath, FileMgr);
	return !readASTFileControlBlock(Filename, FileMgr, PCHContainerRdr,
	/FindModuleFileExtensions=/false,
	validator,
	/ValidateDiagnosticOptions=/true);
	}

	ASTReader::ASTReadResult
	ASTReader::ReadSubmoduleBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
	// Enter the submodule block.
	if (llvm::Error Err = F.Stream.EnterSubBlock(SUBMODULE_BLOCK_ID)) {
	Error(std::move(Err));
	return Failure;
	}

	ModuleMap &ModMap = PP.getHeaderSearchInfo().getModuleMap();
	bool First = true;
	Module *CurrentModule = nullptr;
	RecordData Record;
	while (true) {
	Expected<llvm::BitstreamEntry> MaybeEntry =
	F.Stream.advanceSkippingSubblocks();
	if (!MaybeEntry) {
	Error(MaybeEntry.takeError());
	return Failure;
	}
	llvm::BitstreamEntry Entry = MaybeEntry.get();

	switch (Entry.Kind) {
	case llvm::BitstreamEntry::SubBlock: // Handled for us already.
	case llvm::BitstreamEntry::Error:
	Error("malformed block record in AST file");
	return Failure;
	case llvm::BitstreamEntry::EndBlock:
	return Success;
	case llvm::BitstreamEntry::Record:
	// The interesting case.
	break;
	}

	// Read a record.
	StringRef Blob;
	Record.clear();
	Expected<unsigned> MaybeKind = F.Stream.readRecord(Entry.ID, Record, &Blob);
	if (!MaybeKind) {
	Error(MaybeKind.takeError());
	return Failure;
	}
	unsigned Kind = MaybeKind.get();

	if ((Kind == SUBMODULE_METADATA) != First) {
	Error("submodule metadata record should be at beginning of block");
	return Failure;
	}
	First = false;

	// Submodule information is only valid if we have a current module.
	// FIXME: Should we error on these cases?
	if (!CurrentModule && Kind != SUBMODULE_METADATA &&
	Kind != SUBMODULE_DEFINITION)
	continue;

	switch (Kind) {
	default: // Default behavior: ignore.
	break;

	case SUBMODULE_DEFINITION: {
	if (Record.size() < 12) {
	Error("malformed module definition");
	return Failure;
	}

	StringRef Name = Blob;
	unsigned Idx = 0;
	SubmoduleID GlobalID = getGlobalSubmoduleID(F, Record[Idx++]);
	SubmoduleID Parent = getGlobalSubmoduleID(F, Record[Idx++]);
	Module::ModuleKind Kind = (Module::ModuleKind)Record[Idx++];
	bool IsFramework = Record[Idx++];
	bool IsExplicit = Record[Idx++];
	bool IsSystem = Record[Idx++];
	bool IsExternC = Record[Idx++];
	bool InferSubmodules = Record[Idx++];
	bool InferExplicitSubmodules = Record[Idx++];
	bool InferExportWildcard = Record[Idx++];
	bool ConfigMacrosExhaustive = Record[Idx++];
	bool ModuleMapIsPrivate = Record[Idx++];

	Module *ParentModule = nullptr;
	if (Parent)
	ParentModule = getSubmodule(Parent);

	// Retrieve this (sub)module from the module map, creating it if
	// necessary.
	CurrentModule =
	ModMap.findOrCreateModule(Name, ParentModule, IsFramework, IsExplicit)
	.first;

	// FIXME: set the definition loc for CurrentModule, or call
	// ModMap.setInferredModuleAllowedBy()

	SubmoduleID GlobalIndex = GlobalID - NUM_PREDEF_SUBMODULE_IDS;
	if (GlobalIndex >= SubmodulesLoaded.size() \|\|
	SubmodulesLoaded[GlobalIndex]) {
	Error("too many submodules");
	return Failure;
	}

	if (!ParentModule) {
	if (const FileEntry *CurFile = CurrentModule->getASTFile()) {
	// Don't emit module relocation error if we have -fno-validate-pch
	if (!bool(PP.getPreprocessorOpts().DisablePCHOrModuleValidation &
	DisableValidationForModuleKind::Module) &&
	CurFile != F.File) {
	Error(diag::err_module_file_conflict,
	CurrentModule->getTopLevelModuleName(), CurFile->getName(),
	F.File->getName());
	return Failure;
	}
	}

	F.DidReadTopLevelSubmodule = true;
	CurrentModule->setASTFile(F.File);
	CurrentModule->PresumedModuleMapFile = F.ModuleMapPath;
	}

	CurrentModule->Kind = Kind;
	CurrentModule->Signature = F.Signature;
	CurrentModule->IsFromModuleFile = true;
	CurrentModule->IsSystem = IsSystem \|\| CurrentModule->IsSystem;
	CurrentModule->IsExternC = IsExternC;
	CurrentModule->InferSubmodules = InferSubmodules;
	CurrentModule->InferExplicitSubmodules = InferExplicitSubmodules;
	CurrentModule->InferExportWildcard = InferExportWildcard;
	CurrentModule->ConfigMacrosExhaustive = ConfigMacrosExhaustive;
	CurrentModule->ModuleMapIsPrivate = ModuleMapIsPrivate;
	if (DeserializationListener)
	DeserializationListener->ModuleRead(GlobalID, CurrentModule);

	SubmodulesLoaded[GlobalIndex] = CurrentModule;

	// Clear out data that will be replaced by what is in the module file.
	CurrentModule->LinkLibraries.clear();
	CurrentModule->ConfigMacros.clear();
	CurrentModule->UnresolvedConflicts.clear();
	CurrentModule->Conflicts.clear();

	// The module is available unless it's missing a requirement; relevant
	// requirements will be (re-)added by SUBMODULE_REQUIRES records.
	// Missing headers that were present when the module was built do not
	// make it unavailable -- if we got this far, this must be an explicitly
	// imported module file.
	CurrentModule->Requirements.clear();
	CurrentModule->MissingHeaders.clear();
	CurrentModule->IsUnimportable =
	ParentModule && ParentModule->IsUnimportable;
	CurrentModule->IsAvailable = !CurrentModule->IsUnimportable;
	break;
	}

	case SUBMODULE_UMBRELLA_HEADER: {
	std::string Filename = std::string(Blob);
	ResolveImportedPath(F, Filename);
	if (auto Umbrella = PP.getFileManager().getFile(Filename)) {
	if (!CurrentModule->getUmbrellaHeader())
	// FIXME: NameAsWritten
	ModMap.setUmbrellaHeader(CurrentModule, *Umbrella, Blob, "");
	else if (CurrentModule->getUmbrellaHeader().Entry != *Umbrella) {
	if ((ClientLoadCapabilities & ARR_OutOfDate) == 0)
	Error("mismatched umbrella headers in submodule");
	return OutOfDate;
	}
	}
	break;
	}

	case SUBMODULE_HEADER:
	case SUBMODULE_EXCLUDED_HEADER:
	case SUBMODULE_PRIVATE_HEADER:
	// We lazily associate headers with their modules via the HeaderInfo table.
	// FIXME: Re-evaluate this section; maybe only store InputFile IDs instead
	// of complete filenames or remove it entirely.
	break;

	case SUBMODULE_TEXTUAL_HEADER:
	case SUBMODULE_PRIVATE_TEXTUAL_HEADER:
	// FIXME: Textual headers are not marked in the HeaderInfo table. Load
	// them here.
	break;

	case SUBMODULE_TOPHEADER:
	CurrentModule->addTopHeaderFilename(Blob);
	break;

	case SUBMODULE_UMBRELLA_DIR: {
	std::string Dirname = std::string(Blob);
	ResolveImportedPath(F, Dirname);
	if (auto Umbrella = PP.getFileManager().getDirectory(Dirname)) {
	if (!CurrentModule->getUmbrellaDir())
	// FIXME: NameAsWritten
	ModMap.setUmbrellaDir(CurrentModule, *Umbrella, Blob, "");
	else if (CurrentModule->getUmbrellaDir().Entry != *Umbrella) {
	if ((ClientLoadCapabilities & ARR_OutOfDate) == 0)
	Error("mismatched umbrella directories in submodule");
	return OutOfDate;
	}
	}
	break;
	}

	case SUBMODULE_METADATA: {
	F.BaseSubmoduleID = getTotalNumSubmodules();
	F.LocalNumSubmodules = Record[0];
	unsigned LocalBaseSubmoduleID = Record[1];
	if (F.LocalNumSubmodules > 0) {
	// Introduce the global -> local mapping for submodules within this
	// module.
	GlobalSubmoduleMap.insert(std::make_pair(getTotalNumSubmodules()+1,&F));

	// Introduce the local -> global mapping for submodules within this
	// module.
	F.SubmoduleRemap.insertOrReplace(
	std::make_pair(LocalBaseSubmoduleID,
	F.BaseSubmoduleID - LocalBaseSubmoduleID));

	SubmodulesLoaded.resize(SubmodulesLoaded.size() + F.LocalNumSubmodules);
	}
	break;
	}

	case SUBMODULE_IMPORTS:
	for (unsigned Idx = 0; Idx != Record.size(); ++Idx) {
	UnresolvedModuleRef Unresolved;
	Unresolved.File = &F;
	Unresolved.Mod = CurrentModule;
	Unresolved.ID = Record[Idx];
	Unresolved.Kind = UnresolvedModuleRef::Import;
	Unresolved.IsWildcard = false;
	UnresolvedModuleRefs.push_back(Unresolved);
	}
	break;

	case SUBMODULE_EXPORTS:
	for (unsigned Idx = 0; Idx + 1 < Record.size(); Idx += 2) {
	UnresolvedModuleRef Unresolved;
	Unresolved.File = &F;
	Unresolved.Mod = CurrentModule;
	Unresolved.ID = Record[Idx];
	Unresolved.Kind = UnresolvedModuleRef::Export;
	Unresolved.IsWildcard = Record[Idx + 1];
	UnresolvedModuleRefs.push_back(Unresolved);
	}

	// Once we've loaded the set of exports, there's no reason to keep
	// the parsed, unresolved exports around.
	CurrentModule->UnresolvedExports.clear();
	break;

	case SUBMODULE_REQUIRES:
	CurrentModule->addRequirement(Blob, Record[0], PP.getLangOpts(),
	PP.getTargetInfo());
	break;

	case SUBMODULE_LINK_LIBRARY:
	ModMap.resolveLinkAsDependencies(CurrentModule);
	CurrentModule->LinkLibraries.push_back(
	Module::LinkLibrary(std::string(Blob), Record[0]));
	break;

	case SUBMODULE_CONFIG_MACRO:
	CurrentModule->ConfigMacros.push_back(Blob.str());
	break;

	case SUBMODULE_CONFLICT: {
	UnresolvedModuleRef Unresolved;
	Unresolved.File = &F;
	Unresolved.Mod = CurrentModule;
	Unresolved.ID = Record[0];
	Unresolved.Kind = UnresolvedModuleRef::Conflict;
	Unresolved.IsWildcard = false;
	Unresolved.String = Blob;
	UnresolvedModuleRefs.push_back(Unresolved);
	break;
	}

	case SUBMODULE_INITIALIZERS: {
	if (!ContextObj)
	break;
	SmallVector<uint32_t, 16> Inits;
	for (auto &ID : Record)
	Inits.push_back(getGlobalDeclID(F, ID));
	ContextObj->addLazyModuleInitializers(CurrentModule, Inits);
	break;
	}

	case SUBMODULE_EXPORT_AS:
	CurrentModule->ExportAsModule = Blob.str();
	ModMap.addLinkAsDependency(CurrentModule);
	break;
	}
	}
	}

	/// Parse the record that corresponds to a LangOptions data
	/// structure.
	///
	/// This routine parses the language options from the AST file and then gives
	/// them to the AST listener if one is set.
	///
	/// \returns true if the listener deems the file unacceptable, false otherwise.
	bool ASTReader::ParseLanguageOptions(const RecordData &Record,
	bool Complain,
	ASTReaderListener &Listener,
	bool AllowCompatibleDifferences) {
	LangOptions LangOpts;
	unsigned Idx = 0;
	#define LANGOPT(Name, Bits, Default, Description) \
	LangOpts.Name = Record[Idx++];
	#define ENUM_LANGOPT(Name, Type, Bits, Default, Description) \
	LangOpts.set##Name(static_cast<LangOptions::Type>(Record[Idx++]));
	#include "clang/Basic/LangOptions.def"
	#define SANITIZER(NAME, ID) \
	LangOpts.Sanitize.set(SanitizerKind::ID, Record[Idx++]);
	#include "clang/Basic/Sanitizers.def"

	for (unsigned N = Record[Idx++]; N; --N)
	LangOpts.ModuleFeatures.push_back(ReadString(Record, Idx));

	ObjCRuntime::Kind runtimeKind = (ObjCRuntime::Kind) Record[Idx++];
	VersionTuple runtimeVersion = ReadVersionTuple(Record, Idx);
	LangOpts.ObjCRuntime = ObjCRuntime(runtimeKind, runtimeVersion);

	LangOpts.CurrentModule = ReadString(Record, Idx);

	// Comment options.
	for (unsigned N = Record[Idx++]; N; --N) {
	LangOpts.CommentOpts.BlockCommandNames.push_back(
	ReadString(Record, Idx));
	}
	LangOpts.CommentOpts.ParseAllComments = Record[Idx++];

	// OpenMP offloading options.
	for (unsigned N = Record[Idx++]; N; --N) {
	LangOpts.OMPTargetTriples.push_back(llvm::Triple(ReadString(Record, Idx)));
	}

	LangOpts.OMPHostIRFile = ReadString(Record, Idx);

	return Listener.ReadLanguageOptions(LangOpts, Complain,
	AllowCompatibleDifferences);
	}

	bool ASTReader::ParseTargetOptions(const RecordData &Record, bool Complain,
	ASTReaderListener &Listener,
	bool AllowCompatibleDifferences) {
	unsigned Idx = 0;
	TargetOptions TargetOpts;
	TargetOpts.Triple = ReadString(Record, Idx);
	TargetOpts.CPU = ReadString(Record, Idx);
	TargetOpts.TuneCPU = ReadString(Record, Idx);
	TargetOpts.ABI = ReadString(Record, Idx);
	for (unsigned N = Record[Idx++]; N; --N) {
	TargetOpts.FeaturesAsWritten.push_back(ReadString(Record, Idx));
	}
	for (unsigned N = Record[Idx++]; N; --N) {
	TargetOpts.Features.push_back(ReadString(Record, Idx));
	}

	return Listener.ReadTargetOptions(TargetOpts, Complain,
	AllowCompatibleDifferences);
	}

	bool ASTReader::ParseDiagnosticOptions(const RecordData &Record, bool Complain,
	ASTReaderListener &Listener) {
	IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts(new DiagnosticOptions);
	unsigned Idx = 0;
	#define DIAGOPT(Name, Bits, Default) DiagOpts->Name = Record[Idx++];
	#define ENUM_DIAGOPT(Name, Type, Bits, Default) \
	DiagOpts->set##Name(static_cast<Type>(Record[Idx++]));
	#include "clang/Basic/DiagnosticOptions.def"

	for (unsigned N = Record[Idx++]; N; --N)
	DiagOpts->Warnings.push_back(ReadString(Record, Idx));
	for (unsigned N = Record[Idx++]; N; --N)
	DiagOpts->Remarks.push_back(ReadString(Record, Idx));

	return Listener.ReadDiagnosticOptions(DiagOpts, Complain);
	}

	bool ASTReader::ParseFileSystemOptions(const RecordData &Record, bool Complain,
	ASTReaderListener &Listener) {
	FileSystemOptions FSOpts;
	unsigned Idx = 0;
	FSOpts.WorkingDir = ReadString(Record, Idx);
	return Listener.ReadFileSystemOptions(FSOpts, Complain);
	}

	bool ASTReader::ParseHeaderSearchOptions(const RecordData &Record,
	bool Complain,
	ASTReaderListener &Listener) {
	HeaderSearchOptions HSOpts;
	unsigned Idx = 0;
	HSOpts.Sysroot = ReadString(Record, Idx);

	// Include entries.
	for (unsigned N = Record[Idx++]; N; --N) {
	std::string Path = ReadString(Record, Idx);
	frontend::IncludeDirGroup Group
	= static_cast<frontend::IncludeDirGroup>(Record[Idx++]);
	bool IsFramework = Record[Idx++];
	bool IgnoreSysRoot = Record[Idx++];
	HSOpts.UserEntries.emplace_back(std::move(Path), Group, IsFramework,
	IgnoreSysRoot);
	}

	// System header prefixes.
	for (unsigned N = Record[Idx++]; N; --N) {
	std::string Prefix = ReadString(Record, Idx);
	bool IsSystemHeader = Record[Idx++];
	HSOpts.SystemHeaderPrefixes.emplace_back(std::move(Prefix), IsSystemHeader);
	}

	HSOpts.ResourceDir = ReadString(Record, Idx);
	HSOpts.ModuleCachePath = ReadString(Record, Idx);
	HSOpts.ModuleUserBuildPath = ReadString(Record, Idx);
	HSOpts.DisableModuleHash = Record[Idx++];
	HSOpts.ImplicitModuleMaps = Record[Idx++];
	HSOpts.ModuleMapFileHomeIsCwd = Record[Idx++];
	HSOpts.EnablePrebuiltImplicitModules = Record[Idx++];
	HSOpts.UseBuiltinIncludes = Record[Idx++];
	HSOpts.UseStandardSystemIncludes = Record[Idx++];
	HSOpts.UseStandardCXXIncludes = Record[Idx++];
	HSOpts.UseLibcxx = Record[Idx++];
	std::string SpecificModuleCachePath = ReadString(Record, Idx);

	return Listener.ReadHeaderSearchOptions(HSOpts, SpecificModuleCachePath,
	Complain);
	}

	bool ASTReader::ParsePreprocessorOptions(const RecordData &Record,
	bool Complain,
	ASTReaderListener &Listener,
	std::string &SuggestedPredefines) {
	PreprocessorOptions PPOpts;
	unsigned Idx = 0;

	// Macro definitions/undefs
	for (unsigned N = Record[Idx++]; N; --N) {
	std::string Macro = ReadString(Record, Idx);
	bool IsUndef = Record[Idx++];
	PPOpts.Macros.push_back(std::make_pair(Macro, IsUndef));
	}

	// Includes
	for (unsigned N = Record[Idx++]; N; --N) {
	PPOpts.Includes.push_back(ReadString(Record, Idx));
	}

	// Macro Includes
	for (unsigned N = Record[Idx++]; N; --N) {
	PPOpts.MacroIncludes.push_back(ReadString(Record, Idx));
	}

	PPOpts.UsePredefines = Record[Idx++];
	PPOpts.DetailedRecord = Record[Idx++];
	PPOpts.ImplicitPCHInclude = ReadString(Record, Idx);
	PPOpts.ObjCXXARCStandardLibrary =
	static_cast<ObjCXXARCStandardLibraryKind>(Record[Idx++]);
	SuggestedPredefines.clear();
	return Listener.ReadPreprocessorOptions(PPOpts, Complain,
	SuggestedPredefines);
	}

	std::pair<ModuleFile *, unsigned>
	ASTReader::getModulePreprocessedEntity(unsigned GlobalIndex) {
	GlobalPreprocessedEntityMapType::iterator
	I = GlobalPreprocessedEntityMap.find(GlobalIndex);
	assert(I != GlobalPreprocessedEntityMap.end() &&
	"Corrupted global preprocessed entity map");
	ModuleFile *M = I->second;
	unsigned LocalIndex = GlobalIndex - M->BasePreprocessedEntityID;
	return std::make_pair(M, LocalIndex);
	}

	llvm::iterator_range<PreprocessingRecord::iterator>
	ASTReader::getModulePreprocessedEntities(ModuleFile &Mod) const {
	if (PreprocessingRecord *PPRec = PP.getPreprocessingRecord())
	return PPRec->getIteratorsForLoadedRange(Mod.BasePreprocessedEntityID,
	Mod.NumPreprocessedEntities);

	return llvm::make_range(PreprocessingRecord::iterator(),
	PreprocessingRecord::iterator());
	}

	bool ASTReader::canRecoverFromOutOfDate(StringRef ModuleFileName,
	unsigned int ClientLoadCapabilities) {
	return ClientLoadCapabilities & ARR_OutOfDate &&
	!getModuleManager().getModuleCache().isPCMFinal(ModuleFileName);
	}

	llvm::iterator_range<ASTReader::ModuleDeclIterator>
	ASTReader::getModuleFileLevelDecls(ModuleFile &Mod) {
	return llvm::make_range(
	ModuleDeclIterator(this, &Mod, Mod.FileSortedDecls),
	ModuleDeclIterator(this, &Mod,
	Mod.FileSortedDecls + Mod.NumFileSortedDecls));
	}

	SourceRange ASTReader::ReadSkippedRange(unsigned GlobalIndex) {
	auto I = GlobalSkippedRangeMap.find(GlobalIndex);
	assert(I != GlobalSkippedRangeMap.end() &&
	"Corrupted global skipped range map");
	ModuleFile *M = I->second;
	unsigned LocalIndex = GlobalIndex - M->BasePreprocessedSkippedRangeID;
	assert(LocalIndex < M->NumPreprocessedSkippedRanges);
	PPSkippedRange RawRange = M->PreprocessedSkippedRangeOffsets[LocalIndex];
	SourceRange Range(TranslateSourceLocation(*M, RawRange.getBegin()),
	TranslateSourceLocation(*M, RawRange.getEnd()));
	assert(Range.isValid());
	return Range;
	}

	PreprocessedEntity *ASTReader::ReadPreprocessedEntity(unsigned Index) {
	PreprocessedEntityID PPID = Index+1;
	std::pair<ModuleFile *, unsigned> PPInfo = getModulePreprocessedEntity(Index);
	ModuleFile &M = *PPInfo.first;
	unsigned LocalIndex = PPInfo.second;
	const PPEntityOffset &PPOffs = M.PreprocessedEntityOffsets[LocalIndex];

	if (!PP.getPreprocessingRecord()) {
	Error("no preprocessing record");
	return nullptr;
	}

	SavedStreamPosition SavedPosition(M.PreprocessorDetailCursor);
	if (llvm::Error Err = M.PreprocessorDetailCursor.JumpToBit(
	M.MacroOffsetsBase + PPOffs.BitOffset)) {
	Error(std::move(Err));
	return nullptr;
	}

	Expected<llvm::BitstreamEntry> MaybeEntry =
	M.PreprocessorDetailCursor.advance(BitstreamCursor::AF_DontPopBlockAtEnd);
	if (!MaybeEntry) {
	Error(MaybeEntry.takeError());
	return nullptr;
	}
	llvm::BitstreamEntry Entry = MaybeEntry.get();

	if (Entry.Kind != llvm::BitstreamEntry::Record)
	return nullptr;

	// Read the record.
	SourceRange Range(TranslateSourceLocation(M, PPOffs.getBegin()),
	TranslateSourceLocation(M, PPOffs.getEnd()));
	PreprocessingRecord &PPRec = *PP.getPreprocessingRecord();
	StringRef Blob;
	RecordData Record;
	Expected<unsigned> MaybeRecType =
	M.PreprocessorDetailCursor.readRecord(Entry.ID, Record, &Blob);
	if (!MaybeRecType) {
	Error(MaybeRecType.takeError());
	return nullptr;
	}
	switch ((PreprocessorDetailRecordTypes)MaybeRecType.get()) {
	case PPD_MACRO_EXPANSION: {
	bool isBuiltin = Record[0];
	IdentifierInfo *Name = nullptr;
	MacroDefinitionRecord *Def = nullptr;
	if (isBuiltin)
	Name = getLocalIdentifier(M, Record[1]);
	else {
	PreprocessedEntityID GlobalID =
	getGlobalPreprocessedEntityID(M, Record[1]);
	Def = cast<MacroDefinitionRecord>(
	PPRec.getLoadedPreprocessedEntity(GlobalID - 1));
	}

	MacroExpansion *ME;
	if (isBuiltin)
	ME = new (PPRec) MacroExpansion(Name, Range);
	else
	ME = new (PPRec) MacroExpansion(Def, Range);

	return ME;
	}

	case PPD_MACRO_DEFINITION: {
	// Decode the identifier info and then check again; if the macro is
	// still defined and associated with the identifier,
	IdentifierInfo *II = getLocalIdentifier(M, Record[0]);
	MacroDefinitionRecord *MD = new (PPRec) MacroDefinitionRecord(II, Range);

	if (DeserializationListener)
	DeserializationListener->MacroDefinitionRead(PPID, MD);

	return MD;
	}

	case PPD_INCLUSION_DIRECTIVE: {
	const char *FullFileNameStart = Blob.data() + Record[0];
	StringRef FullFileName(FullFileNameStart, Blob.size() - Record[0]);
	const FileEntry *File = nullptr;
	if (!FullFileName.empty())
	if (auto FE = PP.getFileManager().getFile(FullFileName))
	File = *FE;

	// FIXME: Stable encoding
	InclusionDirective::InclusionKind Kind
	= static_cast<InclusionDirective::InclusionKind>(Record[2]);
	InclusionDirective *ID
	= new (PPRec) InclusionDirective(PPRec, Kind,
	StringRef(Blob.data(), Record[0]),
	Record[1], Record[3],
	File,
	Range);
	return ID;
	}
	}

	llvm_unreachable("Invalid PreprocessorDetailRecordTypes");
	}

	/// Find the next module that contains entities and return the ID
	/// of the first entry.
	///
	/// \param SLocMapI points at a chunk of a module that contains no
	/// preprocessed entities or the entities it contains are not the ones we are
	/// looking for.
	PreprocessedEntityID ASTReader::findNextPreprocessedEntity(
	GlobalSLocOffsetMapType::const_iterator SLocMapI) const {
	++SLocMapI;
	for (GlobalSLocOffsetMapType::const_iterator
	EndI = GlobalSLocOffsetMap.end(); SLocMapI != EndI; ++SLocMapI) {
	ModuleFile &M = *SLocMapI->second;
	if (M.NumPreprocessedEntities)
	return M.BasePreprocessedEntityID;
	}

	return getTotalNumPreprocessedEntities();
	}

	namespace {

	struct PPEntityComp {
	const ASTReader &Reader;
	ModuleFile &M;

	PPEntityComp(const ASTReader &Reader, ModuleFile &M) : Reader(Reader), M(M) {}

	bool operator()(const PPEntityOffset &L, const PPEntityOffset &R) const {
	SourceLocation LHS = getLoc(L);
	SourceLocation RHS = getLoc(R);
	return Reader.getSourceManager().isBeforeInTranslationUnit(LHS, RHS);
	}

	bool operator()(const PPEntityOffset &L, SourceLocation RHS) const {
	SourceLocation LHS = getLoc(L);
	return Reader.getSourceManager().isBeforeInTranslationUnit(LHS, RHS);
	}

	bool operator()(SourceLocation LHS, const PPEntityOffset &R) const {
	SourceLocation RHS = getLoc(R);
	return Reader.getSourceManager().isBeforeInTranslationUnit(LHS, RHS);
	}

	SourceLocation getLoc(const PPEntityOffset &PPE) const {
	return Reader.TranslateSourceLocation(M, PPE.getBegin());
	}
	};

	} // namespace

	PreprocessedEntityID ASTReader::findPreprocessedEntity(SourceLocation Loc,
	bool EndsAfter) const {
	if (SourceMgr.isLocalSourceLocation(Loc))
	return getTotalNumPreprocessedEntities();

	GlobalSLocOffsetMapType::const_iterator SLocMapI = GlobalSLocOffsetMap.find(
	SourceManager::MaxLoadedOffset - Loc.getOffset() - 1);
	assert(SLocMapI != GlobalSLocOffsetMap.end() &&
	"Corrupted global sloc offset map");

	if (SLocMapI->second->NumPreprocessedEntities == 0)
	return findNextPreprocessedEntity(SLocMapI);

	ModuleFile &M = *SLocMapI->second;

	using pp_iterator = const PPEntityOffset *;

	pp_iterator pp_begin = M.PreprocessedEntityOffsets;
	pp_iterator pp_end = pp_begin + M.NumPreprocessedEntities;

	size_t Count = M.NumPreprocessedEntities;
	size_t Half;
	pp_iterator First = pp_begin;
	pp_iterator PPI;

	if (EndsAfter) {
	PPI = std::upper_bound(pp_begin, pp_end, Loc,
	PPEntityComp(*this, M));
	} else {
	// Do a binary search manually instead of using std::lower_bound because
	// The end locations of entities may be unordered (when a macro expansion
	// is inside another macro argument), but for this case it is not important
	// whether we get the first macro expansion or its containing macro.
	while (Count > 0) {
	Half = Count / 2;
	PPI = First;
	std::advance(PPI, Half);
	if (SourceMgr.isBeforeInTranslationUnit(
	TranslateSourceLocation(M, PPI->getEnd()), Loc)) {
	First = PPI;
	++First;
	Count = Count - Half - 1;
	} else
	Count = Half;
	}
	}

	if (PPI == pp_end)
	return findNextPreprocessedEntity(SLocMapI);

	return M.BasePreprocessedEntityID + (PPI - pp_begin);
	}

	/// Returns a pair of [Begin, End) indices of preallocated
	/// preprocessed entities that \arg Range encompasses.
	std::pair<unsigned, unsigned>
	ASTReader::findPreprocessedEntitiesInRange(SourceRange Range) {
	if (Range.isInvalid())
	return std::make_pair(0,0);
	assert(!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(),Range.getBegin()));

	PreprocessedEntityID BeginID =
	findPreprocessedEntity(Range.getBegin(), false);
	PreprocessedEntityID EndID = findPreprocessedEntity(Range.getEnd(), true);
	return std::make_pair(BeginID, EndID);
	}

	/// Optionally returns true or false if the preallocated preprocessed
	/// entity with index \arg Index came from file \arg FID.
	Optional<bool> ASTReader::isPreprocessedEntityInFileID(unsigned Index,
	FileID FID) {
	if (FID.isInvalid())
	return false;

	std::pair<ModuleFile *, unsigned> PPInfo = getModulePreprocessedEntity(Index);
	ModuleFile &M = *PPInfo.first;
	unsigned LocalIndex = PPInfo.second;
	const PPEntityOffset &PPOffs = M.PreprocessedEntityOffsets[LocalIndex];

	SourceLocation Loc = TranslateSourceLocation(M, PPOffs.getBegin());
	if (Loc.isInvalid())
	return false;

	if (SourceMgr.isInFileID(SourceMgr.getFileLoc(Loc), FID))
	return true;
	else
	return false;
	}

	namespace {

	/// Visitor used to search for information about a header file.
	class HeaderFileInfoVisitor {
	const FileEntry *FE;
	Optional<HeaderFileInfo> HFI;

	public:
	explicit HeaderFileInfoVisitor(const FileEntry *FE) : FE(FE) {}

	bool operator()(ModuleFile &M) {
	HeaderFileInfoLookupTable *Table
	= static_cast<HeaderFileInfoLookupTable *>(M.HeaderFileInfoTable);
	if (!Table)
	return false;

	// Look in the on-disk hash table for an entry for this file name.
	HeaderFileInfoLookupTable::iterator Pos = Table->find(FE);
	if (Pos == Table->end())
	return false;

	HFI = *Pos;
	return true;
	}

	Optional<HeaderFileInfo> getHeaderFileInfo() const { return HFI; }
	};

	} // namespace

	HeaderFileInfo ASTReader::GetHeaderFileInfo(const FileEntry *FE) {
	HeaderFileInfoVisitor Visitor(FE);
	ModuleMgr.visit(Visitor);
	if (Optional<HeaderFileInfo> HFI = Visitor.getHeaderFileInfo())
	return *HFI;

	return HeaderFileInfo();
	}

	void ASTReader::ReadPragmaDiagnosticMappings(DiagnosticsEngine &Diag) {
	using DiagState = DiagnosticsEngine::DiagState;
	SmallVector<DiagState *, 32> DiagStates;

	for (ModuleFile &F : ModuleMgr) {
	unsigned Idx = 0;
	auto &Record = F.PragmaDiagMappings;
	if (Record.empty())
	continue;

	DiagStates.clear();

	auto ReadDiagState =
	[&](const DiagState &BasedOn, SourceLocation Loc,
	bool IncludeNonPragmaStates) -> DiagnosticsEngine::DiagState * {
	unsigned BackrefID = Record[Idx++];
	if (BackrefID != 0)
	return DiagStates[BackrefID - 1];

	// A new DiagState was created here.
	Diag.DiagStates.push_back(BasedOn);
	DiagState *NewState = &Diag.DiagStates.back();
	DiagStates.push_back(NewState);
	unsigned Size = Record[Idx++];
	assert(Idx + Size * 2 <= Record.size() &&
	"Invalid data, not enough diag/map pairs");
	while (Size--) {
	unsigned DiagID = Record[Idx++];
	DiagnosticMapping NewMapping =
	DiagnosticMapping::deserialize(Record[Idx++]);
	if (!NewMapping.isPragma() && !IncludeNonPragmaStates)
	continue;

	DiagnosticMapping &Mapping = NewState->getOrAddMapping(DiagID);

	// If this mapping was specified as a warning but the severity was
	// upgraded due to diagnostic settings, simulate the current diagnostic
	// settings (and use a warning).
	if (NewMapping.wasUpgradedFromWarning() && !Mapping.isErrorOrFatal()) {
	NewMapping.setSeverity(diag::Severity::Warning);
	NewMapping.setUpgradedFromWarning(false);
	}

	Mapping = NewMapping;
	}
	return NewState;
	};

	// Read the first state.
	DiagState *FirstState;
	if (F.Kind == MK_ImplicitModule) {
	// Implicitly-built modules are reused with different diagnostic
	// settings. Use the initial diagnostic state from Diag to simulate this
	// compilation's diagnostic settings.
	FirstState = Diag.DiagStatesByLoc.FirstDiagState;
	DiagStates.push_back(FirstState);

	// Skip the initial diagnostic state from the serialized module.
	assert(Record[1] == 0 &&
	"Invalid data, unexpected backref in initial state");
	Idx = 3 + Record[2] * 2;
	assert(Idx < Record.size() &&
	"Invalid data, not enough state change pairs in initial state");
	} else if (F.isModule()) {
	// For an explicit module, preserve the flags from the module build
	// command line (-w, -Weverything, -Werror, ...) along with any explicit
	// -Wblah flags.
	unsigned Flags = Record[Idx++];
	DiagState Initial;
	Initial.SuppressSystemWarnings = Flags & 1; Flags >>= 1;
	Initial.ErrorsAsFatal = Flags & 1; Flags >>= 1;
	Initial.WarningsAsErrors = Flags & 1; Flags >>= 1;
	Initial.EnableAllWarnings = Flags & 1; Flags >>= 1;
	Initial.IgnoreAllWarnings = Flags & 1; Flags >>= 1;
	Initial.ExtBehavior = (diag::Severity)Flags;
	FirstState = ReadDiagState(Initial, SourceLocation(), true);

	assert(F.OriginalSourceFileID.isValid());

	// Set up the root buffer of the module to start with the initial
	// diagnostic state of the module itself, to cover files that contain no
	// explicit transitions (for which we did not serialize anything).
	Diag.DiagStatesByLoc.Files[F.OriginalSourceFileID]
	.StateTransitions.push_back({FirstState, 0});
	} else {
	// For prefix ASTs, start with whatever the user configured on the
	// command line.
	Idx++; // Skip flags.
	FirstState = ReadDiagState(*Diag.DiagStatesByLoc.CurDiagState,
	SourceLocation(), false);
	}

	// Read the state transitions.
	unsigned NumLocations = Record[Idx++];
	while (NumLocations--) {
	assert(Idx < Record.size() &&
	"Invalid data, missing pragma diagnostic states");
	SourceLocation Loc = ReadSourceLocation(F, Record[Idx++]);
	auto IDAndOffset = SourceMgr.getDecomposedLoc(Loc);
	assert(IDAndOffset.first.isValid() && "invalid FileID for transition");
	assert(IDAndOffset.second == 0 && "not a start location for a FileID");
	unsigned Transitions = Record[Idx++];

	// Note that we don't need to set up Parent/ParentOffset here, because
	// we won't be changing the diagnostic state within imported FileIDs
	// (other than perhaps appending to the main source file, which has no
	// parent).
	auto &F = Diag.DiagStatesByLoc.Files[IDAndOffset.first];
	F.StateTransitions.reserve(F.StateTransitions.size() + Transitions);
	for (unsigned I = 0; I != Transitions; ++I) {
	unsigned Offset = Record[Idx++];
	auto *State =
	ReadDiagState(*FirstState, Loc.getLocWithOffset(Offset), false);
	F.StateTransitions.push_back({State, Offset});
	}
	}

	// Read the final state.
	assert(Idx < Record.size() &&
	"Invalid data, missing final pragma diagnostic state");
	SourceLocation CurStateLoc =
	ReadSourceLocation(F, F.PragmaDiagMappings[Idx++]);
	auto CurState = ReadDiagState(FirstState, CurStateLoc, false);

	if (!F.isModule()) {
	Diag.DiagStatesByLoc.CurDiagState = CurState;
	Diag.DiagStatesByLoc.CurDiagStateLoc = CurStateLoc;

	// Preserve the property that the imaginary root file describes the
	// current state.
	FileID NullFile;
	auto &T = Diag.DiagStatesByLoc.Files[NullFile].StateTransitions;
	if (T.empty())
	T.push_back({CurState, 0});
	else
	T[0].State = CurState;
	}

	// Don't try to read these mappings again.
	Record.clear();
	}
	}

	/// Get the correct cursor and offset for loading a type.
	ASTReader::RecordLocation ASTReader::TypeCursorForIndex(unsigned Index) {
	GlobalTypeMapType::iterator I = GlobalTypeMap.find(Index);
	assert(I != GlobalTypeMap.end() && "Corrupted global type map");
	ModuleFile *M = I->second;
	return RecordLocation(
	M, M->TypeOffsets[Index - M->BaseTypeIndex].getBitOffset() +
	M->DeclsBlockStartOffset);
	}

	static llvm::Optional<Type::TypeClass> getTypeClassForCode(TypeCode code) {
	switch (code) {
	#define TYPE_BIT_CODE(CLASS_ID, CODE_ID, CODE_VALUE) \
	case TYPE_##CODE_ID: return Type::CLASS_ID;
	#include "clang/Serialization/TypeBitCodes.def"
	default: return llvm::None;
	}
	}

	/// Read and return the type with the given index..
	///
	/// The index is the type ID, shifted and minus the number of predefs. This
	/// routine actually reads the record corresponding to the type at the given
	/// location. It is a helper routine for GetType, which deals with reading type
	/// IDs.
	QualType ASTReader::readTypeRecord(unsigned Index) {
	assert(ContextObj && "reading type with no AST context");
	ASTContext &Context = *ContextObj;
	RecordLocation Loc = TypeCursorForIndex(Index);
	BitstreamCursor &DeclsCursor = Loc.F->DeclsCursor;

	// Keep track of where we are in the stream, then jump back there
	// after reading this type.
	SavedStreamPosition SavedPosition(DeclsCursor);

	ReadingKindTracker ReadingKind(Read_Type, *this);

	// Note that we are loading a type record.
	Deserializing AType(this);

	if (llvm::Error Err = DeclsCursor.JumpToBit(Loc.Offset)) {
	Error(std::move(Err));
	return QualType();
	}
	Expected<unsigned> RawCode = DeclsCursor.ReadCode();
	if (!RawCode) {
	Error(RawCode.takeError());
	return QualType();
	}

	ASTRecordReader Record(this, Loc.F);
	Expected<unsigned> Code = Record.readRecord(DeclsCursor, RawCode.get());
	if (!Code) {
	Error(Code.takeError());
	return QualType();
	}
	if (Code.get() == TYPE_EXT_QUAL) {
	QualType baseType = Record.readQualType();
	Qualifiers quals = Record.readQualifiers();
	return Context.getQualifiedType(baseType, quals);
	}

	auto maybeClass = getTypeClassForCode((TypeCode) Code.get());
	if (!maybeClass) {
	Error("Unexpected code for type");
	return QualType();
	}

	serialization::AbstractTypeReader<ASTRecordReader> TypeReader(Record);
	return TypeReader.read(*maybeClass);
	}

	namespace clang {

	class TypeLocReader : public TypeLocVisitor<TypeLocReader> {
	ASTRecordReader &Reader;

	SourceLocation readSourceLocation() {
	return Reader.readSourceLocation();
	}

	TypeSourceInfo *GetTypeSourceInfo() {
	return Reader.readTypeSourceInfo();
	}

	NestedNameSpecifierLoc ReadNestedNameSpecifierLoc() {
	return Reader.readNestedNameSpecifierLoc();
	}

	Attr *ReadAttr() {
	return Reader.readAttr();
	}

	public:
	TypeLocReader(ASTRecordReader &Reader) : Reader(Reader) {}

	// We want compile-time assurance that we've enumerated all of
	// these, so unfortunately we have to declare them first, then
	// define them out-of-line.
	#define ABSTRACT_TYPELOC(CLASS, PARENT)
	#define TYPELOC(CLASS, PARENT) \
	void Visit##CLASS##TypeLoc(CLASS##TypeLoc TyLoc);
	#include "clang/AST/TypeLocNodes.def"

	void VisitFunctionTypeLoc(FunctionTypeLoc);
	void VisitArrayTypeLoc(ArrayTypeLoc);
	};

	} // namespace clang

	void TypeLocReader::VisitQualifiedTypeLoc(QualifiedTypeLoc TL) {
	// nothing to do
	}

	void TypeLocReader::VisitBuiltinTypeLoc(BuiltinTypeLoc TL) {
	TL.setBuiltinLoc(readSourceLocation());
	if (TL.needsExtraLocalData()) {
	TL.setWrittenTypeSpec(static_cast<DeclSpec::TST>(Reader.readInt()));
	TL.setWrittenSignSpec(static_cast<TypeSpecifierSign>(Reader.readInt()));
	TL.setWrittenWidthSpec(static_cast<TypeSpecifierWidth>(Reader.readInt()));
	TL.setModeAttr(Reader.readInt());
	}
	}

	void TypeLocReader::VisitComplexTypeLoc(ComplexTypeLoc TL) {
	TL.setNameLoc(readSourceLocation());
	}

	void TypeLocReader::VisitPointerTypeLoc(PointerTypeLoc TL) {
	TL.setStarLoc(readSourceLocation());
	}

	void TypeLocReader::VisitDecayedTypeLoc(DecayedTypeLoc TL) {
	// nothing to do
	}

	void TypeLocReader::VisitAdjustedTypeLoc(AdjustedTypeLoc TL) {
	// nothing to do
	}

	void TypeLocReader::VisitMacroQualifiedTypeLoc(MacroQualifiedTypeLoc TL) {
	TL.setExpansionLoc(readSourceLocation());
	}

	void TypeLocReader::VisitBlockPointerTypeLoc(BlockPointerTypeLoc TL) {
	TL.setCaretLoc(readSourceLocation());
	}

	void TypeLocReader::VisitLValueReferenceTypeLoc(LValueReferenceTypeLoc TL) {
	TL.setAmpLoc(readSourceLocation());
	}

	void TypeLocReader::VisitRValueReferenceTypeLoc(RValueReferenceTypeLoc TL) {
	TL.setAmpAmpLoc(readSourceLocation());
	}

	void TypeLocReader::VisitMemberPointerTypeLoc(MemberPointerTypeLoc TL) {
	TL.setStarLoc(readSourceLocation());
	TL.setClassTInfo(GetTypeSourceInfo());
	}

	void TypeLocReader::VisitArrayTypeLoc(ArrayTypeLoc TL) {
	TL.setLBracketLoc(readSourceLocation());
	TL.setRBracketLoc(readSourceLocation());
	if (Reader.readBool())
	TL.setSizeExpr(Reader.readExpr());
	else
	TL.setSizeExpr(nullptr);
	}

	void TypeLocReader::VisitConstantArrayTypeLoc(ConstantArrayTypeLoc TL) {
	VisitArrayTypeLoc(TL);
	}

	void TypeLocReader::VisitIncompleteArrayTypeLoc(IncompleteArrayTypeLoc TL) {
	VisitArrayTypeLoc(TL);
	}

	void TypeLocReader::VisitVariableArrayTypeLoc(VariableArrayTypeLoc TL) {
	VisitArrayTypeLoc(TL);
	}

	void TypeLocReader::VisitDependentSizedArrayTypeLoc(
	DependentSizedArrayTypeLoc TL) {
	VisitArrayTypeLoc(TL);
	}

	void TypeLocReader::VisitDependentAddressSpaceTypeLoc(
	DependentAddressSpaceTypeLoc TL) {

	TL.setAttrNameLoc(readSourceLocation());
	TL.setAttrOperandParensRange(Reader.readSourceRange());
	TL.setAttrExprOperand(Reader.readExpr());
	}

	void TypeLocReader::VisitDependentSizedExtVectorTypeLoc(
	DependentSizedExtVectorTypeLoc TL) {
	TL.setNameLoc(readSourceLocation());
	}

	void TypeLocReader::VisitVectorTypeLoc(VectorTypeLoc TL) {
	TL.setNameLoc(readSourceLocation());
	}

	void TypeLocReader::VisitDependentVectorTypeLoc(
	DependentVectorTypeLoc TL) {
	TL.setNameLoc(readSourceLocation());
	}

	void TypeLocReader::VisitExtVectorTypeLoc(ExtVectorTypeLoc TL) {
	TL.setNameLoc(readSourceLocation());
	}

	void TypeLocReader::VisitConstantMatrixTypeLoc(ConstantMatrixTypeLoc TL) {
	TL.setAttrNameLoc(readSourceLocation());
	TL.setAttrOperandParensRange(Reader.readSourceRange());
	TL.setAttrRowOperand(Reader.readExpr());
	TL.setAttrColumnOperand(Reader.readExpr());
	}

	void TypeLocReader::VisitDependentSizedMatrixTypeLoc(
	DependentSizedMatrixTypeLoc TL) {
	TL.setAttrNameLoc(readSourceLocation());
	TL.setAttrOperandParensRange(Reader.readSourceRange());
	TL.setAttrRowOperand(Reader.readExpr());
	TL.setAttrColumnOperand(Reader.readExpr());
	}

	void TypeLocReader::VisitFunctionTypeLoc(FunctionTypeLoc TL) {
	TL.setLocalRangeBegin(readSourceLocation());
	TL.setLParenLoc(readSourceLocation());
	TL.setRParenLoc(readSourceLocation());
	TL.setExceptionSpecRange(Reader.readSourceRange());
	TL.setLocalRangeEnd(readSourceLocation());
	for (unsigned i = 0, e = TL.getNumParams(); i != e; ++i) {
	TL.setParam(i, Reader.readDeclAs<ParmVarDecl>());
	}
	}

	void TypeLocReader::VisitFunctionProtoTypeLoc(FunctionProtoTypeLoc TL) {
	VisitFunctionTypeLoc(TL);
	}

	void TypeLocReader::VisitFunctionNoProtoTypeLoc(FunctionNoProtoTypeLoc TL) {
	VisitFunctionTypeLoc(TL);
	}

	void TypeLocReader::VisitUnresolvedUsingTypeLoc(UnresolvedUsingTypeLoc TL) {
	TL.setNameLoc(readSourceLocation());
	}

	void TypeLocReader::VisitTypedefTypeLoc(TypedefTypeLoc TL) {
	TL.setNameLoc(readSourceLocation());
	}

	void TypeLocReader::VisitTypeOfExprTypeLoc(TypeOfExprTypeLoc TL) {
	TL.setTypeofLoc(readSourceLocation());
	TL.setLParenLoc(readSourceLocation());
	TL.setRParenLoc(readSourceLocation());
	}

	void TypeLocReader::VisitTypeOfTypeLoc(TypeOfTypeLoc TL) {
	TL.setTypeofLoc(readSourceLocation());
	TL.setLParenLoc(readSourceLocation());
	TL.setRParenLoc(readSourceLocation());
	TL.setUnderlyingTInfo(GetTypeSourceInfo());
	}

	void TypeLocReader::VisitDecltypeTypeLoc(DecltypeTypeLoc TL) {
	TL.setNameLoc(readSourceLocation());
	}

	void TypeLocReader::VisitUnaryTransformTypeLoc(UnaryTransformTypeLoc TL) {
	TL.setKWLoc(readSourceLocation());
	TL.setLParenLoc(readSourceLocation());
	TL.setRParenLoc(readSourceLocation());
	TL.setUnderlyingTInfo(GetTypeSourceInfo());
	}

	void TypeLocReader::VisitAutoTypeLoc(AutoTypeLoc TL) {
	TL.setNameLoc(readSourceLocation());
	if (Reader.readBool()) {
	TL.setNestedNameSpecifierLoc(ReadNestedNameSpecifierLoc());
	TL.setTemplateKWLoc(readSourceLocation());
	TL.setConceptNameLoc(readSourceLocation());
	TL.setFoundDecl(Reader.readDeclAs<NamedDecl>());
	TL.setLAngleLoc(readSourceLocation());
	TL.setRAngleLoc(readSourceLocation());
	for (unsigned i = 0, e = TL.getNumArgs(); i != e; ++i)
	TL.setArgLocInfo(i, Reader.readTemplateArgumentLocInfo(
	TL.getTypePtr()->getArg(i).getKind()));
	}
	}

	void TypeLocReader::VisitDeducedTemplateSpecializationTypeLoc(
	DeducedTemplateSpecializationTypeLoc TL) {
	TL.setTemplateNameLoc(readSourceLocation());
	}

	void TypeLocReader::VisitRecordTypeLoc(RecordTypeLoc TL) {
	TL.setNameLoc(readSourceLocation());
	}

	void TypeLocReader::VisitEnumTypeLoc(EnumTypeLoc TL) {
	TL.setNameLoc(readSourceLocation());
	}

	void TypeLocReader::VisitAttributedTypeLoc(AttributedTypeLoc TL) {
	TL.setAttr(ReadAttr());
	}

	void TypeLocReader::VisitTemplateTypeParmTypeLoc(TemplateTypeParmTypeLoc TL) {
	TL.setNameLoc(readSourceLocation());
	}

	void TypeLocReader::VisitSubstTemplateTypeParmTypeLoc(
	SubstTemplateTypeParmTypeLoc TL) {
	TL.setNameLoc(readSourceLocation());
	}

	void TypeLocReader::VisitSubstTemplateTypeParmPackTypeLoc(
	SubstTemplateTypeParmPackTypeLoc TL) {
	TL.setNameLoc(readSourceLocation());
	}

	void TypeLocReader::VisitTemplateSpecializationTypeLoc(
	TemplateSpecializationTypeLoc TL) {
	TL.setTemplateKeywordLoc(readSourceLocation());
	TL.setTemplateNameLoc(readSourceLocation());
	TL.setLAngleLoc(readSourceLocation());
	TL.setRAngleLoc(readSourceLocation());
	for (unsigned i = 0, e = TL.getNumArgs(); i != e; ++i)
	TL.setArgLocInfo(
	i,
	Reader.readTemplateArgumentLocInfo(
	TL.getTypePtr()->getArg(i).getKind()));
	}

	void TypeLocReader::VisitParenTypeLoc(ParenTypeLoc TL) {
	TL.setLParenLoc(readSourceLocation());
	TL.setRParenLoc(readSourceLocation());
	}

	void TypeLocReader::VisitElaboratedTypeLoc(ElaboratedTypeLoc TL) {
	TL.setElaboratedKeywordLoc(readSourceLocation());
	TL.setQualifierLoc(ReadNestedNameSpecifierLoc());
	}

	void TypeLocReader::VisitInjectedClassNameTypeLoc(InjectedClassNameTypeLoc TL) {
	TL.setNameLoc(readSourceLocation());
	}

	void TypeLocReader::VisitDependentNameTypeLoc(DependentNameTypeLoc TL) {
	TL.setElaboratedKeywordLoc(readSourceLocation());
	TL.setQualifierLoc(ReadNestedNameSpecifierLoc());
	TL.setNameLoc(readSourceLocation());
	}

	void TypeLocReader::VisitDependentTemplateSpecializationTypeLoc(
	DependentTemplateSpecializationTypeLoc TL) {
	TL.setElaboratedKeywordLoc(readSourceLocation());
	TL.setQualifierLoc(ReadNestedNameSpecifierLoc());
	TL.setTemplateKeywordLoc(readSourceLocation());
	TL.setTemplateNameLoc(readSourceLocation());
	TL.setLAngleLoc(readSourceLocation());
	TL.setRAngleLoc(readSourceLocation());
	for (unsigned I = 0, E = TL.getNumArgs(); I != E; ++I)
	TL.setArgLocInfo(
	I,
	Reader.readTemplateArgumentLocInfo(
	TL.getTypePtr()->getArg(I).getKind()));
	}

	void TypeLocReader::VisitPackExpansionTypeLoc(PackExpansionTypeLoc TL) {
	TL.setEllipsisLoc(readSourceLocation());
	}

	void TypeLocReader::VisitObjCInterfaceTypeLoc(ObjCInterfaceTypeLoc TL) {
	TL.setNameLoc(readSourceLocation());
	}

	void TypeLocReader::VisitObjCTypeParamTypeLoc(ObjCTypeParamTypeLoc TL) {
	if (TL.getNumProtocols()) {
	TL.setProtocolLAngleLoc(readSourceLocation());
	TL.setProtocolRAngleLoc(readSourceLocation());
	}
	for (unsigned i = 0, e = TL.getNumProtocols(); i != e; ++i)
	TL.setProtocolLoc(i, readSourceLocation());
	}

	void TypeLocReader::VisitObjCObjectTypeLoc(ObjCObjectTypeLoc TL) {
	TL.setHasBaseTypeAsWritten(Reader.readBool());
	TL.setTypeArgsLAngleLoc(readSourceLocation());
	TL.setTypeArgsRAngleLoc(readSourceLocation());
	for (unsigned i = 0, e = TL.getNumTypeArgs(); i != e; ++i)
	TL.setTypeArgTInfo(i, GetTypeSourceInfo());
	TL.setProtocolLAngleLoc(readSourceLocation());
	TL.setProtocolRAngleLoc(readSourceLocation());
	for (unsigned i = 0, e = TL.getNumProtocols(); i != e; ++i)
	TL.setProtocolLoc(i, readSourceLocation());
	}

	void TypeLocReader::VisitObjCObjectPointerTypeLoc(ObjCObjectPointerTypeLoc TL) {
	TL.setStarLoc(readSourceLocation());
	}

	void TypeLocReader::VisitAtomicTypeLoc(AtomicTypeLoc TL) {
	TL.setKWLoc(readSourceLocation());
	TL.setLParenLoc(readSourceLocation());
	TL.setRParenLoc(readSourceLocation());
	}

	void TypeLocReader::VisitPipeTypeLoc(PipeTypeLoc TL) {
	TL.setKWLoc(readSourceLocation());
	}

	void TypeLocReader::VisitExtIntTypeLoc(clang::ExtIntTypeLoc TL) {
	TL.setNameLoc(readSourceLocation());
	}
	void TypeLocReader::VisitDependentExtIntTypeLoc(
	clang::DependentExtIntTypeLoc TL) {
	TL.setNameLoc(readSourceLocation());
	}


	void ASTRecordReader::readTypeLoc(TypeLoc TL) {
	TypeLocReader TLR(*this);
	for (; !TL.isNull(); TL = TL.getNextTypeLoc())
	TLR.Visit(TL);
	}

	TypeSourceInfo *ASTRecordReader::readTypeSourceInfo() {
	QualType InfoTy = readType();
	if (InfoTy.isNull())
	return nullptr;

	TypeSourceInfo *TInfo = getContext().CreateTypeSourceInfo(InfoTy);
	readTypeLoc(TInfo->getTypeLoc());
	return TInfo;
	}

	QualType ASTReader::GetType(TypeID ID) {
	assert(ContextObj && "reading type with no AST context");
	ASTContext &Context = *ContextObj;

	unsigned FastQuals = ID & Qualifiers::FastMask;
	unsigned Index = ID >> Qualifiers::FastWidth;

	if (Index < NUM_PREDEF_TYPE_IDS) {
	QualType T;
	switch ((PredefinedTypeIDs)Index) {
	case PREDEF_TYPE_NULL_ID:
	return QualType();
	case PREDEF_TYPE_VOID_ID:
	T = Context.VoidTy;
	break;
	case PREDEF_TYPE_BOOL_ID:
	T = Context.BoolTy;
	break;
	case PREDEF_TYPE_CHAR_U_ID:
	case PREDEF_TYPE_CHAR_S_ID:
	// FIXME: Check that the signedness of CharTy is correct!
	T = Context.CharTy;
	break;
	case PREDEF_TYPE_UCHAR_ID:
	T = Context.UnsignedCharTy;
	break;
	case PREDEF_TYPE_USHORT_ID:
	T = Context.UnsignedShortTy;
	break;
	case PREDEF_TYPE_UINT_ID:
	T = Context.UnsignedIntTy;
	break;
	case PREDEF_TYPE_ULONG_ID:
	T = Context.UnsignedLongTy;
	break;
	case PREDEF_TYPE_ULONGLONG_ID:
	T = Context.UnsignedLongLongTy;
	break;
	case PREDEF_TYPE_UINT128_ID:
	T = Context.UnsignedInt128Ty;
	break;
	case PREDEF_TYPE_SCHAR_ID:
	T = Context.SignedCharTy;
	break;
	case PREDEF_TYPE_WCHAR_ID:
	T = Context.WCharTy;
	break;
	case PREDEF_TYPE_SHORT_ID:
	T = Context.ShortTy;
	break;
	case PREDEF_TYPE_INT_ID:
	T = Context.IntTy;
	break;
	case PREDEF_TYPE_LONG_ID:
	T = Context.LongTy;
	break;
	case PREDEF_TYPE_LONGLONG_ID:
	T = Context.LongLongTy;
	break;
	case PREDEF_TYPE_INT128_ID:
	T = Context.Int128Ty;
	break;
	case PREDEF_TYPE_BFLOAT16_ID:
	T = Context.BFloat16Ty;
	break;
	case PREDEF_TYPE_HALF_ID:
	T = Context.HalfTy;
	break;
	case PREDEF_TYPE_FLOAT_ID:
	T = Context.FloatTy;
	break;
	case PREDEF_TYPE_DOUBLE_ID:
	T = Context.DoubleTy;
	break;
	case PREDEF_TYPE_LONGDOUBLE_ID:
	T = Context.LongDoubleTy;
	break;
	case PREDEF_TYPE_SHORT_ACCUM_ID:
	T = Context.ShortAccumTy;
	break;
	case PREDEF_TYPE_ACCUM_ID:
	T = Context.AccumTy;
	break;
	case PREDEF_TYPE_LONG_ACCUM_ID:
	T = Context.LongAccumTy;
	break;
	case PREDEF_TYPE_USHORT_ACCUM_ID:
	T = Context.UnsignedShortAccumTy;
	break;
	case PREDEF_TYPE_UACCUM_ID:
	T = Context.UnsignedAccumTy;
	break;
	case PREDEF_TYPE_ULONG_ACCUM_ID:
	T = Context.UnsignedLongAccumTy;
	break;
	case PREDEF_TYPE_SHORT_FRACT_ID:
	T = Context.ShortFractTy;
	break;
	case PREDEF_TYPE_FRACT_ID:
	T = Context.FractTy;
	break;
	case PREDEF_TYPE_LONG_FRACT_ID:
	T = Context.LongFractTy;
	break;
	case PREDEF_TYPE_USHORT_FRACT_ID:
	T = Context.UnsignedShortFractTy;
	break;
	case PREDEF_TYPE_UFRACT_ID:
	T = Context.UnsignedFractTy;
	break;
	case PREDEF_TYPE_ULONG_FRACT_ID:
	T = Context.UnsignedLongFractTy;
	break;
	case PREDEF_TYPE_SAT_SHORT_ACCUM_ID:
	T = Context.SatShortAccumTy;
	break;
	case PREDEF_TYPE_SAT_ACCUM_ID:
	T = Context.SatAccumTy;
	break;
	case PREDEF_TYPE_SAT_LONG_ACCUM_ID:
	T = Context.SatLongAccumTy;
	break;
	case PREDEF_TYPE_SAT_USHORT_ACCUM_ID:
	T = Context.SatUnsignedShortAccumTy;
	break;
	case PREDEF_TYPE_SAT_UACCUM_ID:
	T = Context.SatUnsignedAccumTy;
	break;
	case PREDEF_TYPE_SAT_ULONG_ACCUM_ID:
	T = Context.SatUnsignedLongAccumTy;
	break;
	case PREDEF_TYPE_SAT_SHORT_FRACT_ID:
	T = Context.SatShortFractTy;
	break;
	case PREDEF_TYPE_SAT_FRACT_ID:
	T = Context.SatFractTy;
	break;
	case PREDEF_TYPE_SAT_LONG_FRACT_ID:
	T = Context.SatLongFractTy;
	break;
	case PREDEF_TYPE_SAT_USHORT_FRACT_ID:
	T = Context.SatUnsignedShortFractTy;
	break;
	case PREDEF_TYPE_SAT_UFRACT_ID:
	T = Context.SatUnsignedFractTy;
	break;
	case PREDEF_TYPE_SAT_ULONG_FRACT_ID:
	T = Context.SatUnsignedLongFractTy;
	break;
	case PREDEF_TYPE_FLOAT16_ID:
	T = Context.Float16Ty;
	break;
	case PREDEF_TYPE_FLOAT128_ID:
	T = Context.Float128Ty;
	break;
	case PREDEF_TYPE_OVERLOAD_ID:
	T = Context.OverloadTy;
	break;
	case PREDEF_TYPE_BOUND_MEMBER:
	T = Context.BoundMemberTy;
	break;
	case PREDEF_TYPE_PSEUDO_OBJECT:
	T = Context.PseudoObjectTy;
	break;
	case PREDEF_TYPE_DEPENDENT_ID:
	T = Context.DependentTy;
	break;
	case PREDEF_TYPE_UNKNOWN_ANY:
	T = Context.UnknownAnyTy;
	break;
	case PREDEF_TYPE_NULLPTR_ID:
	T = Context.NullPtrTy;
	break;
	case PREDEF_TYPE_CHAR8_ID:
	T = Context.Char8Ty;
	break;
	case PREDEF_TYPE_CHAR16_ID:
	T = Context.Char16Ty;
	break;
	case PREDEF_TYPE_CHAR32_ID:
	T = Context.Char32Ty;
	break;
	case PREDEF_TYPE_OBJC_ID:
	T = Context.ObjCBuiltinIdTy;
	break;
	case PREDEF_TYPE_OBJC_CLASS:
	T = Context.ObjCBuiltinClassTy;
	break;
	case PREDEF_TYPE_OBJC_SEL:
	T = Context.ObjCBuiltinSelTy;
	break;
	#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
	case PREDEF_TYPE_##Id##_ID: \
	T = Context.SingletonId; \
	break;
	#include "clang/Basic/OpenCLImageTypes.def"
	#define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \
	case PREDEF_TYPE_##Id##_ID: \
	T = Context.Id##Ty; \
	break;
	#include "clang/Basic/OpenCLExtensionTypes.def"
	case PREDEF_TYPE_SAMPLER_ID:
	T = Context.OCLSamplerTy;
	break;
	case PREDEF_TYPE_EVENT_ID:
	T = Context.OCLEventTy;
	break;
	case PREDEF_TYPE_CLK_EVENT_ID:
	T = Context.OCLClkEventTy;
	break;
	case PREDEF_TYPE_QUEUE_ID:
	T = Context.OCLQueueTy;
	break;
	case PREDEF_TYPE_RESERVE_ID_ID:
	T = Context.OCLReserveIDTy;
	break;
	case PREDEF_TYPE_AUTO_DEDUCT:
	T = Context.getAutoDeductType();
	break;
	case PREDEF_TYPE_AUTO_RREF_DEDUCT:
	T = Context.getAutoRRefDeductType();
	break;
	case PREDEF_TYPE_ARC_UNBRIDGED_CAST:
	T = Context.ARCUnbridgedCastTy;
	break;
	case PREDEF_TYPE_BUILTIN_FN:
	T = Context.BuiltinFnTy;
	break;
	case PREDEF_TYPE_INCOMPLETE_MATRIX_IDX:
	T = Context.IncompleteMatrixIdxTy;
	break;
	case PREDEF_TYPE_OMP_ARRAY_SECTION:
	T = Context.OMPArraySectionTy;
	break;
	case PREDEF_TYPE_OMP_ARRAY_SHAPING:
	T = Context.OMPArraySectionTy;
	break;
	case PREDEF_TYPE_OMP_ITERATOR:
	T = Context.OMPIteratorTy;
	break;
	#define SVE_TYPE(Name, Id, SingletonId) \
	case PREDEF_TYPE_##Id##_ID: \
	T = Context.SingletonId; \
	break;
	#include "clang/Basic/AArch64SVEACLETypes.def"
	#define PPC_VECTOR_TYPE(Name, Id, Size) \
	case PREDEF_TYPE_##Id##_ID: \
	T = Context.Id##Ty; \
	break;
	#include "clang/Basic/PPCTypes.def"
	#define RVV_TYPE(Name, Id, SingletonId) \
	case PREDEF_TYPE_##Id##_ID: \
	T = Context.SingletonId; \
	break;
	#include "clang/Basic/RISCVVTypes.def"
	}

	assert(!T.isNull() && "Unknown predefined type");
	return T.withFastQualifiers(FastQuals);
	}

	Index -= NUM_PREDEF_TYPE_IDS;
	assert(Index < TypesLoaded.size() && "Type index out-of-range");
	if (TypesLoaded[Index].isNull()) {
	TypesLoaded[Index] = readTypeRecord(Index);
	if (TypesLoaded[Index].isNull())
	return QualType();

	TypesLoaded[Index]->setFromAST();
	if (DeserializationListener)
	DeserializationListener->TypeRead(TypeIdx::fromTypeID(ID),
	TypesLoaded[Index]);
	}

	return TypesLoaded[Index].withFastQualifiers(FastQuals);
	}

	QualType ASTReader::getLocalType(ModuleFile &F, unsigned LocalID) {
	return GetType(getGlobalTypeID(F, LocalID));
	}

	serialization::TypeID
	ASTReader::getGlobalTypeID(ModuleFile &F, unsigned LocalID) const {
	unsigned FastQuals = LocalID & Qualifiers::FastMask;
	unsigned LocalIndex = LocalID >> Qualifiers::FastWidth;

	if (LocalIndex < NUM_PREDEF_TYPE_IDS)
	return LocalID;

	if (!F.ModuleOffsetMap.empty())
	ReadModuleOffsetMap(F);

	ContinuousRangeMap<uint32_t, int, 2>::iterator I
	= F.TypeRemap.find(LocalIndex - NUM_PREDEF_TYPE_IDS);
	assert(I != F.TypeRemap.end() && "Invalid index into type index remap");

	unsigned GlobalIndex = LocalIndex + I->second;
	return (GlobalIndex << Qualifiers::FastWidth) \| FastQuals;
	}

	TemplateArgumentLocInfo
	ASTRecordReader::readTemplateArgumentLocInfo(TemplateArgument::ArgKind Kind) {
	switch (Kind) {
	case TemplateArgument::Expression:
	return readExpr();
	case TemplateArgument::Type:
	return readTypeSourceInfo();
	case TemplateArgument::Template: {
	NestedNameSpecifierLoc QualifierLoc =
	readNestedNameSpecifierLoc();
	SourceLocation TemplateNameLoc = readSourceLocation();
	return TemplateArgumentLocInfo(getASTContext(), QualifierLoc,
	TemplateNameLoc, SourceLocation());
	}
	case TemplateArgument::TemplateExpansion: {
	NestedNameSpecifierLoc QualifierLoc = readNestedNameSpecifierLoc();
	SourceLocation TemplateNameLoc = readSourceLocation();
	SourceLocation EllipsisLoc = readSourceLocation();
	return TemplateArgumentLocInfo(getASTContext(), QualifierLoc,
	TemplateNameLoc, EllipsisLoc);
	}
	case TemplateArgument::Null:
	case TemplateArgument::Integral:
	case TemplateArgument::Declaration:
	case TemplateArgument::NullPtr:
	case TemplateArgument::Pack:
	// FIXME: Is this right?
	return TemplateArgumentLocInfo();
	}
	llvm_unreachable("unexpected template argument loc");
	}

	TemplateArgumentLoc ASTRecordReader::readTemplateArgumentLoc() {
	TemplateArgument Arg = readTemplateArgument();

	if (Arg.getKind() == TemplateArgument::Expression) {
	if (readBool()) // bool InfoHasSameExpr.
	return TemplateArgumentLoc(Arg, TemplateArgumentLocInfo(Arg.getAsExpr()));
	}
	return TemplateArgumentLoc(Arg, readTemplateArgumentLocInfo(Arg.getKind()));
	}

	const ASTTemplateArgumentListInfo *
	ASTRecordReader::readASTTemplateArgumentListInfo() {
	SourceLocation LAngleLoc = readSourceLocation();
	SourceLocation RAngleLoc = readSourceLocation();
	unsigned NumArgsAsWritten = readInt();
	TemplateArgumentListInfo TemplArgsInfo(LAngleLoc, RAngleLoc);
	for (unsigned i = 0; i != NumArgsAsWritten; ++i)
	TemplArgsInfo.addArgument(readTemplateArgumentLoc());
	return ASTTemplateArgumentListInfo::Create(getContext(), TemplArgsInfo);
	}

	Decl *ASTReader::GetExternalDecl(uint32_t ID) {
	return GetDecl(ID);
	}

	void ASTReader::CompleteRedeclChain(const Decl *D) {
	if (NumCurrentElementsDeserializing) {
	// We arrange to not care about the complete redeclaration chain while we're
	// deserializing. Just remember that the AST has marked this one as complete
	// but that it's not actually complete yet, so we know we still need to
	// complete it later.
	PendingIncompleteDeclChains.push_back(const_cast<Decl*>(D));
	return;
	}

	if (!D->getDeclContext()) {
	assert(isa<TranslationUnitDecl>(D) && "Not a TU?");
	return;
	}

	const DeclContext *DC = D->getDeclContext()->getRedeclContext();

	// If this is a named declaration, complete it by looking it up
	// within its context.
	//
	// FIXME: Merging a function definition should merge
	// all mergeable entities within it.
	if (isa<TranslationUnitDecl>(DC) \|\| isa<NamespaceDecl>(DC) \|\|
	isa<CXXRecordDecl>(DC) \|\| isa<EnumDecl>(DC)) {
	if (DeclarationName Name = cast<NamedDecl>(D)->getDeclName()) {
	if (!getContext().getLangOpts().CPlusPlus &&
	isa<TranslationUnitDecl>(DC)) {
	// Outside of C++, we don't have a lookup table for the TU, so update
	// the identifier instead. (For C++ modules, we don't store decls
	// in the serialized identifier table, so we do the lookup in the TU.)
	auto *II = Name.getAsIdentifierInfo();
	assert(II && "non-identifier name in C?");
	if (II->isOutOfDate())
	updateOutOfDateIdentifier(*II);
	} else
	DC->lookup(Name);
	} else if (needsAnonymousDeclarationNumber(cast<NamedDecl>(D))) {
	// Find all declarations of this kind from the relevant context.
	for (auto *DCDecl : cast<Decl>(D->getLexicalDeclContext())->redecls()) {
	auto *DC = cast<DeclContext>(DCDecl);
	SmallVector<Decl*, 8> Decls;
	FindExternalLexicalDecls(
	DC, [&](Decl::Kind K) { return K == D->getKind(); }, Decls);
	}
	}
	}

	if (auto *CTSD = dyn_cast<ClassTemplateSpecializationDecl>(D))
	CTSD->getSpecializedTemplate()->LoadLazySpecializations();
	if (auto *VTSD = dyn_cast<VarTemplateSpecializationDecl>(D))
	VTSD->getSpecializedTemplate()->LoadLazySpecializations();
	if (auto *FD = dyn_cast<FunctionDecl>(D)) {
	if (auto *Template = FD->getPrimaryTemplate())
	Template->LoadLazySpecializations();
	}
	}

	CXXCtorInitializer **
	ASTReader::GetExternalCXXCtorInitializers(uint64_t Offset) {
	RecordLocation Loc = getLocalBitOffset(Offset);
	BitstreamCursor &Cursor = Loc.F->DeclsCursor;
	SavedStreamPosition SavedPosition(Cursor);
	if (llvm::Error Err = Cursor.JumpToBit(Loc.Offset)) {
	Error(std::move(Err));
	return nullptr;
	}
	ReadingKindTracker ReadingKind(Read_Decl, *this);

	Expected<unsigned> MaybeCode = Cursor.ReadCode();
	if (!MaybeCode) {
	Error(MaybeCode.takeError());
	return nullptr;
	}
	unsigned Code = MaybeCode.get();

	ASTRecordReader Record(this, Loc.F);
	Expected<unsigned> MaybeRecCode = Record.readRecord(Cursor, Code);
	if (!MaybeRecCode) {
	Error(MaybeRecCode.takeError());
	return nullptr;
	}
	if (MaybeRecCode.get() != DECL_CXX_CTOR_INITIALIZERS) {
	Error("malformed AST file: missing C++ ctor initializers");
	return nullptr;
	}

	return Record.readCXXCtorInitializers();
	}

	CXXBaseSpecifier *ASTReader::GetExternalCXXBaseSpecifiers(uint64_t Offset) {
	assert(ContextObj && "reading base specifiers with no AST context");
	ASTContext &Context = *ContextObj;

	RecordLocation Loc = getLocalBitOffset(Offset);
	BitstreamCursor &Cursor = Loc.F->DeclsCursor;
	SavedStreamPosition SavedPosition(Cursor);
	if (llvm::Error Err = Cursor.JumpToBit(Loc.Offset)) {
	Error(std::move(Err));
	return nullptr;
	}
	ReadingKindTracker ReadingKind(Read_Decl, *this);

	Expected<unsigned> MaybeCode = Cursor.ReadCode();
	if (!MaybeCode) {
	Error(MaybeCode.takeError());
	return nullptr;
	}
	unsigned Code = MaybeCode.get();

	ASTRecordReader Record(this, Loc.F);
	Expected<unsigned> MaybeRecCode = Record.readRecord(Cursor, Code);
	if (!MaybeRecCode) {
	Error(MaybeCode.takeError());
	return nullptr;
	}
	unsigned RecCode = MaybeRecCode.get();

	if (RecCode != DECL_CXX_BASE_SPECIFIERS) {
	Error("malformed AST file: missing C++ base specifiers");
	return nullptr;
	}

	unsigned NumBases = Record.readInt();
	void Mem = Context.Allocate(sizeof(CXXBaseSpecifier) NumBases);
	CXXBaseSpecifier *Bases = new (Mem) CXXBaseSpecifier [NumBases];
	for (unsigned I = 0; I != NumBases; ++I)
	Bases[I] = Record.readCXXBaseSpecifier();
	return Bases;
	}

	serialization::DeclID
	ASTReader::getGlobalDeclID(ModuleFile &F, LocalDeclID LocalID) const {
	if (LocalID < NUM_PREDEF_DECL_IDS)
	return LocalID;

	if (!F.ModuleOffsetMap.empty())
	ReadModuleOffsetMap(F);

	ContinuousRangeMap<uint32_t, int, 2>::iterator I
	= F.DeclRemap.find(LocalID - NUM_PREDEF_DECL_IDS);
	assert(I != F.DeclRemap.end() && "Invalid index into decl index remap");

	return LocalID + I->second;
	}

	bool ASTReader::isDeclIDFromModule(serialization::GlobalDeclID ID,
	ModuleFile &M) const {
	// Predefined decls aren't from any module.
	if (ID < NUM_PREDEF_DECL_IDS)
	return false;

	return ID - NUM_PREDEF_DECL_IDS >= M.BaseDeclID &&
	ID - NUM_PREDEF_DECL_IDS < M.BaseDeclID + M.LocalNumDecls;
	}

	ModuleFile ASTReader::getOwningModuleFile(const Decl D) {
	if (!D->isFromASTFile())
	return nullptr;
	GlobalDeclMapType::const_iterator I = GlobalDeclMap.find(D->getGlobalID());
	assert(I != GlobalDeclMap.end() && "Corrupted global declaration map");
	return I->second;
	}

	SourceLocation ASTReader::getSourceLocationForDeclID(GlobalDeclID ID) {
	if (ID < NUM_PREDEF_DECL_IDS)
	return SourceLocation();

	unsigned Index = ID - NUM_PREDEF_DECL_IDS;

	if (Index > DeclsLoaded.size()) {
	Error("declaration ID out-of-range for AST file");
	return SourceLocation();
	}

	if (Decl *D = DeclsLoaded[Index])
	return D->getLocation();

	SourceLocation Loc;
	DeclCursorForID(ID, Loc);
	return Loc;
	}

	static Decl *getPredefinedDecl(ASTContext &Context, PredefinedDeclIDs ID) {
	switch (ID) {
	case PREDEF_DECL_NULL_ID:
	return nullptr;

	case PREDEF_DECL_TRANSLATION_UNIT_ID:
	return Context.getTranslationUnitDecl();

	case PREDEF_DECL_OBJC_ID_ID:
	return Context.getObjCIdDecl();

	case PREDEF_DECL_OBJC_SEL_ID:
	return Context.getObjCSelDecl();

	case PREDEF_DECL_OBJC_CLASS_ID:
	return Context.getObjCClassDecl();

	case PREDEF_DECL_OBJC_PROTOCOL_ID:
	return Context.getObjCProtocolDecl();

	case PREDEF_DECL_INT_128_ID:
	return Context.getInt128Decl();

	case PREDEF_DECL_UNSIGNED_INT_128_ID:
	return Context.getUInt128Decl();

	case PREDEF_DECL_OBJC_INSTANCETYPE_ID:
	return Context.getObjCInstanceTypeDecl();

	case PREDEF_DECL_BUILTIN_VA_LIST_ID:
	return Context.getBuiltinVaListDecl();

	case PREDEF_DECL_VA_LIST_TAG:
	return Context.getVaListTagDecl();

	case PREDEF_DECL_BUILTIN_MS_VA_LIST_ID:
	return Context.getBuiltinMSVaListDecl();

	case PREDEF_DECL_BUILTIN_MS_GUID_ID:
	return Context.getMSGuidTagDecl();

	case PREDEF_DECL_EXTERN_C_CONTEXT_ID:
	return Context.getExternCContextDecl();

	case PREDEF_DECL_MAKE_INTEGER_SEQ_ID:
	return Context.getMakeIntegerSeqDecl();

	case PREDEF_DECL_CF_CONSTANT_STRING_ID:
	return Context.getCFConstantStringDecl();

	case PREDEF_DECL_CF_CONSTANT_STRING_TAG_ID:
	return Context.getCFConstantStringTagDecl();

	case PREDEF_DECL_TYPE_PACK_ELEMENT_ID:
	return Context.getTypePackElementDecl();
	}
	llvm_unreachable("PredefinedDeclIDs unknown enum value");
	}

	Decl *ASTReader::GetExistingDecl(DeclID ID) {
	assert(ContextObj && "reading decl with no AST context");
	if (ID < NUM_PREDEF_DECL_IDS) {
	Decl D = getPredefinedDecl(ContextObj, (PredefinedDeclIDs)ID);
	if (D) {
	// Track that we have merged the declaration with ID \p ID into the
	// pre-existing predefined declaration \p D.
	auto &Merged = KeyDecls[D->getCanonicalDecl()];
	if (Merged.empty())
	Merged.push_back(ID);
	}
	return D;
	}

	unsigned Index = ID - NUM_PREDEF_DECL_IDS;

	if (Index >= DeclsLoaded.size()) {
	assert(0 && "declaration ID out-of-range for AST file");
	Error("declaration ID out-of-range for AST file");
	return nullptr;
	}

	return DeclsLoaded[Index];
	}

	Decl *ASTReader::GetDecl(DeclID ID) {
	if (ID < NUM_PREDEF_DECL_IDS)
	return GetExistingDecl(ID);

	unsigned Index = ID - NUM_PREDEF_DECL_IDS;

	if (Index >= DeclsLoaded.size()) {
	assert(0 && "declaration ID out-of-range for AST file");
	Error("declaration ID out-of-range for AST file");
	return nullptr;
	}

	if (!DeclsLoaded[Index]) {
	ReadDeclRecord(ID);
	if (DeserializationListener)
	DeserializationListener->DeclRead(ID, DeclsLoaded[Index]);
	}

	return DeclsLoaded[Index];
	}

	DeclID ASTReader::mapGlobalIDToModuleFileGlobalID(ModuleFile &M,
	DeclID GlobalID) {
	if (GlobalID < NUM_PREDEF_DECL_IDS)
	return GlobalID;

	GlobalDeclMapType::const_iterator I = GlobalDeclMap.find(GlobalID);
	assert(I != GlobalDeclMap.end() && "Corrupted global declaration map");
	ModuleFile *Owner = I->second;

	llvm::DenseMap<ModuleFile *, serialization::DeclID>::iterator Pos
	= M.GlobalToLocalDeclIDs.find(Owner);
	if (Pos == M.GlobalToLocalDeclIDs.end())
	return 0;

	return GlobalID - Owner->BaseDeclID + Pos->second;
	}

	serialization::DeclID ASTReader::ReadDeclID(ModuleFile &F,
	const RecordData &Record,
	unsigned &Idx) {
	if (Idx >= Record.size()) {
	Error("Corrupted AST file");
	return 0;
	}

	return getGlobalDeclID(F, Record[Idx++]);
	}

	/// Resolve the offset of a statement into a statement.
	///
	/// This operation will read a new statement from the external
	/// source each time it is called, and is meant to be used via a
	/// LazyOffsetPtr (which is used by Decls for the body of functions, etc).
	Stmt *ASTReader::GetExternalDeclStmt(uint64_t Offset) {
	// Switch case IDs are per Decl.
	ClearSwitchCaseIDs();

	// Offset here is a global offset across the entire chain.
	RecordLocation Loc = getLocalBitOffset(Offset);
	if (llvm::Error Err = Loc.F->DeclsCursor.JumpToBit(Loc.Offset)) {
	Error(std::move(Err));
	return nullptr;
	}
	assert(NumCurrentElementsDeserializing == 0 &&
	"should not be called while already deserializing");
	Deserializing D(this);
	return ReadStmtFromStream(*Loc.F);
	}

	void ASTReader::FindExternalLexicalDecls(
	const DeclContext *DC, llvm::function_ref<bool(Decl::Kind)> IsKindWeWant,
	SmallVectorImpl<Decl *> &Decls) {
	bool PredefsVisited[NUM_PREDEF_DECL_IDS] = {};

	auto Visit = [&] (ModuleFile *M, LexicalContents LexicalDecls) {
	assert(LexicalDecls.size() % 2 == 0 && "expected an even number of entries");
	for (int I = 0, N = LexicalDecls.size(); I != N; I += 2) {
	auto K = (Decl::Kind)+LexicalDecls[I];
	if (!IsKindWeWant(K))
	continue;

	auto ID = (serialization::DeclID)+LexicalDecls[I + 1];

	// Don't add predefined declarations to the lexical context more
	// than once.
	if (ID < NUM_PREDEF_DECL_IDS) {
	if (PredefsVisited[ID])
	continue;

	PredefsVisited[ID] = true;
	}

	if (Decl D = GetLocalDecl(M, ID)) {
	assert(D->getKind() == K && "wrong kind for lexical decl");
	if (!DC->isDeclInLexicalTraversal(D))
	Decls.push_back(D);
	}
	}
	};

	if (isa<TranslationUnitDecl>(DC)) {
	for (auto Lexical : TULexicalDecls)
	Visit(Lexical.first, Lexical.second);
	} else {
	auto I = LexicalDecls.find(DC);
	if (I != LexicalDecls.end())
	Visit(I->second.first, I->second.second);
	}

	++NumLexicalDeclContextsRead;
	}

	namespace {

	class DeclIDComp {
	ASTReader &Reader;
	ModuleFile &Mod;

	public:
	DeclIDComp(ASTReader &Reader, ModuleFile &M) : Reader(Reader), Mod(M) {}

	bool operator()(LocalDeclID L, LocalDeclID R) const {
	SourceLocation LHS = getLocation(L);
	SourceLocation RHS = getLocation(R);
	return Reader.getSourceManager().isBeforeInTranslationUnit(LHS, RHS);
	}

	bool operator()(SourceLocation LHS, LocalDeclID R) const {
	SourceLocation RHS = getLocation(R);
	return Reader.getSourceManager().isBeforeInTranslationUnit(LHS, RHS);
	}

	bool operator()(LocalDeclID L, SourceLocation RHS) const {
	SourceLocation LHS = getLocation(L);
	return Reader.getSourceManager().isBeforeInTranslationUnit(LHS, RHS);
	}

	SourceLocation getLocation(LocalDeclID ID) const {
	return Reader.getSourceManager().getFileLoc(
	Reader.getSourceLocationForDeclID(Reader.getGlobalDeclID(Mod, ID)));
	}
	};

	} // namespace

	void ASTReader::FindFileRegionDecls(FileID File,
	unsigned Offset, unsigned Length,
	SmallVectorImpl<Decl *> &Decls) {
	SourceManager &SM = getSourceManager();

	llvm::DenseMap<FileID, FileDeclsInfo>::iterator I = FileDeclIDs.find(File);
	if (I == FileDeclIDs.end())
	return;

	FileDeclsInfo &DInfo = I->second;
	if (DInfo.Decls.empty())
	return;

	SourceLocation
	BeginLoc = SM.getLocForStartOfFile(File).getLocWithOffset(Offset);
	SourceLocation EndLoc = BeginLoc.getLocWithOffset(Length);

	DeclIDComp DIDComp(this, DInfo.Mod);
	ArrayRef<serialization::LocalDeclID>::iterator BeginIt =
	llvm::lower_bound(DInfo.Decls, BeginLoc, DIDComp);
	if (BeginIt != DInfo.Decls.begin())
	--BeginIt;

	// If we are pointing at a top-level decl inside an objc container, we need
	// to backtrack until we find it otherwise we will fail to report that the
	// region overlaps with an objc container.
	while (BeginIt != DInfo.Decls.begin() &&
	GetDecl(getGlobalDeclID(DInfo.Mod, BeginIt))
	->isTopLevelDeclInObjCContainer())
	--BeginIt;

	ArrayRef<serialization::LocalDeclID>::iterator EndIt =
	llvm::upper_bound(DInfo.Decls, EndLoc, DIDComp);
	if (EndIt != DInfo.Decls.end())
	++EndIt;

	for (ArrayRef<serialization::LocalDeclID>::iterator
	DIt = BeginIt; DIt != EndIt; ++DIt)
	Decls.push_back(GetDecl(getGlobalDeclID(DInfo.Mod, DIt)));
	}

	bool
	ASTReader::FindExternalVisibleDeclsByName(const DeclContext *DC,
	DeclarationName Name) {
	assert(DC->hasExternalVisibleStorage() && DC == DC->getPrimaryContext() &&
	"DeclContext has no visible decls in storage");
	if (!Name)
	return false;

	auto It = Lookups.find(DC);
	if (It == Lookups.end())
	return false;

	Deserializing LookupResults(this);

	// Load the list of declarations.
	SmallVector<NamedDecl *, 64> Decls;
	llvm::SmallPtrSet<NamedDecl *, 8> Found;
	for (DeclID ID : It->second.Table.find(Name)) {
	NamedDecl *ND = cast<NamedDecl>(GetDecl(ID));
	if (ND->getDeclName() == Name && Found.insert(ND).second)
	Decls.push_back(ND);
	}

	++NumVisibleDeclContextsRead;
	SetExternalVisibleDeclsForName(DC, Name, Decls);
	return !Decls.empty();
	}

	void ASTReader::completeVisibleDeclsMap(const DeclContext *DC) {
	if (!DC->hasExternalVisibleStorage())
	return;

	auto It = Lookups.find(DC);
	assert(It != Lookups.end() &&
	"have external visible storage but no lookup tables");

	DeclsMap Decls;

	for (DeclID ID : It->second.Table.findAll()) {
	NamedDecl *ND = cast<NamedDecl>(GetDecl(ID));
	Decls[ND->getDeclName()].push_back(ND);
	}

	++NumVisibleDeclContextsRead;

	for (DeclsMap::iterator I = Decls.begin(), E = Decls.end(); I != E; ++I) {
	SetExternalVisibleDeclsForName(DC, I->first, I->second);
	}
	const_cast<DeclContext *>(DC)->setHasExternalVisibleStorage(false);
	}

	const serialization::reader::DeclContextLookupTable *
	ASTReader::getLoadedLookupTables(DeclContext *Primary) const {
	auto I = Lookups.find(Primary);
	return I == Lookups.end() ? nullptr : &I->second;
	}

	/// Under non-PCH compilation the consumer receives the objc methods
	/// before receiving the implementation, and codegen depends on this.
	/// We simulate this by deserializing and passing to consumer the methods of the
	/// implementation before passing the deserialized implementation decl.
	static void PassObjCImplDeclToConsumer(ObjCImplDecl *ImplD,
	ASTConsumer *Consumer) {
	assert(ImplD && Consumer);

	for (auto *I : ImplD->methods())
	Consumer->HandleInterestingDecl(DeclGroupRef(I));

	Consumer->HandleInterestingDecl(DeclGroupRef(ImplD));
	}

	void ASTReader::PassInterestingDeclToConsumer(Decl *D) {
	if (ObjCImplDecl *ImplD = dyn_cast<ObjCImplDecl>(D))
	PassObjCImplDeclToConsumer(ImplD, Consumer);
	else
	Consumer->HandleInterestingDecl(DeclGroupRef(D));
	}

	void ASTReader::StartTranslationUnit(ASTConsumer *Consumer) {
	this->Consumer = Consumer;

	if (Consumer)
	PassInterestingDeclsToConsumer();

	if (DeserializationListener)
	DeserializationListener->ReaderInitialized(this);
	}

	void ASTReader::PrintStats() {
	std::fprintf(stderr, "*** AST File Statistics:\n");

	unsigned NumTypesLoaded
	= TypesLoaded.size() - std::count(TypesLoaded.begin(), TypesLoaded.end(),
	QualType());
	unsigned NumDeclsLoaded
	= DeclsLoaded.size() - std::count(DeclsLoaded.begin(), DeclsLoaded.end(),
	(Decl *)nullptr);
	unsigned NumIdentifiersLoaded
	= IdentifiersLoaded.size() - std::count(IdentifiersLoaded.begin(),
	IdentifiersLoaded.end(),
	(IdentifierInfo *)nullptr);
	unsigned NumMacrosLoaded
	= MacrosLoaded.size() - std::count(MacrosLoaded.begin(),
	MacrosLoaded.end(),
	(MacroInfo *)nullptr);
	unsigned NumSelectorsLoaded
	= SelectorsLoaded.size() - std::count(SelectorsLoaded.begin(),
	SelectorsLoaded.end(),
	Selector());

	if (unsigned TotalNumSLocEntries = getTotalNumSLocs())
	std::fprintf(stderr, " %u/%u source location entries read (%f%%)\n",
	NumSLocEntriesRead, TotalNumSLocEntries,
	((float)NumSLocEntriesRead/TotalNumSLocEntries * 100));
	if (!TypesLoaded.empty())
	std::fprintf(stderr, " %u/%u types read (%f%%)\n",
	NumTypesLoaded, (unsigned)TypesLoaded.size(),
	((float)NumTypesLoaded/TypesLoaded.size() * 100));
	if (!DeclsLoaded.empty())
	std::fprintf(stderr, " %u/%u declarations read (%f%%)\n",
	NumDeclsLoaded, (unsigned)DeclsLoaded.size(),
	((float)NumDeclsLoaded/DeclsLoaded.size() * 100));
	if (!IdentifiersLoaded.empty())
	std::fprintf(stderr, " %u/%u identifiers read (%f%%)\n",
	NumIdentifiersLoaded, (unsigned)IdentifiersLoaded.size(),
	((float)NumIdentifiersLoaded/IdentifiersLoaded.size() * 100));
	if (!MacrosLoaded.empty())
	std::fprintf(stderr, " %u/%u macros read (%f%%)\n",
	NumMacrosLoaded, (unsigned)MacrosLoaded.size(),
	((float)NumMacrosLoaded/MacrosLoaded.size() * 100));
	if (!SelectorsLoaded.empty())
	std::fprintf(stderr, " %u/%u selectors read (%f%%)\n",
	NumSelectorsLoaded, (unsigned)SelectorsLoaded.size(),
	((float)NumSelectorsLoaded/SelectorsLoaded.size() * 100));
	if (TotalNumStatements)
	std::fprintf(stderr, " %u/%u statements read (%f%%)\n",
	NumStatementsRead, TotalNumStatements,
	((float)NumStatementsRead/TotalNumStatements * 100));
	if (TotalNumMacros)
	std::fprintf(stderr, " %u/%u macros read (%f%%)\n",
	NumMacrosRead, TotalNumMacros,
	((float)NumMacrosRead/TotalNumMacros * 100));
	if (TotalLexicalDeclContexts)
	std::fprintf(stderr, " %u/%u lexical declcontexts read (%f%%)\n",
	NumLexicalDeclContextsRead, TotalLexicalDeclContexts,
	((float)NumLexicalDeclContextsRead/TotalLexicalDeclContexts
	* 100));
	if (TotalVisibleDeclContexts)
	std::fprintf(stderr, " %u/%u visible declcontexts read (%f%%)\n",
	NumVisibleDeclContextsRead, TotalVisibleDeclContexts,
	((float)NumVisibleDeclContextsRead/TotalVisibleDeclContexts
	* 100));
	if (TotalNumMethodPoolEntries)
	std::fprintf(stderr, " %u/%u method pool entries read (%f%%)\n",
	NumMethodPoolEntriesRead, TotalNumMethodPoolEntries,
	((float)NumMethodPoolEntriesRead/TotalNumMethodPoolEntries
	* 100));
	if (NumMethodPoolLookups)
	std::fprintf(stderr, " %u/%u method pool lookups succeeded (%f%%)\n",
	NumMethodPoolHits, NumMethodPoolLookups,
	((float)NumMethodPoolHits/NumMethodPoolLookups * 100.0));
	if (NumMethodPoolTableLookups)
	std::fprintf(stderr, " %u/%u method pool table lookups succeeded (%f%%)\n",
	NumMethodPoolTableHits, NumMethodPoolTableLookups,
	((float)NumMethodPoolTableHits/NumMethodPoolTableLookups
	* 100.0));
	if (NumIdentifierLookupHits)
	std::fprintf(stderr,
	" %u / %u identifier table lookups succeeded (%f%%)\n",
	NumIdentifierLookupHits, NumIdentifierLookups,
	(double)NumIdentifierLookupHits*100.0/NumIdentifierLookups);

	if (GlobalIndex) {
	std::fprintf(stderr, "\n");
	GlobalIndex->printStats();
	}

	std::fprintf(stderr, "\n");
	dump();
	std::fprintf(stderr, "\n");
	}

	template<typename Key, typename ModuleFile, unsigned InitialCapacity>
	LLVM_DUMP_METHOD static void
	dumpModuleIDMap(StringRef Name,
	const ContinuousRangeMap<Key, ModuleFile *,
	InitialCapacity> &Map) {
	if (Map.begin() == Map.end())
	return;

	using MapType = ContinuousRangeMap<Key, ModuleFile *, InitialCapacity>;

	llvm::errs() << Name << ":\n";
	for (typename MapType::const_iterator I = Map.begin(), IEnd = Map.end();
	I != IEnd; ++I) {
	llvm::errs() << " " << I->first << " -> " << I->second->FileName
	<< "\n";
	}
	}

	LLVM_DUMP_METHOD void ASTReader::dump() {
	llvm::errs() << "*** PCH/ModuleFile Remappings:\n";
	dumpModuleIDMap("Global bit offset map", GlobalBitOffsetsMap);
	dumpModuleIDMap("Global source location entry map", GlobalSLocEntryMap);
	dumpModuleIDMap("Global type map", GlobalTypeMap);
	dumpModuleIDMap("Global declaration map", GlobalDeclMap);
	dumpModuleIDMap("Global identifier map", GlobalIdentifierMap);
	dumpModuleIDMap("Global macro map", GlobalMacroMap);
	dumpModuleIDMap("Global submodule map", GlobalSubmoduleMap);
	dumpModuleIDMap("Global selector map", GlobalSelectorMap);
	dumpModuleIDMap("Global preprocessed entity map",
	GlobalPreprocessedEntityMap);

	llvm::errs() << "\n*** PCH/Modules Loaded:";
	for (ModuleFile &M : ModuleMgr)
	M.dump();
	}

	/// Return the amount of memory used by memory buffers, breaking down
	/// by heap-backed versus mmap'ed memory.
	void ASTReader::getMemoryBufferSizes(MemoryBufferSizes &sizes) const {
	for (ModuleFile &I : ModuleMgr) {
	if (llvm::MemoryBuffer *buf = I.Buffer) {
	size_t bytes = buf->getBufferSize();
	switch (buf->getBufferKind()) {
	case llvm::MemoryBuffer::MemoryBuffer_Malloc:
	sizes.malloc_bytes += bytes;
	break;
	case llvm::MemoryBuffer::MemoryBuffer_MMap:
	sizes.mmap_bytes += bytes;
	break;
	}
	}
	}
	}

	void ASTReader::InitializeSema(Sema &S) {
	SemaObj = &S;
	S.addExternalSource(this);

	// Makes sure any declarations that were deserialized "too early"
	// still get added to the identifier's declaration chains.
	for (uint64_t ID : PreloadedDeclIDs) {
	NamedDecl *D = cast<NamedDecl>(GetDecl(ID));
	pushExternalDeclIntoScope(D, D->getDeclName());
	}
	PreloadedDeclIDs.clear();

	// FIXME: What happens if these are changed by a module import?
	if (!FPPragmaOptions.empty()) {
	assert(FPPragmaOptions.size() == 1 && "Wrong number of FP_PRAGMA_OPTIONS");
	FPOptionsOverride NewOverrides =
	FPOptionsOverride::getFromOpaqueInt(FPPragmaOptions[0]);
	SemaObj->CurFPFeatures =
	NewOverrides.applyOverrides(SemaObj->getLangOpts());
	}

	SemaObj->OpenCLFeatures = OpenCLExtensions;

	UpdateSema();
	}

	void ASTReader::UpdateSema() {
	assert(SemaObj && "no Sema to update");

	// Load the offsets of the declarations that Sema references.
	// They will be lazily deserialized when needed.
	if (!SemaDeclRefs.empty()) {
	assert(SemaDeclRefs.size() % 3 == 0);
	for (unsigned I = 0; I != SemaDeclRefs.size(); I += 3) {
	if (!SemaObj->StdNamespace)
	SemaObj->StdNamespace = SemaDeclRefs[I];
	if (!SemaObj->StdBadAlloc)
	SemaObj->StdBadAlloc = SemaDeclRefs[I+1];
	if (!SemaObj->StdAlignValT)
	SemaObj->StdAlignValT = SemaDeclRefs[I+2];
	}
	SemaDeclRefs.clear();
	}

	// Update the state of pragmas. Use the same API as if we had encountered the
	// pragma in the source.
	if(OptimizeOffPragmaLocation.isValid())
	SemaObj->ActOnPragmaOptimize(/* On = */ false, OptimizeOffPragmaLocation);
	if (PragmaMSStructState != -1)
	SemaObj->ActOnPragmaMSStruct((PragmaMSStructKind)PragmaMSStructState);
	if (PointersToMembersPragmaLocation.isValid()) {
	SemaObj->ActOnPragmaMSPointersToMembers(
	(LangOptions::PragmaMSPointersToMembersKind)
	PragmaMSPointersToMembersState,
	PointersToMembersPragmaLocation);
	}
	SemaObj->ForceCUDAHostDeviceDepth = ForceCUDAHostDeviceDepth;

	if (PragmaAlignPackCurrentValue) {
	// The bottom of the stack might have a default value. It must be adjusted
	// to the current value to ensure that the packing state is preserved after
	// popping entries that were included/imported from a PCH/module.
	bool DropFirst = false;
	if (!PragmaAlignPackStack.empty() &&
	PragmaAlignPackStack.front().Location.isInvalid()) {
	assert(PragmaAlignPackStack.front().Value ==
	SemaObj->AlignPackStack.DefaultValue &&
	"Expected a default alignment value");
	SemaObj->AlignPackStack.Stack.emplace_back(
	PragmaAlignPackStack.front().SlotLabel,
	SemaObj->AlignPackStack.CurrentValue,
	SemaObj->AlignPackStack.CurrentPragmaLocation,
	PragmaAlignPackStack.front().PushLocation);
	DropFirst = true;
	}
	for (const auto &Entry : llvm::makeArrayRef(PragmaAlignPackStack)
	.drop_front(DropFirst ? 1 : 0)) {
	SemaObj->AlignPackStack.Stack.emplace_back(
	Entry.SlotLabel, Entry.Value, Entry.Location, Entry.PushLocation);
	}
	if (PragmaAlignPackCurrentLocation.isInvalid()) {
	assert(*PragmaAlignPackCurrentValue ==
	SemaObj->AlignPackStack.DefaultValue &&
	"Expected a default align and pack value");
	// Keep the current values.
	} else {
	SemaObj->AlignPackStack.CurrentValue = *PragmaAlignPackCurrentValue;
	SemaObj->AlignPackStack.CurrentPragmaLocation =
	PragmaAlignPackCurrentLocation;
	}
	}
	if (FpPragmaCurrentValue) {
	// The bottom of the stack might have a default value. It must be adjusted
	// to the current value to ensure that fp-pragma state is preserved after
	// popping entries that were included/imported from a PCH/module.
	bool DropFirst = false;
	if (!FpPragmaStack.empty() && FpPragmaStack.front().Location.isInvalid()) {
	assert(FpPragmaStack.front().Value ==
	SemaObj->FpPragmaStack.DefaultValue &&
	"Expected a default pragma float_control value");
	SemaObj->FpPragmaStack.Stack.emplace_back(
	FpPragmaStack.front().SlotLabel, SemaObj->FpPragmaStack.CurrentValue,
	SemaObj->FpPragmaStack.CurrentPragmaLocation,
	FpPragmaStack.front().PushLocation);
	DropFirst = true;
	}
	for (const auto &Entry :
	llvm::makeArrayRef(FpPragmaStack).drop_front(DropFirst ? 1 : 0))
	SemaObj->FpPragmaStack.Stack.emplace_back(
	Entry.SlotLabel, Entry.Value, Entry.Location, Entry.PushLocation);
	if (FpPragmaCurrentLocation.isInvalid()) {
	assert(*FpPragmaCurrentValue == SemaObj->FpPragmaStack.DefaultValue &&
	"Expected a default pragma float_control value");
	// Keep the current values.
	} else {
	SemaObj->FpPragmaStack.CurrentValue = *FpPragmaCurrentValue;
	SemaObj->FpPragmaStack.CurrentPragmaLocation = FpPragmaCurrentLocation;
	}
	}

	// For non-modular AST files, restore visiblity of modules.
	for (auto &Import : ImportedModules) {
	if (Import.ImportLoc.isInvalid())
	continue;
	if (Module *Imported = getSubmodule(Import.ID)) {
	SemaObj->makeModuleVisible(Imported, Import.ImportLoc);
	}
	}
	}

	IdentifierInfo *ASTReader::get(StringRef Name) {
	// Note that we are loading an identifier.
	Deserializing AnIdentifier(this);

	IdentifierLookupVisitor Visitor(Name, /PriorGeneration=/0,
	NumIdentifierLookups,
	NumIdentifierLookupHits);

	// We don't need to do identifier table lookups in C++ modules (we preload
	// all interesting declarations, and don't need to use the scope for name
	// lookups). Perform the lookup in PCH files, though, since we don't build
	// a complete initial identifier table if we're carrying on from a PCH.
	if (PP.getLangOpts().CPlusPlus) {
	for (auto F : ModuleMgr.pch_modules())
	if (Visitor(*F))
	break;
	} else {
	// If there is a global index, look there first to determine which modules
	// provably do not have any results for this identifier.
	GlobalModuleIndex::HitSet Hits;
	GlobalModuleIndex::HitSet *HitsPtr = nullptr;
	if (!loadGlobalIndex()) {
	if (GlobalIndex->lookupIdentifier(Name, Hits)) {
	HitsPtr = &Hits;
	}
	}

	ModuleMgr.visit(Visitor, HitsPtr);
	}

	IdentifierInfo *II = Visitor.getIdentifierInfo();
	markIdentifierUpToDate(II);
	return II;
	}

	namespace clang {

	/// An identifier-lookup iterator that enumerates all of the
	/// identifiers stored within a set of AST files.
	class ASTIdentifierIterator : public IdentifierIterator {
	/// The AST reader whose identifiers are being enumerated.
	const ASTReader &Reader;

	/// The current index into the chain of AST files stored in
	/// the AST reader.
	unsigned Index;

	/// The current position within the identifier lookup table
	/// of the current AST file.
	ASTIdentifierLookupTable::key_iterator Current;

	/// The end position within the identifier lookup table of
	/// the current AST file.
	ASTIdentifierLookupTable::key_iterator End;

	/// Whether to skip any modules in the ASTReader.
	bool SkipModules;

	public:
	explicit ASTIdentifierIterator(const ASTReader &Reader,
	bool SkipModules = false);

	StringRef Next() override;
	};

	} // namespace clang

	ASTIdentifierIterator::ASTIdentifierIterator(const ASTReader &Reader,
	bool SkipModules)
	: Reader(Reader), Index(Reader.ModuleMgr.size()), SkipModules(SkipModules) {
	}

	StringRef ASTIdentifierIterator::Next() {
	while (Current == End) {
	// If we have exhausted all of our AST files, we're done.
	if (Index == 0)
	return StringRef();

	--Index;
	ModuleFile &F = Reader.ModuleMgr[Index];
	if (SkipModules && F.isModule())
	continue;

	ASTIdentifierLookupTable *IdTable =
	(ASTIdentifierLookupTable *)F.IdentifierLookupTable;
	Current = IdTable->key_begin();
	End = IdTable->key_end();
	}

	// We have any identifiers remaining in the current AST file; return
	// the next one.
	StringRef Result = *Current;
	++Current;
	return Result;
	}

	namespace {

	/// A utility for appending two IdentifierIterators.
	class ChainedIdentifierIterator : public IdentifierIterator {
	std::unique_ptr<IdentifierIterator> Current;
	std::unique_ptr<IdentifierIterator> Queued;

	public:
	ChainedIdentifierIterator(std::unique_ptr<IdentifierIterator> First,
	std::unique_ptr<IdentifierIterator> Second)
	: Current(std::move(First)), Queued(std::move(Second)) {}

	StringRef Next() override {
	if (!Current)
	return StringRef();

	StringRef result = Current->Next();
	if (!result.empty())
	return result;

	// Try the queued iterator, which may itself be empty.
	Current.reset();
	std::swap(Current, Queued);
	return Next();
	}
	};

	} // namespace

	IdentifierIterator *ASTReader::getIdentifiers() {
	if (!loadGlobalIndex()) {
	std::unique_ptr<IdentifierIterator> ReaderIter(
	new ASTIdentifierIterator(this, /SkipModules=*/true));
	std::unique_ptr<IdentifierIterator> ModulesIter(
	GlobalIndex->createIdentifierIterator());
	return new ChainedIdentifierIterator(std::move(ReaderIter),
	std::move(ModulesIter));
	}

	return new ASTIdentifierIterator(*this);
	}

	namespace clang {
	namespace serialization {

	class ReadMethodPoolVisitor {
	ASTReader &Reader;
	Selector Sel;
	unsigned PriorGeneration;
	unsigned InstanceBits = 0;
	unsigned FactoryBits = 0;
	bool InstanceHasMoreThanOneDecl = false;
	bool FactoryHasMoreThanOneDecl = false;
	SmallVector<ObjCMethodDecl *, 4> InstanceMethods;
	SmallVector<ObjCMethodDecl *, 4> FactoryMethods;

	public:
	ReadMethodPoolVisitor(ASTReader &Reader, Selector Sel,
	unsigned PriorGeneration)
	: Reader(Reader), Sel(Sel), PriorGeneration(PriorGeneration) {}

	bool operator()(ModuleFile &M) {
	if (!M.SelectorLookupTable)
	return false;

	// If we've already searched this module file, skip it now.
	if (M.Generation <= PriorGeneration)
	return true;

	++Reader.NumMethodPoolTableLookups;
	ASTSelectorLookupTable *PoolTable
	= (ASTSelectorLookupTable*)M.SelectorLookupTable;
	ASTSelectorLookupTable::iterator Pos = PoolTable->find(Sel);
	if (Pos == PoolTable->end())
	return false;

	++Reader.NumMethodPoolTableHits;
	++Reader.NumSelectorsRead;
	// FIXME: Not quite happy with the statistics here. We probably should
	// disable this tracking when called via LoadSelector.
	// Also, should entries without methods count as misses?
	++Reader.NumMethodPoolEntriesRead;
	ASTSelectorLookupTrait::data_type Data = *Pos;
	if (Reader.DeserializationListener)
	Reader.DeserializationListener->SelectorRead(Data.ID, Sel);

	InstanceMethods.append(Data.Instance.begin(), Data.Instance.end());
	FactoryMethods.append(Data.Factory.begin(), Data.Factory.end());
	InstanceBits = Data.InstanceBits;
	FactoryBits = Data.FactoryBits;
	InstanceHasMoreThanOneDecl = Data.InstanceHasMoreThanOneDecl;
	FactoryHasMoreThanOneDecl = Data.FactoryHasMoreThanOneDecl;
	return true;
	}

	/// Retrieve the instance methods found by this visitor.
	ArrayRef<ObjCMethodDecl *> getInstanceMethods() const {
	return InstanceMethods;
	}

	/// Retrieve the instance methods found by this visitor.
	ArrayRef<ObjCMethodDecl *> getFactoryMethods() const {
	return FactoryMethods;
	}

	unsigned getInstanceBits() const { return InstanceBits; }
	unsigned getFactoryBits() const { return FactoryBits; }

	bool instanceHasMoreThanOneDecl() const {
	return InstanceHasMoreThanOneDecl;
	}

	bool factoryHasMoreThanOneDecl() const { return FactoryHasMoreThanOneDecl; }
	};

	} // namespace serialization
	} // namespace clang

	/// Add the given set of methods to the method list.
	static void addMethodsToPool(Sema &S, ArrayRef<ObjCMethodDecl *> Methods,
	ObjCMethodList &List) {
	for (unsigned I = 0, N = Methods.size(); I != N; ++I) {
	S.addMethodToGlobalList(&List, Methods[I]);
	}
	}

	void ASTReader::ReadMethodPool(Selector Sel) {
	// Get the selector generation and update it to the current generation.
	unsigned &Generation = SelectorGeneration[Sel];
	unsigned PriorGeneration = Generation;
	Generation = getGeneration();
	SelectorOutOfDate[Sel] = false;

	// Search for methods defined with this selector.
	++NumMethodPoolLookups;
	ReadMethodPoolVisitor Visitor(*this, Sel, PriorGeneration);
	ModuleMgr.visit(Visitor);

	if (Visitor.getInstanceMethods().empty() &&
	Visitor.getFactoryMethods().empty())
	return;

	++NumMethodPoolHits;

	if (!getSema())
	return;

	Sema &S = *getSema();
	Sema::GlobalMethodPool::iterator Pos
	= S.MethodPool.insert(std::make_pair(Sel, Sema::GlobalMethods())).first;

	Pos->second.first.setBits(Visitor.getInstanceBits());
	Pos->second.first.setHasMoreThanOneDecl(Visitor.instanceHasMoreThanOneDecl());
	Pos->second.second.setBits(Visitor.getFactoryBits());
	Pos->second.second.setHasMoreThanOneDecl(Visitor.factoryHasMoreThanOneDecl());

	// Add methods to the global pool after setting hasMoreThanOneDecl, since
	// when building a module we keep every method individually and may need to
	// update hasMoreThanOneDecl as we add the methods.
	addMethodsToPool(S, Visitor.getInstanceMethods(), Pos->second.first);
	addMethodsToPool(S, Visitor.getFactoryMethods(), Pos->second.second);
	}

	void ASTReader::updateOutOfDateSelector(Selector Sel) {
	if (SelectorOutOfDate[Sel])
	ReadMethodPool(Sel);
	}

	void ASTReader::ReadKnownNamespaces(
	SmallVectorImpl<NamespaceDecl *> &Namespaces) {
	Namespaces.clear();

	for (unsigned I = 0, N = KnownNamespaces.size(); I != N; ++I) {
	if (NamespaceDecl *Namespace
	= dyn_cast_or_null<NamespaceDecl>(GetDecl(KnownNamespaces[I])))
	Namespaces.push_back(Namespace);
	}
	}

	void ASTReader::ReadUndefinedButUsed(
	llvm::MapVector<NamedDecl *, SourceLocation> &Undefined) {
	for (unsigned Idx = 0, N = UndefinedButUsed.size(); Idx != N;) {
	NamedDecl *D = cast<NamedDecl>(GetDecl(UndefinedButUsed[Idx++]));
	SourceLocation Loc =
	SourceLocation::getFromRawEncoding(UndefinedButUsed[Idx++]);
	Undefined.insert(std::make_pair(D, Loc));
	}
	}

	void ASTReader::ReadMismatchingDeleteExpressions(llvm::MapVector<
	FieldDecl *, llvm::SmallVector<std::pair<SourceLocation, bool>, 4>> &
	Exprs) {
	for (unsigned Idx = 0, N = DelayedDeleteExprs.size(); Idx != N;) {
	FieldDecl *FD = cast<FieldDecl>(GetDecl(DelayedDeleteExprs[Idx++]));
	uint64_t Count = DelayedDeleteExprs[Idx++];
	for (uint64_t C = 0; C < Count; ++C) {
	SourceLocation DeleteLoc =
	SourceLocation::getFromRawEncoding(DelayedDeleteExprs[Idx++]);
	const bool IsArrayForm = DelayedDeleteExprs[Idx++];
	Exprs[FD].push_back(std::make_pair(DeleteLoc, IsArrayForm));
	}
	}
	}

	void ASTReader::ReadTentativeDefinitions(
	SmallVectorImpl<VarDecl *> &TentativeDefs) {
	for (unsigned I = 0, N = TentativeDefinitions.size(); I != N; ++I) {
	VarDecl *Var = dyn_cast_or_null<VarDecl>(GetDecl(TentativeDefinitions[I]));
	if (Var)
	TentativeDefs.push_back(Var);
	}
	TentativeDefinitions.clear();
	}

	void ASTReader::ReadUnusedFileScopedDecls(
	SmallVectorImpl<const DeclaratorDecl *> &Decls) {
	for (unsigned I = 0, N = UnusedFileScopedDecls.size(); I != N; ++I) {
	DeclaratorDecl *D
	= dyn_cast_or_null<DeclaratorDecl>(GetDecl(UnusedFileScopedDecls[I]));
	if (D)
	Decls.push_back(D);
	}
	UnusedFileScopedDecls.clear();
	}

	void ASTReader::ReadDelegatingConstructors(
	SmallVectorImpl<CXXConstructorDecl *> &Decls) {
	for (unsigned I = 0, N = DelegatingCtorDecls.size(); I != N; ++I) {
	CXXConstructorDecl *D
	= dyn_cast_or_null<CXXConstructorDecl>(GetDecl(DelegatingCtorDecls[I]));
	if (D)
	Decls.push_back(D);
	}
	DelegatingCtorDecls.clear();
	}

	void ASTReader::ReadExtVectorDecls(SmallVectorImpl<TypedefNameDecl *> &Decls) {
	for (unsigned I = 0, N = ExtVectorDecls.size(); I != N; ++I) {
	TypedefNameDecl *D
	= dyn_cast_or_null<TypedefNameDecl>(GetDecl(ExtVectorDecls[I]));
	if (D)
	Decls.push_back(D);
	}
	ExtVectorDecls.clear();
	}

	void ASTReader::ReadUnusedLocalTypedefNameCandidates(
	llvm::SmallSetVector<const TypedefNameDecl *, 4> &Decls) {
	for (unsigned I = 0, N = UnusedLocalTypedefNameCandidates.size(); I != N;
	++I) {
	TypedefNameDecl *D = dyn_cast_or_null<TypedefNameDecl>(
	GetDecl(UnusedLocalTypedefNameCandidates[I]));
	if (D)
	Decls.insert(D);
	}
	UnusedLocalTypedefNameCandidates.clear();
	}

	void ASTReader::ReadDeclsToCheckForDeferredDiags(
	llvm::SmallSetVector<Decl *, 4> &Decls) {
	for (auto I : DeclsToCheckForDeferredDiags) {
	auto *D = dyn_cast_or_null<Decl>(GetDecl(I));
	if (D)
	Decls.insert(D);
	}
	DeclsToCheckForDeferredDiags.clear();
	}

	void ASTReader::ReadReferencedSelectors(
	SmallVectorImpl<std::pair<Selector, SourceLocation>> &Sels) {
	if (ReferencedSelectorsData.empty())
	return;

	// If there are @selector references added them to its pool. This is for
	// implementation of -Wselector.
	unsigned int DataSize = ReferencedSelectorsData.size()-1;
	unsigned I = 0;
	while (I < DataSize) {
	Selector Sel = DecodeSelector(ReferencedSelectorsData[I++]);
	SourceLocation SelLoc
	= SourceLocation::getFromRawEncoding(ReferencedSelectorsData[I++]);
	Sels.push_back(std::make_pair(Sel, SelLoc));
	}
	ReferencedSelectorsData.clear();
	}

	void ASTReader::ReadWeakUndeclaredIdentifiers(
	SmallVectorImpl<std::pair<IdentifierInfo *, WeakInfo>> &WeakIDs) {
	if (WeakUndeclaredIdentifiers.empty())
	return;

	for (unsigned I = 0, N = WeakUndeclaredIdentifiers.size(); I < N; /none/) {
	IdentifierInfo *WeakId
	= DecodeIdentifierInfo(WeakUndeclaredIdentifiers[I++]);
	IdentifierInfo *AliasId
	= DecodeIdentifierInfo(WeakUndeclaredIdentifiers[I++]);
	SourceLocation Loc
	= SourceLocation::getFromRawEncoding(WeakUndeclaredIdentifiers[I++]);
	bool Used = WeakUndeclaredIdentifiers[I++];
	WeakInfo WI(AliasId, Loc);
	WI.setUsed(Used);
	WeakIDs.push_back(std::make_pair(WeakId, WI));
	}
	WeakUndeclaredIdentifiers.clear();
	}

	void ASTReader::ReadUsedVTables(SmallVectorImpl<ExternalVTableUse> &VTables) {
	for (unsigned Idx = 0, N = VTableUses.size(); Idx < N; /* In loop */) {
	ExternalVTableUse VT;
	VT.Record = dyn_cast_or_null<CXXRecordDecl>(GetDecl(VTableUses[Idx++]));
	VT.Location = SourceLocation::getFromRawEncoding(VTableUses[Idx++]);
	VT.DefinitionRequired = VTableUses[Idx++];
	VTables.push_back(VT);
	}

	VTableUses.clear();
	}

	void ASTReader::ReadPendingInstantiations(
	SmallVectorImpl<std::pair<ValueDecl *, SourceLocation>> &Pending) {
	for (unsigned Idx = 0, N = PendingInstantiations.size(); Idx < N;) {
	ValueDecl *D = cast<ValueDecl>(GetDecl(PendingInstantiations[Idx++]));
	SourceLocation Loc
	= SourceLocation::getFromRawEncoding(PendingInstantiations[Idx++]);

	Pending.push_back(std::make_pair(D, Loc));
	}
	PendingInstantiations.clear();
	}

	void ASTReader::ReadLateParsedTemplates(
	llvm::MapVector<const FunctionDecl *, std::unique_ptr<LateParsedTemplate>>
	&LPTMap) {
	for (auto &LPT : LateParsedTemplates) {
	ModuleFile *FMod = LPT.first;
	RecordDataImpl &LateParsed = LPT.second;
	for (unsigned Idx = 0, N = LateParsed.size(); Idx < N;
	/* In loop */) {
	FunctionDecl *FD =
	cast<FunctionDecl>(GetLocalDecl(*FMod, LateParsed[Idx++]));

	auto LT = std::make_unique<LateParsedTemplate>();
	LT->D = GetLocalDecl(*FMod, LateParsed[Idx++]);

	ModuleFile *F = getOwningModuleFile(LT->D);
	assert(F && "No module");

	unsigned TokN = LateParsed[Idx++];
	LT->Toks.reserve(TokN);
	for (unsigned T = 0; T < TokN; ++T)
	LT->Toks.push_back(ReadToken(*F, LateParsed, Idx));

	LPTMap.insert(std::make_pair(FD, std::move(LT)));
	}
	}
	+
	+ LateParsedTemplates.clear();
	}

	void ASTReader::LoadSelector(Selector Sel) {
	// It would be complicated to avoid reading the methods anyway. So don't.
	ReadMethodPool(Sel);
	}

	void ASTReader::SetIdentifierInfo(IdentifierID ID, IdentifierInfo *II) {
	assert(ID && "Non-zero identifier ID required");
	assert(ID <= IdentifiersLoaded.size() && "identifier ID out of range");
	IdentifiersLoaded[ID - 1] = II;
	if (DeserializationListener)
	DeserializationListener->IdentifierRead(ID, II);
	}

	/// Set the globally-visible declarations associated with the given
	/// identifier.
	///
	/// If the AST reader is currently in a state where the given declaration IDs
	/// cannot safely be resolved, they are queued until it is safe to resolve
	/// them.
	///
	/// \param II an IdentifierInfo that refers to one or more globally-visible
	/// declarations.
	///
	/// \param DeclIDs the set of declaration IDs with the name @p II that are
	/// visible at global scope.
	///
	/// \param Decls if non-null, this vector will be populated with the set of
	/// deserialized declarations. These declarations will not be pushed into
	/// scope.
	void
	ASTReader::SetGloballyVisibleDecls(IdentifierInfo *II,
	const SmallVectorImpl<uint32_t> &DeclIDs,
	SmallVectorImpl<Decl > Decls) {
	if (NumCurrentElementsDeserializing && !Decls) {
	PendingIdentifierInfos[II].append(DeclIDs.begin(), DeclIDs.end());
	return;
	}

	for (unsigned I = 0, N = DeclIDs.size(); I != N; ++I) {
	if (!SemaObj) {
	// Queue this declaration so that it will be added to the
	// translation unit scope and identifier's declaration chain
	// once a Sema object is known.
	PreloadedDeclIDs.push_back(DeclIDs[I]);
	continue;
	}

	NamedDecl *D = cast<NamedDecl>(GetDecl(DeclIDs[I]));

	// If we're simply supposed to record the declarations, do so now.
	if (Decls) {
	Decls->push_back(D);
	continue;
	}

	// Introduce this declaration into the translation-unit scope
	// and add it to the declaration chain for this identifier, so
	// that (unqualified) name lookup will find it.
	pushExternalDeclIntoScope(D, II);
	}
	}

	IdentifierInfo *ASTReader::DecodeIdentifierInfo(IdentifierID ID) {
	if (ID == 0)
	return nullptr;

	if (IdentifiersLoaded.empty()) {
	Error("no identifier table in AST file");
	return nullptr;
	}

	ID -= 1;
	if (!IdentifiersLoaded[ID]) {
	GlobalIdentifierMapType::iterator I = GlobalIdentifierMap.find(ID + 1);
	assert(I != GlobalIdentifierMap.end() && "Corrupted global identifier map");
	ModuleFile *M = I->second;
	unsigned Index = ID - M->BaseIdentifierID;
	const unsigned char *Data =
	M->IdentifierTableData + M->IdentifierOffsets[Index];

	ASTIdentifierLookupTrait Trait(this, M);
	auto KeyDataLen = Trait.ReadKeyDataLength(Data);
	auto Key = Trait.ReadKey(Data, KeyDataLen.first);
	auto &II = PP.getIdentifierTable().get(Key);
	IdentifiersLoaded[ID] = &II;
	markIdentifierFromAST(*this, II);
	if (DeserializationListener)
	DeserializationListener->IdentifierRead(ID + 1, &II);
	}

	return IdentifiersLoaded[ID];
	}

	IdentifierInfo *ASTReader::getLocalIdentifier(ModuleFile &M, unsigned LocalID) {
	return DecodeIdentifierInfo(getGlobalIdentifierID(M, LocalID));
	}

	IdentifierID ASTReader::getGlobalIdentifierID(ModuleFile &M, unsigned LocalID) {
	if (LocalID < NUM_PREDEF_IDENT_IDS)
	return LocalID;

	if (!M.ModuleOffsetMap.empty())
	ReadModuleOffsetMap(M);

	ContinuousRangeMap<uint32_t, int, 2>::iterator I
	= M.IdentifierRemap.find(LocalID - NUM_PREDEF_IDENT_IDS);
	assert(I != M.IdentifierRemap.end()
	&& "Invalid index into identifier index remap");

	return LocalID + I->second;
	}

	MacroInfo *ASTReader::getMacro(MacroID ID) {
	if (ID == 0)
	return nullptr;

	if (MacrosLoaded.empty()) {
	Error("no macro table in AST file");
	return nullptr;
	}

	ID -= NUM_PREDEF_MACRO_IDS;
	if (!MacrosLoaded[ID]) {
	GlobalMacroMapType::iterator I
	= GlobalMacroMap.find(ID + NUM_PREDEF_MACRO_IDS);
	assert(I != GlobalMacroMap.end() && "Corrupted global macro map");
	ModuleFile *M = I->second;
	unsigned Index = ID - M->BaseMacroID;
	MacrosLoaded[ID] =
	ReadMacroRecord(*M, M->MacroOffsetsBase + M->MacroOffsets[Index]);

	if (DeserializationListener)
	DeserializationListener->MacroRead(ID + NUM_PREDEF_MACRO_IDS,
	MacrosLoaded[ID]);
	}

	return MacrosLoaded[ID];
	}

	MacroID ASTReader::getGlobalMacroID(ModuleFile &M, unsigned LocalID) {
	if (LocalID < NUM_PREDEF_MACRO_IDS)
	return LocalID;

	if (!M.ModuleOffsetMap.empty())
	ReadModuleOffsetMap(M);

	ContinuousRangeMap<uint32_t, int, 2>::iterator I
	= M.MacroRemap.find(LocalID - NUM_PREDEF_MACRO_IDS);
	assert(I != M.MacroRemap.end() && "Invalid index into macro index remap");

	return LocalID + I->second;
	}

	serialization::SubmoduleID
	ASTReader::getGlobalSubmoduleID(ModuleFile &M, unsigned LocalID) {
	if (LocalID < NUM_PREDEF_SUBMODULE_IDS)
	return LocalID;

	if (!M.ModuleOffsetMap.empty())
	ReadModuleOffsetMap(M);

	ContinuousRangeMap<uint32_t, int, 2>::iterator I
	= M.SubmoduleRemap.find(LocalID - NUM_PREDEF_SUBMODULE_IDS);
	assert(I != M.SubmoduleRemap.end()
	&& "Invalid index into submodule index remap");

	return LocalID + I->second;
	}

	Module *ASTReader::getSubmodule(SubmoduleID GlobalID) {
	if (GlobalID < NUM_PREDEF_SUBMODULE_IDS) {
	assert(GlobalID == 0 && "Unhandled global submodule ID");
	return nullptr;
	}

	if (GlobalID > SubmodulesLoaded.size()) {
	Error("submodule ID out of range in AST file");
	return nullptr;
	}

	return SubmodulesLoaded[GlobalID - NUM_PREDEF_SUBMODULE_IDS];
	}

	Module *ASTReader::getModule(unsigned ID) {
	return getSubmodule(ID);
	}

	ModuleFile *ASTReader::getLocalModuleFile(ModuleFile &F, unsigned ID) {
	if (ID & 1) {
	// It's a module, look it up by submodule ID.
	auto I = GlobalSubmoduleMap.find(getGlobalSubmoduleID(F, ID >> 1));
	return I == GlobalSubmoduleMap.end() ? nullptr : I->second;
	} else {
	// It's a prefix (preamble, PCH, ...). Look it up by index.
	unsigned IndexFromEnd = ID >> 1;
	assert(IndexFromEnd && "got reference to unknown module file");
	return getModuleManager().pch_modules().end()[-IndexFromEnd];
	}
	}

	unsigned ASTReader::getModuleFileID(ModuleFile *F) {
	if (!F)
	return 1;

	// For a file representing a module, use the submodule ID of the top-level
	// module as the file ID. For any other kind of file, the number of such
	// files loaded beforehand will be the same on reload.
	// FIXME: Is this true even if we have an explicit module file and a PCH?
	if (F->isModule())
	return ((F->BaseSubmoduleID + NUM_PREDEF_SUBMODULE_IDS) << 1) \| 1;

	auto PCHModules = getModuleManager().pch_modules();
	auto I = llvm::find(PCHModules, F);
	assert(I != PCHModules.end() && "emitting reference to unknown file");
	return (I - PCHModules.end()) << 1;
	}

	llvm::Optional<ASTSourceDescriptor>
	ASTReader::getSourceDescriptor(unsigned ID) {
	if (Module *M = getSubmodule(ID))
	return ASTSourceDescriptor(*M);

	// If there is only a single PCH, return it instead.
	// Chained PCH are not supported.
	const auto &PCHChain = ModuleMgr.pch_modules();
	if (std::distance(std::begin(PCHChain), std::end(PCHChain))) {
	ModuleFile &MF = ModuleMgr.getPrimaryModule();
	StringRef ModuleName = llvm::sys::path::filename(MF.OriginalSourceFileName);
	StringRef FileName = llvm::sys::path::filename(MF.FileName);
	return ASTSourceDescriptor(ModuleName, MF.OriginalDir, FileName,
	MF.Signature);
	}
	return None;
	}

	ExternalASTSource::ExtKind ASTReader::hasExternalDefinitions(const Decl *FD) {
	auto I = DefinitionSource.find(FD);
	if (I == DefinitionSource.end())
	return EK_ReplyHazy;
	return I->second ? EK_Never : EK_Always;
	}

	Selector ASTReader::getLocalSelector(ModuleFile &M, unsigned LocalID) {
	return DecodeSelector(getGlobalSelectorID(M, LocalID));
	}

	Selector ASTReader::DecodeSelector(serialization::SelectorID ID) {
	if (ID == 0)
	return Selector();

	if (ID > SelectorsLoaded.size()) {
	Error("selector ID out of range in AST file");
	return Selector();
	}

	if (SelectorsLoaded[ID - 1].getAsOpaquePtr() == nullptr) {
	// Load this selector from the selector table.
	GlobalSelectorMapType::iterator I = GlobalSelectorMap.find(ID);
	assert(I != GlobalSelectorMap.end() && "Corrupted global selector map");
	ModuleFile &M = *I->second;
	ASTSelectorLookupTrait Trait(*this, M);
	unsigned Idx = ID - M.BaseSelectorID - NUM_PREDEF_SELECTOR_IDS;
	SelectorsLoaded[ID - 1] =
	Trait.ReadKey(M.SelectorLookupTableData + M.SelectorOffsets[Idx], 0);
	if (DeserializationListener)
	DeserializationListener->SelectorRead(ID, SelectorsLoaded[ID - 1]);
	}

	return SelectorsLoaded[ID - 1];
	}

	Selector ASTReader::GetExternalSelector(serialization::SelectorID ID) {
	return DecodeSelector(ID);
	}

	uint32_t ASTReader::GetNumExternalSelectors() {
	// ID 0 (the null selector) is considered an external selector.
	return getTotalNumSelectors() + 1;
	}

	serialization::SelectorID
	ASTReader::getGlobalSelectorID(ModuleFile &M, unsigned LocalID) const {
	if (LocalID < NUM_PREDEF_SELECTOR_IDS)
	return LocalID;

	if (!M.ModuleOffsetMap.empty())
	ReadModuleOffsetMap(M);

	ContinuousRangeMap<uint32_t, int, 2>::iterator I
	= M.SelectorRemap.find(LocalID - NUM_PREDEF_SELECTOR_IDS);
	assert(I != M.SelectorRemap.end()
	&& "Invalid index into selector index remap");

	return LocalID + I->second;
	}

	DeclarationNameLoc
	ASTRecordReader::readDeclarationNameLoc(DeclarationName Name) {
	switch (Name.getNameKind()) {
	case DeclarationName::CXXConstructorName:
	case DeclarationName::CXXDestructorName:
	case DeclarationName::CXXConversionFunctionName:
	return DeclarationNameLoc::makeNamedTypeLoc(readTypeSourceInfo());

	case DeclarationName::CXXOperatorName:
	return DeclarationNameLoc::makeCXXOperatorNameLoc(readSourceRange());

	case DeclarationName::CXXLiteralOperatorName:
	return DeclarationNameLoc::makeCXXLiteralOperatorNameLoc(
	readSourceLocation());

	case DeclarationName::Identifier:
	case DeclarationName::ObjCZeroArgSelector:
	case DeclarationName::ObjCOneArgSelector:
	case DeclarationName::ObjCMultiArgSelector:
	case DeclarationName::CXXUsingDirective:
	case DeclarationName::CXXDeductionGuideName:
	break;
	}
	return DeclarationNameLoc();
	}

	DeclarationNameInfo ASTRecordReader::readDeclarationNameInfo() {
	DeclarationNameInfo NameInfo;
	NameInfo.setName(readDeclarationName());
	NameInfo.setLoc(readSourceLocation());
	NameInfo.setInfo(readDeclarationNameLoc(NameInfo.getName()));
	return NameInfo;
	}

	void ASTRecordReader::readQualifierInfo(QualifierInfo &Info) {
	Info.QualifierLoc = readNestedNameSpecifierLoc();
	unsigned NumTPLists = readInt();
	Info.NumTemplParamLists = NumTPLists;
	if (NumTPLists) {
	Info.TemplParamLists =
	new (getContext()) TemplateParameterList *[NumTPLists];
	for (unsigned i = 0; i != NumTPLists; ++i)
	Info.TemplParamLists[i] = readTemplateParameterList();
	}
	}

	TemplateParameterList *
	ASTRecordReader::readTemplateParameterList() {
	SourceLocation TemplateLoc = readSourceLocation();
	SourceLocation LAngleLoc = readSourceLocation();
	SourceLocation RAngleLoc = readSourceLocation();

	unsigned NumParams = readInt();
	SmallVector<NamedDecl *, 16> Params;
	Params.reserve(NumParams);
	while (NumParams--)
	Params.push_back(readDeclAs<NamedDecl>());

	bool HasRequiresClause = readBool();
	Expr *RequiresClause = HasRequiresClause ? readExpr() : nullptr;

	TemplateParameterList *TemplateParams = TemplateParameterList::Create(
	getContext(), TemplateLoc, LAngleLoc, Params, RAngleLoc, RequiresClause);
	return TemplateParams;
	}

	void ASTRecordReader::readTemplateArgumentList(
	SmallVectorImpl<TemplateArgument> &TemplArgs,
	bool Canonicalize) {
	unsigned NumTemplateArgs = readInt();
	TemplArgs.reserve(NumTemplateArgs);
	while (NumTemplateArgs--)
	TemplArgs.push_back(readTemplateArgument(Canonicalize));
	}

	/// Read a UnresolvedSet structure.
	void ASTRecordReader::readUnresolvedSet(LazyASTUnresolvedSet &Set) {
	unsigned NumDecls = readInt();
	Set.reserve(getContext(), NumDecls);
	while (NumDecls--) {
	DeclID ID = readDeclID();
	AccessSpecifier AS = (AccessSpecifier) readInt();
	Set.addLazyDecl(getContext(), ID, AS);
	}
	}

	CXXBaseSpecifier
	ASTRecordReader::readCXXBaseSpecifier() {
	bool isVirtual = readBool();
	bool isBaseOfClass = readBool();
	AccessSpecifier AS = static_cast<AccessSpecifier>(readInt());
	bool inheritConstructors = readBool();
	TypeSourceInfo *TInfo = readTypeSourceInfo();
	SourceRange Range = readSourceRange();
	SourceLocation EllipsisLoc = readSourceLocation();
	CXXBaseSpecifier Result(Range, isVirtual, isBaseOfClass, AS, TInfo,
	EllipsisLoc);
	Result.setInheritConstructors(inheritConstructors);
	return Result;
	}

	CXXCtorInitializer **
	ASTRecordReader::readCXXCtorInitializers() {
	ASTContext &Context = getContext();
	unsigned NumInitializers = readInt();
	assert(NumInitializers && "wrote ctor initializers but have no inits");
	auto *CtorInitializers = new (Context) CXXCtorInitializer[NumInitializers];
	for (unsigned i = 0; i != NumInitializers; ++i) {
	TypeSourceInfo *TInfo = nullptr;
	bool IsBaseVirtual = false;
	FieldDecl *Member = nullptr;
	IndirectFieldDecl *IndirectMember = nullptr;

	CtorInitializerType Type = (CtorInitializerType) readInt();
	switch (Type) {
	case CTOR_INITIALIZER_BASE:
	TInfo = readTypeSourceInfo();
	IsBaseVirtual = readBool();
	break;

	case CTOR_INITIALIZER_DELEGATING:
	TInfo = readTypeSourceInfo();
	break;

	case CTOR_INITIALIZER_MEMBER:
	Member = readDeclAs<FieldDecl>();
	break;

	case CTOR_INITIALIZER_INDIRECT_MEMBER:
	IndirectMember = readDeclAs<IndirectFieldDecl>();
	break;
	}

	SourceLocation MemberOrEllipsisLoc = readSourceLocation();
	Expr *Init = readExpr();
	SourceLocation LParenLoc = readSourceLocation();
	SourceLocation RParenLoc = readSourceLocation();

	CXXCtorInitializer *BOMInit;
	if (Type == CTOR_INITIALIZER_BASE)
	BOMInit = new (Context)
	CXXCtorInitializer(Context, TInfo, IsBaseVirtual, LParenLoc, Init,
	RParenLoc, MemberOrEllipsisLoc);
	else if (Type == CTOR_INITIALIZER_DELEGATING)
	BOMInit = new (Context)
	CXXCtorInitializer(Context, TInfo, LParenLoc, Init, RParenLoc);
	else if (Member)
	BOMInit = new (Context)
	CXXCtorInitializer(Context, Member, MemberOrEllipsisLoc, LParenLoc,
	Init, RParenLoc);
	else
	BOMInit = new (Context)
	CXXCtorInitializer(Context, IndirectMember, MemberOrEllipsisLoc,
	LParenLoc, Init, RParenLoc);

	if (/IsWritten/readBool()) {
	unsigned SourceOrder = readInt();
	BOMInit->setSourceOrder(SourceOrder);
	}

	CtorInitializers[i] = BOMInit;
	}

	return CtorInitializers;
	}

	NestedNameSpecifierLoc
	ASTRecordReader::readNestedNameSpecifierLoc() {
	ASTContext &Context = getContext();
	unsigned N = readInt();
	NestedNameSpecifierLocBuilder Builder;
	for (unsigned I = 0; I != N; ++I) {
	auto Kind = readNestedNameSpecifierKind();
	switch (Kind) {
	case NestedNameSpecifier::Identifier: {
	IdentifierInfo *II = readIdentifier();
	SourceRange Range = readSourceRange();
	Builder.Extend(Context, II, Range.getBegin(), Range.getEnd());
	break;
	}

	case NestedNameSpecifier::Namespace: {
	NamespaceDecl *NS = readDeclAs<NamespaceDecl>();
	SourceRange Range = readSourceRange();
	Builder.Extend(Context, NS, Range.getBegin(), Range.getEnd());
	break;
	}

	case NestedNameSpecifier::NamespaceAlias: {
	NamespaceAliasDecl *Alias = readDeclAs<NamespaceAliasDecl>();
	SourceRange Range = readSourceRange();
	Builder.Extend(Context, Alias, Range.getBegin(), Range.getEnd());
	break;
	}

	case NestedNameSpecifier::TypeSpec:
	case NestedNameSpecifier::TypeSpecWithTemplate: {
	bool Template = readBool();
	TypeSourceInfo *T = readTypeSourceInfo();
	if (!T)
	return NestedNameSpecifierLoc();
	SourceLocation ColonColonLoc = readSourceLocation();

	// FIXME: 'template' keyword location not saved anywhere, so we fake it.
	Builder.Extend(Context,
	Template? T->getTypeLoc().getBeginLoc() : SourceLocation(),
	T->getTypeLoc(), ColonColonLoc);
	break;
	}

	case NestedNameSpecifier::Global: {
	SourceLocation ColonColonLoc = readSourceLocation();
	Builder.MakeGlobal(Context, ColonColonLoc);
	break;
	}

	case NestedNameSpecifier::Super: {
	CXXRecordDecl *RD = readDeclAs<CXXRecordDecl>();
	SourceRange Range = readSourceRange();
	Builder.MakeSuper(Context, RD, Range.getBegin(), Range.getEnd());
	break;
	}
	}
	}

	return Builder.getWithLocInContext(Context);
	}

	SourceRange
	ASTReader::ReadSourceRange(ModuleFile &F, const RecordData &Record,
	unsigned &Idx) {
	SourceLocation beg = ReadSourceLocation(F, Record, Idx);
	SourceLocation end = ReadSourceLocation(F, Record, Idx);
	return SourceRange(beg, end);
	}

	/// Read a floating-point value
	llvm::APFloat ASTRecordReader::readAPFloat(const llvm::fltSemantics &Sem) {
	return llvm::APFloat(Sem, readAPInt());
	}

	// Read a string
	std::string ASTReader::ReadString(const RecordData &Record, unsigned &Idx) {
	unsigned Len = Record[Idx++];
	std::string Result(Record.data() + Idx, Record.data() + Idx + Len);
	Idx += Len;
	return Result;
	}

	std::string ASTReader::ReadPath(ModuleFile &F, const RecordData &Record,
	unsigned &Idx) {
	std::string Filename = ReadString(Record, Idx);
	ResolveImportedPath(F, Filename);
	return Filename;
	}

	std::string ASTReader::ReadPath(StringRef BaseDirectory,
	const RecordData &Record, unsigned &Idx) {
	std::string Filename = ReadString(Record, Idx);
	if (!BaseDirectory.empty())
	ResolveImportedPath(Filename, BaseDirectory);
	return Filename;
	}

	VersionTuple ASTReader::ReadVersionTuple(const RecordData &Record,
	unsigned &Idx) {
	unsigned Major = Record[Idx++];
	unsigned Minor = Record[Idx++];
	unsigned Subminor = Record[Idx++];
	if (Minor == 0)
	return VersionTuple(Major);
	if (Subminor == 0)
	return VersionTuple(Major, Minor - 1);
	return VersionTuple(Major, Minor - 1, Subminor - 1);
	}

	CXXTemporary *ASTReader::ReadCXXTemporary(ModuleFile &F,
	const RecordData &Record,
	unsigned &Idx) {
	CXXDestructorDecl *Decl = ReadDeclAs<CXXDestructorDecl>(F, Record, Idx);
	return CXXTemporary::Create(getContext(), Decl);
	}

	DiagnosticBuilder ASTReader::Diag(unsigned DiagID) const {
	return Diag(CurrentImportLoc, DiagID);
	}

	DiagnosticBuilder ASTReader::Diag(SourceLocation Loc, unsigned DiagID) const {
	return Diags.Report(Loc, DiagID);
	}

	/// Retrieve the identifier table associated with the
	/// preprocessor.
	IdentifierTable &ASTReader::getIdentifierTable() {
	return PP.getIdentifierTable();
	}

	/// Record that the given ID maps to the given switch-case
	/// statement.
	void ASTReader::RecordSwitchCaseID(SwitchCase *SC, unsigned ID) {
	assert((*CurrSwitchCaseStmts)[ID] == nullptr &&
	"Already have a SwitchCase with this ID");
	(*CurrSwitchCaseStmts)[ID] = SC;
	}

	/// Retrieve the switch-case statement with the given ID.
	SwitchCase *ASTReader::getSwitchCaseWithID(unsigned ID) {
	assert((*CurrSwitchCaseStmts)[ID] != nullptr && "No SwitchCase with this ID");
	return (*CurrSwitchCaseStmts)[ID];
	}

	void ASTReader::ClearSwitchCaseIDs() {
	CurrSwitchCaseStmts->clear();
	}

	void ASTReader::ReadComments() {
	ASTContext &Context = getContext();
	std::vector<RawComment *> Comments;
	for (SmallVectorImpl<std::pair<BitstreamCursor,
	serialization::ModuleFile *>>::iterator
	I = CommentsCursors.begin(),
	E = CommentsCursors.end();
	I != E; ++I) {
	Comments.clear();
	BitstreamCursor &Cursor = I->first;
	serialization::ModuleFile &F = *I->second;
	SavedStreamPosition SavedPosition(Cursor);

	RecordData Record;
	while (true) {
	Expected<llvm::BitstreamEntry> MaybeEntry =
	Cursor.advanceSkippingSubblocks(
	BitstreamCursor::AF_DontPopBlockAtEnd);
	if (!MaybeEntry) {
	Error(MaybeEntry.takeError());
	return;
	}
	llvm::BitstreamEntry Entry = MaybeEntry.get();

	switch (Entry.Kind) {
	case llvm::BitstreamEntry::SubBlock: // Handled for us already.
	case llvm::BitstreamEntry::Error:
	Error("malformed block record in AST file");
	return;
	case llvm::BitstreamEntry::EndBlock:
	goto NextCursor;
	case llvm::BitstreamEntry::Record:
	// The interesting case.
	break;
	}

	// Read a record.
	Record.clear();
	Expected<unsigned> MaybeComment = Cursor.readRecord(Entry.ID, Record);
	if (!MaybeComment) {
	Error(MaybeComment.takeError());
	return;
	}
	switch ((CommentRecordTypes)MaybeComment.get()) {
	case COMMENTS_RAW_COMMENT: {
	unsigned Idx = 0;
	SourceRange SR = ReadSourceRange(F, Record, Idx);
	RawComment::CommentKind Kind =
	(RawComment::CommentKind) Record[Idx++];
	bool IsTrailingComment = Record[Idx++];
	bool IsAlmostTrailingComment = Record[Idx++];
	Comments.push_back(new (Context) RawComment(
	SR, Kind, IsTrailingComment, IsAlmostTrailingComment));
	break;
	}
	}
	}
	NextCursor:
	llvm::DenseMap<FileID, std::map<unsigned, RawComment *>>
	FileToOffsetToComment;
	for (RawComment *C : Comments) {
	SourceLocation CommentLoc = C->getBeginLoc();
	if (CommentLoc.isValid()) {
	std::pair<FileID, unsigned> Loc =
	SourceMgr.getDecomposedLoc(CommentLoc);
	if (Loc.first.isValid())
	Context.Comments.OrderedComments[Loc.first].emplace(Loc.second, C);
	}
	}
	}
	}

	void ASTReader::visitInputFiles(serialization::ModuleFile &MF,
	bool IncludeSystem, bool Complain,
	llvm::function_ref<void(const serialization::InputFile &IF,
	bool isSystem)> Visitor) {
	unsigned NumUserInputs = MF.NumUserInputFiles;
	unsigned NumInputs = MF.InputFilesLoaded.size();
	assert(NumUserInputs <= NumInputs);
	unsigned N = IncludeSystem ? NumInputs : NumUserInputs;
	for (unsigned I = 0; I < N; ++I) {
	bool IsSystem = I >= NumUserInputs;
	InputFile IF = getInputFile(MF, I+1, Complain);
	Visitor(IF, IsSystem);
	}
	}

	void ASTReader::visitTopLevelModuleMaps(
	serialization::ModuleFile &MF,
	llvm::function_ref<void(const FileEntry *FE)> Visitor) {
	unsigned NumInputs = MF.InputFilesLoaded.size();
	for (unsigned I = 0; I < NumInputs; ++I) {
	InputFileInfo IFI = readInputFileInfo(MF, I + 1);
	if (IFI.TopLevelModuleMap)
	// FIXME: This unnecessarily re-reads the InputFileInfo.
	if (auto FE = getInputFile(MF, I + 1).getFile())
	Visitor(FE);
	}
	}

	std::string ASTReader::getOwningModuleNameForDiagnostic(const Decl *D) {
	// If we know the owning module, use it.
	if (Module *M = D->getImportedOwningModule())
	return M->getFullModuleName();

	// Otherwise, use the name of the top-level module the decl is within.
	if (ModuleFile *M = getOwningModuleFile(D))
	return M->ModuleName;

	// Not from a module.
	return {};
	}

	void ASTReader::finishPendingActions() {
	while (!PendingIdentifierInfos.empty() \|\| !PendingFunctionTypes.empty() \|\|
	!PendingIncompleteDeclChains.empty() \|\| !PendingDeclChains.empty() \|\|
	!PendingMacroIDs.empty() \|\| !PendingDeclContextInfos.empty() \|\|
	!PendingUpdateRecords.empty()) {
	// If any identifiers with corresponding top-level declarations have
	// been loaded, load those declarations now.
	using TopLevelDeclsMap =
	llvm::DenseMap<IdentifierInfo , SmallVector<Decl , 2>>;
	TopLevelDeclsMap TopLevelDecls;

	while (!PendingIdentifierInfos.empty()) {
	IdentifierInfo *II = PendingIdentifierInfos.back().first;
	SmallVector<uint32_t, 4> DeclIDs =
	std::move(PendingIdentifierInfos.back().second);
	PendingIdentifierInfos.pop_back();

	SetGloballyVisibleDecls(II, DeclIDs, &TopLevelDecls[II]);
	}

	// Load each function type that we deferred loading because it was a
	// deduced type that might refer to a local type declared within itself.
	for (unsigned I = 0; I != PendingFunctionTypes.size(); ++I) {
	auto *FD = PendingFunctionTypes[I].first;
	FD->setType(GetType(PendingFunctionTypes[I].second));

	// If we gave a function a deduced return type, remember that we need to
	// propagate that along the redeclaration chain.
	auto *DT = FD->getReturnType()->getContainedDeducedType();
	if (DT && DT->isDeduced())
	PendingDeducedTypeUpdates.insert(
	{FD->getCanonicalDecl(), FD->getReturnType()});
	}
	PendingFunctionTypes.clear();

	// For each decl chain that we wanted to complete while deserializing, mark
	// it as "still needs to be completed".
	for (unsigned I = 0; I != PendingIncompleteDeclChains.size(); ++I) {
	markIncompleteDeclChain(PendingIncompleteDeclChains[I]);
	}
	PendingIncompleteDeclChains.clear();

	// Load pending declaration chains.
	for (unsigned I = 0; I != PendingDeclChains.size(); ++I)
	loadPendingDeclChain(PendingDeclChains[I].first,
	PendingDeclChains[I].second);
	PendingDeclChains.clear();

	// Make the most recent of the top-level declarations visible.
	for (TopLevelDeclsMap::iterator TLD = TopLevelDecls.begin(),
	TLDEnd = TopLevelDecls.end(); TLD != TLDEnd; ++TLD) {
	IdentifierInfo *II = TLD->first;
	for (unsigned I = 0, N = TLD->second.size(); I != N; ++I) {
	pushExternalDeclIntoScope(cast<NamedDecl>(TLD->second[I]), II);
	}
	}

	// Load any pending macro definitions.
	for (unsigned I = 0; I != PendingMacroIDs.size(); ++I) {
	IdentifierInfo *II = PendingMacroIDs.begin()[I].first;
	SmallVector<PendingMacroInfo, 2> GlobalIDs;
	GlobalIDs.swap(PendingMacroIDs.begin()[I].second);
	// Initialize the macro history from chained-PCHs ahead of module imports.
	for (unsigned IDIdx = 0, NumIDs = GlobalIDs.size(); IDIdx != NumIDs;
	++IDIdx) {
	const PendingMacroInfo &Info = GlobalIDs[IDIdx];
	if (!Info.M->isModule())
	resolvePendingMacro(II, Info);
	}
	// Handle module imports.
	for (unsigned IDIdx = 0, NumIDs = GlobalIDs.size(); IDIdx != NumIDs;
	++IDIdx) {
	const PendingMacroInfo &Info = GlobalIDs[IDIdx];
	if (Info.M->isModule())
	resolvePendingMacro(II, Info);
	}
	}
	PendingMacroIDs.clear();

	// Wire up the DeclContexts for Decls that we delayed setting until
	// recursive loading is completed.
	while (!PendingDeclContextInfos.empty()) {
	PendingDeclContextInfo Info = PendingDeclContextInfos.front();
	PendingDeclContextInfos.pop_front();
	DeclContext *SemaDC = cast<DeclContext>(GetDecl(Info.SemaDC));
	DeclContext *LexicalDC = cast<DeclContext>(GetDecl(Info.LexicalDC));
	Info.D->setDeclContextsImpl(SemaDC, LexicalDC, getContext());
	}

	// Perform any pending declaration updates.
	while (!PendingUpdateRecords.empty()) {
	auto Update = PendingUpdateRecords.pop_back_val();
	ReadingKindTracker ReadingKind(Read_Decl, *this);
	loadDeclUpdateRecords(Update);
	}
	}

	// At this point, all update records for loaded decls are in place, so any
	// fake class definitions should have become real.
	assert(PendingFakeDefinitionData.empty() &&
	"faked up a class definition but never saw the real one");

	// If we deserialized any C++ or Objective-C class definitions, any
	// Objective-C protocol definitions, or any redeclarable templates, make sure
	// that all redeclarations point to the definitions. Note that this can only
	// happen now, after the redeclaration chains have been fully wired.
	for (Decl *D : PendingDefinitions) {
	if (TagDecl *TD = dyn_cast<TagDecl>(D)) {
	if (const TagType *TagT = dyn_cast<TagType>(TD->getTypeForDecl())) {
	// Make sure that the TagType points at the definition.
	const_cast<TagType*>(TagT)->decl = TD;
	}

	if (auto RD = dyn_cast<CXXRecordDecl>(D)) {
	for (auto *R = getMostRecentExistingDecl(RD); R;
	R = R->getPreviousDecl()) {
	assert((R == D) ==
	cast<CXXRecordDecl>(R)->isThisDeclarationADefinition() &&
	"declaration thinks it's the definition but it isn't");
	cast<CXXRecordDecl>(R)->DefinitionData = RD->DefinitionData;
	}
	}

	continue;
	}

	if (auto ID = dyn_cast<ObjCInterfaceDecl>(D)) {
	// Make sure that the ObjCInterfaceType points at the definition.
	const_cast<ObjCInterfaceType *>(cast<ObjCInterfaceType>(ID->TypeForDecl))
	->Decl = ID;

	for (auto *R = getMostRecentExistingDecl(ID); R; R = R->getPreviousDecl())
	cast<ObjCInterfaceDecl>(R)->Data = ID->Data;

	continue;
	}

	if (auto PD = dyn_cast<ObjCProtocolDecl>(D)) {
	for (auto *R = getMostRecentExistingDecl(PD); R; R = R->getPreviousDecl())
	cast<ObjCProtocolDecl>(R)->Data = PD->Data;

	continue;
	}

	auto RTD = cast<RedeclarableTemplateDecl>(D)->getCanonicalDecl();
	for (auto *R = getMostRecentExistingDecl(RTD); R; R = R->getPreviousDecl())
	cast<RedeclarableTemplateDecl>(R)->Common = RTD->Common;
	}
	PendingDefinitions.clear();

	// Load the bodies of any functions or methods we've encountered. We do
	// this now (delayed) so that we can be sure that the declaration chains
	// have been fully wired up (hasBody relies on this).
	// FIXME: We shouldn't require complete redeclaration chains here.
	for (PendingBodiesMap::iterator PB = PendingBodies.begin(),
	PBEnd = PendingBodies.end();
	PB != PBEnd; ++PB) {
	if (FunctionDecl *FD = dyn_cast<FunctionDecl>(PB->first)) {
	// For a function defined inline within a class template, force the
	// canonical definition to be the one inside the canonical definition of
	// the template. This ensures that we instantiate from a correct view
	// of the template.
	//
	// Sadly we can't do this more generally: we can't be sure that all
	// copies of an arbitrary class definition will have the same members
	// defined (eg, some member functions may not be instantiated, and some
	// special members may or may not have been implicitly defined).
	if (auto *RD = dyn_cast<CXXRecordDecl>(FD->getLexicalParent()))
	if (RD->isDependentContext() && !RD->isThisDeclarationADefinition())
	continue;

	// FIXME: Check for =delete/=default?
	// FIXME: Complain about ODR violations here?
	const FunctionDecl *Defn = nullptr;
	if (!getContext().getLangOpts().Modules \|\| !FD->hasBody(Defn)) {
	FD->setLazyBody(PB->second);
	} else {
	auto NonConstDefn = const_cast<FunctionDecl>(Defn);
	mergeDefinitionVisibility(NonConstDefn, FD);

	if (!FD->isLateTemplateParsed() &&
	!NonConstDefn->isLateTemplateParsed() &&
	FD->getODRHash() != NonConstDefn->getODRHash()) {
	if (!isa<CXXMethodDecl>(FD)) {
	PendingFunctionOdrMergeFailures[FD].push_back(NonConstDefn);
	} else if (FD->getLexicalParent()->isFileContext() &&
	NonConstDefn->getLexicalParent()->isFileContext()) {
	// Only diagnose out-of-line method definitions. If they are
	// in class definitions, then an error will be generated when
	// processing the class bodies.
	PendingFunctionOdrMergeFailures[FD].push_back(NonConstDefn);
	}
	}
	}
	continue;
	}

	ObjCMethodDecl *MD = cast<ObjCMethodDecl>(PB->first);
	if (!getContext().getLangOpts().Modules \|\| !MD->hasBody())
	MD->setLazyBody(PB->second);
	}
	PendingBodies.clear();

	// Do some cleanup.
	for (auto *ND : PendingMergedDefinitionsToDeduplicate)
	getContext().deduplicateMergedDefinitonsFor(ND);
	PendingMergedDefinitionsToDeduplicate.clear();
	}

	void ASTReader::diagnoseOdrViolations() {
	if (PendingOdrMergeFailures.empty() && PendingOdrMergeChecks.empty() &&
	PendingFunctionOdrMergeFailures.empty() &&
	PendingEnumOdrMergeFailures.empty())
	return;

	// Trigger the import of the full definition of each class that had any
	// odr-merging problems, so we can produce better diagnostics for them.
	// These updates may in turn find and diagnose some ODR failures, so take
	// ownership of the set first.
	auto OdrMergeFailures = std::move(PendingOdrMergeFailures);
	PendingOdrMergeFailures.clear();
	for (auto &Merge : OdrMergeFailures) {
	Merge.first->buildLookup();
	Merge.first->decls_begin();
	Merge.first->bases_begin();
	Merge.first->vbases_begin();
	for (auto &RecordPair : Merge.second) {
	auto *RD = RecordPair.first;
	RD->decls_begin();
	RD->bases_begin();
	RD->vbases_begin();
	}
	}

	// Trigger the import of functions.
	auto FunctionOdrMergeFailures = std::move(PendingFunctionOdrMergeFailures);
	PendingFunctionOdrMergeFailures.clear();
	for (auto &Merge : FunctionOdrMergeFailures) {
	Merge.first->buildLookup();
	Merge.first->decls_begin();
	Merge.first->getBody();
	for (auto &FD : Merge.second) {
	FD->buildLookup();
	FD->decls_begin();
	FD->getBody();
	}
	}

	// Trigger the import of enums.
	auto EnumOdrMergeFailures = std::move(PendingEnumOdrMergeFailures);
	PendingEnumOdrMergeFailures.clear();
	for (auto &Merge : EnumOdrMergeFailures) {
	Merge.first->decls_begin();
	for (auto &Enum : Merge.second) {
	Enum->decls_begin();
	}
	}

	// For each declaration from a merged context, check that the canonical
	// definition of that context also contains a declaration of the same
	// entity.
	//
	// Caution: this loop does things that might invalidate iterators into
	// PendingOdrMergeChecks. Don't turn this into a range-based for loop!
	while (!PendingOdrMergeChecks.empty()) {
	NamedDecl *D = PendingOdrMergeChecks.pop_back_val();

	// FIXME: Skip over implicit declarations for now. This matters for things
	// like implicitly-declared special member functions. This isn't entirely
	// correct; we can end up with multiple unmerged declarations of the same
	// implicit entity.
	if (D->isImplicit())
	continue;

	DeclContext *CanonDef = D->getDeclContext();

	bool Found = false;
	const Decl *DCanon = D->getCanonicalDecl();

	for (auto RI : D->redecls()) {
	if (RI->getLexicalDeclContext() == CanonDef) {
	Found = true;
	break;
	}
	}
	if (Found)
	continue;

	// Quick check failed, time to do the slow thing. Note, we can't just
	// look up the name of D in CanonDef here, because the member that is
	// in CanonDef might not be found by name lookup (it might have been
	// replaced by a more recent declaration in the lookup table), and we
	// can't necessarily find it in the redeclaration chain because it might
	// be merely mergeable, not redeclarable.
	llvm::SmallVector<const NamedDecl*, 4> Candidates;
	for (auto *CanonMember : CanonDef->decls()) {
	if (CanonMember->getCanonicalDecl() == DCanon) {
	// This can happen if the declaration is merely mergeable and not
	// actually redeclarable (we looked for redeclarations earlier).
	//
	// FIXME: We should be able to detect this more efficiently, without
	// pulling in all of the members of CanonDef.
	Found = true;
	break;
	}
	if (auto *ND = dyn_cast<NamedDecl>(CanonMember))
	if (ND->getDeclName() == D->getDeclName())
	Candidates.push_back(ND);
	}

	if (!Found) {
	// The AST doesn't like TagDecls becoming invalid after they've been
	// completed. We only really need to mark FieldDecls as invalid here.
	if (!isa<TagDecl>(D))
	D->setInvalidDecl();

	// Ensure we don't accidentally recursively enter deserialization while
	// we're producing our diagnostic.
	Deserializing RecursionGuard(this);

	std::string CanonDefModule =
	getOwningModuleNameForDiagnostic(cast<Decl>(CanonDef));
	Diag(D->getLocation(), diag::err_module_odr_violation_missing_decl)
	<< D << getOwningModuleNameForDiagnostic(D)
	<< CanonDef << CanonDefModule.empty() << CanonDefModule;

	if (Candidates.empty())
	Diag(cast<Decl>(CanonDef)->getLocation(),
	diag::note_module_odr_violation_no_possible_decls) << D;
	else {
	for (unsigned I = 0, N = Candidates.size(); I != N; ++I)
	Diag(Candidates[I]->getLocation(),
	diag::note_module_odr_violation_possible_decl)
	<< Candidates[I];
	}

	DiagnosedOdrMergeFailures.insert(CanonDef);
	}
	}

	if (OdrMergeFailures.empty() && FunctionOdrMergeFailures.empty() &&
	EnumOdrMergeFailures.empty())
	return;

	// Ensure we don't accidentally recursively enter deserialization while
	// we're producing our diagnostics.
	Deserializing RecursionGuard(this);

	// Common code for hashing helpers.
	ODRHash Hash;
	auto ComputeQualTypeODRHash = [&Hash](QualType Ty) {
	Hash.clear();
	Hash.AddQualType(Ty);
	return Hash.CalculateHash();
	};

	auto ComputeODRHash = [&Hash](const Stmt *S) {
	assert(S);
	Hash.clear();
	Hash.AddStmt(S);
	return Hash.CalculateHash();
	};

	auto ComputeSubDeclODRHash = [&Hash](const Decl *D) {
	assert(D);
	Hash.clear();
	Hash.AddSubDecl(D);
	return Hash.CalculateHash();
	};

	auto ComputeTemplateArgumentODRHash = [&Hash](const TemplateArgument &TA) {
	Hash.clear();
	Hash.AddTemplateArgument(TA);
	return Hash.CalculateHash();
	};

	auto ComputeTemplateParameterListODRHash =
	[&Hash](const TemplateParameterList *TPL) {
	assert(TPL);
	Hash.clear();
	Hash.AddTemplateParameterList(TPL);
	return Hash.CalculateHash();
	};

	// Used with err_module_odr_violation_mismatch_decl and
	// note_module_odr_violation_mismatch_decl
	// This list should be the same Decl's as in ODRHash::isDeclToBeProcessed
	enum ODRMismatchDecl {
	EndOfClass,
	PublicSpecifer,
	PrivateSpecifer,
	ProtectedSpecifer,
	StaticAssert,
	Field,
	CXXMethod,
	TypeAlias,
	TypeDef,
	Var,
	Friend,
	FunctionTemplate,
	Other
	};

	// Used with err_module_odr_violation_mismatch_decl_diff and
	// note_module_odr_violation_mismatch_decl_diff
	enum ODRMismatchDeclDifference {
	StaticAssertCondition,
	StaticAssertMessage,
	StaticAssertOnlyMessage,
	FieldName,
	FieldTypeName,
	FieldSingleBitField,
	FieldDifferentWidthBitField,
	FieldSingleMutable,
	FieldSingleInitializer,
	FieldDifferentInitializers,
	MethodName,
	MethodDeleted,
	MethodDefaulted,
	MethodVirtual,
	MethodStatic,
	MethodVolatile,
	MethodConst,
	MethodInline,
	MethodNumberParameters,
	MethodParameterType,
	MethodParameterName,
	MethodParameterSingleDefaultArgument,
	MethodParameterDifferentDefaultArgument,
	MethodNoTemplateArguments,
	MethodDifferentNumberTemplateArguments,
	MethodDifferentTemplateArgument,
	MethodSingleBody,
	MethodDifferentBody,
	TypedefName,
	TypedefType,
	VarName,
	VarType,
	VarSingleInitializer,
	VarDifferentInitializer,
	VarConstexpr,
	FriendTypeFunction,
	FriendType,
	FriendFunction,
	FunctionTemplateDifferentNumberParameters,
	FunctionTemplateParameterDifferentKind,
	FunctionTemplateParameterName,
	FunctionTemplateParameterSingleDefaultArgument,
	FunctionTemplateParameterDifferentDefaultArgument,
	FunctionTemplateParameterDifferentType,
	FunctionTemplatePackParameter,
	};

	// These lambdas have the common portions of the ODR diagnostics. This
	// has the same return as Diag(), so addition parameters can be passed
	// in with operator<<
	auto ODRDiagDeclError = [this](NamedDecl *FirstRecord, StringRef FirstModule,
	SourceLocation Loc, SourceRange Range,
	ODRMismatchDeclDifference DiffType) {
	return Diag(Loc, diag::err_module_odr_violation_mismatch_decl_diff)
	<< FirstRecord << FirstModule.empty() << FirstModule << Range
	<< DiffType;
	};
	auto ODRDiagDeclNote = [this](StringRef SecondModule, SourceLocation Loc,
	SourceRange Range, ODRMismatchDeclDifference DiffType) {
	return Diag(Loc, diag::note_module_odr_violation_mismatch_decl_diff)
	<< SecondModule << Range << DiffType;
	};

	auto ODRDiagField = [this, &ODRDiagDeclError, &ODRDiagDeclNote,
	&ComputeQualTypeODRHash, &ComputeODRHash](
	NamedDecl *FirstRecord, StringRef FirstModule,
	StringRef SecondModule, FieldDecl *FirstField,
	FieldDecl *SecondField) {
	IdentifierInfo *FirstII = FirstField->getIdentifier();
	IdentifierInfo *SecondII = SecondField->getIdentifier();
	if (FirstII->getName() != SecondII->getName()) {
	ODRDiagDeclError(FirstRecord, FirstModule, FirstField->getLocation(),
	FirstField->getSourceRange(), FieldName)
	<< FirstII;
	ODRDiagDeclNote(SecondModule, SecondField->getLocation(),
	SecondField->getSourceRange(), FieldName)
	<< SecondII;

	return true;
	}

	assert(getContext().hasSameType(FirstField->getType(),
	SecondField->getType()));

	QualType FirstType = FirstField->getType();
	QualType SecondType = SecondField->getType();
	if (ComputeQualTypeODRHash(FirstType) !=
	ComputeQualTypeODRHash(SecondType)) {
	ODRDiagDeclError(FirstRecord, FirstModule, FirstField->getLocation(),
	FirstField->getSourceRange(), FieldTypeName)
	<< FirstII << FirstType;
	ODRDiagDeclNote(SecondModule, SecondField->getLocation(),
	SecondField->getSourceRange(), FieldTypeName)
	<< SecondII << SecondType;

	return true;
	}

	const bool IsFirstBitField = FirstField->isBitField();
	const bool IsSecondBitField = SecondField->isBitField();
	if (IsFirstBitField != IsSecondBitField) {
	ODRDiagDeclError(FirstRecord, FirstModule, FirstField->getLocation(),
	FirstField->getSourceRange(), FieldSingleBitField)
	<< FirstII << IsFirstBitField;
	ODRDiagDeclNote(SecondModule, SecondField->getLocation(),
	SecondField->getSourceRange(), FieldSingleBitField)
	<< SecondII << IsSecondBitField;
	return true;
	}

	if (IsFirstBitField && IsSecondBitField) {
	unsigned FirstBitWidthHash =
	ComputeODRHash(FirstField->getBitWidth());
	unsigned SecondBitWidthHash =
	ComputeODRHash(SecondField->getBitWidth());
	if (FirstBitWidthHash != SecondBitWidthHash) {
	ODRDiagDeclError(FirstRecord, FirstModule, FirstField->getLocation(),
	FirstField->getSourceRange(),
	FieldDifferentWidthBitField)
	<< FirstII << FirstField->getBitWidth()->getSourceRange();
	ODRDiagDeclNote(SecondModule, SecondField->getLocation(),
	SecondField->getSourceRange(),
	FieldDifferentWidthBitField)
	<< SecondII << SecondField->getBitWidth()->getSourceRange();
	return true;
	}
	}

	if (!PP.getLangOpts().CPlusPlus)
	return false;

	const bool IsFirstMutable = FirstField->isMutable();
	const bool IsSecondMutable = SecondField->isMutable();
	if (IsFirstMutable != IsSecondMutable) {
	ODRDiagDeclError(FirstRecord, FirstModule, FirstField->getLocation(),
	FirstField->getSourceRange(), FieldSingleMutable)
	<< FirstII << IsFirstMutable;
	ODRDiagDeclNote(SecondModule, SecondField->getLocation(),
	SecondField->getSourceRange(), FieldSingleMutable)
	<< SecondII << IsSecondMutable;
	return true;
	}

	const Expr *FirstInitializer = FirstField->getInClassInitializer();
	const Expr *SecondInitializer = SecondField->getInClassInitializer();
	if ((!FirstInitializer && SecondInitializer) \|\|
	(FirstInitializer && !SecondInitializer)) {
	ODRDiagDeclError(FirstRecord, FirstModule, FirstField->getLocation(),
	FirstField->getSourceRange(), FieldSingleInitializer)
	<< FirstII << (FirstInitializer != nullptr);
	ODRDiagDeclNote(SecondModule, SecondField->getLocation(),
	SecondField->getSourceRange(), FieldSingleInitializer)
	<< SecondII << (SecondInitializer != nullptr);
	return true;
	}

	if (FirstInitializer && SecondInitializer) {
	unsigned FirstInitHash = ComputeODRHash(FirstInitializer);
	unsigned SecondInitHash = ComputeODRHash(SecondInitializer);
	if (FirstInitHash != SecondInitHash) {
	ODRDiagDeclError(FirstRecord, FirstModule, FirstField->getLocation(),
	FirstField->getSourceRange(),
	FieldDifferentInitializers)
	<< FirstII << FirstInitializer->getSourceRange();
	ODRDiagDeclNote(SecondModule, SecondField->getLocation(),
	SecondField->getSourceRange(),
	FieldDifferentInitializers)
	<< SecondII << SecondInitializer->getSourceRange();
	return true;
	}
	}

	return false;
	};

	auto ODRDiagTypeDefOrAlias =
	[&ODRDiagDeclError, &ODRDiagDeclNote, &ComputeQualTypeODRHash](
	NamedDecl *FirstRecord, StringRef FirstModule, StringRef SecondModule,
	TypedefNameDecl FirstTD, TypedefNameDecl SecondTD,
	bool IsTypeAlias) {
	auto FirstName = FirstTD->getDeclName();
	auto SecondName = SecondTD->getDeclName();
	if (FirstName != SecondName) {
	ODRDiagDeclError(FirstRecord, FirstModule, FirstTD->getLocation(),
	FirstTD->getSourceRange(), TypedefName)
	<< IsTypeAlias << FirstName;
	ODRDiagDeclNote(SecondModule, SecondTD->getLocation(),
	SecondTD->getSourceRange(), TypedefName)
	<< IsTypeAlias << SecondName;
	return true;
	}

	QualType FirstType = FirstTD->getUnderlyingType();
	QualType SecondType = SecondTD->getUnderlyingType();
	if (ComputeQualTypeODRHash(FirstType) !=
	ComputeQualTypeODRHash(SecondType)) {
	ODRDiagDeclError(FirstRecord, FirstModule, FirstTD->getLocation(),
	FirstTD->getSourceRange(), TypedefType)
	<< IsTypeAlias << FirstName << FirstType;
	ODRDiagDeclNote(SecondModule, SecondTD->getLocation(),
	SecondTD->getSourceRange(), TypedefType)
	<< IsTypeAlias << SecondName << SecondType;
	return true;
	}

	return false;
	};

	auto ODRDiagVar = [&ODRDiagDeclError, &ODRDiagDeclNote,
	&ComputeQualTypeODRHash, &ComputeODRHash,
	this](NamedDecl *FirstRecord, StringRef FirstModule,
	StringRef SecondModule, VarDecl *FirstVD,
	VarDecl *SecondVD) {
	auto FirstName = FirstVD->getDeclName();
	auto SecondName = SecondVD->getDeclName();
	if (FirstName != SecondName) {
	ODRDiagDeclError(FirstRecord, FirstModule, FirstVD->getLocation(),
	FirstVD->getSourceRange(), VarName)
	<< FirstName;
	ODRDiagDeclNote(SecondModule, SecondVD->getLocation(),
	SecondVD->getSourceRange(), VarName)
	<< SecondName;
	return true;
	}

	QualType FirstType = FirstVD->getType();
	QualType SecondType = SecondVD->getType();
	if (ComputeQualTypeODRHash(FirstType) !=
	ComputeQualTypeODRHash(SecondType)) {
	ODRDiagDeclError(FirstRecord, FirstModule, FirstVD->getLocation(),
	FirstVD->getSourceRange(), VarType)
	<< FirstName << FirstType;
	ODRDiagDeclNote(SecondModule, SecondVD->getLocation(),
	SecondVD->getSourceRange(), VarType)
	<< SecondName << SecondType;
	return true;
	}

	if (!PP.getLangOpts().CPlusPlus)
	return false;

	const Expr *FirstInit = FirstVD->getInit();
	const Expr *SecondInit = SecondVD->getInit();
	if ((FirstInit == nullptr) != (SecondInit == nullptr)) {
	ODRDiagDeclError(FirstRecord, FirstModule, FirstVD->getLocation(),
	FirstVD->getSourceRange(), VarSingleInitializer)
	<< FirstName << (FirstInit == nullptr)
	<< (FirstInit ? FirstInit->getSourceRange() : SourceRange());
	ODRDiagDeclNote(SecondModule, SecondVD->getLocation(),
	SecondVD->getSourceRange(), VarSingleInitializer)
	<< SecondName << (SecondInit == nullptr)
	<< (SecondInit ? SecondInit->getSourceRange() : SourceRange());
	return true;
	}

	if (FirstInit && SecondInit &&
	ComputeODRHash(FirstInit) != ComputeODRHash(SecondInit)) {
	ODRDiagDeclError(FirstRecord, FirstModule, FirstVD->getLocation(),
	FirstVD->getSourceRange(), VarDifferentInitializer)
	<< FirstName << FirstInit->getSourceRange();
	ODRDiagDeclNote(SecondModule, SecondVD->getLocation(),
	SecondVD->getSourceRange(), VarDifferentInitializer)
	<< SecondName << SecondInit->getSourceRange();
	return true;
	}

	const bool FirstIsConstexpr = FirstVD->isConstexpr();
	const bool SecondIsConstexpr = SecondVD->isConstexpr();
	if (FirstIsConstexpr != SecondIsConstexpr) {
	ODRDiagDeclError(FirstRecord, FirstModule, FirstVD->getLocation(),
	FirstVD->getSourceRange(), VarConstexpr)
	<< FirstName << FirstIsConstexpr;
	ODRDiagDeclNote(SecondModule, SecondVD->getLocation(),
	SecondVD->getSourceRange(), VarConstexpr)
	<< SecondName << SecondIsConstexpr;
	return true;
	}
	return false;
	};

	auto DifferenceSelector = [](Decl *D) {
	assert(D && "valid Decl required");
	switch (D->getKind()) {
	default:
	return Other;
	case Decl::AccessSpec:
	switch (D->getAccess()) {
	case AS_public:
	return PublicSpecifer;
	case AS_private:
	return PrivateSpecifer;
	case AS_protected:
	return ProtectedSpecifer;
	case AS_none:
	break;
	}
	llvm_unreachable("Invalid access specifier");
	case Decl::StaticAssert:
	return StaticAssert;
	case Decl::Field:
	return Field;
	case Decl::CXXMethod:
	case Decl::CXXConstructor:
	case Decl::CXXDestructor:
	return CXXMethod;
	case Decl::TypeAlias:
	return TypeAlias;
	case Decl::Typedef:
	return TypeDef;
	case Decl::Var:
	return Var;
	case Decl::Friend:
	return Friend;
	case Decl::FunctionTemplate:
	return FunctionTemplate;
	}
	};

	using DeclHashes = llvm::SmallVector<std::pair<Decl *, unsigned>, 4>;
	auto PopulateHashes = [&ComputeSubDeclODRHash](DeclHashes &Hashes,
	RecordDecl *Record,
	const DeclContext *DC) {
	for (auto *D : Record->decls()) {
	if (!ODRHash::isDeclToBeProcessed(D, DC))
	continue;
	Hashes.emplace_back(D, ComputeSubDeclODRHash(D));
	}
	};

	struct DiffResult {
	Decl FirstDecl = nullptr, SecondDecl = nullptr;
	ODRMismatchDecl FirstDiffType = Other, SecondDiffType = Other;
	};

	// If there is a diagnoseable difference, FirstDiffType and
	// SecondDiffType will not be Other and FirstDecl and SecondDecl will be
	// filled in if not EndOfClass.
	auto FindTypeDiffs = [&DifferenceSelector](DeclHashes &FirstHashes,
	DeclHashes &SecondHashes) {
	DiffResult DR;
	auto FirstIt = FirstHashes.begin();
	auto SecondIt = SecondHashes.begin();
	while (FirstIt != FirstHashes.end() \|\| SecondIt != SecondHashes.end()) {
	if (FirstIt != FirstHashes.end() && SecondIt != SecondHashes.end() &&
	FirstIt->second == SecondIt->second) {
	++FirstIt;
	++SecondIt;
	continue;
	}

	DR.FirstDecl = FirstIt == FirstHashes.end() ? nullptr : FirstIt->first;
	DR.SecondDecl =
	SecondIt == SecondHashes.end() ? nullptr : SecondIt->first;

	DR.FirstDiffType =
	DR.FirstDecl ? DifferenceSelector(DR.FirstDecl) : EndOfClass;
	DR.SecondDiffType =
	DR.SecondDecl ? DifferenceSelector(DR.SecondDecl) : EndOfClass;
	return DR;
	}
	return DR;
	};

	// Use this to diagnose that an unexpected Decl was encountered
	// or no difference was detected. This causes a generic error
	// message to be emitted.
	auto DiagnoseODRUnexpected = [this](DiffResult &DR, NamedDecl *FirstRecord,
	StringRef FirstModule,
	NamedDecl *SecondRecord,
	StringRef SecondModule) {
	Diag(FirstRecord->getLocation(),
	diag::err_module_odr_violation_different_definitions)
	<< FirstRecord << FirstModule.empty() << FirstModule;

	if (DR.FirstDecl) {
	Diag(DR.FirstDecl->getLocation(), diag::note_first_module_difference)
	<< FirstRecord << DR.FirstDecl->getSourceRange();
	}

	Diag(SecondRecord->getLocation(),
	diag::note_module_odr_violation_different_definitions)
	<< SecondModule;

	if (DR.SecondDecl) {
	Diag(DR.SecondDecl->getLocation(), diag::note_second_module_difference)
	<< DR.SecondDecl->getSourceRange();
	}
	};

	auto DiagnoseODRMismatch =
	[this](DiffResult &DR, NamedDecl *FirstRecord, StringRef FirstModule,
	NamedDecl *SecondRecord, StringRef SecondModule) {
	SourceLocation FirstLoc;
	SourceRange FirstRange;
	auto *FirstTag = dyn_cast<TagDecl>(FirstRecord);
	if (DR.FirstDiffType == EndOfClass && FirstTag) {
	FirstLoc = FirstTag->getBraceRange().getEnd();
	} else {
	FirstLoc = DR.FirstDecl->getLocation();
	FirstRange = DR.FirstDecl->getSourceRange();
	}
	Diag(FirstLoc, diag::err_module_odr_violation_mismatch_decl)
	<< FirstRecord << FirstModule.empty() << FirstModule << FirstRange
	<< DR.FirstDiffType;

	SourceLocation SecondLoc;
	SourceRange SecondRange;
	auto *SecondTag = dyn_cast<TagDecl>(SecondRecord);
	if (DR.SecondDiffType == EndOfClass && SecondTag) {
	SecondLoc = SecondTag->getBraceRange().getEnd();
	} else {
	SecondLoc = DR.SecondDecl->getLocation();
	SecondRange = DR.SecondDecl->getSourceRange();
	}
	Diag(SecondLoc, diag::note_module_odr_violation_mismatch_decl)
	<< SecondModule << SecondRange << DR.SecondDiffType;
	};

	// Issue any pending ODR-failure diagnostics.
	for (auto &Merge : OdrMergeFailures) {
	// If we've already pointed out a specific problem with this class, don't
	// bother issuing a general "something's different" diagnostic.
	if (!DiagnosedOdrMergeFailures.insert(Merge.first).second)
	continue;

	bool Diagnosed = false;
	CXXRecordDecl *FirstRecord = Merge.first;
	std::string FirstModule = getOwningModuleNameForDiagnostic(FirstRecord);
	for (auto &RecordPair : Merge.second) {
	CXXRecordDecl *SecondRecord = RecordPair.first;
	// Multiple different declarations got merged together; tell the user
	// where they came from.
	if (FirstRecord == SecondRecord)
	continue;

	std::string SecondModule = getOwningModuleNameForDiagnostic(SecondRecord);

	auto *FirstDD = FirstRecord->DefinitionData;
	auto *SecondDD = RecordPair.second;

	assert(FirstDD && SecondDD && "Definitions without DefinitionData");

	// Diagnostics from DefinitionData are emitted here.
	if (FirstDD != SecondDD) {
	enum ODRDefinitionDataDifference {
	NumBases,
	NumVBases,
	BaseType,
	BaseVirtual,
	BaseAccess,
	};
	auto ODRDiagBaseError = [FirstRecord, &FirstModule,
	this](SourceLocation Loc, SourceRange Range,
	ODRDefinitionDataDifference DiffType) {
	return Diag(Loc, diag::err_module_odr_violation_definition_data)
	<< FirstRecord << FirstModule.empty() << FirstModule << Range
	<< DiffType;
	};
	auto ODRDiagBaseNote = [&SecondModule,
	this](SourceLocation Loc, SourceRange Range,
	ODRDefinitionDataDifference DiffType) {
	return Diag(Loc, diag::note_module_odr_violation_definition_data)
	<< SecondModule << Range << DiffType;
	};

	unsigned FirstNumBases = FirstDD->NumBases;
	unsigned FirstNumVBases = FirstDD->NumVBases;
	unsigned SecondNumBases = SecondDD->NumBases;
	unsigned SecondNumVBases = SecondDD->NumVBases;

	auto GetSourceRange = [](struct CXXRecordDecl::DefinitionData *DD) {
	unsigned NumBases = DD->NumBases;
	if (NumBases == 0) return SourceRange();
	auto bases = DD->bases();
	return SourceRange(bases[0].getBeginLoc(),
	bases[NumBases - 1].getEndLoc());
	};

	if (FirstNumBases != SecondNumBases) {
	ODRDiagBaseError(FirstRecord->getLocation(), GetSourceRange(FirstDD),
	NumBases)
	<< FirstNumBases;
	ODRDiagBaseNote(SecondRecord->getLocation(), GetSourceRange(SecondDD),
	NumBases)
	<< SecondNumBases;
	Diagnosed = true;
	break;
	}

	if (FirstNumVBases != SecondNumVBases) {
	ODRDiagBaseError(FirstRecord->getLocation(), GetSourceRange(FirstDD),
	NumVBases)
	<< FirstNumVBases;
	ODRDiagBaseNote(SecondRecord->getLocation(), GetSourceRange(SecondDD),
	NumVBases)
	<< SecondNumVBases;
	Diagnosed = true;
	break;
	}

	auto FirstBases = FirstDD->bases();
	auto SecondBases = SecondDD->bases();
	unsigned i = 0;
	for (i = 0; i < FirstNumBases; ++i) {
	auto FirstBase = FirstBases[i];
	auto SecondBase = SecondBases[i];
	if (ComputeQualTypeODRHash(FirstBase.getType()) !=
	ComputeQualTypeODRHash(SecondBase.getType())) {
	ODRDiagBaseError(FirstRecord->getLocation(),
	FirstBase.getSourceRange(), BaseType)
	<< (i + 1) << FirstBase.getType();
	ODRDiagBaseNote(SecondRecord->getLocation(),
	SecondBase.getSourceRange(), BaseType)
	<< (i + 1) << SecondBase.getType();
	break;
	}

	if (FirstBase.isVirtual() != SecondBase.isVirtual()) {
	ODRDiagBaseError(FirstRecord->getLocation(),
	FirstBase.getSourceRange(), BaseVirtual)
	<< (i + 1) << FirstBase.isVirtual() << FirstBase.getType();
	ODRDiagBaseNote(SecondRecord->getLocation(),
	SecondBase.getSourceRange(), BaseVirtual)
	<< (i + 1) << SecondBase.isVirtual() << SecondBase.getType();
	break;
	}

	if (FirstBase.getAccessSpecifierAsWritten() !=
	SecondBase.getAccessSpecifierAsWritten()) {
	ODRDiagBaseError(FirstRecord->getLocation(),
	FirstBase.getSourceRange(), BaseAccess)
	<< (i + 1) << FirstBase.getType()
	<< (int)FirstBase.getAccessSpecifierAsWritten();
	ODRDiagBaseNote(SecondRecord->getLocation(),
	SecondBase.getSourceRange(), BaseAccess)
	<< (i + 1) << SecondBase.getType()
	<< (int)SecondBase.getAccessSpecifierAsWritten();
	break;
	}
	}

	if (i != FirstNumBases) {
	Diagnosed = true;
	break;
	}
	}

	const ClassTemplateDecl *FirstTemplate =
	FirstRecord->getDescribedClassTemplate();
	const ClassTemplateDecl *SecondTemplate =
	SecondRecord->getDescribedClassTemplate();

	assert(!FirstTemplate == !SecondTemplate &&
	"Both pointers should be null or non-null");

	enum ODRTemplateDifference {
	ParamEmptyName,
	ParamName,
	ParamSingleDefaultArgument,
	ParamDifferentDefaultArgument,
	};

	if (FirstTemplate && SecondTemplate) {
	DeclHashes FirstTemplateHashes;
	DeclHashes SecondTemplateHashes;

	auto PopulateTemplateParameterHashs =
	[&ComputeSubDeclODRHash](DeclHashes &Hashes,
	const ClassTemplateDecl *TD) {
	for (auto *D : TD->getTemplateParameters()->asArray()) {
	Hashes.emplace_back(D, ComputeSubDeclODRHash(D));
	}
	};

	PopulateTemplateParameterHashs(FirstTemplateHashes, FirstTemplate);
	PopulateTemplateParameterHashs(SecondTemplateHashes, SecondTemplate);

	assert(FirstTemplateHashes.size() == SecondTemplateHashes.size() &&
	"Number of template parameters should be equal.");

	auto FirstIt = FirstTemplateHashes.begin();
	auto FirstEnd = FirstTemplateHashes.end();
	auto SecondIt = SecondTemplateHashes.begin();
	for (; FirstIt != FirstEnd; ++FirstIt, ++SecondIt) {
	if (FirstIt->second == SecondIt->second)
	continue;

	auto ODRDiagTemplateError = [FirstRecord, &FirstModule, this](
	SourceLocation Loc, SourceRange Range,
	ODRTemplateDifference DiffType) {
	return Diag(Loc, diag::err_module_odr_violation_template_parameter)
	<< FirstRecord << FirstModule.empty() << FirstModule << Range
	<< DiffType;
	};
	auto ODRDiagTemplateNote = [&SecondModule, this](
	SourceLocation Loc, SourceRange Range,
	ODRTemplateDifference DiffType) {
	return Diag(Loc, diag::note_module_odr_violation_template_parameter)
	<< SecondModule << Range << DiffType;
	};

	const NamedDecl* FirstDecl = cast<NamedDecl>(FirstIt->first);
	const NamedDecl* SecondDecl = cast<NamedDecl>(SecondIt->first);

	assert(FirstDecl->getKind() == SecondDecl->getKind() &&
	"Parameter Decl's should be the same kind.");

	DeclarationName FirstName = FirstDecl->getDeclName();
	DeclarationName SecondName = SecondDecl->getDeclName();

	if (FirstName != SecondName) {
	const bool FirstNameEmpty =
	FirstName.isIdentifier() && !FirstName.getAsIdentifierInfo();
	const bool SecondNameEmpty =
	SecondName.isIdentifier() && !SecondName.getAsIdentifierInfo();
	assert((!FirstNameEmpty \|\| !SecondNameEmpty) &&
	"Both template parameters cannot be unnamed.");
	ODRDiagTemplateError(FirstDecl->getLocation(),
	FirstDecl->getSourceRange(),
	FirstNameEmpty ? ParamEmptyName : ParamName)
	<< FirstName;
	ODRDiagTemplateNote(SecondDecl->getLocation(),
	SecondDecl->getSourceRange(),
	SecondNameEmpty ? ParamEmptyName : ParamName)
	<< SecondName;
	break;
	}

	switch (FirstDecl->getKind()) {
	default:
	llvm_unreachable("Invalid template parameter type.");
	case Decl::TemplateTypeParm: {
	const auto *FirstParam = cast<TemplateTypeParmDecl>(FirstDecl);
	const auto *SecondParam = cast<TemplateTypeParmDecl>(SecondDecl);
	const bool HasFirstDefaultArgument =
	FirstParam->hasDefaultArgument() &&
	!FirstParam->defaultArgumentWasInherited();
	const bool HasSecondDefaultArgument =
	SecondParam->hasDefaultArgument() &&
	!SecondParam->defaultArgumentWasInherited();

	if (HasFirstDefaultArgument != HasSecondDefaultArgument) {
	ODRDiagTemplateError(FirstDecl->getLocation(),
	FirstDecl->getSourceRange(),
	ParamSingleDefaultArgument)
	<< HasFirstDefaultArgument;
	ODRDiagTemplateNote(SecondDecl->getLocation(),
	SecondDecl->getSourceRange(),
	ParamSingleDefaultArgument)
	<< HasSecondDefaultArgument;
	break;
	}

	assert(HasFirstDefaultArgument && HasSecondDefaultArgument &&
	"Expecting default arguments.");

	ODRDiagTemplateError(FirstDecl->getLocation(),
	FirstDecl->getSourceRange(),
	ParamDifferentDefaultArgument);
	ODRDiagTemplateNote(SecondDecl->getLocation(),
	SecondDecl->getSourceRange(),
	ParamDifferentDefaultArgument);

	break;
	}
	case Decl::NonTypeTemplateParm: {
	const auto *FirstParam = cast<NonTypeTemplateParmDecl>(FirstDecl);
	const auto *SecondParam = cast<NonTypeTemplateParmDecl>(SecondDecl);
	const bool HasFirstDefaultArgument =
	FirstParam->hasDefaultArgument() &&
	!FirstParam->defaultArgumentWasInherited();
	const bool HasSecondDefaultArgument =
	SecondParam->hasDefaultArgument() &&
	!SecondParam->defaultArgumentWasInherited();

	if (HasFirstDefaultArgument != HasSecondDefaultArgument) {
	ODRDiagTemplateError(FirstDecl->getLocation(),
	FirstDecl->getSourceRange(),
	ParamSingleDefaultArgument)
	<< HasFirstDefaultArgument;
	ODRDiagTemplateNote(SecondDecl->getLocation(),
	SecondDecl->getSourceRange(),
	ParamSingleDefaultArgument)
	<< HasSecondDefaultArgument;
	break;
	}

	assert(HasFirstDefaultArgument && HasSecondDefaultArgument &&
	"Expecting default arguments.");

	ODRDiagTemplateError(FirstDecl->getLocation(),
	FirstDecl->getSourceRange(),
	ParamDifferentDefaultArgument);
	ODRDiagTemplateNote(SecondDecl->getLocation(),
	SecondDecl->getSourceRange(),
	ParamDifferentDefaultArgument);

	break;
	}
	case Decl::TemplateTemplateParm: {
	const auto *FirstParam = cast<TemplateTemplateParmDecl>(FirstDecl);
	const auto *SecondParam =
	cast<TemplateTemplateParmDecl>(SecondDecl);
	const bool HasFirstDefaultArgument =
	FirstParam->hasDefaultArgument() &&
	!FirstParam->defaultArgumentWasInherited();
	const bool HasSecondDefaultArgument =
	SecondParam->hasDefaultArgument() &&
	!SecondParam->defaultArgumentWasInherited();

	if (HasFirstDefaultArgument != HasSecondDefaultArgument) {
	ODRDiagTemplateError(FirstDecl->getLocation(),
	FirstDecl->getSourceRange(),
	ParamSingleDefaultArgument)
	<< HasFirstDefaultArgument;
	ODRDiagTemplateNote(SecondDecl->getLocation(),
	SecondDecl->getSourceRange(),
	ParamSingleDefaultArgument)
	<< HasSecondDefaultArgument;
	break;
	}

	assert(HasFirstDefaultArgument && HasSecondDefaultArgument &&
	"Expecting default arguments.");

	ODRDiagTemplateError(FirstDecl->getLocation(),
	FirstDecl->getSourceRange(),
	ParamDifferentDefaultArgument);
	ODRDiagTemplateNote(SecondDecl->getLocation(),
	SecondDecl->getSourceRange(),
	ParamDifferentDefaultArgument);

	break;
	}
	}

	break;
	}

	if (FirstIt != FirstEnd) {
	Diagnosed = true;
	break;
	}
	}

	DeclHashes FirstHashes;
	DeclHashes SecondHashes;
	const DeclContext *DC = FirstRecord;
	PopulateHashes(FirstHashes, FirstRecord, DC);
	PopulateHashes(SecondHashes, SecondRecord, DC);

	auto DR = FindTypeDiffs(FirstHashes, SecondHashes);
	ODRMismatchDecl FirstDiffType = DR.FirstDiffType;
	ODRMismatchDecl SecondDiffType = DR.SecondDiffType;
	Decl *FirstDecl = DR.FirstDecl;
	Decl *SecondDecl = DR.SecondDecl;

	if (FirstDiffType == Other \|\| SecondDiffType == Other) {
	DiagnoseODRUnexpected(DR, FirstRecord, FirstModule, SecondRecord,
	SecondModule);
	Diagnosed = true;
	break;
	}

	if (FirstDiffType != SecondDiffType) {
	DiagnoseODRMismatch(DR, FirstRecord, FirstModule, SecondRecord,
	SecondModule);
	Diagnosed = true;
	break;
	}

	assert(FirstDiffType == SecondDiffType);

	switch (FirstDiffType) {
	case Other:
	case EndOfClass:
	case PublicSpecifer:
	case PrivateSpecifer:
	case ProtectedSpecifer:
	llvm_unreachable("Invalid diff type");

	case StaticAssert: {
	StaticAssertDecl *FirstSA = cast<StaticAssertDecl>(FirstDecl);
	StaticAssertDecl *SecondSA = cast<StaticAssertDecl>(SecondDecl);

	Expr *FirstExpr = FirstSA->getAssertExpr();
	Expr *SecondExpr = SecondSA->getAssertExpr();
	unsigned FirstODRHash = ComputeODRHash(FirstExpr);
	unsigned SecondODRHash = ComputeODRHash(SecondExpr);
	if (FirstODRHash != SecondODRHash) {
	ODRDiagDeclError(FirstRecord, FirstModule, FirstExpr->getBeginLoc(),
	FirstExpr->getSourceRange(), StaticAssertCondition);
	ODRDiagDeclNote(SecondModule, SecondExpr->getBeginLoc(),
	SecondExpr->getSourceRange(), StaticAssertCondition);
	Diagnosed = true;
	break;
	}

	StringLiteral *FirstStr = FirstSA->getMessage();
	StringLiteral *SecondStr = SecondSA->getMessage();
	assert((FirstStr \|\| SecondStr) && "Both messages cannot be empty");
	if ((FirstStr && !SecondStr) \|\| (!FirstStr && SecondStr)) {
	SourceLocation FirstLoc, SecondLoc;
	SourceRange FirstRange, SecondRange;
	if (FirstStr) {
	FirstLoc = FirstStr->getBeginLoc();
	FirstRange = FirstStr->getSourceRange();
	} else {
	FirstLoc = FirstSA->getBeginLoc();
	FirstRange = FirstSA->getSourceRange();
	}
	if (SecondStr) {
	SecondLoc = SecondStr->getBeginLoc();
	SecondRange = SecondStr->getSourceRange();
	} else {
	SecondLoc = SecondSA->getBeginLoc();
	SecondRange = SecondSA->getSourceRange();
	}
	ODRDiagDeclError(FirstRecord, FirstModule, FirstLoc, FirstRange,
	StaticAssertOnlyMessage)
	<< (FirstStr == nullptr);
	ODRDiagDeclNote(SecondModule, SecondLoc, SecondRange,
	StaticAssertOnlyMessage)
	<< (SecondStr == nullptr);
	Diagnosed = true;
	break;
	}

	if (FirstStr && SecondStr &&
	FirstStr->getString() != SecondStr->getString()) {
	ODRDiagDeclError(FirstRecord, FirstModule, FirstStr->getBeginLoc(),
	FirstStr->getSourceRange(), StaticAssertMessage);
	ODRDiagDeclNote(SecondModule, SecondStr->getBeginLoc(),
	SecondStr->getSourceRange(), StaticAssertMessage);
	Diagnosed = true;
	break;
	}
	break;
	}
	case Field: {
	Diagnosed = ODRDiagField(FirstRecord, FirstModule, SecondModule,
	cast<FieldDecl>(FirstDecl),
	cast<FieldDecl>(SecondDecl));
	break;
	}
	case CXXMethod: {
	enum {
	DiagMethod,
	DiagConstructor,
	DiagDestructor,
	} FirstMethodType,
	SecondMethodType;
	auto GetMethodTypeForDiagnostics = [](const CXXMethodDecl* D) {
	if (isa<CXXConstructorDecl>(D)) return DiagConstructor;
	if (isa<CXXDestructorDecl>(D)) return DiagDestructor;
	return DiagMethod;
	};
	const CXXMethodDecl *FirstMethod = cast<CXXMethodDecl>(FirstDecl);
	const CXXMethodDecl *SecondMethod = cast<CXXMethodDecl>(SecondDecl);
	FirstMethodType = GetMethodTypeForDiagnostics(FirstMethod);
	SecondMethodType = GetMethodTypeForDiagnostics(SecondMethod);
	auto FirstName = FirstMethod->getDeclName();
	auto SecondName = SecondMethod->getDeclName();
	if (FirstMethodType != SecondMethodType \|\| FirstName != SecondName) {
	ODRDiagDeclError(FirstRecord, FirstModule, FirstMethod->getLocation(),
	FirstMethod->getSourceRange(), MethodName)
	<< FirstMethodType << FirstName;
	ODRDiagDeclNote(SecondModule, SecondMethod->getLocation(),
	SecondMethod->getSourceRange(), MethodName)
	<< SecondMethodType << SecondName;

	Diagnosed = true;
	break;
	}

	const bool FirstDeleted = FirstMethod->isDeletedAsWritten();
	const bool SecondDeleted = SecondMethod->isDeletedAsWritten();
	if (FirstDeleted != SecondDeleted) {
	ODRDiagDeclError(FirstRecord, FirstModule, FirstMethod->getLocation(),
	FirstMethod->getSourceRange(), MethodDeleted)
	<< FirstMethodType << FirstName << FirstDeleted;

	ODRDiagDeclNote(SecondModule, SecondMethod->getLocation(),
	SecondMethod->getSourceRange(), MethodDeleted)
	<< SecondMethodType << SecondName << SecondDeleted;
	Diagnosed = true;
	break;
	}

	const bool FirstDefaulted = FirstMethod->isExplicitlyDefaulted();
	const bool SecondDefaulted = SecondMethod->isExplicitlyDefaulted();
	if (FirstDefaulted != SecondDefaulted) {
	ODRDiagDeclError(FirstRecord, FirstModule, FirstMethod->getLocation(),
	FirstMethod->getSourceRange(), MethodDefaulted)
	<< FirstMethodType << FirstName << FirstDefaulted;

	ODRDiagDeclNote(SecondModule, SecondMethod->getLocation(),
	SecondMethod->getSourceRange(), MethodDefaulted)
	<< SecondMethodType << SecondName << SecondDefaulted;
	Diagnosed = true;
	break;
	}

	const bool FirstVirtual = FirstMethod->isVirtualAsWritten();
	const bool SecondVirtual = SecondMethod->isVirtualAsWritten();
	const bool FirstPure = FirstMethod->isPure();
	const bool SecondPure = SecondMethod->isPure();
	if ((FirstVirtual \|\| SecondVirtual) &&
	(FirstVirtual != SecondVirtual \|\| FirstPure != SecondPure)) {
	ODRDiagDeclError(FirstRecord, FirstModule, FirstMethod->getLocation(),
	FirstMethod->getSourceRange(), MethodVirtual)
	<< FirstMethodType << FirstName << FirstPure << FirstVirtual;
	ODRDiagDeclNote(SecondModule, SecondMethod->getLocation(),
	SecondMethod->getSourceRange(), MethodVirtual)
	<< SecondMethodType << SecondName << SecondPure << SecondVirtual;
	Diagnosed = true;
	break;
	}

	// CXXMethodDecl::isStatic uses the canonical Decl. With Decl merging,
	// FirstDecl is the canonical Decl of SecondDecl, so the storage
	// class needs to be checked instead.
	const auto FirstStorage = FirstMethod->getStorageClass();
	const auto SecondStorage = SecondMethod->getStorageClass();
	const bool FirstStatic = FirstStorage == SC_Static;
	const bool SecondStatic = SecondStorage == SC_Static;
	if (FirstStatic != SecondStatic) {
	ODRDiagDeclError(FirstRecord, FirstModule, FirstMethod->getLocation(),
	FirstMethod->getSourceRange(), MethodStatic)
	<< FirstMethodType << FirstName << FirstStatic;
	ODRDiagDeclNote(SecondModule, SecondMethod->getLocation(),
	SecondMethod->getSourceRange(), MethodStatic)
	<< SecondMethodType << SecondName << SecondStatic;
	Diagnosed = true;
	break;
	}

	const bool FirstVolatile = FirstMethod->isVolatile();
	const bool SecondVolatile = SecondMethod->isVolatile();
	if (FirstVolatile != SecondVolatile) {
	ODRDiagDeclError(FirstRecord, FirstModule, FirstMethod->getLocation(),
	FirstMethod->getSourceRange(), MethodVolatile)
	<< FirstMethodType << FirstName << FirstVolatile;
	ODRDiagDeclNote(SecondModule, SecondMethod->getLocation(),
	SecondMethod->getSourceRange(), MethodVolatile)
	<< SecondMethodType << SecondName << SecondVolatile;
	Diagnosed = true;
	break;
	}

	const bool FirstConst = FirstMethod->isConst();
	const bool SecondConst = SecondMethod->isConst();
	if (FirstConst != SecondConst) {
	ODRDiagDeclError(FirstRecord, FirstModule, FirstMethod->getLocation(),
	FirstMethod->getSourceRange(), MethodConst)
	<< FirstMethodType << FirstName << FirstConst;
	ODRDiagDeclNote(SecondModule, SecondMethod->getLocation(),
	SecondMethod->getSourceRange(), MethodConst)
	<< SecondMethodType << SecondName << SecondConst;
	Diagnosed = true;
	break;
	}

	const bool FirstInline = FirstMethod->isInlineSpecified();
	const bool SecondInline = SecondMethod->isInlineSpecified();
	if (FirstInline != SecondInline) {
	ODRDiagDeclError(FirstRecord, FirstModule, FirstMethod->getLocation(),
	FirstMethod->getSourceRange(), MethodInline)
	<< FirstMethodType << FirstName << FirstInline;
	ODRDiagDeclNote(SecondModule, SecondMethod->getLocation(),
	SecondMethod->getSourceRange(), MethodInline)
	<< SecondMethodType << SecondName << SecondInline;
	Diagnosed = true;
	break;
	}

	const unsigned FirstNumParameters = FirstMethod->param_size();
	const unsigned SecondNumParameters = SecondMethod->param_size();
	if (FirstNumParameters != SecondNumParameters) {
	ODRDiagDeclError(FirstRecord, FirstModule, FirstMethod->getLocation(),
	FirstMethod->getSourceRange(),
	MethodNumberParameters)
	<< FirstMethodType << FirstName << FirstNumParameters;
	ODRDiagDeclNote(SecondModule, SecondMethod->getLocation(),
	SecondMethod->getSourceRange(),
	MethodNumberParameters)
	<< SecondMethodType << SecondName << SecondNumParameters;
	Diagnosed = true;
	break;
	}

	// Need this status boolean to know when break out of the switch.
	bool ParameterMismatch = false;
	for (unsigned I = 0; I < FirstNumParameters; ++I) {
	const ParmVarDecl *FirstParam = FirstMethod->getParamDecl(I);
	const ParmVarDecl *SecondParam = SecondMethod->getParamDecl(I);

	QualType FirstParamType = FirstParam->getType();
	QualType SecondParamType = SecondParam->getType();
	if (FirstParamType != SecondParamType &&
	ComputeQualTypeODRHash(FirstParamType) !=
	ComputeQualTypeODRHash(SecondParamType)) {
	if (const DecayedType *ParamDecayedType =
	FirstParamType->getAs<DecayedType>()) {
	ODRDiagDeclError(
	FirstRecord, FirstModule, FirstMethod->getLocation(),
	FirstMethod->getSourceRange(), MethodParameterType)
	<< FirstMethodType << FirstName << (I + 1) << FirstParamType
	<< true << ParamDecayedType->getOriginalType();
	} else {
	ODRDiagDeclError(
	FirstRecord, FirstModule, FirstMethod->getLocation(),
	FirstMethod->getSourceRange(), MethodParameterType)
	<< FirstMethodType << FirstName << (I + 1) << FirstParamType
	<< false;
	}

	if (const DecayedType *ParamDecayedType =
	SecondParamType->getAs<DecayedType>()) {
	ODRDiagDeclNote(SecondModule, SecondMethod->getLocation(),
	SecondMethod->getSourceRange(),
	MethodParameterType)
	<< SecondMethodType << SecondName << (I + 1)
	<< SecondParamType << true
	<< ParamDecayedType->getOriginalType();
	} else {
	ODRDiagDeclNote(SecondModule, SecondMethod->getLocation(),
	SecondMethod->getSourceRange(),
	MethodParameterType)
	<< SecondMethodType << SecondName << (I + 1)
	<< SecondParamType << false;
	}
	ParameterMismatch = true;
	break;
	}

	DeclarationName FirstParamName = FirstParam->getDeclName();
	DeclarationName SecondParamName = SecondParam->getDeclName();
	if (FirstParamName != SecondParamName) {
	ODRDiagDeclError(FirstRecord, FirstModule,
	FirstMethod->getLocation(),
	FirstMethod->getSourceRange(), MethodParameterName)
	<< FirstMethodType << FirstName << (I + 1) << FirstParamName;
	ODRDiagDeclNote(SecondModule, SecondMethod->getLocation(),
	SecondMethod->getSourceRange(), MethodParameterName)
	<< SecondMethodType << SecondName << (I + 1) << SecondParamName;
	ParameterMismatch = true;
	break;
	}

	const Expr *FirstInit = FirstParam->getInit();
	const Expr *SecondInit = SecondParam->getInit();
	if ((FirstInit == nullptr) != (SecondInit == nullptr)) {
	ODRDiagDeclError(FirstRecord, FirstModule,
	FirstMethod->getLocation(),
	FirstMethod->getSourceRange(),
	MethodParameterSingleDefaultArgument)
	<< FirstMethodType << FirstName << (I + 1)
	<< (FirstInit == nullptr)
	<< (FirstInit ? FirstInit->getSourceRange() : SourceRange());
	ODRDiagDeclNote(SecondModule, SecondMethod->getLocation(),
	SecondMethod->getSourceRange(),
	MethodParameterSingleDefaultArgument)
	<< SecondMethodType << SecondName << (I + 1)
	<< (SecondInit == nullptr)
	<< (SecondInit ? SecondInit->getSourceRange() : SourceRange());
	ParameterMismatch = true;
	break;
	}

	if (FirstInit && SecondInit &&
	ComputeODRHash(FirstInit) != ComputeODRHash(SecondInit)) {
	ODRDiagDeclError(FirstRecord, FirstModule,
	FirstMethod->getLocation(),
	FirstMethod->getSourceRange(),
	MethodParameterDifferentDefaultArgument)
	<< FirstMethodType << FirstName << (I + 1)
	<< FirstInit->getSourceRange();
	ODRDiagDeclNote(SecondModule, SecondMethod->getLocation(),
	SecondMethod->getSourceRange(),
	MethodParameterDifferentDefaultArgument)
	<< SecondMethodType << SecondName << (I + 1)
	<< SecondInit->getSourceRange();
	ParameterMismatch = true;
	break;

	}
	}

	if (ParameterMismatch) {
	Diagnosed = true;
	break;
	}

	const auto *FirstTemplateArgs =
	FirstMethod->getTemplateSpecializationArgs();
	const auto *SecondTemplateArgs =
	SecondMethod->getTemplateSpecializationArgs();

	if ((FirstTemplateArgs && !SecondTemplateArgs) \|\|
	(!FirstTemplateArgs && SecondTemplateArgs)) {
	ODRDiagDeclError(FirstRecord, FirstModule, FirstMethod->getLocation(),
	FirstMethod->getSourceRange(),
	MethodNoTemplateArguments)
	<< FirstMethodType << FirstName << (FirstTemplateArgs != nullptr);
	ODRDiagDeclNote(SecondModule, SecondMethod->getLocation(),
	SecondMethod->getSourceRange(),
	MethodNoTemplateArguments)
	<< SecondMethodType << SecondName
	<< (SecondTemplateArgs != nullptr);

	Diagnosed = true;
	break;
	}

	if (FirstTemplateArgs && SecondTemplateArgs) {
	// Remove pack expansions from argument list.
	auto ExpandTemplateArgumentList =
	[](const TemplateArgumentList *TAL) {
	llvm::SmallVector<const TemplateArgument *, 8> ExpandedList;
	for (const TemplateArgument &TA : TAL->asArray()) {
	if (TA.getKind() != TemplateArgument::Pack) {
	ExpandedList.push_back(&TA);
	continue;
	}
	for (const TemplateArgument &PackTA : TA.getPackAsArray()) {
	ExpandedList.push_back(&PackTA);
	}
	}
	return ExpandedList;
	};
	llvm::SmallVector<const TemplateArgument *, 8> FirstExpandedList =
	ExpandTemplateArgumentList(FirstTemplateArgs);
	llvm::SmallVector<const TemplateArgument *, 8> SecondExpandedList =
	ExpandTemplateArgumentList(SecondTemplateArgs);

	if (FirstExpandedList.size() != SecondExpandedList.size()) {
	ODRDiagDeclError(FirstRecord, FirstModule,
	FirstMethod->getLocation(),
	FirstMethod->getSourceRange(),
	MethodDifferentNumberTemplateArguments)
	<< FirstMethodType << FirstName
	<< (unsigned)FirstExpandedList.size();
	ODRDiagDeclNote(SecondModule, SecondMethod->getLocation(),
	SecondMethod->getSourceRange(),
	MethodDifferentNumberTemplateArguments)
	<< SecondMethodType << SecondName
	<< (unsigned)SecondExpandedList.size();

	Diagnosed = true;
	break;
	}

	bool TemplateArgumentMismatch = false;
	for (unsigned i = 0, e = FirstExpandedList.size(); i != e; ++i) {
	const TemplateArgument &FirstTA = *FirstExpandedList[i],
	&SecondTA = *SecondExpandedList[i];
	if (ComputeTemplateArgumentODRHash(FirstTA) ==
	ComputeTemplateArgumentODRHash(SecondTA)) {
	continue;
	}

	ODRDiagDeclError(
	FirstRecord, FirstModule, FirstMethod->getLocation(),
	FirstMethod->getSourceRange(), MethodDifferentTemplateArgument)
	<< FirstMethodType << FirstName << FirstTA << i + 1;
	ODRDiagDeclNote(SecondModule, SecondMethod->getLocation(),
	SecondMethod->getSourceRange(),
	MethodDifferentTemplateArgument)
	<< SecondMethodType << SecondName << SecondTA << i + 1;

	TemplateArgumentMismatch = true;
	break;
	}

	if (TemplateArgumentMismatch) {
	Diagnosed = true;
	break;
	}
	}

	// Compute the hash of the method as if it has no body.
	auto ComputeCXXMethodODRHash = [&Hash](const CXXMethodDecl *D) {
	Hash.clear();
	Hash.AddFunctionDecl(D, true /SkipBody/);
	return Hash.CalculateHash();
	};

	// Compare the hash generated to the hash stored. A difference means
	// that a body was present in the original source. Due to merging,
	// the stardard way of detecting a body will not work.
	const bool HasFirstBody =
	ComputeCXXMethodODRHash(FirstMethod) != FirstMethod->getODRHash();
	const bool HasSecondBody =
	ComputeCXXMethodODRHash(SecondMethod) != SecondMethod->getODRHash();

	if (HasFirstBody != HasSecondBody) {
	ODRDiagDeclError(FirstRecord, FirstModule, FirstMethod->getLocation(),
	FirstMethod->getSourceRange(), MethodSingleBody)
	<< FirstMethodType << FirstName << HasFirstBody;
	ODRDiagDeclNote(SecondModule, SecondMethod->getLocation(),
	SecondMethod->getSourceRange(), MethodSingleBody)
	<< SecondMethodType << SecondName << HasSecondBody;
	Diagnosed = true;
	break;
	}

	if (HasFirstBody && HasSecondBody) {
	ODRDiagDeclError(FirstRecord, FirstModule, FirstMethod->getLocation(),
	FirstMethod->getSourceRange(), MethodDifferentBody)
	<< FirstMethodType << FirstName;
	ODRDiagDeclNote(SecondModule, SecondMethod->getLocation(),
	SecondMethod->getSourceRange(), MethodDifferentBody)
	<< SecondMethodType << SecondName;
	Diagnosed = true;
	break;
	}

	break;
	}
	case TypeAlias:
	case TypeDef: {
	Diagnosed = ODRDiagTypeDefOrAlias(
	FirstRecord, FirstModule, SecondModule,
	cast<TypedefNameDecl>(FirstDecl), cast<TypedefNameDecl>(SecondDecl),
	FirstDiffType == TypeAlias);
	break;
	}
	case Var: {
	Diagnosed =
	ODRDiagVar(FirstRecord, FirstModule, SecondModule,
	cast<VarDecl>(FirstDecl), cast<VarDecl>(SecondDecl));
	break;
	}
	case Friend: {
	FriendDecl *FirstFriend = cast<FriendDecl>(FirstDecl);
	FriendDecl *SecondFriend = cast<FriendDecl>(SecondDecl);

	NamedDecl *FirstND = FirstFriend->getFriendDecl();
	NamedDecl *SecondND = SecondFriend->getFriendDecl();

	TypeSourceInfo *FirstTSI = FirstFriend->getFriendType();
	TypeSourceInfo *SecondTSI = SecondFriend->getFriendType();

	if (FirstND && SecondND) {
	ODRDiagDeclError(FirstRecord, FirstModule,
	FirstFriend->getFriendLoc(),
	FirstFriend->getSourceRange(), FriendFunction)
	<< FirstND;
	ODRDiagDeclNote(SecondModule, SecondFriend->getFriendLoc(),
	SecondFriend->getSourceRange(), FriendFunction)
	<< SecondND;

	Diagnosed = true;
	break;
	}

	if (FirstTSI && SecondTSI) {
	QualType FirstFriendType = FirstTSI->getType();
	QualType SecondFriendType = SecondTSI->getType();
	assert(ComputeQualTypeODRHash(FirstFriendType) !=
	ComputeQualTypeODRHash(SecondFriendType));
	ODRDiagDeclError(FirstRecord, FirstModule,
	FirstFriend->getFriendLoc(),
	FirstFriend->getSourceRange(), FriendType)
	<< FirstFriendType;
	ODRDiagDeclNote(SecondModule, SecondFriend->getFriendLoc(),
	SecondFriend->getSourceRange(), FriendType)
	<< SecondFriendType;
	Diagnosed = true;
	break;
	}

	ODRDiagDeclError(FirstRecord, FirstModule, FirstFriend->getFriendLoc(),
	FirstFriend->getSourceRange(), FriendTypeFunction)
	<< (FirstTSI == nullptr);
	ODRDiagDeclNote(SecondModule, SecondFriend->getFriendLoc(),
	SecondFriend->getSourceRange(), FriendTypeFunction)
	<< (SecondTSI == nullptr);

	Diagnosed = true;
	break;
	}
	case FunctionTemplate: {
	FunctionTemplateDecl *FirstTemplate =
	cast<FunctionTemplateDecl>(FirstDecl);
	FunctionTemplateDecl *SecondTemplate =
	cast<FunctionTemplateDecl>(SecondDecl);

	TemplateParameterList *FirstTPL =
	FirstTemplate->getTemplateParameters();
	TemplateParameterList *SecondTPL =
	SecondTemplate->getTemplateParameters();

	if (FirstTPL->size() != SecondTPL->size()) {
	ODRDiagDeclError(FirstRecord, FirstModule,
	FirstTemplate->getLocation(),
	FirstTemplate->getSourceRange(),
	FunctionTemplateDifferentNumberParameters)
	<< FirstTemplate << FirstTPL->size();
	ODRDiagDeclNote(SecondModule, SecondTemplate->getLocation(),
	SecondTemplate->getSourceRange(),
	FunctionTemplateDifferentNumberParameters)
	<< SecondTemplate << SecondTPL->size();

	Diagnosed = true;
	break;
	}

	bool ParameterMismatch = false;
	for (unsigned i = 0, e = FirstTPL->size(); i != e; ++i) {
	NamedDecl *FirstParam = FirstTPL->getParam(i);
	NamedDecl *SecondParam = SecondTPL->getParam(i);

	if (FirstParam->getKind() != SecondParam->getKind()) {
	enum {
	TemplateTypeParameter,
	NonTypeTemplateParameter,
	TemplateTemplateParameter,
	};
	auto GetParamType = [](NamedDecl *D) {
	switch (D->getKind()) {
	default:
	llvm_unreachable("Unexpected template parameter type");
	case Decl::TemplateTypeParm:
	return TemplateTypeParameter;
	case Decl::NonTypeTemplateParm:
	return NonTypeTemplateParameter;
	case Decl::TemplateTemplateParm:
	return TemplateTemplateParameter;
	}
	};

	ODRDiagDeclError(FirstRecord, FirstModule,
	FirstTemplate->getLocation(),
	FirstTemplate->getSourceRange(),
	FunctionTemplateParameterDifferentKind)
	<< FirstTemplate << (i + 1) << GetParamType(FirstParam);
	ODRDiagDeclNote(SecondModule, SecondTemplate->getLocation(),
	SecondTemplate->getSourceRange(),
	FunctionTemplateParameterDifferentKind)
	<< SecondTemplate << (i + 1) << GetParamType(SecondParam);

	ParameterMismatch = true;
	break;
	}

	if (FirstParam->getName() != SecondParam->getName()) {
	ODRDiagDeclError(
	FirstRecord, FirstModule, FirstTemplate->getLocation(),
	FirstTemplate->getSourceRange(), FunctionTemplateParameterName)
	<< FirstTemplate << (i + 1) << (bool)FirstParam->getIdentifier()
	<< FirstParam;
	ODRDiagDeclNote(SecondModule, SecondTemplate->getLocation(),
	SecondTemplate->getSourceRange(),
	FunctionTemplateParameterName)
	<< SecondTemplate << (i + 1)
	<< (bool)SecondParam->getIdentifier() << SecondParam;
	ParameterMismatch = true;
	break;
	}

	if (isa<TemplateTypeParmDecl>(FirstParam) &&
	isa<TemplateTypeParmDecl>(SecondParam)) {
	TemplateTypeParmDecl *FirstTTPD =
	cast<TemplateTypeParmDecl>(FirstParam);
	TemplateTypeParmDecl *SecondTTPD =
	cast<TemplateTypeParmDecl>(SecondParam);
	bool HasFirstDefaultArgument =
	FirstTTPD->hasDefaultArgument() &&
	!FirstTTPD->defaultArgumentWasInherited();
	bool HasSecondDefaultArgument =
	SecondTTPD->hasDefaultArgument() &&
	!SecondTTPD->defaultArgumentWasInherited();
	if (HasFirstDefaultArgument != HasSecondDefaultArgument) {
	ODRDiagDeclError(FirstRecord, FirstModule,
	FirstTemplate->getLocation(),
	FirstTemplate->getSourceRange(),
	FunctionTemplateParameterSingleDefaultArgument)
	<< FirstTemplate << (i + 1) << HasFirstDefaultArgument;
	ODRDiagDeclNote(SecondModule, SecondTemplate->getLocation(),
	SecondTemplate->getSourceRange(),
	FunctionTemplateParameterSingleDefaultArgument)
	<< SecondTemplate << (i + 1) << HasSecondDefaultArgument;
	ParameterMismatch = true;
	break;
	}

	if (HasFirstDefaultArgument && HasSecondDefaultArgument) {
	QualType FirstType = FirstTTPD->getDefaultArgument();
	QualType SecondType = SecondTTPD->getDefaultArgument();
	if (ComputeQualTypeODRHash(FirstType) !=
	ComputeQualTypeODRHash(SecondType)) {
	ODRDiagDeclError(
	FirstRecord, FirstModule, FirstTemplate->getLocation(),
	FirstTemplate->getSourceRange(),
	FunctionTemplateParameterDifferentDefaultArgument)
	<< FirstTemplate << (i + 1) << FirstType;
	ODRDiagDeclNote(
	SecondModule, SecondTemplate->getLocation(),
	SecondTemplate->getSourceRange(),
	FunctionTemplateParameterDifferentDefaultArgument)
	<< SecondTemplate << (i + 1) << SecondType;
	ParameterMismatch = true;
	break;
	}
	}

	if (FirstTTPD->isParameterPack() !=
	SecondTTPD->isParameterPack()) {
	ODRDiagDeclError(FirstRecord, FirstModule,
	FirstTemplate->getLocation(),
	FirstTemplate->getSourceRange(),
	FunctionTemplatePackParameter)
	<< FirstTemplate << (i + 1) << FirstTTPD->isParameterPack();
	ODRDiagDeclNote(SecondModule, SecondTemplate->getLocation(),
	SecondTemplate->getSourceRange(),
	FunctionTemplatePackParameter)
	<< SecondTemplate << (i + 1) << SecondTTPD->isParameterPack();
	ParameterMismatch = true;
	break;
	}
	}

	if (isa<TemplateTemplateParmDecl>(FirstParam) &&
	isa<TemplateTemplateParmDecl>(SecondParam)) {
	TemplateTemplateParmDecl *FirstTTPD =
	cast<TemplateTemplateParmDecl>(FirstParam);
	TemplateTemplateParmDecl *SecondTTPD =
	cast<TemplateTemplateParmDecl>(SecondParam);

	TemplateParameterList *FirstTPL =
	FirstTTPD->getTemplateParameters();
	TemplateParameterList *SecondTPL =
	SecondTTPD->getTemplateParameters();

	if (ComputeTemplateParameterListODRHash(FirstTPL) !=
	ComputeTemplateParameterListODRHash(SecondTPL)) {
	ODRDiagDeclError(FirstRecord, FirstModule,
	FirstTemplate->getLocation(),
	FirstTemplate->getSourceRange(),
	FunctionTemplateParameterDifferentType)
	<< FirstTemplate << (i + 1);
	ODRDiagDeclNote(SecondModule, SecondTemplate->getLocation(),
	SecondTemplate->getSourceRange(),
	FunctionTemplateParameterDifferentType)
	<< SecondTemplate << (i + 1);
	ParameterMismatch = true;
	break;
	}

	bool HasFirstDefaultArgument =
	FirstTTPD->hasDefaultArgument() &&
	!FirstTTPD->defaultArgumentWasInherited();
	bool HasSecondDefaultArgument =
	SecondTTPD->hasDefaultArgument() &&
	!SecondTTPD->defaultArgumentWasInherited();
	if (HasFirstDefaultArgument != HasSecondDefaultArgument) {
	ODRDiagDeclError(FirstRecord, FirstModule,
	FirstTemplate->getLocation(),
	FirstTemplate->getSourceRange(),
	FunctionTemplateParameterSingleDefaultArgument)
	<< FirstTemplate << (i + 1) << HasFirstDefaultArgument;
	ODRDiagDeclNote(SecondModule, SecondTemplate->getLocation(),
	SecondTemplate->getSourceRange(),
	FunctionTemplateParameterSingleDefaultArgument)
	<< SecondTemplate << (i + 1) << HasSecondDefaultArgument;
	ParameterMismatch = true;
	break;
	}

	if (HasFirstDefaultArgument && HasSecondDefaultArgument) {
	TemplateArgument FirstTA =
	FirstTTPD->getDefaultArgument().getArgument();
	TemplateArgument SecondTA =
	SecondTTPD->getDefaultArgument().getArgument();
	if (ComputeTemplateArgumentODRHash(FirstTA) !=
	ComputeTemplateArgumentODRHash(SecondTA)) {
	ODRDiagDeclError(
	FirstRecord, FirstModule, FirstTemplate->getLocation(),
	FirstTemplate->getSourceRange(),
	FunctionTemplateParameterDifferentDefaultArgument)
	<< FirstTemplate << (i + 1) << FirstTA;
	ODRDiagDeclNote(
	SecondModule, SecondTemplate->getLocation(),
	SecondTemplate->getSourceRange(),
	FunctionTemplateParameterDifferentDefaultArgument)
	<< SecondTemplate << (i + 1) << SecondTA;
	ParameterMismatch = true;
	break;
	}
	}

	if (FirstTTPD->isParameterPack() !=
	SecondTTPD->isParameterPack()) {
	ODRDiagDeclError(FirstRecord, FirstModule,
	FirstTemplate->getLocation(),
	FirstTemplate->getSourceRange(),
	FunctionTemplatePackParameter)
	<< FirstTemplate << (i + 1) << FirstTTPD->isParameterPack();
	ODRDiagDeclNote(SecondModule, SecondTemplate->getLocation(),
	SecondTemplate->getSourceRange(),
	FunctionTemplatePackParameter)
	<< SecondTemplate << (i + 1) << SecondTTPD->isParameterPack();
	ParameterMismatch = true;
	break;
	}
	}

	if (isa<NonTypeTemplateParmDecl>(FirstParam) &&
	isa<NonTypeTemplateParmDecl>(SecondParam)) {
	NonTypeTemplateParmDecl *FirstNTTPD =
	cast<NonTypeTemplateParmDecl>(FirstParam);
	NonTypeTemplateParmDecl *SecondNTTPD =
	cast<NonTypeTemplateParmDecl>(SecondParam);

	QualType FirstType = FirstNTTPD->getType();
	QualType SecondType = SecondNTTPD->getType();
	if (ComputeQualTypeODRHash(FirstType) !=
	ComputeQualTypeODRHash(SecondType)) {
	ODRDiagDeclError(FirstRecord, FirstModule,
	FirstTemplate->getLocation(),
	FirstTemplate->getSourceRange(),
	FunctionTemplateParameterDifferentType)
	<< FirstTemplate << (i + 1);
	ODRDiagDeclNote(SecondModule, SecondTemplate->getLocation(),
	SecondTemplate->getSourceRange(),
	FunctionTemplateParameterDifferentType)
	<< SecondTemplate << (i + 1);
	ParameterMismatch = true;
	break;
	}

	bool HasFirstDefaultArgument =
	FirstNTTPD->hasDefaultArgument() &&
	!FirstNTTPD->defaultArgumentWasInherited();
	bool HasSecondDefaultArgument =
	SecondNTTPD->hasDefaultArgument() &&
	!SecondNTTPD->defaultArgumentWasInherited();
	if (HasFirstDefaultArgument != HasSecondDefaultArgument) {
	ODRDiagDeclError(FirstRecord, FirstModule,
	FirstTemplate->getLocation(),
	FirstTemplate->getSourceRange(),
	FunctionTemplateParameterSingleDefaultArgument)
	<< FirstTemplate << (i + 1) << HasFirstDefaultArgument;
	ODRDiagDeclNote(SecondModule, SecondTemplate->getLocation(),
	SecondTemplate->getSourceRange(),
	FunctionTemplateParameterSingleDefaultArgument)
	<< SecondTemplate << (i + 1) << HasSecondDefaultArgument;
	ParameterMismatch = true;
	break;
	}

	if (HasFirstDefaultArgument && HasSecondDefaultArgument) {
	Expr *FirstDefaultArgument = FirstNTTPD->getDefaultArgument();
	Expr *SecondDefaultArgument = SecondNTTPD->getDefaultArgument();
	if (ComputeODRHash(FirstDefaultArgument) !=
	ComputeODRHash(SecondDefaultArgument)) {
	ODRDiagDeclError(
	FirstRecord, FirstModule, FirstTemplate->getLocation(),
	FirstTemplate->getSourceRange(),
	FunctionTemplateParameterDifferentDefaultArgument)
	<< FirstTemplate << (i + 1) << FirstDefaultArgument;
	ODRDiagDeclNote(
	SecondModule, SecondTemplate->getLocation(),
	SecondTemplate->getSourceRange(),
	FunctionTemplateParameterDifferentDefaultArgument)
	<< SecondTemplate << (i + 1) << SecondDefaultArgument;
	ParameterMismatch = true;
	break;
	}
	}

	if (FirstNTTPD->isParameterPack() !=
	SecondNTTPD->isParameterPack()) {
	ODRDiagDeclError(FirstRecord, FirstModule,
	FirstTemplate->getLocation(),
	FirstTemplate->getSourceRange(),
	FunctionTemplatePackParameter)
	<< FirstTemplate << (i + 1) << FirstNTTPD->isParameterPack();
	ODRDiagDeclNote(SecondModule, SecondTemplate->getLocation(),
	SecondTemplate->getSourceRange(),
	FunctionTemplatePackParameter)
	<< SecondTemplate << (i + 1)
	<< SecondNTTPD->isParameterPack();
	ParameterMismatch = true;
	break;
	}
	}
	}

	if (ParameterMismatch) {
	Diagnosed = true;
	break;
	}

	break;
	}
	}

	if (Diagnosed)
	continue;

	Diag(FirstDecl->getLocation(),
	diag::err_module_odr_violation_mismatch_decl_unknown)
	<< FirstRecord << FirstModule.empty() << FirstModule << FirstDiffType
	<< FirstDecl->getSourceRange();
	Diag(SecondDecl->getLocation(),
	diag::note_module_odr_violation_mismatch_decl_unknown)
	<< SecondModule << FirstDiffType << SecondDecl->getSourceRange();
	Diagnosed = true;
	}

	if (!Diagnosed) {
	// All definitions are updates to the same declaration. This happens if a
	// module instantiates the declaration of a class template specialization
	// and two or more other modules instantiate its definition.
	//
	// FIXME: Indicate which modules had instantiations of this definition.
	// FIXME: How can this even happen?
	Diag(Merge.first->getLocation(),
	diag::err_module_odr_violation_different_instantiations)
	<< Merge.first;
	}
	}

	// Issue ODR failures diagnostics for functions.
	for (auto &Merge : FunctionOdrMergeFailures) {
	enum ODRFunctionDifference {
	ReturnType,
	ParameterName,
	ParameterType,
	ParameterSingleDefaultArgument,
	ParameterDifferentDefaultArgument,
	FunctionBody,
	};

	FunctionDecl *FirstFunction = Merge.first;
	std::string FirstModule = getOwningModuleNameForDiagnostic(FirstFunction);

	bool Diagnosed = false;
	for (auto &SecondFunction : Merge.second) {

	if (FirstFunction == SecondFunction)
	continue;

	std::string SecondModule =
	getOwningModuleNameForDiagnostic(SecondFunction);

	auto ODRDiagError = [FirstFunction, &FirstModule,
	this](SourceLocation Loc, SourceRange Range,
	ODRFunctionDifference DiffType) {
	return Diag(Loc, diag::err_module_odr_violation_function)
	<< FirstFunction << FirstModule.empty() << FirstModule << Range
	<< DiffType;
	};
	auto ODRDiagNote = [&SecondModule, this](SourceLocation Loc,
	SourceRange Range,
	ODRFunctionDifference DiffType) {
	return Diag(Loc, diag::note_module_odr_violation_function)
	<< SecondModule << Range << DiffType;
	};

	if (ComputeQualTypeODRHash(FirstFunction->getReturnType()) !=
	ComputeQualTypeODRHash(SecondFunction->getReturnType())) {
	ODRDiagError(FirstFunction->getReturnTypeSourceRange().getBegin(),
	FirstFunction->getReturnTypeSourceRange(), ReturnType)
	<< FirstFunction->getReturnType();
	ODRDiagNote(SecondFunction->getReturnTypeSourceRange().getBegin(),
	SecondFunction->getReturnTypeSourceRange(), ReturnType)
	<< SecondFunction->getReturnType();
	Diagnosed = true;
	break;
	}

	assert(FirstFunction->param_size() == SecondFunction->param_size() &&
	"Merged functions with different number of parameters");

	auto ParamSize = FirstFunction->param_size();
	bool ParameterMismatch = false;
	for (unsigned I = 0; I < ParamSize; ++I) {
	auto *FirstParam = FirstFunction->getParamDecl(I);
	auto *SecondParam = SecondFunction->getParamDecl(I);

	assert(getContext().hasSameType(FirstParam->getType(),
	SecondParam->getType()) &&
	"Merged function has different parameter types.");

	if (FirstParam->getDeclName() != SecondParam->getDeclName()) {
	ODRDiagError(FirstParam->getLocation(), FirstParam->getSourceRange(),
	ParameterName)
	<< I + 1 << FirstParam->getDeclName();
	ODRDiagNote(SecondParam->getLocation(), SecondParam->getSourceRange(),
	ParameterName)
	<< I + 1 << SecondParam->getDeclName();
	ParameterMismatch = true;
	break;
	};

	QualType FirstParamType = FirstParam->getType();
	QualType SecondParamType = SecondParam->getType();
	if (FirstParamType != SecondParamType &&
	ComputeQualTypeODRHash(FirstParamType) !=
	ComputeQualTypeODRHash(SecondParamType)) {
	if (const DecayedType *ParamDecayedType =
	FirstParamType->getAs<DecayedType>()) {
	ODRDiagError(FirstParam->getLocation(),
	FirstParam->getSourceRange(), ParameterType)
	<< (I + 1) << FirstParamType << true
	<< ParamDecayedType->getOriginalType();
	} else {
	ODRDiagError(FirstParam->getLocation(),
	FirstParam->getSourceRange(), ParameterType)
	<< (I + 1) << FirstParamType << false;
	}

	if (const DecayedType *ParamDecayedType =
	SecondParamType->getAs<DecayedType>()) {
	ODRDiagNote(SecondParam->getLocation(),
	SecondParam->getSourceRange(), ParameterType)
	<< (I + 1) << SecondParamType << true
	<< ParamDecayedType->getOriginalType();
	} else {
	ODRDiagNote(SecondParam->getLocation(),
	SecondParam->getSourceRange(), ParameterType)
	<< (I + 1) << SecondParamType << false;
	}
	ParameterMismatch = true;
	break;
	}

	const Expr *FirstInit = FirstParam->getInit();
	const Expr *SecondInit = SecondParam->getInit();
	if ((FirstInit == nullptr) != (SecondInit == nullptr)) {
	ODRDiagError(FirstParam->getLocation(), FirstParam->getSourceRange(),
	ParameterSingleDefaultArgument)
	<< (I + 1) << (FirstInit == nullptr)
	<< (FirstInit ? FirstInit->getSourceRange() : SourceRange());
	ODRDiagNote(SecondParam->getLocation(), SecondParam->getSourceRange(),
	ParameterSingleDefaultArgument)
	<< (I + 1) << (SecondInit == nullptr)
	<< (SecondInit ? SecondInit->getSourceRange() : SourceRange());
	ParameterMismatch = true;
	break;
	}

	if (FirstInit && SecondInit &&
	ComputeODRHash(FirstInit) != ComputeODRHash(SecondInit)) {
	ODRDiagError(FirstParam->getLocation(), FirstParam->getSourceRange(),
	ParameterDifferentDefaultArgument)
	<< (I + 1) << FirstInit->getSourceRange();
	ODRDiagNote(SecondParam->getLocation(), SecondParam->getSourceRange(),
	ParameterDifferentDefaultArgument)
	<< (I + 1) << SecondInit->getSourceRange();
	ParameterMismatch = true;
	break;
	}

	assert(ComputeSubDeclODRHash(FirstParam) ==
	ComputeSubDeclODRHash(SecondParam) &&
	"Undiagnosed parameter difference.");
	}

	if (ParameterMismatch) {
	Diagnosed = true;
	break;
	}

	// If no error has been generated before now, assume the problem is in
	// the body and generate a message.
	ODRDiagError(FirstFunction->getLocation(),
	FirstFunction->getSourceRange(), FunctionBody);
	ODRDiagNote(SecondFunction->getLocation(),
	SecondFunction->getSourceRange(), FunctionBody);
	Diagnosed = true;
	break;
	}
	(void)Diagnosed;
	assert(Diagnosed && "Unable to emit ODR diagnostic.");
	}

	// Issue ODR failures diagnostics for enums.
	for (auto &Merge : EnumOdrMergeFailures) {
	enum ODREnumDifference {
	SingleScopedEnum,
	EnumTagKeywordMismatch,
	SingleSpecifiedType,
	DifferentSpecifiedTypes,
	DifferentNumberEnumConstants,
	EnumConstantName,
	EnumConstantSingleInitilizer,
	EnumConstantDifferentInitilizer,
	};

	// If we've already pointed out a specific problem with this enum, don't
	// bother issuing a general "something's different" diagnostic.
	if (!DiagnosedOdrMergeFailures.insert(Merge.first).second)
	continue;

	EnumDecl *FirstEnum = Merge.first;
	std::string FirstModule = getOwningModuleNameForDiagnostic(FirstEnum);

	using DeclHashes =
	llvm::SmallVector<std::pair<EnumConstantDecl *, unsigned>, 4>;
	auto PopulateHashes = [&ComputeSubDeclODRHash, FirstEnum](
	DeclHashes &Hashes, EnumDecl *Enum) {
	for (auto *D : Enum->decls()) {
	// Due to decl merging, the first EnumDecl is the parent of
	// Decls in both records.
	if (!ODRHash::isDeclToBeProcessed(D, FirstEnum))
	continue;
	assert(isa<EnumConstantDecl>(D) && "Unexpected Decl kind");
	Hashes.emplace_back(cast<EnumConstantDecl>(D),
	ComputeSubDeclODRHash(D));
	}
	};
	DeclHashes FirstHashes;
	PopulateHashes(FirstHashes, FirstEnum);
	bool Diagnosed = false;
	for (auto &SecondEnum : Merge.second) {

	if (FirstEnum == SecondEnum)
	continue;

	std::string SecondModule =
	getOwningModuleNameForDiagnostic(SecondEnum);

	auto ODRDiagError = [FirstEnum, &FirstModule,
	this](SourceLocation Loc, SourceRange Range,
	ODREnumDifference DiffType) {
	return Diag(Loc, diag::err_module_odr_violation_enum)
	<< FirstEnum << FirstModule.empty() << FirstModule << Range
	<< DiffType;
	};
	auto ODRDiagNote = [&SecondModule, this](SourceLocation Loc,
	SourceRange Range,
	ODREnumDifference DiffType) {
	return Diag(Loc, diag::note_module_odr_violation_enum)
	<< SecondModule << Range << DiffType;
	};

	if (FirstEnum->isScoped() != SecondEnum->isScoped()) {
	ODRDiagError(FirstEnum->getLocation(), FirstEnum->getSourceRange(),
	SingleScopedEnum)
	<< FirstEnum->isScoped();
	ODRDiagNote(SecondEnum->getLocation(), SecondEnum->getSourceRange(),
	SingleScopedEnum)
	<< SecondEnum->isScoped();
	Diagnosed = true;
	continue;
	}

	if (FirstEnum->isScoped() && SecondEnum->isScoped()) {
	if (FirstEnum->isScopedUsingClassTag() !=
	SecondEnum->isScopedUsingClassTag()) {
	ODRDiagError(FirstEnum->getLocation(), FirstEnum->getSourceRange(),
	EnumTagKeywordMismatch)
	<< FirstEnum->isScopedUsingClassTag();
	ODRDiagNote(SecondEnum->getLocation(), SecondEnum->getSourceRange(),
	EnumTagKeywordMismatch)
	<< SecondEnum->isScopedUsingClassTag();
	Diagnosed = true;
	continue;
	}
	}

	QualType FirstUnderlyingType =
	FirstEnum->getIntegerTypeSourceInfo()
	? FirstEnum->getIntegerTypeSourceInfo()->getType()
	: QualType();
	QualType SecondUnderlyingType =
	SecondEnum->getIntegerTypeSourceInfo()
	? SecondEnum->getIntegerTypeSourceInfo()->getType()
	: QualType();
	if (FirstUnderlyingType.isNull() != SecondUnderlyingType.isNull()) {
	ODRDiagError(FirstEnum->getLocation(), FirstEnum->getSourceRange(),
	SingleSpecifiedType)
	<< !FirstUnderlyingType.isNull();
	ODRDiagNote(SecondEnum->getLocation(), SecondEnum->getSourceRange(),
	SingleSpecifiedType)
	<< !SecondUnderlyingType.isNull();
	Diagnosed = true;
	continue;
	}

	if (!FirstUnderlyingType.isNull() && !SecondUnderlyingType.isNull()) {
	if (ComputeQualTypeODRHash(FirstUnderlyingType) !=
	ComputeQualTypeODRHash(SecondUnderlyingType)) {
	ODRDiagError(FirstEnum->getLocation(), FirstEnum->getSourceRange(),
	DifferentSpecifiedTypes)
	<< FirstUnderlyingType;
	ODRDiagNote(SecondEnum->getLocation(), SecondEnum->getSourceRange(),
	DifferentSpecifiedTypes)
	<< SecondUnderlyingType;
	Diagnosed = true;
	continue;
	}
	}

	DeclHashes SecondHashes;
	PopulateHashes(SecondHashes, SecondEnum);

	if (FirstHashes.size() != SecondHashes.size()) {
	ODRDiagError(FirstEnum->getLocation(), FirstEnum->getSourceRange(),
	DifferentNumberEnumConstants)
	<< (int)FirstHashes.size();
	ODRDiagNote(SecondEnum->getLocation(), SecondEnum->getSourceRange(),
	DifferentNumberEnumConstants)
	<< (int)SecondHashes.size();
	Diagnosed = true;
	continue;
	}

	for (unsigned I = 0; I < FirstHashes.size(); ++I) {
	if (FirstHashes[I].second == SecondHashes[I].second)
	continue;
	const EnumConstantDecl *FirstEnumConstant = FirstHashes[I].first;
	const EnumConstantDecl *SecondEnumConstant = SecondHashes[I].first;

	if (FirstEnumConstant->getDeclName() !=
	SecondEnumConstant->getDeclName()) {

	ODRDiagError(FirstEnumConstant->getLocation(),
	FirstEnumConstant->getSourceRange(), EnumConstantName)
	<< I + 1 << FirstEnumConstant;
	ODRDiagNote(SecondEnumConstant->getLocation(),
	SecondEnumConstant->getSourceRange(), EnumConstantName)
	<< I + 1 << SecondEnumConstant;
	Diagnosed = true;
	break;
	}

	const Expr *FirstInit = FirstEnumConstant->getInitExpr();
	const Expr *SecondInit = SecondEnumConstant->getInitExpr();
	if (!FirstInit && !SecondInit)
	continue;

	if (!FirstInit \|\| !SecondInit) {
	ODRDiagError(FirstEnumConstant->getLocation(),
	FirstEnumConstant->getSourceRange(),
	EnumConstantSingleInitilizer)
	<< I + 1 << FirstEnumConstant << (FirstInit != nullptr);
	ODRDiagNote(SecondEnumConstant->getLocation(),
	SecondEnumConstant->getSourceRange(),
	EnumConstantSingleInitilizer)
	<< I + 1 << SecondEnumConstant << (SecondInit != nullptr);
	Diagnosed = true;
	break;
	}

	if (ComputeODRHash(FirstInit) != ComputeODRHash(SecondInit)) {
	ODRDiagError(FirstEnumConstant->getLocation(),
	FirstEnumConstant->getSourceRange(),
	EnumConstantDifferentInitilizer)
	<< I + 1 << FirstEnumConstant;
	ODRDiagNote(SecondEnumConstant->getLocation(),
	SecondEnumConstant->getSourceRange(),
	EnumConstantDifferentInitilizer)
	<< I + 1 << SecondEnumConstant;
	Diagnosed = true;
	break;
	}
	}
	}

	(void)Diagnosed;
	assert(Diagnosed && "Unable to emit ODR diagnostic.");
	}
	}

	void ASTReader::StartedDeserializing() {
	if (++NumCurrentElementsDeserializing == 1 && ReadTimer.get())
	ReadTimer->startTimer();
	}

	void ASTReader::FinishedDeserializing() {
	assert(NumCurrentElementsDeserializing &&
	"FinishedDeserializing not paired with StartedDeserializing");
	if (NumCurrentElementsDeserializing == 1) {
	// We decrease NumCurrentElementsDeserializing only after pending actions
	// are finished, to avoid recursively re-calling finishPendingActions().
	finishPendingActions();
	}
	--NumCurrentElementsDeserializing;

	if (NumCurrentElementsDeserializing == 0) {
	// Propagate exception specification and deduced type updates along
	// redeclaration chains.
	//
	// We do this now rather than in finishPendingActions because we want to
	// be able to walk the complete redeclaration chains of the updated decls.
	while (!PendingExceptionSpecUpdates.empty() \|\|
	!PendingDeducedTypeUpdates.empty()) {
	auto ESUpdates = std::move(PendingExceptionSpecUpdates);
	PendingExceptionSpecUpdates.clear();
	for (auto Update : ESUpdates) {
	ProcessingUpdatesRAIIObj ProcessingUpdates(*this);
	auto *FPT = Update.second->getType()->castAs<FunctionProtoType>();
	auto ESI = FPT->getExtProtoInfo().ExceptionSpec;
	if (auto *Listener = getContext().getASTMutationListener())
	Listener->ResolvedExceptionSpec(cast<FunctionDecl>(Update.second));
	for (auto *Redecl : Update.second->redecls())
	getContext().adjustExceptionSpec(cast<FunctionDecl>(Redecl), ESI);
	}

	auto DTUpdates = std::move(PendingDeducedTypeUpdates);
	PendingDeducedTypeUpdates.clear();
	for (auto Update : DTUpdates) {
	ProcessingUpdatesRAIIObj ProcessingUpdates(*this);
	// FIXME: If the return type is already deduced, check that it matches.
	getContext().adjustDeducedFunctionResultType(Update.first,
	Update.second);
	}
	}

	if (ReadTimer)
	ReadTimer->stopTimer();

	diagnoseOdrViolations();

	// We are not in recursive loading, so it's safe to pass the "interesting"
	// decls to the consumer.
	if (Consumer)
	PassInterestingDeclsToConsumer();
	}
	}

	void ASTReader::pushExternalDeclIntoScope(NamedDecl *D, DeclarationName Name) {
	if (IdentifierInfo *II = Name.getAsIdentifierInfo()) {
	// Remove any fake results before adding any real ones.
	auto It = PendingFakeLookupResults.find(II);
	if (It != PendingFakeLookupResults.end()) {
	for (auto *ND : It->second)
	SemaObj->IdResolver.RemoveDecl(ND);
	// FIXME: this works around module+PCH performance issue.
	// Rather than erase the result from the map, which is O(n), just clear
	// the vector of NamedDecls.
	It->second.clear();
	}
	}

	if (SemaObj->IdResolver.tryAddTopLevelDecl(D, Name) && SemaObj->TUScope) {
	SemaObj->TUScope->AddDecl(D);
	} else if (SemaObj->TUScope) {
	// Adding the decl to IdResolver may have failed because it was already in
	// (even though it was not added in scope). If it is already in, make sure
	// it gets in the scope as well.
	if (std::find(SemaObj->IdResolver.begin(Name),
	SemaObj->IdResolver.end(), D) != SemaObj->IdResolver.end())
	SemaObj->TUScope->AddDecl(D);
	}
	}

	ASTReader::ASTReader(Preprocessor &PP, InMemoryModuleCache &ModuleCache,
	ASTContext *Context,
	const PCHContainerReader &PCHContainerRdr,
	ArrayRef<std::shared_ptr<ModuleFileExtension>> Extensions,
	StringRef isysroot,
	DisableValidationForModuleKind DisableValidationKind,
	bool AllowASTWithCompilerErrors,
	bool AllowConfigurationMismatch, bool ValidateSystemInputs,
	bool ValidateASTInputFilesContent, bool UseGlobalIndex,
	std::unique_ptr<llvm::Timer> ReadTimer)
	: Listener(bool(DisableValidationKind &DisableValidationForModuleKind::PCH)
	? cast<ASTReaderListener>(new SimpleASTReaderListener(PP))
	: cast<ASTReaderListener>(new PCHValidator(PP, *this))),
	SourceMgr(PP.getSourceManager()), FileMgr(PP.getFileManager()),
	PCHContainerRdr(PCHContainerRdr), Diags(PP.getDiagnostics()), PP(PP),
	ContextObj(Context), ModuleMgr(PP.getFileManager(), ModuleCache,
	PCHContainerRdr, PP.getHeaderSearchInfo()),
	DummyIdResolver(PP), ReadTimer(std::move(ReadTimer)), isysroot(isysroot),
	DisableValidationKind(DisableValidationKind),
	AllowASTWithCompilerErrors(AllowASTWithCompilerErrors),
	AllowConfigurationMismatch(AllowConfigurationMismatch),
	ValidateSystemInputs(ValidateSystemInputs),
	ValidateASTInputFilesContent(ValidateASTInputFilesContent),
	UseGlobalIndex(UseGlobalIndex), CurrSwitchCaseStmts(&SwitchCaseStmts) {
	SourceMgr.setExternalSLocEntrySource(this);

	for (const auto &Ext : Extensions) {
	auto BlockName = Ext->getExtensionMetadata().BlockName;
	auto Known = ModuleFileExtensions.find(BlockName);
	if (Known != ModuleFileExtensions.end()) {
	Diags.Report(diag::warn_duplicate_module_file_extension)
	<< BlockName;
	continue;
	}

	ModuleFileExtensions.insert({BlockName, Ext});
	}
	}

	ASTReader::~ASTReader() {
	if (OwnsDeserializationListener)
	delete DeserializationListener;
	}

	IdentifierResolver &ASTReader::getIdResolver() {
	return SemaObj ? SemaObj->IdResolver : DummyIdResolver;
	}

	Expected<unsigned> ASTRecordReader::readRecord(llvm::BitstreamCursor &Cursor,
	unsigned AbbrevID) {
	Idx = 0;
	Record.clear();
	return Cursor.readRecord(AbbrevID, Record);
	}
	//===----------------------------------------------------------------------===//
	//// OMPClauseReader implementation
	////===----------------------------------------------------------------------===//

	// This has to be in namespace clang because it's friended by all
	// of the OMP clauses.
	namespace clang {

	class OMPClauseReader : public OMPClauseVisitor<OMPClauseReader> {
	ASTRecordReader &Record;
	ASTContext &Context;

	public:
	OMPClauseReader(ASTRecordReader &Record)
	: Record(Record), Context(Record.getContext()) {}
	#define GEN_CLANG_CLAUSE_CLASS
	#define CLAUSE_CLASS(Enum, Str, Class) void Visit##Class(Class *C);
	#include "llvm/Frontend/OpenMP/OMP.inc"
	OMPClause *readClause();
	void VisitOMPClauseWithPreInit(OMPClauseWithPreInit *C);
	void VisitOMPClauseWithPostUpdate(OMPClauseWithPostUpdate *C);
	};

	} // end namespace clang

	OMPClause *ASTRecordReader::readOMPClause() {
	return OMPClauseReader(*this).readClause();
	}

	OMPClause *OMPClauseReader::readClause() {
	OMPClause *C = nullptr;
	switch (llvm::omp::Clause(Record.readInt())) {
	case llvm::omp::OMPC_if:
	C = new (Context) OMPIfClause();
	break;
	case llvm::omp::OMPC_final:
	C = new (Context) OMPFinalClause();
	break;
	case llvm::omp::OMPC_num_threads:
	C = new (Context) OMPNumThreadsClause();
	break;
	case llvm::omp::OMPC_safelen:
	C = new (Context) OMPSafelenClause();
	break;
	case llvm::omp::OMPC_simdlen:
	C = new (Context) OMPSimdlenClause();
	break;
	case llvm::omp::OMPC_sizes: {
	unsigned NumSizes = Record.readInt();
	C = OMPSizesClause::CreateEmpty(Context, NumSizes);
	break;
	}
	case llvm::omp::OMPC_full:
	C = OMPFullClause::CreateEmpty(Context);
	break;
	case llvm::omp::OMPC_partial:
	C = OMPPartialClause::CreateEmpty(Context);
	break;
	case llvm::omp::OMPC_allocator:
	C = new (Context) OMPAllocatorClause();
	break;
	case llvm::omp::OMPC_collapse:
	C = new (Context) OMPCollapseClause();
	break;
	case llvm::omp::OMPC_default:
	C = new (Context) OMPDefaultClause();
	break;
	case llvm::omp::OMPC_proc_bind:
	C = new (Context) OMPProcBindClause();
	break;
	case llvm::omp::OMPC_schedule:
	C = new (Context) OMPScheduleClause();
	break;
	case llvm::omp::OMPC_ordered:
	C = OMPOrderedClause::CreateEmpty(Context, Record.readInt());
	break;
	case llvm::omp::OMPC_nowait:
	C = new (Context) OMPNowaitClause();
	break;
	case llvm::omp::OMPC_untied:
	C = new (Context) OMPUntiedClause();
	break;
	case llvm::omp::OMPC_mergeable:
	C = new (Context) OMPMergeableClause();
	break;
	case llvm::omp::OMPC_read:
	C = new (Context) OMPReadClause();
	break;
	case llvm::omp::OMPC_write:
	C = new (Context) OMPWriteClause();
	break;
	case llvm::omp::OMPC_update:
	C = OMPUpdateClause::CreateEmpty(Context, Record.readInt());
	break;
	case llvm::omp::OMPC_capture:
	C = new (Context) OMPCaptureClause();
	break;
	case llvm::omp::OMPC_seq_cst:
	C = new (Context) OMPSeqCstClause();
	break;
	case llvm::omp::OMPC_acq_rel:
	C = new (Context) OMPAcqRelClause();
	break;
	case llvm::omp::OMPC_acquire:
	C = new (Context) OMPAcquireClause();
	break;
	case llvm::omp::OMPC_release:
	C = new (Context) OMPReleaseClause();
	break;
	case llvm::omp::OMPC_relaxed:
	C = new (Context) OMPRelaxedClause();
	break;
	case llvm::omp::OMPC_threads:
	C = new (Context) OMPThreadsClause();
	break;
	case llvm::omp::OMPC_simd:
	C = new (Context) OMPSIMDClause();
	break;
	case llvm::omp::OMPC_nogroup:
	C = new (Context) OMPNogroupClause();
	break;
	case llvm::omp::OMPC_unified_address:
	C = new (Context) OMPUnifiedAddressClause();
	break;
	case llvm::omp::OMPC_unified_shared_memory:
	C = new (Context) OMPUnifiedSharedMemoryClause();
	break;
	case llvm::omp::OMPC_reverse_offload:
	C = new (Context) OMPReverseOffloadClause();
	break;
	case llvm::omp::OMPC_dynamic_allocators:
	C = new (Context) OMPDynamicAllocatorsClause();
	break;
	case llvm::omp::OMPC_atomic_default_mem_order:
	C = new (Context) OMPAtomicDefaultMemOrderClause();
	break;
	case llvm::omp::OMPC_private:
	C = OMPPrivateClause::CreateEmpty(Context, Record.readInt());
	break;
	case llvm::omp::OMPC_firstprivate:
	C = OMPFirstprivateClause::CreateEmpty(Context, Record.readInt());
	break;
	case llvm::omp::OMPC_lastprivate:
	C = OMPLastprivateClause::CreateEmpty(Context, Record.readInt());
	break;
	case llvm::omp::OMPC_shared:
	C = OMPSharedClause::CreateEmpty(Context, Record.readInt());
	break;
	case llvm::omp::OMPC_reduction: {
	unsigned N = Record.readInt();
	auto Modifier = Record.readEnum<OpenMPReductionClauseModifier>();
	C = OMPReductionClause::CreateEmpty(Context, N, Modifier);
	break;
	}
	case llvm::omp::OMPC_task_reduction:
	C = OMPTaskReductionClause::CreateEmpty(Context, Record.readInt());
	break;
	case llvm::omp::OMPC_in_reduction:
	C = OMPInReductionClause::CreateEmpty(Context, Record.readInt());
	break;
	case llvm::omp::OMPC_linear:
	C = OMPLinearClause::CreateEmpty(Context, Record.readInt());
	break;
	case llvm::omp::OMPC_aligned:
	C = OMPAlignedClause::CreateEmpty(Context, Record.readInt());
	break;
	case llvm::omp::OMPC_copyin:
	C = OMPCopyinClause::CreateEmpty(Context, Record.readInt());
	break;
	case llvm::omp::OMPC_copyprivate:
	C = OMPCopyprivateClause::CreateEmpty(Context, Record.readInt());
	break;
	case llvm::omp::OMPC_flush:
	C = OMPFlushClause::CreateEmpty(Context, Record.readInt());
	break;
	case llvm::omp::OMPC_depobj:
	C = OMPDepobjClause::CreateEmpty(Context);
	break;
	case llvm::omp::OMPC_depend: {
	unsigned NumVars = Record.readInt();
	unsigned NumLoops = Record.readInt();
	C = OMPDependClause::CreateEmpty(Context, NumVars, NumLoops);
	break;
	}
	case llvm::omp::OMPC_device:
	C = new (Context) OMPDeviceClause();
	break;
	case llvm::omp::OMPC_map: {
	OMPMappableExprListSizeTy Sizes;
	Sizes.NumVars = Record.readInt();
	Sizes.NumUniqueDeclarations = Record.readInt();
	Sizes.NumComponentLists = Record.readInt();
	Sizes.NumComponents = Record.readInt();
	C = OMPMapClause::CreateEmpty(Context, Sizes);
	break;
	}
	case llvm::omp::OMPC_num_teams:
	C = new (Context) OMPNumTeamsClause();
	break;
	case llvm::omp::OMPC_thread_limit:
	C = new (Context) OMPThreadLimitClause();
	break;
	case llvm::omp::OMPC_priority:
	C = new (Context) OMPPriorityClause();
	break;
	case llvm::omp::OMPC_grainsize:
	C = new (Context) OMPGrainsizeClause();
	break;
	case llvm::omp::OMPC_num_tasks:
	C = new (Context) OMPNumTasksClause();
	break;
	case llvm::omp::OMPC_hint:
	C = new (Context) OMPHintClause();
	break;
	case llvm::omp::OMPC_dist_schedule:
	C = new (Context) OMPDistScheduleClause();
	break;
	case llvm::omp::OMPC_defaultmap:
	C = new (Context) OMPDefaultmapClause();
	break;
	case llvm::omp::OMPC_to: {
	OMPMappableExprListSizeTy Sizes;
	Sizes.NumVars = Record.readInt();
	Sizes.NumUniqueDeclarations = Record.readInt();
	Sizes.NumComponentLists = Record.readInt();
	Sizes.NumComponents = Record.readInt();
	C = OMPToClause::CreateEmpty(Context, Sizes);
	break;
	}
	case llvm::omp::OMPC_from: {
	OMPMappableExprListSizeTy Sizes;
	Sizes.NumVars = Record.readInt();
	Sizes.NumUniqueDeclarations = Record.readInt();
	Sizes.NumComponentLists = Record.readInt();
	Sizes.NumComponents = Record.readInt();
	C = OMPFromClause::CreateEmpty(Context, Sizes);
	break;
	}
	case llvm::omp::OMPC_use_device_ptr: {
	OMPMappableExprListSizeTy Sizes;
	Sizes.NumVars = Record.readInt();
	Sizes.NumUniqueDeclarations = Record.readInt();
	Sizes.NumComponentLists = Record.readInt();
	Sizes.NumComponents = Record.readInt();
	C = OMPUseDevicePtrClause::CreateEmpty(Context, Sizes);
	break;
	}
	case llvm::omp::OMPC_use_device_addr: {
	OMPMappableExprListSizeTy Sizes;
	Sizes.NumVars = Record.readInt();
	Sizes.NumUniqueDeclarations = Record.readInt();
	Sizes.NumComponentLists = Record.readInt();
	Sizes.NumComponents = Record.readInt();
	C = OMPUseDeviceAddrClause::CreateEmpty(Context, Sizes);
	break;
	}
	case llvm::omp::OMPC_is_device_ptr: {
	OMPMappableExprListSizeTy Sizes;
	Sizes.NumVars = Record.readInt();
	Sizes.NumUniqueDeclarations = Record.readInt();
	Sizes.NumComponentLists = Record.readInt();
	Sizes.NumComponents = Record.readInt();
	C = OMPIsDevicePtrClause::CreateEmpty(Context, Sizes);
	break;
	}
	case llvm::omp::OMPC_allocate:
	C = OMPAllocateClause::CreateEmpty(Context, Record.readInt());
	break;
	case llvm::omp::OMPC_nontemporal:
	C = OMPNontemporalClause::CreateEmpty(Context, Record.readInt());
	break;
	case llvm::omp::OMPC_inclusive:
	C = OMPInclusiveClause::CreateEmpty(Context, Record.readInt());
	break;
	case llvm::omp::OMPC_exclusive:
	C = OMPExclusiveClause::CreateEmpty(Context, Record.readInt());
	break;
	case llvm::omp::OMPC_order:
	C = new (Context) OMPOrderClause();
	break;
	case llvm::omp::OMPC_init:
	C = OMPInitClause::CreateEmpty(Context, Record.readInt());
	break;
	case llvm::omp::OMPC_use:
	C = new (Context) OMPUseClause();
	break;
	case llvm::omp::OMPC_destroy:
	C = new (Context) OMPDestroyClause();
	break;
	case llvm::omp::OMPC_novariants:
	C = new (Context) OMPNovariantsClause();
	break;
	case llvm::omp::OMPC_nocontext:
	C = new (Context) OMPNocontextClause();
	break;
	case llvm::omp::OMPC_detach:
	C = new (Context) OMPDetachClause();
	break;
	case llvm::omp::OMPC_uses_allocators:
	C = OMPUsesAllocatorsClause::CreateEmpty(Context, Record.readInt());
	break;
	case llvm::omp::OMPC_affinity:
	C = OMPAffinityClause::CreateEmpty(Context, Record.readInt());
	break;
	case llvm::omp::OMPC_filter:
	C = new (Context) OMPFilterClause();
	break;
	#define OMP_CLAUSE_NO_CLASS(Enum, Str) \
	case llvm::omp::Enum: \
	break;
	#include "llvm/Frontend/OpenMP/OMPKinds.def"
	default:
	break;
	}
	assert(C && "Unknown OMPClause type");

	Visit(C);
	C->setLocStart(Record.readSourceLocation());
	C->setLocEnd(Record.readSourceLocation());

	return C;
	}

	void OMPClauseReader::VisitOMPClauseWithPreInit(OMPClauseWithPreInit *C) {
	C->setPreInitStmt(Record.readSubStmt(),
	static_cast<OpenMPDirectiveKind>(Record.readInt()));
	}

	void OMPClauseReader::VisitOMPClauseWithPostUpdate(OMPClauseWithPostUpdate *C) {
	VisitOMPClauseWithPreInit(C);
	C->setPostUpdateExpr(Record.readSubExpr());
	}

	void OMPClauseReader::VisitOMPIfClause(OMPIfClause *C) {
	VisitOMPClauseWithPreInit(C);
	C->setNameModifier(static_cast<OpenMPDirectiveKind>(Record.readInt()));
	C->setNameModifierLoc(Record.readSourceLocation());
	C->setColonLoc(Record.readSourceLocation());
	C->setCondition(Record.readSubExpr());
	C->setLParenLoc(Record.readSourceLocation());
	}

	void OMPClauseReader::VisitOMPFinalClause(OMPFinalClause *C) {
	VisitOMPClauseWithPreInit(C);
	C->setCondition(Record.readSubExpr());
	C->setLParenLoc(Record.readSourceLocation());
	}

	void OMPClauseReader::VisitOMPNumThreadsClause(OMPNumThreadsClause *C) {
	VisitOMPClauseWithPreInit(C);
	C->setNumThreads(Record.readSubExpr());
	C->setLParenLoc(Record.readSourceLocation());
	}

	void OMPClauseReader::VisitOMPSafelenClause(OMPSafelenClause *C) {
	C->setSafelen(Record.readSubExpr());
	C->setLParenLoc(Record.readSourceLocation());
	}

	void OMPClauseReader::VisitOMPSimdlenClause(OMPSimdlenClause *C) {
	C->setSimdlen(Record.readSubExpr());
	C->setLParenLoc(Record.readSourceLocation());
	}

	void OMPClauseReader::VisitOMPSizesClause(OMPSizesClause *C) {
	for (Expr *&E : C->getSizesRefs())
	E = Record.readSubExpr();
	C->setLParenLoc(Record.readSourceLocation());
	}

	void OMPClauseReader::VisitOMPFullClause(OMPFullClause *C) {}

	void OMPClauseReader::VisitOMPPartialClause(OMPPartialClause *C) {
	C->setFactor(Record.readSubExpr());
	C->setLParenLoc(Record.readSourceLocation());
	}

	void OMPClauseReader::VisitOMPAllocatorClause(OMPAllocatorClause *C) {
	C->setAllocator(Record.readExpr());
	C->setLParenLoc(Record.readSourceLocation());
	}

	void OMPClauseReader::VisitOMPCollapseClause(OMPCollapseClause *C) {
	C->setNumForLoops(Record.readSubExpr());
	C->setLParenLoc(Record.readSourceLocation());
	}

	void OMPClauseReader::VisitOMPDefaultClause(OMPDefaultClause *C) {
	C->setDefaultKind(static_cast<llvm::omp::DefaultKind>(Record.readInt()));
	C->setLParenLoc(Record.readSourceLocation());
	C->setDefaultKindKwLoc(Record.readSourceLocation());
	}

	void OMPClauseReader::VisitOMPProcBindClause(OMPProcBindClause *C) {
	C->setProcBindKind(static_cast<llvm::omp::ProcBindKind>(Record.readInt()));
	C->setLParenLoc(Record.readSourceLocation());
	C->setProcBindKindKwLoc(Record.readSourceLocation());
	}

	void OMPClauseReader::VisitOMPScheduleClause(OMPScheduleClause *C) {
	VisitOMPClauseWithPreInit(C);
	C->setScheduleKind(
	static_cast<OpenMPScheduleClauseKind>(Record.readInt()));
	C->setFirstScheduleModifier(
	static_cast<OpenMPScheduleClauseModifier>(Record.readInt()));
	C->setSecondScheduleModifier(
	static_cast<OpenMPScheduleClauseModifier>(Record.readInt()));
	C->setChunkSize(Record.readSubExpr());
	C->setLParenLoc(Record.readSourceLocation());
	C->setFirstScheduleModifierLoc(Record.readSourceLocation());
	C->setSecondScheduleModifierLoc(Record.readSourceLocation());
	C->setScheduleKindLoc(Record.readSourceLocation());
	C->setCommaLoc(Record.readSourceLocation());
	}

	void OMPClauseReader::VisitOMPOrderedClause(OMPOrderedClause *C) {
	C->setNumForLoops(Record.readSubExpr());
	for (unsigned I = 0, E = C->NumberOfLoops; I < E; ++I)
	C->setLoopNumIterations(I, Record.readSubExpr());
	for (unsigned I = 0, E = C->NumberOfLoops; I < E; ++I)
	C->setLoopCounter(I, Record.readSubExpr());
	C->setLParenLoc(Record.readSourceLocation());
	}

	void OMPClauseReader::VisitOMPDetachClause(OMPDetachClause *C) {
	C->setEventHandler(Record.readSubExpr());
	C->setLParenLoc(Record.readSourceLocation());
	}

	void OMPClauseReader::VisitOMPNowaitClause(OMPNowaitClause *) {}

	void OMPClauseReader::VisitOMPUntiedClause(OMPUntiedClause *) {}

	void OMPClauseReader::VisitOMPMergeableClause(OMPMergeableClause *) {}

	void OMPClauseReader::VisitOMPReadClause(OMPReadClause *) {}

	void OMPClauseReader::VisitOMPWriteClause(OMPWriteClause *) {}

	void OMPClauseReader::VisitOMPUpdateClause(OMPUpdateClause *C) {
	if (C->isExtended()) {
	C->setLParenLoc(Record.readSourceLocation());
	C->setArgumentLoc(Record.readSourceLocation());
	C->setDependencyKind(Record.readEnum<OpenMPDependClauseKind>());
	}
	}

	void OMPClauseReader::VisitOMPCaptureClause(OMPCaptureClause *) {}

	void OMPClauseReader::VisitOMPSeqCstClause(OMPSeqCstClause *) {}

	void OMPClauseReader::VisitOMPAcqRelClause(OMPAcqRelClause *) {}

	void OMPClauseReader::VisitOMPAcquireClause(OMPAcquireClause *) {}

	void OMPClauseReader::VisitOMPReleaseClause(OMPReleaseClause *) {}

	void OMPClauseReader::VisitOMPRelaxedClause(OMPRelaxedClause *) {}

	void OMPClauseReader::VisitOMPThreadsClause(OMPThreadsClause *) {}

	void OMPClauseReader::VisitOMPSIMDClause(OMPSIMDClause *) {}

	void OMPClauseReader::VisitOMPNogroupClause(OMPNogroupClause *) {}

	void OMPClauseReader::VisitOMPInitClause(OMPInitClause *C) {
	unsigned NumVars = C->varlist_size();
	SmallVector<Expr *, 16> Vars;
	Vars.reserve(NumVars);
	for (unsigned I = 0; I != NumVars; ++I)
	Vars.push_back(Record.readSubExpr());
	C->setVarRefs(Vars);
	C->setIsTarget(Record.readBool());
	C->setIsTargetSync(Record.readBool());
	C->setLParenLoc(Record.readSourceLocation());
	C->setVarLoc(Record.readSourceLocation());
	}

	void OMPClauseReader::VisitOMPUseClause(OMPUseClause *C) {
	C->setInteropVar(Record.readSubExpr());
	C->setLParenLoc(Record.readSourceLocation());
	C->setVarLoc(Record.readSourceLocation());
	}

	void OMPClauseReader::VisitOMPDestroyClause(OMPDestroyClause *C) {
	C->setInteropVar(Record.readSubExpr());
	C->setLParenLoc(Record.readSourceLocation());
	C->setVarLoc(Record.readSourceLocation());
	}

	void OMPClauseReader::VisitOMPNovariantsClause(OMPNovariantsClause *C) {
	VisitOMPClauseWithPreInit(C);
	C->setCondition(Record.readSubExpr());
	C->setLParenLoc(Record.readSourceLocation());
	}

	void OMPClauseReader::VisitOMPNocontextClause(OMPNocontextClause *C) {
	VisitOMPClauseWithPreInit(C);
	C->setCondition(Record.readSubExpr());
	C->setLParenLoc(Record.readSourceLocation());
	}

	void OMPClauseReader::VisitOMPUnifiedAddressClause(OMPUnifiedAddressClause *) {}

	void OMPClauseReader::VisitOMPUnifiedSharedMemoryClause(
	OMPUnifiedSharedMemoryClause *) {}

	void OMPClauseReader::VisitOMPReverseOffloadClause(OMPReverseOffloadClause *) {}

	void
	OMPClauseReader::VisitOMPDynamicAllocatorsClause(OMPDynamicAllocatorsClause *) {
	}

	void OMPClauseReader::VisitOMPAtomicDefaultMemOrderClause(
	OMPAtomicDefaultMemOrderClause *C) {
	C->setAtomicDefaultMemOrderKind(
	static_cast<OpenMPAtomicDefaultMemOrderClauseKind>(Record.readInt()));
	C->setLParenLoc(Record.readSourceLocation());
	C->setAtomicDefaultMemOrderKindKwLoc(Record.readSourceLocation());
	}

	void OMPClauseReader::VisitOMPPrivateClause(OMPPrivateClause *C) {
	C->setLParenLoc(Record.readSourceLocation());
	unsigned NumVars = C->varlist_size();
	SmallVector<Expr *, 16> Vars;
	Vars.reserve(NumVars);
	for (unsigned i = 0; i != NumVars; ++i)
	Vars.push_back(Record.readSubExpr());
	C->setVarRefs(Vars);
	Vars.clear();
	for (unsigned i = 0; i != NumVars; ++i)
	Vars.push_back(Record.readSubExpr());
	C->setPrivateCopies(Vars);
	}

	void OMPClauseReader::VisitOMPFirstprivateClause(OMPFirstprivateClause *C) {
	VisitOMPClauseWithPreInit(C);
	C->setLParenLoc(Record.readSourceLocation());
	unsigned NumVars = C->varlist_size();
	SmallVector<Expr *, 16> Vars;
	Vars.reserve(NumVars);
	for (unsigned i = 0; i != NumVars; ++i)
	Vars.push_back(Record.readSubExpr());
	C->setVarRefs(Vars);
	Vars.clear();
	for (unsigned i = 0; i != NumVars; ++i)
	Vars.push_back(Record.readSubExpr());
	C->setPrivateCopies(Vars);
	Vars.clear();
	for (unsigned i = 0; i != NumVars; ++i)
	Vars.push_back(Record.readSubExpr());
	C->setInits(Vars);
	}

	void OMPClauseReader::VisitOMPLastprivateClause(OMPLastprivateClause *C) {
	VisitOMPClauseWithPostUpdate(C);
	C->setLParenLoc(Record.readSourceLocation());
	C->setKind(Record.readEnum<OpenMPLastprivateModifier>());
	C->setKindLoc(Record.readSourceLocation());
	C->setColonLoc(Record.readSourceLocation());
	unsigned NumVars = C->varlist_size();
	SmallVector<Expr *, 16> Vars;
	Vars.reserve(NumVars);
	for (unsigned i = 0; i != NumVars; ++i)
	Vars.push_back(Record.readSubExpr());
	C->setVarRefs(Vars);
	Vars.clear();
	for (unsigned i = 0; i != NumVars; ++i)
	Vars.push_back(Record.readSubExpr());
	C->setPrivateCopies(Vars);
	Vars.clear();
	for (unsigned i = 0; i != NumVars; ++i)
	Vars.push_back(Record.readSubExpr());
	C->setSourceExprs(Vars);
	Vars.clear();
	for (unsigned i = 0; i != NumVars; ++i)
	Vars.push_back(Record.readSubExpr());
	C->setDestinationExprs(Vars);
	Vars.clear();
	for (unsigned i = 0; i != NumVars; ++i)
	Vars.push_back(Record.readSubExpr());
	C->setAssignmentOps(Vars);
	}

	void OMPClauseReader::VisitOMPSharedClause(OMPSharedClause *C) {
	C->setLParenLoc(Record.readSourceLocation());
	unsigned NumVars = C->varlist_size();
	SmallVector<Expr *, 16> Vars;
	Vars.reserve(NumVars);
	for (unsigned i = 0; i != NumVars; ++i)
	Vars.push_back(Record.readSubExpr());
	C->setVarRefs(Vars);
	}

	void OMPClauseReader::VisitOMPReductionClause(OMPReductionClause *C) {
	VisitOMPClauseWithPostUpdate(C);
	C->setLParenLoc(Record.readSourceLocation());
	C->setModifierLoc(Record.readSourceLocation());
	C->setColonLoc(Record.readSourceLocation());
	NestedNameSpecifierLoc NNSL = Record.readNestedNameSpecifierLoc();
	DeclarationNameInfo DNI = Record.readDeclarationNameInfo();
	C->setQualifierLoc(NNSL);
	C->setNameInfo(DNI);

	unsigned NumVars = C->varlist_size();
	SmallVector<Expr *, 16> Vars;
	Vars.reserve(NumVars);
	for (unsigned i = 0; i != NumVars; ++i)
	Vars.push_back(Record.readSubExpr());
	C->setVarRefs(Vars);
	Vars.clear();
	for (unsigned i = 0; i != NumVars; ++i)
	Vars.push_back(Record.readSubExpr());
	C->setPrivates(Vars);
	Vars.clear();
	for (unsigned i = 0; i != NumVars; ++i)
	Vars.push_back(Record.readSubExpr());
	C->setLHSExprs(Vars);
	Vars.clear();
	for (unsigned i = 0; i != NumVars; ++i)
	Vars.push_back(Record.readSubExpr());
	C->setRHSExprs(Vars);
	Vars.clear();
	for (unsigned i = 0; i != NumVars; ++i)
	Vars.push_back(Record.readSubExpr());
	C->setReductionOps(Vars);
	if (C->getModifier() == OMPC_REDUCTION_inscan) {
	Vars.clear();
	for (unsigned i = 0; i != NumVars; ++i)
	Vars.push_back(Record.readSubExpr());
	C->setInscanCopyOps(Vars);
	Vars.clear();
	for (unsigned i = 0; i != NumVars; ++i)
	Vars.push_back(Record.readSubExpr());
	C->setInscanCopyArrayTemps(Vars);
	Vars.clear();
	for (unsigned i = 0; i != NumVars; ++i)
	Vars.push_back(Record.readSubExpr());
	C->setInscanCopyArrayElems(Vars);
	}
	}

	void OMPClauseReader::VisitOMPTaskReductionClause(OMPTaskReductionClause *C) {
	VisitOMPClauseWithPostUpdate(C);
	C->setLParenLoc(Record.readSourceLocation());
	C->setColonLoc(Record.readSourceLocation());
	NestedNameSpecifierLoc NNSL = Record.readNestedNameSpecifierLoc();
	DeclarationNameInfo DNI = Record.readDeclarationNameInfo();
	C->setQualifierLoc(NNSL);
	C->setNameInfo(DNI);

	unsigned NumVars = C->varlist_size();
	SmallVector<Expr *, 16> Vars;
	Vars.reserve(NumVars);
	for (unsigned I = 0; I != NumVars; ++I)
	Vars.push_back(Record.readSubExpr());
	C->setVarRefs(Vars);
	Vars.clear();
	for (unsigned I = 0; I != NumVars; ++I)
	Vars.push_back(Record.readSubExpr());
	C->setPrivates(Vars);
	Vars.clear();
	for (unsigned I = 0; I != NumVars; ++I)
	Vars.push_back(Record.readSubExpr());
	C->setLHSExprs(Vars);
	Vars.clear();
	for (unsigned I = 0; I != NumVars; ++I)
	Vars.push_back(Record.readSubExpr());
	C->setRHSExprs(Vars);
	Vars.clear();
	for (unsigned I = 0; I != NumVars; ++I)
	Vars.push_back(Record.readSubExpr());
	C->setReductionOps(Vars);
	}

	void OMPClauseReader::VisitOMPInReductionClause(OMPInReductionClause *C) {
	VisitOMPClauseWithPostUpdate(C);
	C->setLParenLoc(Record.readSourceLocation());
	C->setColonLoc(Record.readSourceLocation());
	NestedNameSpecifierLoc NNSL = Record.readNestedNameSpecifierLoc();
	DeclarationNameInfo DNI = Record.readDeclarationNameInfo();
	C->setQualifierLoc(NNSL);
	C->setNameInfo(DNI);

	unsigned NumVars = C->varlist_size();
	SmallVector<Expr *, 16> Vars;
	Vars.reserve(NumVars);
	for (unsigned I = 0; I != NumVars; ++I)
	Vars.push_back(Record.readSubExpr());
	C->setVarRefs(Vars);
	Vars.clear();
	for (unsigned I = 0; I != NumVars; ++I)
	Vars.push_back(Record.readSubExpr());
	C->setPrivates(Vars);
	Vars.clear();
	for (unsigned I = 0; I != NumVars; ++I)
	Vars.push_back(Record.readSubExpr());
	C->setLHSExprs(Vars);
	Vars.clear();
	for (unsigned I = 0; I != NumVars; ++I)
	Vars.push_back(Record.readSubExpr());
	C->setRHSExprs(Vars);
	Vars.clear();
	for (unsigned I = 0; I != NumVars; ++I)
	Vars.push_back(Record.readSubExpr());
	C->setReductionOps(Vars);
	Vars.clear();
	for (unsigned I = 0; I != NumVars; ++I)
	Vars.push_back(Record.readSubExpr());
	C->setTaskgroupDescriptors(Vars);
	}

	void OMPClauseReader::VisitOMPLinearClause(OMPLinearClause *C) {
	VisitOMPClauseWithPostUpdate(C);
	C->setLParenLoc(Record.readSourceLocation());
	C->setColonLoc(Record.readSourceLocation());
	C->setModifier(static_cast<OpenMPLinearClauseKind>(Record.readInt()));
	C->setModifierLoc(Record.readSourceLocation());
	unsigned NumVars = C->varlist_size();
	SmallVector<Expr *, 16> Vars;
	Vars.reserve(NumVars);
	for (unsigned i = 0; i != NumVars; ++i)
	Vars.push_back(Record.readSubExpr());
	C->setVarRefs(Vars);
	Vars.clear();
	for (unsigned i = 0; i != NumVars; ++i)
	Vars.push_back(Record.readSubExpr());
	C->setPrivates(Vars);
	Vars.clear();
	for (unsigned i = 0; i != NumVars; ++i)
	Vars.push_back(Record.readSubExpr());
	C->setInits(Vars);
	Vars.clear();
	for (unsigned i = 0; i != NumVars; ++i)
	Vars.push_back(Record.readSubExpr());
	C->setUpdates(Vars);
	Vars.clear();
	for (unsigned i = 0; i != NumVars; ++i)
	Vars.push_back(Record.readSubExpr());
	C->setFinals(Vars);
	C->setStep(Record.readSubExpr());
	C->setCalcStep(Record.readSubExpr());
	Vars.clear();
	for (unsigned I = 0; I != NumVars + 1; ++I)
	Vars.push_back(Record.readSubExpr());
	C->setUsedExprs(Vars);
	}

	void OMPClauseReader::VisitOMPAlignedClause(OMPAlignedClause *C) {
	C->setLParenLoc(Record.readSourceLocation());
	C->setColonLoc(Record.readSourceLocation());
	unsigned NumVars = C->varlist_size();
	SmallVector<Expr *, 16> Vars;
	Vars.reserve(NumVars);
	for (unsigned i = 0; i != NumVars; ++i)
	Vars.push_back(Record.readSubExpr());
	C->setVarRefs(Vars);
	C->setAlignment(Record.readSubExpr());
	}

	void OMPClauseReader::VisitOMPCopyinClause(OMPCopyinClause *C) {
	C->setLParenLoc(Record.readSourceLocation());
	unsigned NumVars = C->varlist_size();
	SmallVector<Expr *, 16> Exprs;
	Exprs.reserve(NumVars);
	for (unsigned i = 0; i != NumVars; ++i)
	Exprs.push_back(Record.readSubExpr());
	C->setVarRefs(Exprs);
	Exprs.clear();
	for (unsigned i = 0; i != NumVars; ++i)
	Exprs.push_back(Record.readSubExpr());
	C->setSourceExprs(Exprs);
	Exprs.clear();
	for (unsigned i = 0; i != NumVars; ++i)
	Exprs.push_back(Record.readSubExpr());
	C->setDestinationExprs(Exprs);
	Exprs.clear();
	for (unsigned i = 0; i != NumVars; ++i)
	Exprs.push_back(Record.readSubExpr());
	C->setAssignmentOps(Exprs);
	}

	void OMPClauseReader::VisitOMPCopyprivateClause(OMPCopyprivateClause *C) {
	C->setLParenLoc(Record.readSourceLocation());
	unsigned NumVars = C->varlist_size();
	SmallVector<Expr *, 16> Exprs;
	Exprs.reserve(NumVars);
	for (unsigned i = 0; i != NumVars; ++i)
	Exprs.push_back(Record.readSubExpr());
	C->setVarRefs(Exprs);
	Exprs.clear();
	for (unsigned i = 0; i != NumVars; ++i)
	Exprs.push_back(Record.readSubExpr());
	C->setSourceExprs(Exprs);
	Exprs.clear();
	for (unsigned i = 0; i != NumVars; ++i)
	Exprs.push_back(Record.readSubExpr());
	C->setDestinationExprs(Exprs);
	Exprs.clear();
	for (unsigned i = 0; i != NumVars; ++i)
	Exprs.push_back(Record.readSubExpr());
	C->setAssignmentOps(Exprs);
	}

	void OMPClauseReader::VisitOMPFlushClause(OMPFlushClause *C) {
	C->setLParenLoc(Record.readSourceLocation());
	unsigned NumVars = C->varlist_size();
	SmallVector<Expr *, 16> Vars;
	Vars.reserve(NumVars);
	for (unsigned i = 0; i != NumVars; ++i)
	Vars.push_back(Record.readSubExpr());
	C->setVarRefs(Vars);
	}

	void OMPClauseReader::VisitOMPDepobjClause(OMPDepobjClause *C) {
	C->setDepobj(Record.readSubExpr());
	C->setLParenLoc(Record.readSourceLocation());
	}

	void OMPClauseReader::VisitOMPDependClause(OMPDependClause *C) {
	C->setLParenLoc(Record.readSourceLocation());
	C->setModifier(Record.readSubExpr());
	C->setDependencyKind(
	static_cast<OpenMPDependClauseKind>(Record.readInt()));
	C->setDependencyLoc(Record.readSourceLocation());
	C->setColonLoc(Record.readSourceLocation());
	unsigned NumVars = C->varlist_size();
	SmallVector<Expr *, 16> Vars;
	Vars.reserve(NumVars);
	for (unsigned I = 0; I != NumVars; ++I)
	Vars.push_back(Record.readSubExpr());
	C->setVarRefs(Vars);
	for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I)
	C->setLoopData(I, Record.readSubExpr());
	}

	void OMPClauseReader::VisitOMPDeviceClause(OMPDeviceClause *C) {
	VisitOMPClauseWithPreInit(C);
	C->setModifier(Record.readEnum<OpenMPDeviceClauseModifier>());
	C->setDevice(Record.readSubExpr());
	C->setModifierLoc(Record.readSourceLocation());
	C->setLParenLoc(Record.readSourceLocation());
	}

	void OMPClauseReader::VisitOMPMapClause(OMPMapClause *C) {
	C->setLParenLoc(Record.readSourceLocation());
	for (unsigned I = 0; I < NumberOfOMPMapClauseModifiers; ++I) {
	C->setMapTypeModifier(
	I, static_cast<OpenMPMapModifierKind>(Record.readInt()));
	C->setMapTypeModifierLoc(I, Record.readSourceLocation());
	}
	C->setMapperQualifierLoc(Record.readNestedNameSpecifierLoc());
	C->setMapperIdInfo(Record.readDeclarationNameInfo());
	C->setMapType(
	static_cast<OpenMPMapClauseKind>(Record.readInt()));
	C->setMapLoc(Record.readSourceLocation());
	C->setColonLoc(Record.readSourceLocation());
	auto NumVars = C->varlist_size();
	auto UniqueDecls = C->getUniqueDeclarationsNum();
	auto TotalLists = C->getTotalComponentListNum();
	auto TotalComponents = C->getTotalComponentsNum();

	SmallVector<Expr *, 16> Vars;
	Vars.reserve(NumVars);
	for (unsigned i = 0; i != NumVars; ++i)
	Vars.push_back(Record.readExpr());
	C->setVarRefs(Vars);

	SmallVector<Expr *, 16> UDMappers;
	UDMappers.reserve(NumVars);
	for (unsigned I = 0; I < NumVars; ++I)
	UDMappers.push_back(Record.readExpr());
	C->setUDMapperRefs(UDMappers);

	SmallVector<ValueDecl *, 16> Decls;
	Decls.reserve(UniqueDecls);
	for (unsigned i = 0; i < UniqueDecls; ++i)
	Decls.push_back(Record.readDeclAs<ValueDecl>());
	C->setUniqueDecls(Decls);

	SmallVector<unsigned, 16> ListsPerDecl;
	ListsPerDecl.reserve(UniqueDecls);
	for (unsigned i = 0; i < UniqueDecls; ++i)
	ListsPerDecl.push_back(Record.readInt());
	C->setDeclNumLists(ListsPerDecl);

	SmallVector<unsigned, 32> ListSizes;
	ListSizes.reserve(TotalLists);
	for (unsigned i = 0; i < TotalLists; ++i)
	ListSizes.push_back(Record.readInt());
	C->setComponentListSizes(ListSizes);

	SmallVector<OMPClauseMappableExprCommon::MappableComponent, 32> Components;
	Components.reserve(TotalComponents);
	for (unsigned i = 0; i < TotalComponents; ++i) {
	Expr *AssociatedExprPr = Record.readExpr();
	auto *AssociatedDecl = Record.readDeclAs<ValueDecl>();
	Components.emplace_back(AssociatedExprPr, AssociatedDecl,
	/IsNonContiguous=/false);
	}
	C->setComponents(Components, ListSizes);
	}

	void OMPClauseReader::VisitOMPAllocateClause(OMPAllocateClause *C) {
	C->setLParenLoc(Record.readSourceLocation());
	C->setColonLoc(Record.readSourceLocation());
	C->setAllocator(Record.readSubExpr());
	unsigned NumVars = C->varlist_size();
	SmallVector<Expr *, 16> Vars;
	Vars.reserve(NumVars);
	for (unsigned i = 0; i != NumVars; ++i)
	Vars.push_back(Record.readSubExpr());
	C->setVarRefs(Vars);
	}

	void OMPClauseReader::VisitOMPNumTeamsClause(OMPNumTeamsClause *C) {
	VisitOMPClauseWithPreInit(C);
	C->setNumTeams(Record.readSubExpr());
	C->setLParenLoc(Record.readSourceLocation());
	}

	void OMPClauseReader::VisitOMPThreadLimitClause(OMPThreadLimitClause *C) {
	VisitOMPClauseWithPreInit(C);
	C->setThreadLimit(Record.readSubExpr());
	C->setLParenLoc(Record.readSourceLocation());
	}

	void OMPClauseReader::VisitOMPPriorityClause(OMPPriorityClause *C) {
	VisitOMPClauseWithPreInit(C);
	C->setPriority(Record.readSubExpr());
	C->setLParenLoc(Record.readSourceLocation());
	}

	void OMPClauseReader::VisitOMPGrainsizeClause(OMPGrainsizeClause *C) {
	VisitOMPClauseWithPreInit(C);
	C->setGrainsize(Record.readSubExpr());
	C->setLParenLoc(Record.readSourceLocation());
	}

	void OMPClauseReader::VisitOMPNumTasksClause(OMPNumTasksClause *C) {
	VisitOMPClauseWithPreInit(C);
	C->setNumTasks(Record.readSubExpr());
	C->setLParenLoc(Record.readSourceLocation());
	}

	void OMPClauseReader::VisitOMPHintClause(OMPHintClause *C) {
	C->setHint(Record.readSubExpr());
	C->setLParenLoc(Record.readSourceLocation());
	}

	void OMPClauseReader::VisitOMPDistScheduleClause(OMPDistScheduleClause *C) {
	VisitOMPClauseWithPreInit(C);
	C->setDistScheduleKind(
	static_cast<OpenMPDistScheduleClauseKind>(Record.readInt()));
	C->setChunkSize(Record.readSubExpr());
	C->setLParenLoc(Record.readSourceLocation());
	C->setDistScheduleKindLoc(Record.readSourceLocation());
	C->setCommaLoc(Record.readSourceLocation());
	}

	void OMPClauseReader::VisitOMPDefaultmapClause(OMPDefaultmapClause *C) {
	C->setDefaultmapKind(
	static_cast<OpenMPDefaultmapClauseKind>(Record.readInt()));
	C->setDefaultmapModifier(
	static_cast<OpenMPDefaultmapClauseModifier>(Record.readInt()));
	C->setLParenLoc(Record.readSourceLocation());
	C->setDefaultmapModifierLoc(Record.readSourceLocation());
	C->setDefaultmapKindLoc(Record.readSourceLocation());
	}

	void OMPClauseReader::VisitOMPToClause(OMPToClause *C) {
	C->setLParenLoc(Record.readSourceLocation());
	for (unsigned I = 0; I < NumberOfOMPMotionModifiers; ++I) {
	C->setMotionModifier(
	I, static_cast<OpenMPMotionModifierKind>(Record.readInt()));
	C->setMotionModifierLoc(I, Record.readSourceLocation());
	}
	C->setMapperQualifierLoc(Record.readNestedNameSpecifierLoc());
	C->setMapperIdInfo(Record.readDeclarationNameInfo());
	C->setColonLoc(Record.readSourceLocation());
	auto NumVars = C->varlist_size();
	auto UniqueDecls = C->getUniqueDeclarationsNum();
	auto TotalLists = C->getTotalComponentListNum();
	auto TotalComponents = C->getTotalComponentsNum();

	SmallVector<Expr *, 16> Vars;
	Vars.reserve(NumVars);
	for (unsigned i = 0; i != NumVars; ++i)
	Vars.push_back(Record.readSubExpr());
	C->setVarRefs(Vars);

	SmallVector<Expr *, 16> UDMappers;
	UDMappers.reserve(NumVars);
	for (unsigned I = 0; I < NumVars; ++I)
	UDMappers.push_back(Record.readSubExpr());
	C->setUDMapperRefs(UDMappers);

	SmallVector<ValueDecl *, 16> Decls;
	Decls.reserve(UniqueDecls);
	for (unsigned i = 0; i < UniqueDecls; ++i)
	Decls.push_back(Record.readDeclAs<ValueDecl>());
	C->setUniqueDecls(Decls);

	SmallVector<unsigned, 16> ListsPerDecl;
	ListsPerDecl.reserve(UniqueDecls);
	for (unsigned i = 0; i < UniqueDecls; ++i)
	ListsPerDecl.push_back(Record.readInt());
	C->setDeclNumLists(ListsPerDecl);

	SmallVector<unsigned, 32> ListSizes;
	ListSizes.reserve(TotalLists);
	for (unsigned i = 0; i < TotalLists; ++i)
	ListSizes.push_back(Record.readInt());
	C->setComponentListSizes(ListSizes);

	SmallVector<OMPClauseMappableExprCommon::MappableComponent, 32> Components;
	Components.reserve(TotalComponents);
	for (unsigned i = 0; i < TotalComponents; ++i) {
	Expr *AssociatedExprPr = Record.readSubExpr();
	bool IsNonContiguous = Record.readBool();
	auto *AssociatedDecl = Record.readDeclAs<ValueDecl>();
	Components.emplace_back(AssociatedExprPr, AssociatedDecl, IsNonContiguous);
	}
	C->setComponents(Components, ListSizes);
	}

	void OMPClauseReader::VisitOMPFromClause(OMPFromClause *C) {
	C->setLParenLoc(Record.readSourceLocation());
	for (unsigned I = 0; I < NumberOfOMPMotionModifiers; ++I) {
	C->setMotionModifier(
	I, static_cast<OpenMPMotionModifierKind>(Record.readInt()));
	C->setMotionModifierLoc(I, Record.readSourceLocation());
	}
	C->setMapperQualifierLoc(Record.readNestedNameSpecifierLoc());
	C->setMapperIdInfo(Record.readDeclarationNameInfo());
	C->setColonLoc(Record.readSourceLocation());
	auto NumVars = C->varlist_size();
	auto UniqueDecls = C->getUniqueDeclarationsNum();
	auto TotalLists = C->getTotalComponentListNum();
	auto TotalComponents = C->getTotalComponentsNum();

	SmallVector<Expr *, 16> Vars;
	Vars.reserve(NumVars);
	for (unsigned i = 0; i != NumVars; ++i)
	Vars.push_back(Record.readSubExpr());
	C->setVarRefs(Vars);

	SmallVector<Expr *, 16> UDMappers;
	UDMappers.reserve(NumVars);
	for (unsigned I = 0; I < NumVars; ++I)
	UDMappers.push_back(Record.readSubExpr());
	C->setUDMapperRefs(UDMappers);

	SmallVector<ValueDecl *, 16> Decls;
	Decls.reserve(UniqueDecls);
	for (unsigned i = 0; i < UniqueDecls; ++i)
	Decls.push_back(Record.readDeclAs<ValueDecl>());
	C->setUniqueDecls(Decls);

	SmallVector<unsigned, 16> ListsPerDecl;
	ListsPerDecl.reserve(UniqueDecls);
	for (unsigned i = 0; i < UniqueDecls; ++i)
	ListsPerDecl.push_back(Record.readInt());
	C->setDeclNumLists(ListsPerDecl);

	SmallVector<unsigned, 32> ListSizes;
	ListSizes.reserve(TotalLists);
	for (unsigned i = 0; i < TotalLists; ++i)
	ListSizes.push_back(Record.readInt());
	C->setComponentListSizes(ListSizes);

	SmallVector<OMPClauseMappableExprCommon::MappableComponent, 32> Components;
	Components.reserve(TotalComponents);
	for (unsigned i = 0; i < TotalComponents; ++i) {
	Expr *AssociatedExprPr = Record.readSubExpr();
	bool IsNonContiguous = Record.readBool();
	auto *AssociatedDecl = Record.readDeclAs<ValueDecl>();
	Components.emplace_back(AssociatedExprPr, AssociatedDecl, IsNonContiguous);
	}
	C->setComponents(Components, ListSizes);
	}

	void OMPClauseReader::VisitOMPUseDevicePtrClause(OMPUseDevicePtrClause *C) {
	C->setLParenLoc(Record.readSourceLocation());
	auto NumVars = C->varlist_size();
	auto UniqueDecls = C->getUniqueDeclarationsNum();
	auto TotalLists = C->getTotalComponentListNum();
	auto TotalComponents = C->getTotalComponentsNum();

	SmallVector<Expr *, 16> Vars;
	Vars.reserve(NumVars);
	for (unsigned i = 0; i != NumVars; ++i)
	Vars.push_back(Record.readSubExpr());
	C->setVarRefs(Vars);
	Vars.clear();
	for (unsigned i = 0; i != NumVars; ++i)
	Vars.push_back(Record.readSubExpr());
	C->setPrivateCopies(Vars);
	Vars.clear();
	for (unsigned i = 0; i != NumVars; ++i)
	Vars.push_back(Record.readSubExpr());
	C->setInits(Vars);

	SmallVector<ValueDecl *, 16> Decls;
	Decls.reserve(UniqueDecls);
	for (unsigned i = 0; i < UniqueDecls; ++i)
	Decls.push_back(Record.readDeclAs<ValueDecl>());
	C->setUniqueDecls(Decls);

	SmallVector<unsigned, 16> ListsPerDecl;
	ListsPerDecl.reserve(UniqueDecls);
	for (unsigned i = 0; i < UniqueDecls; ++i)
	ListsPerDecl.push_back(Record.readInt());
	C->setDeclNumLists(ListsPerDecl);

	SmallVector<unsigned, 32> ListSizes;
	ListSizes.reserve(TotalLists);
	for (unsigned i = 0; i < TotalLists; ++i)
	ListSizes.push_back(Record.readInt());
	C->setComponentListSizes(ListSizes);

	SmallVector<OMPClauseMappableExprCommon::MappableComponent, 32> Components;
	Components.reserve(TotalComponents);
	for (unsigned i = 0; i < TotalComponents; ++i) {
	auto *AssociatedExprPr = Record.readSubExpr();
	auto *AssociatedDecl = Record.readDeclAs<ValueDecl>();
	Components.emplace_back(AssociatedExprPr, AssociatedDecl,
	/IsNonContiguous=/false);
	}
	C->setComponents(Components, ListSizes);
	}

	void OMPClauseReader::VisitOMPUseDeviceAddrClause(OMPUseDeviceAddrClause *C) {
	C->setLParenLoc(Record.readSourceLocation());
	auto NumVars = C->varlist_size();
	auto UniqueDecls = C->getUniqueDeclarationsNum();
	auto TotalLists = C->getTotalComponentListNum();
	auto TotalComponents = C->getTotalComponentsNum();

	SmallVector<Expr *, 16> Vars;
	Vars.reserve(NumVars);
	for (unsigned i = 0; i != NumVars; ++i)
	Vars.push_back(Record.readSubExpr());
	C->setVarRefs(Vars);

	SmallVector<ValueDecl *, 16> Decls;
	Decls.reserve(UniqueDecls);
	for (unsigned i = 0; i < UniqueDecls; ++i)
	Decls.push_back(Record.readDeclAs<ValueDecl>());
	C->setUniqueDecls(Decls);

	SmallVector<unsigned, 16> ListsPerDecl;
	ListsPerDecl.reserve(UniqueDecls);
	for (unsigned i = 0; i < UniqueDecls; ++i)
	ListsPerDecl.push_back(Record.readInt());
	C->setDeclNumLists(ListsPerDecl);

	SmallVector<unsigned, 32> ListSizes;
	ListSizes.reserve(TotalLists);
	for (unsigned i = 0; i < TotalLists; ++i)
	ListSizes.push_back(Record.readInt());
	C->setComponentListSizes(ListSizes);

	SmallVector<OMPClauseMappableExprCommon::MappableComponent, 32> Components;
	Components.reserve(TotalComponents);
	for (unsigned i = 0; i < TotalComponents; ++i) {
	Expr *AssociatedExpr = Record.readSubExpr();
	auto *AssociatedDecl = Record.readDeclAs<ValueDecl>();
	Components.emplace_back(AssociatedExpr, AssociatedDecl,
	/IsNonContiguous/ false);
	}
	C->setComponents(Components, ListSizes);
	}

	void OMPClauseReader::VisitOMPIsDevicePtrClause(OMPIsDevicePtrClause *C) {
	C->setLParenLoc(Record.readSourceLocation());
	auto NumVars = C->varlist_size();
	auto UniqueDecls = C->getUniqueDeclarationsNum();
	auto TotalLists = C->getTotalComponentListNum();
	auto TotalComponents = C->getTotalComponentsNum();

	SmallVector<Expr *, 16> Vars;
	Vars.reserve(NumVars);
	for (unsigned i = 0; i != NumVars; ++i)
	Vars.push_back(Record.readSubExpr());
	C->setVarRefs(Vars);
	Vars.clear();

	SmallVector<ValueDecl *, 16> Decls;
	Decls.reserve(UniqueDecls);
	for (unsigned i = 0; i < UniqueDecls; ++i)
	Decls.push_back(Record.readDeclAs<ValueDecl>());
	C->setUniqueDecls(Decls);

	SmallVector<unsigned, 16> ListsPerDecl;
	ListsPerDecl.reserve(UniqueDecls);
	for (unsigned i = 0; i < UniqueDecls; ++i)
	ListsPerDecl.push_back(Record.readInt());
	C->setDeclNumLists(ListsPerDecl);

	SmallVector<unsigned, 32> ListSizes;
	ListSizes.reserve(TotalLists);
	for (unsigned i = 0; i < TotalLists; ++i)
	ListSizes.push_back(Record.readInt());
	C->setComponentListSizes(ListSizes);

	SmallVector<OMPClauseMappableExprCommon::MappableComponent, 32> Components;
	Components.reserve(TotalComponents);
	for (unsigned i = 0; i < TotalComponents; ++i) {
	Expr *AssociatedExpr = Record.readSubExpr();
	auto *AssociatedDecl = Record.readDeclAs<ValueDecl>();
	Components.emplace_back(AssociatedExpr, AssociatedDecl,
	/IsNonContiguous=/false);
	}
	C->setComponents(Components, ListSizes);
	}

	void OMPClauseReader::VisitOMPNontemporalClause(OMPNontemporalClause *C) {
	C->setLParenLoc(Record.readSourceLocation());
	unsigned NumVars = C->varlist_size();
	SmallVector<Expr *, 16> Vars;
	Vars.reserve(NumVars);
	for (unsigned i = 0; i != NumVars; ++i)
	Vars.push_back(Record.readSubExpr());
	C->setVarRefs(Vars);
	Vars.clear();
	Vars.reserve(NumVars);
	for (unsigned i = 0; i != NumVars; ++i)
	Vars.push_back(Record.readSubExpr());
	C->setPrivateRefs(Vars);
	}

	void OMPClauseReader::VisitOMPInclusiveClause(OMPInclusiveClause *C) {
	C->setLParenLoc(Record.readSourceLocation());
	unsigned NumVars = C->varlist_size();
	SmallVector<Expr *, 16> Vars;
	Vars.reserve(NumVars);
	for (unsigned i = 0; i != NumVars; ++i)
	Vars.push_back(Record.readSubExpr());
	C->setVarRefs(Vars);
	}

	void OMPClauseReader::VisitOMPExclusiveClause(OMPExclusiveClause *C) {
	C->setLParenLoc(Record.readSourceLocation());
	unsigned NumVars = C->varlist_size();
	SmallVector<Expr *, 16> Vars;
	Vars.reserve(NumVars);
	for (unsigned i = 0; i != NumVars; ++i)
	Vars.push_back(Record.readSubExpr());
	C->setVarRefs(Vars);
	}

	void OMPClauseReader::VisitOMPUsesAllocatorsClause(OMPUsesAllocatorsClause *C) {
	C->setLParenLoc(Record.readSourceLocation());
	unsigned NumOfAllocators = C->getNumberOfAllocators();
	SmallVector<OMPUsesAllocatorsClause::Data, 4> Data;
	Data.reserve(NumOfAllocators);
	for (unsigned I = 0; I != NumOfAllocators; ++I) {
	OMPUsesAllocatorsClause::Data &D = Data.emplace_back();
	D.Allocator = Record.readSubExpr();
	D.AllocatorTraits = Record.readSubExpr();
	D.LParenLoc = Record.readSourceLocation();
	D.RParenLoc = Record.readSourceLocation();
	}
	C->setAllocatorsData(Data);
	}

	void OMPClauseReader::VisitOMPAffinityClause(OMPAffinityClause *C) {
	C->setLParenLoc(Record.readSourceLocation());
	C->setModifier(Record.readSubExpr());
	C->setColonLoc(Record.readSourceLocation());
	unsigned NumOfLocators = C->varlist_size();
	SmallVector<Expr *, 4> Locators;
	Locators.reserve(NumOfLocators);
	for (unsigned I = 0; I != NumOfLocators; ++I)
	Locators.push_back(Record.readSubExpr());
	C->setVarRefs(Locators);
	}

	void OMPClauseReader::VisitOMPOrderClause(OMPOrderClause *C) {
	C->setKind(Record.readEnum<OpenMPOrderClauseKind>());
	C->setLParenLoc(Record.readSourceLocation());
	C->setKindKwLoc(Record.readSourceLocation());
	}

	void OMPClauseReader::VisitOMPFilterClause(OMPFilterClause *C) {
	VisitOMPClauseWithPreInit(C);
	C->setThreadID(Record.readSubExpr());
	C->setLParenLoc(Record.readSourceLocation());
	}

	OMPTraitInfo *ASTRecordReader::readOMPTraitInfo() {
	OMPTraitInfo &TI = getContext().getNewOMPTraitInfo();
	TI.Sets.resize(readUInt32());
	for (auto &Set : TI.Sets) {
	Set.Kind = readEnum<llvm::omp::TraitSet>();
	Set.Selectors.resize(readUInt32());
	for (auto &Selector : Set.Selectors) {
	Selector.Kind = readEnum<llvm::omp::TraitSelector>();
	Selector.ScoreOrCondition = nullptr;
	if (readBool())
	Selector.ScoreOrCondition = readExprRef();
	Selector.Properties.resize(readUInt32());
	for (auto &Property : Selector.Properties)
	Property.Kind = readEnum<llvm::omp::TraitProperty>();
	}
	}
	return &TI;
	}

	void ASTRecordReader::readOMPChildren(OMPChildren *Data) {
	if (!Data)
	return;
	if (Reader->ReadingKind == ASTReader::Read_Stmt) {
	// Skip NumClauses, NumChildren and HasAssociatedStmt fields.
	skipInts(3);
	}
	SmallVector<OMPClause *, 4> Clauses(Data->getNumClauses());
	for (unsigned I = 0, E = Data->getNumClauses(); I < E; ++I)
	Clauses[I] = readOMPClause();
	Data->setClauses(Clauses);
	if (Data->hasAssociatedStmt())
	Data->setAssociatedStmt(readStmt());
	for (unsigned I = 0, E = Data->getNumChildren(); I < E; ++I)
	Data->getChildren()[I] = readStmt();
	}
	diff --git a/contrib/llvm-project/compiler-rt/lib/profile/InstrProfilingFile.c b/contrib/llvm-project/compiler-rt/lib/profile/InstrProfilingFile.c
	index 518447e3e422..2e91f16a2158 100644
	--- a/contrib/llvm-project/compiler-rt/lib/profile/InstrProfilingFile.c
	+++ b/contrib/llvm-project/compiler-rt/lib/profile/InstrProfilingFile.c
	@@ -1,1208 +1,1215 @@
	/===- InstrProfilingFile.c - Write instrumentation to a file -------------===\
	\|*
	\|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	\|* See https://llvm.org/LICENSE.txt for license information.
	\|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	\|*
	\===----------------------------------------------------------------------===/

	#if !defined(__Fuchsia__)

	#include <assert.h>
	#include <errno.h>
	#include <stdio.h>
	#include <stdlib.h>
	#include <string.h>
	#ifdef _MSC_VER
	/* For _alloca. */
	#include <malloc.h>
	#endif
	#if defined(_WIN32)
	#include "WindowsMMap.h"
	/* For _chsize_s */
	#include <io.h>
	#include <process.h>
	#else
	#include <sys/file.h>
	#include <sys/mman.h>
	#include <unistd.h>
	#if defined(__linux__)
	#include <sys/types.h>
	#endif
	#endif

	#include "InstrProfiling.h"
	#include "InstrProfilingInternal.h"
	#include "InstrProfilingPort.h"
	#include "InstrProfilingUtil.h"

	/* From where is profile name specified.
	* The order the enumerators define their
	* precedence. Re-order them may lead to
	* runtime behavior change. */
	typedef enum ProfileNameSpecifier {
	PNS_unknown = 0,
	PNS_default,
	PNS_command_line,
	PNS_environment,
	PNS_runtime_api
	} ProfileNameSpecifier;

	static const char *getPNSStr(ProfileNameSpecifier PNS) {
	switch (PNS) {
	case PNS_default:
	return "default setting";
	case PNS_command_line:
	return "command line";
	case PNS_environment:
	return "environment variable";
	case PNS_runtime_api:
	return "runtime API";
	default:
	return "Unknown";
	}
	}

	#define MAX_PID_SIZE 16
	/* Data structure holding the result of parsed filename pattern. */
	typedef struct lprofFilename {
	/* File name string possibly with %p or %h specifiers. */
	const char *FilenamePat;
	/* A flag indicating if FilenamePat's memory is allocated
	* by runtime. */
	unsigned OwnsFilenamePat;
	const char *ProfilePathPrefix;
	char PidChars[MAX_PID_SIZE];
	char *TmpDir;
	char Hostname[COMPILER_RT_MAX_HOSTLEN];
	unsigned NumPids;
	unsigned NumHosts;
	/* When in-process merging is enabled, this parameter specifies
	* the total number of profile data files shared by all the processes
	* spawned from the same binary. By default the value is 1. If merging
	* is not enabled, its value should be 0. This parameter is specified
	* by the %[0-9]m specifier. For instance %2m enables merging using
	* 2 profile data files. %1m is equivalent to %m. Also %m specifier
	* can only appear once at the end of the name pattern. */
	unsigned MergePoolSize;
	ProfileNameSpecifier PNS;
	} lprofFilename;

	static lprofFilename lprofCurFilename = {0, 0, 0, {0}, NULL,
	{0}, 0, 0, 0, PNS_unknown};

	static int ProfileMergeRequested = 0;
	static int isProfileMergeRequested() { return ProfileMergeRequested; }
	static void setProfileMergeRequested(int EnableMerge) {
	ProfileMergeRequested = EnableMerge;
	}

	static FILE *ProfileFile = NULL;
	static FILE *getProfileFile() { return ProfileFile; }
	static void setProfileFile(FILE *File) { ProfileFile = File; }

	COMPILER_RT_VISIBILITY void __llvm_profile_set_file_object(FILE *File,
	int EnableMerge) {
	if (__llvm_profile_is_continuous_mode_enabled()) {
	PROF_WARN("__llvm_profile_set_file_object(fd=%d) not supported, because "
	"continuous sync mode (%%c) is enabled",
	fileno(File));
	return;
	}
	setProfileFile(File);
	setProfileMergeRequested(EnableMerge);
	}

	static int getCurFilenameLength();
	static const char getCurFilename(char FilenameBuf, int ForceUseBuf);
	static unsigned doMerging() {
	return lprofCurFilename.MergePoolSize \|\| isProfileMergeRequested();
	}

	/* Return 1 if there is an error, otherwise return 0. */
	static uint32_t fileWriter(ProfDataWriter This, ProfDataIOVec IOVecs,
	uint32_t NumIOVecs) {
	uint32_t I;
	FILE File = (FILE )This->WriterCtx;
	char Zeroes[sizeof(uint64_t)] = {0};
	for (I = 0; I < NumIOVecs; I++) {
	if (IOVecs[I].Data) {
	if (fwrite(IOVecs[I].Data, IOVecs[I].ElmSize, IOVecs[I].NumElm, File) !=
	IOVecs[I].NumElm)
	return 1;
	} else if (IOVecs[I].UseZeroPadding) {
	size_t BytesToWrite = IOVecs[I].ElmSize * IOVecs[I].NumElm;
	while (BytesToWrite > 0) {
	size_t PartialWriteLen =
	(sizeof(uint64_t) > BytesToWrite) ? BytesToWrite : sizeof(uint64_t);
	if (fwrite(Zeroes, sizeof(uint8_t), PartialWriteLen, File) !=
	PartialWriteLen) {
	return 1;
	}
	BytesToWrite -= PartialWriteLen;
	}
	} else {
	if (fseek(File, IOVecs[I].ElmSize * IOVecs[I].NumElm, SEEK_CUR) == -1)
	return 1;
	}
	}
	return 0;
	}

	/* TODO: make buffer size controllable by an internal option, and compiler can pass the size
	to runtime via a variable. */
	static uint32_t orderFileWriter(FILE File, const uint32_t DataStart) {
	if (fwrite(DataStart, sizeof(uint32_t), INSTR_ORDER_FILE_BUFFER_SIZE, File) !=
	INSTR_ORDER_FILE_BUFFER_SIZE)
	return 1;
	return 0;
	}

	static void initFileWriter(ProfDataWriter This, FILE File) {
	This->Write = fileWriter;
	This->WriterCtx = File;
	}

	COMPILER_RT_VISIBILITY ProfBufferIO *
	lprofCreateBufferIOInternal(void *File, uint32_t BufferSz) {
	FreeHook = &free;
	DynamicBufferIOBuffer = (uint8_t *)calloc(BufferSz, 1);
	VPBufferSize = BufferSz;
	ProfDataWriter *fileWriter =
	(ProfDataWriter *)calloc(sizeof(ProfDataWriter), 1);
	initFileWriter(fileWriter, File);
	ProfBufferIO *IO = lprofCreateBufferIO(fileWriter);
	IO->OwnFileWriter = 1;
	return IO;
	}

	static void setupIOBuffer() {
	const char *BufferSzStr = 0;
	BufferSzStr = getenv("LLVM_VP_BUFFER_SIZE");
	if (BufferSzStr && BufferSzStr[0]) {
	VPBufferSize = atoi(BufferSzStr);
	DynamicBufferIOBuffer = (uint8_t *)calloc(VPBufferSize, 1);
	}
	}

	/* Get the size of the profile file. If there are any errors, print the
	* message under the assumption that the profile is being read for merging
	* purposes, and return -1. Otherwise return the file size in the inout param
	* \p ProfileFileSize. */
	static int getProfileFileSizeForMerging(FILE *ProfileFile,
	uint64_t *ProfileFileSize) {
	if (fseek(ProfileFile, 0L, SEEK_END) == -1) {
	PROF_ERR("Unable to merge profile data, unable to get size: %s\n",
	strerror(errno));
	return -1;
	}
	*ProfileFileSize = ftell(ProfileFile);

	/* Restore file offset. */
	if (fseek(ProfileFile, 0L, SEEK_SET) == -1) {
	PROF_ERR("Unable to merge profile data, unable to rewind: %s\n",
	strerror(errno));
	return -1;
	}

	if (*ProfileFileSize > 0 &&
	*ProfileFileSize < sizeof(__llvm_profile_header)) {
	PROF_WARN("Unable to merge profile data: %s\n",
	"source profile file is too small.");
	return -1;
	}
	return 0;
	}

	/* mmap() \p ProfileFile for profile merging purposes, assuming that an
	* exclusive lock is held on the file and that \p ProfileFileSize is the
	* length of the file. Return the mmap'd buffer in the inout variable
	* \p ProfileBuffer. Returns -1 on failure. On success, the caller is
	* responsible for unmapping the mmap'd buffer in \p ProfileBuffer. */
	static int mmapProfileForMerging(FILE *ProfileFile, uint64_t ProfileFileSize,
	char **ProfileBuffer) {
	*ProfileBuffer = mmap(NULL, ProfileFileSize, PROT_READ, MAP_SHARED \| MAP_FILE,
	fileno(ProfileFile), 0);
	if (*ProfileBuffer == MAP_FAILED) {
	PROF_ERR("Unable to merge profile data, mmap failed: %s\n",
	strerror(errno));
	return -1;
	}

	if (__llvm_profile_check_compatibility(*ProfileBuffer, ProfileFileSize)) {
	(void)munmap(*ProfileBuffer, ProfileFileSize);
	PROF_WARN("Unable to merge profile data: %s\n",
	"source profile file is not compatible.");
	return -1;
	}
	return 0;
	}

	/* Read profile data in \c ProfileFile and merge with in-memory
	profile counters. Returns -1 if there is fatal error, otheriwse
	0 is returned. Returning 0 does not mean merge is actually
	performed. If merge is actually done, *MergeDone is set to 1.
	*/
	static int doProfileMerging(FILE ProfileFile, int MergeDone) {
	uint64_t ProfileFileSize;
	char *ProfileBuffer;

	/* Get the size of the profile on disk. */
	if (getProfileFileSizeForMerging(ProfileFile, &ProfileFileSize) == -1)
	return -1;

	/* Nothing to merge. */
	if (!ProfileFileSize)
	return 0;

	/* mmap() the profile and check that it is compatible with the data in
	* the current image. */
	if (mmapProfileForMerging(ProfileFile, ProfileFileSize, &ProfileBuffer) == -1)
	return -1;

	/* Now start merging */
	if (__llvm_profile_merge_from_buffer(ProfileBuffer, ProfileFileSize)) {
	PROF_ERR("%s\n", "Invalid profile data to merge");
	(void)munmap(ProfileBuffer, ProfileFileSize);
	return -1;
	}

	// Truncate the file in case merging of value profile did not happen to
	// prevent from leaving garbage data at the end of the profile file.
	(void)COMPILER_RT_FTRUNCATE(ProfileFile,
	__llvm_profile_get_size_for_buffer());

	(void)munmap(ProfileBuffer, ProfileFileSize);
	*MergeDone = 1;

	return 0;
	}

	/* Create the directory holding the file, if needed. */
	static void createProfileDir(const char *Filename) {
	size_t Length = strlen(Filename);
	if (lprofFindFirstDirSeparator(Filename)) {
	char Copy = (char )COMPILER_RT_ALLOCA(Length + 1);
	strncpy(Copy, Filename, Length + 1);
	__llvm_profile_recursive_mkdir(Copy);
	}
	}

	/* Open the profile data for merging. It opens the file in r+b mode with
	* file locking. If the file has content which is compatible with the
	* current process, it also reads in the profile data in the file and merge
	* it with in-memory counters. After the profile data is merged in memory,
	* the original profile data is truncated and gets ready for the profile
	* dumper. With profile merging enabled, each executable as well as any of
	* its instrumented shared libraries dump profile data into their own data file.
	*/
	static FILE openFileForMerging(const char ProfileFileName, int *MergeDone) {
	FILE *ProfileFile = NULL;
	int rc;

	ProfileFile = getProfileFile();
	if (ProfileFile) {
	lprofLockFileHandle(ProfileFile);
	} else {
	createProfileDir(ProfileFileName);
	ProfileFile = lprofOpenFileEx(ProfileFileName);
	}
	if (!ProfileFile)
	return NULL;

	rc = doProfileMerging(ProfileFile, MergeDone);
	if (rc \|\| (!*MergeDone && COMPILER_RT_FTRUNCATE(ProfileFile, 0L)) \|\|
	fseek(ProfileFile, 0L, SEEK_SET) == -1) {
	PROF_ERR("Profile Merging of file %s failed: %s\n", ProfileFileName,
	strerror(errno));
	fclose(ProfileFile);
	return NULL;
	}
	return ProfileFile;
	}

	static FILE getFileObject(const char OutputName) {
	FILE *File;
	File = getProfileFile();
	if (File != NULL) {
	return File;
	}

	return fopen(OutputName, "ab");
	}

	/* Write profile data to file \c OutputName. */
	static int writeFile(const char *OutputName) {
	int RetVal;
	FILE *OutputFile;

	int MergeDone = 0;
	VPMergeHook = &lprofMergeValueProfData;
	if (doMerging())
	OutputFile = openFileForMerging(OutputName, &MergeDone);
	else
	OutputFile = getFileObject(OutputName);

	if (!OutputFile)
	return -1;

	FreeHook = &free;
	setupIOBuffer();
	ProfDataWriter fileWriter;
	initFileWriter(&fileWriter, OutputFile);
	RetVal = lprofWriteData(&fileWriter, lprofGetVPDataReader(), MergeDone);

	if (OutputFile == getProfileFile()) {
	fflush(OutputFile);
	if (doMerging()) {
	lprofUnlockFileHandle(OutputFile);
	}
	} else {
	fclose(OutputFile);
	}

	return RetVal;
	}

	/* Write order data to file \c OutputName. */
	static int writeOrderFile(const char *OutputName) {
	int RetVal;
	FILE *OutputFile;

	OutputFile = fopen(OutputName, "w");

	if (!OutputFile) {
	PROF_WARN("can't open file with mode ab: %s\n", OutputName);
	return -1;
	}

	FreeHook = &free;
	setupIOBuffer();
	const uint32_t *DataBegin = __llvm_profile_begin_orderfile();
	RetVal = orderFileWriter(OutputFile, DataBegin);

	fclose(OutputFile);
	return RetVal;
	}

	#define LPROF_INIT_ONCE_ENV "__LLVM_PROFILE_RT_INIT_ONCE"

	static void truncateCurrentFile(void) {
	const char *Filename;
	char *FilenameBuf;
	FILE *File;
	int Length;

	Length = getCurFilenameLength();
	FilenameBuf = (char *)COMPILER_RT_ALLOCA(Length + 1);
	Filename = getCurFilename(FilenameBuf, 0);
	if (!Filename)
	return;

	/* Only create the profile directory and truncate an existing profile once.
	* In continuous mode, this is necessary, as the profile is written-to by the
	* runtime initializer. */
	int initialized = getenv(LPROF_INIT_ONCE_ENV) != NULL;
	if (initialized)
	return;
	#if defined(_WIN32)
	_putenv(LPROF_INIT_ONCE_ENV "=" LPROF_INIT_ONCE_ENV);
	#else
	setenv(LPROF_INIT_ONCE_ENV, LPROF_INIT_ONCE_ENV, 1);
	#endif

	/* Create the profile dir (even if online merging is enabled), so that
	* the profile file can be set up if continuous mode is enabled. */
	createProfileDir(Filename);

	/* By pass file truncation to allow online raw profile merging. */
	if (lprofCurFilename.MergePoolSize)
	return;

	/* Truncate the file. Later we'll reopen and append. */
	File = fopen(Filename, "w");
	if (!File)
	return;
	fclose(File);
	}

	// TODO: Move these functions into InstrProfilingPlatform* files.
	#if defined(__APPLE__)
	static void assertIsZero(int *i) {
	if (*i)
	PROF_WARN("Expected flag to be 0, but got: %d\n", *i);
	}

	/* Write a partial profile to \p Filename, which is required to be backed by
	* the open file object \p File. */
	static int writeProfileWithFileObject(const char Filename, FILE File) {
	setProfileFile(File);
	int rc = writeFile(Filename);
	if (rc)
	PROF_ERR("Failed to write file \"%s\": %s\n", Filename, strerror(errno));
	setProfileFile(NULL);
	return rc;
	}

	/* Unlock the profile \p File and clear the unlock flag. */
	static void unlockProfile(int ProfileRequiresUnlock, FILE File) {
	if (!*ProfileRequiresUnlock) {
	PROF_WARN("%s", "Expected to require profile unlock\n");
	}

	lprofUnlockFileHandle(File);
	*ProfileRequiresUnlock = 0;
	}

	static void initializeProfileForContinuousMode(void) {
	if (!__llvm_profile_is_continuous_mode_enabled())
	return;

	/* Get the sizes of various profile data sections. Taken from
	* __llvm_profile_get_size_for_buffer(). */
	const __llvm_profile_data *DataBegin = __llvm_profile_begin_data();
	const __llvm_profile_data *DataEnd = __llvm_profile_end_data();
	const uint64_t *CountersBegin = __llvm_profile_begin_counters();
	const uint64_t *CountersEnd = __llvm_profile_end_counters();
	const char *NamesBegin = __llvm_profile_begin_names();
	const char *NamesEnd = __llvm_profile_end_names();
	const uint64_t NamesSize = (NamesEnd - NamesBegin) * sizeof(char);
	uint64_t DataSize = __llvm_profile_get_data_size(DataBegin, DataEnd);
	uint64_t CountersSize = CountersEnd - CountersBegin;

	/* Check that the counter and data sections in this image are page-aligned. */
	unsigned PageSize = getpagesize();
	if ((intptr_t)CountersBegin % PageSize != 0) {
	PROF_ERR("Counters section not page-aligned (start = %p, pagesz = %u).\n",
	CountersBegin, PageSize);
	return;
	}
	if ((intptr_t)DataBegin % PageSize != 0) {
	PROF_ERR("Data section not page-aligned (start = %p, pagesz = %u).\n",
	DataBegin, PageSize);
	return;
	}

	int Length = getCurFilenameLength();
	char FilenameBuf = (char )COMPILER_RT_ALLOCA(Length + 1);
	const char *Filename = getCurFilename(FilenameBuf, 0);
	if (!Filename)
	return;

	FILE *File = NULL;
	off_t CurrentFileOffset = 0;
	off_t OffsetModPage = 0;

	/* Whether an exclusive lock on the profile must be dropped after init.
	* Use a cleanup to warn if the unlock does not occur. */
	COMPILER_RT_CLEANUP(assertIsZero) int ProfileRequiresUnlock = 0;

	if (!doMerging()) {
	/* We are not merging profiles, so open the raw profile in append mode. */
	File = fopen(Filename, "a+b");
	if (!File)
	return;

	/* Check that the offset within the file is page-aligned. */
	CurrentFileOffset = ftello(File);
	OffsetModPage = CurrentFileOffset % PageSize;
	if (OffsetModPage != 0) {
	PROF_ERR("Continuous counter sync mode is enabled, but raw profile is not"
	"page-aligned. CurrentFileOffset = %" PRIu64 ", pagesz = %u.\n",
	(uint64_t)CurrentFileOffset, PageSize);
	return;
	}

	/* Grow the profile so that mmap() can succeed. Leak the file handle, as
	* the file should stay open. */
	if (writeProfileWithFileObject(Filename, File) != 0)
	return;
	} else {
	/* We are merging profiles. Map the counter section as shared memory into
	* the profile, i.e. into each participating process. An increment in one
	* process should be visible to every other process with the same counter
	* section mapped. */
	File = lprofOpenFileEx(Filename);
	if (!File)
	return;

	ProfileRequiresUnlock = 1;

	uint64_t ProfileFileSize;
	if (getProfileFileSizeForMerging(File, &ProfileFileSize) == -1)
	return unlockProfile(&ProfileRequiresUnlock, File);

	if (ProfileFileSize == 0) {
	/* Grow the profile so that mmap() can succeed. Leak the file handle, as
	* the file should stay open. */
	if (writeProfileWithFileObject(Filename, File) != 0)
	return unlockProfile(&ProfileRequiresUnlock, File);
	} else {
	/* The merged profile has a non-zero length. Check that it is compatible
	* with the data in this process. */
	char *ProfileBuffer;
	if (mmapProfileForMerging(File, ProfileFileSize, &ProfileBuffer) == -1 \|\|
	munmap(ProfileBuffer, ProfileFileSize) == -1)
	return unlockProfile(&ProfileRequiresUnlock, File);
	}
	}

	/* mmap() the profile counters so long as there is at least one counter.
	* If there aren't any counters, mmap() would fail with EINVAL. */
	if (CountersSize > 0) {
	int Fileno = fileno(File);

	/* Determine how much padding is needed before/after the counters and after
	* the names. */
	uint64_t PaddingBytesBeforeCounters, PaddingBytesAfterCounters,
	PaddingBytesAfterNames;
	__llvm_profile_get_padding_sizes_for_counters(
	DataSize, CountersSize, NamesSize, &PaddingBytesBeforeCounters,
	&PaddingBytesAfterCounters, &PaddingBytesAfterNames);

	uint64_t PageAlignedCountersLength =
	(CountersSize * sizeof(uint64_t)) + PaddingBytesAfterCounters;
	uint64_t FileOffsetToCounters =
	CurrentFileOffset + sizeof(__llvm_profile_header) +
	(DataSize * sizeof(__llvm_profile_data)) + PaddingBytesBeforeCounters;

	uint64_t CounterMmap = (uint64_t )mmap(
	(void *)CountersBegin, PageAlignedCountersLength, PROT_READ \| PROT_WRITE,
	MAP_FIXED \| MAP_SHARED, Fileno, FileOffsetToCounters);
	if (CounterMmap != CountersBegin) {
	PROF_ERR(
	"Continuous counter sync mode is enabled, but mmap() failed (%s).\n"
	" - CountersBegin: %p\n"
	" - PageAlignedCountersLength: %" PRIu64 "\n"
	" - Fileno: %d\n"
	" - FileOffsetToCounters: %" PRIu64 "\n",
	strerror(errno), CountersBegin, PageAlignedCountersLength, Fileno,
	FileOffsetToCounters);
	}
	}

	if (ProfileRequiresUnlock)
	unlockProfile(&ProfileRequiresUnlock, File);
	}
	#elif defined(__ELF__) \|\| defined(_WIN32)

	#define INSTR_PROF_PROFILE_COUNTER_BIAS_DEFAULT_VAR \
	INSTR_PROF_CONCAT(INSTR_PROF_PROFILE_COUNTER_BIAS_VAR, _default)
	intptr_t INSTR_PROF_PROFILE_COUNTER_BIAS_DEFAULT_VAR = 0;

	/* This variable is a weak external reference which could be used to detect
	* whether or not the compiler defined this symbol. */
	-#if defined(_WIN32)
	+#if defined(_MSC_VER)
	COMPILER_RT_VISIBILITY extern intptr_t INSTR_PROF_PROFILE_COUNTER_BIAS_VAR;
	-#pragma comment(linker, "/alternatename:" \
	- INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_COUNTER_BIAS_VAR) "=" \
	- INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_COUNTER_BIAS_DEFAULT_VAR))
	+#if defined(_M_IX86) \|\| defined(__i386__)
	+#define WIN_SYM_PREFIX "_"
	+#else
	+#define WIN_SYM_PREFIX
	+#endif
	+#pragma comment( \
	+ linker, "/alternatename:" WIN_SYM_PREFIX INSTR_PROF_QUOTE( \
	+ INSTR_PROF_PROFILE_COUNTER_BIAS_VAR) "=" WIN_SYM_PREFIX \
	+ INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_COUNTER_BIAS_DEFAULT_VAR))
	#else
	COMPILER_RT_VISIBILITY extern intptr_t INSTR_PROF_PROFILE_COUNTER_BIAS_VAR
	__attribute__((weak, alias(INSTR_PROF_QUOTE(
	INSTR_PROF_PROFILE_COUNTER_BIAS_DEFAULT_VAR))));
	#endif

	static int writeMMappedFile(FILE OutputFile, char *Profile) {
	if (!OutputFile)
	return -1;

	/* Write the data into a file. */
	setupIOBuffer();
	ProfDataWriter fileWriter;
	initFileWriter(&fileWriter, OutputFile);
	if (lprofWriteData(&fileWriter, NULL, 0)) {
	PROF_ERR("Failed to write profile: %s\n", strerror(errno));
	return -1;
	}
	fflush(OutputFile);

	/* Get the file size. */
	uint64_t FileSize = ftell(OutputFile);

	/* Map the profile. */
	Profile = (char )mmap(
	NULL, FileSize, PROT_READ \| PROT_WRITE, MAP_SHARED, fileno(OutputFile), 0);
	if (*Profile == MAP_FAILED) {
	PROF_ERR("Unable to mmap profile: %s\n", strerror(errno));
	return -1;
	}

	return 0;
	}

	static void initializeProfileForContinuousMode(void) {
	if (!__llvm_profile_is_continuous_mode_enabled())
	return;

	/* This symbol is defined by the compiler when runtime counter relocation is
	* used and runtime provides a weak alias so we can check if it's defined. */
	void *BiasAddr = &INSTR_PROF_PROFILE_COUNTER_BIAS_VAR;
	void *BiasDefaultAddr = &INSTR_PROF_PROFILE_COUNTER_BIAS_DEFAULT_VAR;
	if (BiasAddr == BiasDefaultAddr) {
	PROF_ERR("%s\n", "__llvm_profile_counter_bias is undefined");
	return;
	}

	/* Get the sizes of various profile data sections. Taken from
	* __llvm_profile_get_size_for_buffer(). */
	const __llvm_profile_data *DataBegin = __llvm_profile_begin_data();
	const __llvm_profile_data *DataEnd = __llvm_profile_end_data();
	const uint64_t *CountersBegin = __llvm_profile_begin_counters();
	const uint64_t *CountersEnd = __llvm_profile_end_counters();
	uint64_t DataSize = __llvm_profile_get_data_size(DataBegin, DataEnd);
	- const uint64_t CountersOffset =
	- sizeof(__llvm_profile_header) + (DataSize * sizeof(__llvm_profile_data));
	+ const uint64_t CountersOffset = sizeof(__llvm_profile_header) +
	+ __llvm_write_binary_ids(NULL) +
	+ (DataSize * sizeof(__llvm_profile_data));

	int Length = getCurFilenameLength();
	char FilenameBuf = (char )COMPILER_RT_ALLOCA(Length + 1);
	const char *Filename = getCurFilename(FilenameBuf, 0);
	if (!Filename)
	return;

	FILE *File = NULL;
	char *Profile = NULL;

	if (!doMerging()) {
	File = fopen(Filename, "w+b");
	if (!File)
	return;

	if (writeMMappedFile(File, &Profile) == -1) {
	fclose(File);
	return;
	}
	} else {
	File = lprofOpenFileEx(Filename);
	if (!File)
	return;

	uint64_t ProfileFileSize = 0;
	if (getProfileFileSizeForMerging(File, &ProfileFileSize) == -1) {
	lprofUnlockFileHandle(File);
	fclose(File);
	return;
	}

	if (!ProfileFileSize) {
	if (writeMMappedFile(File, &Profile) == -1) {
	fclose(File);
	return;
	}
	} else {
	/* The merged profile has a non-zero length. Check that it is compatible
	* with the data in this process. */
	if (mmapProfileForMerging(File, ProfileFileSize, &Profile) == -1) {
	fclose(File);
	return;
	}
	}

	lprofUnlockFileHandle(File);
	}

	/* Update the profile fields based on the current mapping. */
	INSTR_PROF_PROFILE_COUNTER_BIAS_VAR =
	(intptr_t)Profile - (uintptr_t)CountersBegin +
	CountersOffset;

	/* Return the memory allocated for counters to OS. */
	lprofReleaseMemoryPagesToOS((uintptr_t)CountersBegin, (uintptr_t)CountersEnd);
	}
	#else
	static void initializeProfileForContinuousMode(void) {
	PROF_ERR("%s\n", "continuous mode is unsupported on this platform");
	}
	#endif

	static const char *DefaultProfileName = "default.profraw";
	static void resetFilenameToDefault(void) {
	if (lprofCurFilename.FilenamePat && lprofCurFilename.OwnsFilenamePat) {
	free((void *)lprofCurFilename.FilenamePat);
	}
	memset(&lprofCurFilename, 0, sizeof(lprofCurFilename));
	lprofCurFilename.FilenamePat = DefaultProfileName;
	lprofCurFilename.PNS = PNS_default;
	}

	static unsigned getMergePoolSize(const char FilenamePat, int I) {
	unsigned J = 0, Num = 0;
	for (;; ++J) {
	char C = FilenamePat[*I + J];
	if (C == 'm') {
	*I += J;
	return Num ? Num : 1;
	}
	if (C < '0' \|\| C > '9')
	break;
	Num = Num * 10 + C - '0';

	/* If FilenamePat[*I+J] is between '0' and '9', the next byte is guaranteed
	* to be in-bound as the string is null terminated. */
	}
	return 0;
	}

	/* Assert that Idx does index past a string null terminator. Return the
	* result of the check. */
	static int checkBounds(int Idx, int Strlen) {
	assert(Idx <= Strlen && "Indexing past string null terminator");
	return Idx <= Strlen;
	}

	/* Parses the pattern string \p FilenamePat and stores the result to
	* lprofcurFilename structure. */
	static int parseFilenamePattern(const char *FilenamePat,
	unsigned CopyFilenamePat) {
	int NumPids = 0, NumHosts = 0, I;
	char *PidChars = &lprofCurFilename.PidChars[0];
	char *Hostname = &lprofCurFilename.Hostname[0];
	int MergingEnabled = 0;
	int FilenamePatLen = strlen(FilenamePat);

	/* Clean up cached prefix and filename. */
	if (lprofCurFilename.ProfilePathPrefix)
	free((void *)lprofCurFilename.ProfilePathPrefix);

	if (lprofCurFilename.FilenamePat && lprofCurFilename.OwnsFilenamePat) {
	free((void *)lprofCurFilename.FilenamePat);
	}

	memset(&lprofCurFilename, 0, sizeof(lprofCurFilename));

	if (!CopyFilenamePat)
	lprofCurFilename.FilenamePat = FilenamePat;
	else {
	lprofCurFilename.FilenamePat = strdup(FilenamePat);
	lprofCurFilename.OwnsFilenamePat = 1;
	}
	/* Check the filename for "%p", which indicates a pid-substitution. */
	for (I = 0; checkBounds(I, FilenamePatLen) && FilenamePat[I]; ++I) {
	if (FilenamePat[I] == '%') {
	++I; /* Advance to the next character. */
	if (!checkBounds(I, FilenamePatLen))
	break;
	if (FilenamePat[I] == 'p') {
	if (!NumPids++) {
	if (snprintf(PidChars, MAX_PID_SIZE, "%ld", (long)getpid()) <= 0) {
	PROF_WARN("Unable to get pid for filename pattern %s. Using the "
	"default name.",
	FilenamePat);
	return -1;
	}
	}
	} else if (FilenamePat[I] == 'h') {
	if (!NumHosts++)
	if (COMPILER_RT_GETHOSTNAME(Hostname, COMPILER_RT_MAX_HOSTLEN)) {
	PROF_WARN("Unable to get hostname for filename pattern %s. Using "
	"the default name.",
	FilenamePat);
	return -1;
	}
	} else if (FilenamePat[I] == 't') {
	lprofCurFilename.TmpDir = getenv("TMPDIR");
	if (!lprofCurFilename.TmpDir) {
	PROF_WARN("Unable to get the TMPDIR environment variable, referenced "
	"in %s. Using the default path.",
	FilenamePat);
	return -1;
	}
	} else if (FilenamePat[I] == 'c') {
	if (__llvm_profile_is_continuous_mode_enabled()) {
	PROF_WARN("%%c specifier can only be specified once in %s.\n",
	FilenamePat);
	return -1;
	}
	#if defined(__APPLE__) \|\| defined(__ELF__) \|\| defined(_WIN32)
	__llvm_profile_set_page_size(getpagesize());
	__llvm_profile_enable_continuous_mode();
	#else
	PROF_WARN("%s", "Continous mode is currently only supported for Mach-O,"
	" ELF and COFF formats.");
	return -1;
	#endif
	} else {
	unsigned MergePoolSize = getMergePoolSize(FilenamePat, &I);
	if (!MergePoolSize)
	continue;
	if (MergingEnabled) {
	PROF_WARN("%%m specifier can only be specified once in %s.\n",
	FilenamePat);
	return -1;
	}
	MergingEnabled = 1;
	lprofCurFilename.MergePoolSize = MergePoolSize;
	}
	}
	}

	lprofCurFilename.NumPids = NumPids;
	lprofCurFilename.NumHosts = NumHosts;
	return 0;
	}

	static void parseAndSetFilename(const char *FilenamePat,
	ProfileNameSpecifier PNS,
	unsigned CopyFilenamePat) {

	const char *OldFilenamePat = lprofCurFilename.FilenamePat;
	ProfileNameSpecifier OldPNS = lprofCurFilename.PNS;

	/* The old profile name specifier takes precedence over the old one. */
	if (PNS < OldPNS)
	return;

	if (!FilenamePat)
	FilenamePat = DefaultProfileName;

	if (OldFilenamePat && !strcmp(OldFilenamePat, FilenamePat)) {
	lprofCurFilename.PNS = PNS;
	return;
	}

	/* When PNS >= OldPNS, the last one wins. */
	if (!FilenamePat \|\| parseFilenamePattern(FilenamePat, CopyFilenamePat))
	resetFilenameToDefault();
	lprofCurFilename.PNS = PNS;

	if (!OldFilenamePat) {
	if (getenv("LLVM_PROFILE_VERBOSE"))
	PROF_NOTE("Set profile file path to \"%s\" via %s.\n",
	lprofCurFilename.FilenamePat, getPNSStr(PNS));
	} else {
	if (getenv("LLVM_PROFILE_VERBOSE"))
	PROF_NOTE("Override old profile path \"%s\" via %s to \"%s\" via %s.\n",
	OldFilenamePat, getPNSStr(OldPNS), lprofCurFilename.FilenamePat,
	getPNSStr(PNS));
	}

	truncateCurrentFile();
	if (__llvm_profile_is_continuous_mode_enabled())
	initializeProfileForContinuousMode();
	}

	/* Return buffer length that is required to store the current profile
	* filename with PID and hostname substitutions. */
	/* The length to hold uint64_t followed by 3 digits pool id including '_' */
	#define SIGLEN 24
	static int getCurFilenameLength() {
	int Len;
	if (!lprofCurFilename.FilenamePat \|\| !lprofCurFilename.FilenamePat[0])
	return 0;

	if (!(lprofCurFilename.NumPids \|\| lprofCurFilename.NumHosts \|\|
	lprofCurFilename.TmpDir \|\| lprofCurFilename.MergePoolSize))
	return strlen(lprofCurFilename.FilenamePat);

	Len = strlen(lprofCurFilename.FilenamePat) +
	lprofCurFilename.NumPids * (strlen(lprofCurFilename.PidChars) - 2) +
	lprofCurFilename.NumHosts * (strlen(lprofCurFilename.Hostname) - 2) +
	(lprofCurFilename.TmpDir ? (strlen(lprofCurFilename.TmpDir) - 1) : 0);
	if (lprofCurFilename.MergePoolSize)
	Len += SIGLEN;
	return Len;
	}

	/* Return the pointer to the current profile file name (after substituting
	* PIDs and Hostnames in filename pattern. \p FilenameBuf is the buffer
	* to store the resulting filename. If no substitution is needed, the
	* current filename pattern string is directly returned, unless ForceUseBuf
	* is enabled. */
	static const char getCurFilename(char FilenameBuf, int ForceUseBuf) {
	int I, J, PidLength, HostNameLength, TmpDirLength, FilenamePatLength;
	const char *FilenamePat = lprofCurFilename.FilenamePat;

	if (!lprofCurFilename.FilenamePat \|\| !lprofCurFilename.FilenamePat[0])
	return 0;

	if (!(lprofCurFilename.NumPids \|\| lprofCurFilename.NumHosts \|\|
	lprofCurFilename.TmpDir \|\| lprofCurFilename.MergePoolSize \|\|
	__llvm_profile_is_continuous_mode_enabled())) {
	if (!ForceUseBuf)
	return lprofCurFilename.FilenamePat;

	FilenamePatLength = strlen(lprofCurFilename.FilenamePat);
	memcpy(FilenameBuf, lprofCurFilename.FilenamePat, FilenamePatLength);
	FilenameBuf[FilenamePatLength] = '\0';
	return FilenameBuf;
	}

	PidLength = strlen(lprofCurFilename.PidChars);
	HostNameLength = strlen(lprofCurFilename.Hostname);
	TmpDirLength = lprofCurFilename.TmpDir ? strlen(lprofCurFilename.TmpDir) : 0;
	/* Construct the new filename. */
	for (I = 0, J = 0; FilenamePat[I]; ++I)
	if (FilenamePat[I] == '%') {
	if (FilenamePat[++I] == 'p') {
	memcpy(FilenameBuf + J, lprofCurFilename.PidChars, PidLength);
	J += PidLength;
	} else if (FilenamePat[I] == 'h') {
	memcpy(FilenameBuf + J, lprofCurFilename.Hostname, HostNameLength);
	J += HostNameLength;
	} else if (FilenamePat[I] == 't') {
	memcpy(FilenameBuf + J, lprofCurFilename.TmpDir, TmpDirLength);
	FilenameBuf[J + TmpDirLength] = DIR_SEPARATOR;
	J += TmpDirLength + 1;
	} else {
	if (!getMergePoolSize(FilenamePat, &I))
	continue;
	char LoadModuleSignature[SIGLEN + 1];
	int S;
	int ProfilePoolId = getpid() % lprofCurFilename.MergePoolSize;
	S = snprintf(LoadModuleSignature, SIGLEN + 1, "%" PRIu64 "_%d",
	lprofGetLoadModuleSignature(), ProfilePoolId);
	if (S == -1 \|\| S > SIGLEN)
	S = SIGLEN;
	memcpy(FilenameBuf + J, LoadModuleSignature, S);
	J += S;
	}
	/* Drop any unknown substitutions. */
	} else
	FilenameBuf[J++] = FilenamePat[I];
	FilenameBuf[J] = 0;

	return FilenameBuf;
	}

	/* Returns the pointer to the environment variable
	* string. Returns null if the env var is not set. */
	static const char *getFilenamePatFromEnv(void) {
	const char *Filename = getenv("LLVM_PROFILE_FILE");
	if (!Filename \|\| !Filename[0])
	return 0;
	return Filename;
	}

	COMPILER_RT_VISIBILITY
	const char *__llvm_profile_get_path_prefix(void) {
	int Length;
	char FilenameBuf, Prefix;
	const char Filename, PrefixEnd;

	if (lprofCurFilename.ProfilePathPrefix)
	return lprofCurFilename.ProfilePathPrefix;

	Length = getCurFilenameLength();
	FilenameBuf = (char *)COMPILER_RT_ALLOCA(Length + 1);
	Filename = getCurFilename(FilenameBuf, 0);
	if (!Filename)
	return "\0";

	PrefixEnd = lprofFindLastDirSeparator(Filename);
	if (!PrefixEnd)
	return "\0";

	Length = PrefixEnd - Filename + 1;
	Prefix = (char *)malloc(Length + 1);
	if (!Prefix) {
	PROF_ERR("Failed to %s\n", "allocate memory.");
	return "\0";
	}
	memcpy(Prefix, Filename, Length);
	Prefix[Length] = '\0';
	lprofCurFilename.ProfilePathPrefix = Prefix;
	return Prefix;
	}

	COMPILER_RT_VISIBILITY
	const char *__llvm_profile_get_filename(void) {
	int Length;
	char *FilenameBuf;
	const char *Filename;

	Length = getCurFilenameLength();
	FilenameBuf = (char *)malloc(Length + 1);
	if (!FilenameBuf) {
	PROF_ERR("Failed to %s\n", "allocate memory.");
	return "\0";
	}
	Filename = getCurFilename(FilenameBuf, 1);
	if (!Filename)
	return "\0";

	return FilenameBuf;
	}

	/* This API initializes the file handling, both user specified
	* profile path via -fprofile-instr-generate= and LLVM_PROFILE_FILE
	* environment variable can override this default value.
	*/
	COMPILER_RT_VISIBILITY
	void __llvm_profile_initialize_file(void) {
	const char *EnvFilenamePat;
	const char *SelectedPat = NULL;
	ProfileNameSpecifier PNS = PNS_unknown;
	int hasCommandLineOverrider = (INSTR_PROF_PROFILE_NAME_VAR[0] != 0);

	EnvFilenamePat = getFilenamePatFromEnv();
	if (EnvFilenamePat) {
	/* Pass CopyFilenamePat = 1, to ensure that the filename would be valid
	at the moment when __llvm_profile_write_file() gets executed. */
	parseAndSetFilename(EnvFilenamePat, PNS_environment, 1);
	return;
	} else if (hasCommandLineOverrider) {
	SelectedPat = INSTR_PROF_PROFILE_NAME_VAR;
	PNS = PNS_command_line;
	} else {
	SelectedPat = NULL;
	PNS = PNS_default;
	}

	parseAndSetFilename(SelectedPat, PNS, 0);
	}

	/* This method is invoked by the runtime initialization hook
	* InstrProfilingRuntime.o if it is linked in.
	*/
	COMPILER_RT_VISIBILITY
	void __llvm_profile_initialize(void) {
	__llvm_profile_initialize_file();
	if (!__llvm_profile_is_continuous_mode_enabled())
	__llvm_profile_register_write_file_atexit();
	}

	/* This API is directly called by the user application code. It has the
	* highest precedence compared with LLVM_PROFILE_FILE environment variable
	* and command line option -fprofile-instr-generate=<profile_name>.
	*/
	COMPILER_RT_VISIBILITY
	void __llvm_profile_set_filename(const char *FilenamePat) {
	if (__llvm_profile_is_continuous_mode_enabled())
	return;
	parseAndSetFilename(FilenamePat, PNS_runtime_api, 1);
	}

	/* The public API for writing profile data into the file with name
	* set by previous calls to __llvm_profile_set_filename or
	* __llvm_profile_override_default_filename or
	* __llvm_profile_initialize_file. */
	COMPILER_RT_VISIBILITY
	int __llvm_profile_write_file(void) {
	int rc, Length;
	const char *Filename;
	char *FilenameBuf;
	int PDeathSig = 0;

	if (lprofProfileDumped() \|\| __llvm_profile_is_continuous_mode_enabled()) {
	PROF_NOTE("Profile data not written to file: %s.\n", "already written");
	return 0;
	}

	Length = getCurFilenameLength();
	FilenameBuf = (char *)COMPILER_RT_ALLOCA(Length + 1);
	Filename = getCurFilename(FilenameBuf, 0);

	/* Check the filename. */
	if (!Filename) {
	PROF_ERR("Failed to write file : %s\n", "Filename not set");
	return -1;
	}

	/* Check if there is llvm/runtime version mismatch. */
	if (GET_VERSION(__llvm_profile_get_version()) != INSTR_PROF_RAW_VERSION) {
	PROF_ERR("Runtime and instrumentation version mismatch : "
	"expected %d, but get %d\n",
	INSTR_PROF_RAW_VERSION,
	(int)GET_VERSION(__llvm_profile_get_version()));
	return -1;
	}

	// Temporarily suspend getting SIGKILL when the parent exits.
	PDeathSig = lprofSuspendSigKill();

	/* Write profile data to the file. */
	rc = writeFile(Filename);
	if (rc)
	PROF_ERR("Failed to write file \"%s\": %s\n", Filename, strerror(errno));

	// Restore SIGKILL.
	if (PDeathSig == 1)
	lprofRestoreSigKill();

	return rc;
	}

	COMPILER_RT_VISIBILITY
	int __llvm_profile_dump(void) {
	if (!doMerging())
	PROF_WARN("Later invocation of __llvm_profile_dump can lead to clobbering "
	" of previously dumped profile data : %s. Either use %%m "
	"in profile name or change profile name before dumping.\n",
	"online profile merging is not on");
	int rc = __llvm_profile_write_file();
	lprofSetProfileDumped(1);
	return rc;
	}

	/* Order file data will be saved in a file with suffx .order. */
	static const char *OrderFileSuffix = ".order";

	COMPILER_RT_VISIBILITY
	int __llvm_orderfile_write_file(void) {
	int rc, Length, LengthBeforeAppend, SuffixLength;
	const char *Filename;
	char *FilenameBuf;
	int PDeathSig = 0;

	SuffixLength = strlen(OrderFileSuffix);
	Length = getCurFilenameLength() + SuffixLength;
	FilenameBuf = (char *)COMPILER_RT_ALLOCA(Length + 1);
	Filename = getCurFilename(FilenameBuf, 1);

	/* Check the filename. */
	if (!Filename) {
	PROF_ERR("Failed to write file : %s\n", "Filename not set");
	return -1;
	}

	/* Append order file suffix */
	LengthBeforeAppend = strlen(Filename);
	memcpy(FilenameBuf + LengthBeforeAppend, OrderFileSuffix, SuffixLength);
	FilenameBuf[LengthBeforeAppend + SuffixLength] = '\0';

	/* Check if there is llvm/runtime version mismatch. */
	if (GET_VERSION(__llvm_profile_get_version()) != INSTR_PROF_RAW_VERSION) {
	PROF_ERR("Runtime and instrumentation version mismatch : "
	"expected %d, but get %d\n",
	INSTR_PROF_RAW_VERSION,
	(int)GET_VERSION(__llvm_profile_get_version()));
	return -1;
	}

	// Temporarily suspend getting SIGKILL when the parent exits.
	PDeathSig = lprofSuspendSigKill();

	/* Write order data to the file. */
	rc = writeOrderFile(Filename);
	if (rc)
	PROF_ERR("Failed to write file \"%s\": %s\n", Filename, strerror(errno));

	// Restore SIGKILL.
	if (PDeathSig == 1)
	lprofRestoreSigKill();

	return rc;
	}

	COMPILER_RT_VISIBILITY
	int __llvm_orderfile_dump(void) {
	int rc = __llvm_orderfile_write_file();
	return rc;
	}

	static void writeFileWithoutReturn(void) { __llvm_profile_write_file(); }

	COMPILER_RT_VISIBILITY
	int __llvm_profile_register_write_file_atexit(void) {
	static int HasBeenRegistered = 0;

	if (HasBeenRegistered)
	return 0;

	lprofSetupValueProfiler();

	HasBeenRegistered = 1;
	return atexit(writeFileWithoutReturn);
	}

	#endif
	diff --git a/contrib/llvm-project/compiler-rt/lib/profile/InstrProfilingPlatformFuchsia.c b/contrib/llvm-project/compiler-rt/lib/profile/InstrProfilingPlatformFuchsia.c
	index 0146b14c193f..1be0ef36a288 100644
	--- a/contrib/llvm-project/compiler-rt/lib/profile/InstrProfilingPlatformFuchsia.c
	+++ b/contrib/llvm-project/compiler-rt/lib/profile/InstrProfilingPlatformFuchsia.c
	@@ -1,192 +1,193 @@
	/===- InstrProfilingPlatformFuchsia.c - Profile data Fuchsia platform ----===\
	\|*
	\|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	\|* See https://llvm.org/LICENSE.txt for license information.
	\|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	\|*
	\===----------------------------------------------------------------------===/
	/*
	* This file implements the profiling runtime for Fuchsia and defines the
	* shared profile runtime interface. Each module (executable or DSO) statically
	* links in the whole profile runtime to satisfy the calls from its
	* instrumented code. Several modules in the same program might be separately
	* compiled and even use different versions of the instrumentation ABI and data
	* format. All they share in common is the VMO and the offset, which live in
	* exported globals so that exactly one definition will be shared across all
	* modules. Each module has its own independent runtime that registers its own
	* atexit hook to append its own data into the shared VMO which is published
	* via the data sink hook provided by Fuchsia's dynamic linker.
	*/

	#if defined(__Fuchsia__)

	#include <inttypes.h>
	#include <stdarg.h>
	#include <stdbool.h>
	#include <stdlib.h>

	#include <zircon/process.h>
	#include <zircon/sanitizer.h>
	#include <zircon/status.h>
	#include <zircon/syscalls.h>

	#include "InstrProfiling.h"
	#include "InstrProfilingInternal.h"
	#include "InstrProfilingUtil.h"

	/* This variable is an external reference to symbol defined by the compiler. */
	COMPILER_RT_VISIBILITY extern intptr_t INSTR_PROF_PROFILE_COUNTER_BIAS_VAR;

	COMPILER_RT_VISIBILITY unsigned lprofProfileDumped() {
	return 1;
	}
	COMPILER_RT_VISIBILITY void lprofSetProfileDumped(unsigned Value) {}

	static const char ProfileSinkName[] = "llvm-profile";

	static inline void lprofWrite(const char *fmt, ...) {
	char s[256];

	va_list ap;
	va_start(ap, fmt);
	int ret = vsnprintf(s, sizeof(s), fmt, ap);
	va_end(ap);

	__sanitizer_log_write(s, ret + 1);
	}

	struct lprofVMOWriterCtx {
	/* VMO that contains the profile data for this module. */
	zx_handle_t Vmo;
	/* Current offset within the VMO where data should be written next. */
	uint64_t Offset;
	};

	static uint32_t lprofVMOWriter(ProfDataWriter This, ProfDataIOVec IOVecs,
	uint32_t NumIOVecs) {
	struct lprofVMOWriterCtx Ctx = (struct lprofVMOWriterCtx )This->WriterCtx;

	/* Compute the total length of data to be written. */
	size_t Length = 0;
	for (uint32_t I = 0; I < NumIOVecs; I++)
	Length += IOVecs[I].ElmSize * IOVecs[I].NumElm;

	/* Resize the VMO to ensure there's sufficient space for the data. */
	zx_status_t Status = _zx_vmo_set_size(Ctx->Vmo, Ctx->Offset + Length);
	if (Status != ZX_OK)
	return -1;

	/* Copy the data into VMO. */
	for (uint32_t I = 0; I < NumIOVecs; I++) {
	size_t Length = IOVecs[I].ElmSize * IOVecs[I].NumElm;
	if (IOVecs[I].Data) {
	Status = _zx_vmo_write(Ctx->Vmo, IOVecs[I].Data, Ctx->Offset, Length);
	if (Status != ZX_OK)
	return -1;
	} else if (IOVecs[I].UseZeroPadding) {
	/* Resizing the VMO should zero fill. */
	}
	Ctx->Offset += Length;
	}

	/* Record the profile size as a property of the VMO. */
	_zx_object_set_property(Ctx->Vmo, ZX_PROP_VMO_CONTENT_SIZE, &Ctx->Offset,
	sizeof(Ctx->Offset));

	return 0;
	}

	static void initVMOWriter(ProfDataWriter This, struct lprofVMOWriterCtx Ctx) {
	This->Write = lprofVMOWriter;
	This->WriterCtx = Ctx;
	}

	/* This method is invoked by the runtime initialization hook
	* InstrProfilingRuntime.o if it is linked in. */
	COMPILER_RT_VISIBILITY
	void __llvm_profile_initialize(void) {
	/* Check if there is llvm/runtime version mismatch. */
	if (GET_VERSION(__llvm_profile_get_version()) != INSTR_PROF_RAW_VERSION) {
	lprofWrite("LLVM Profile: runtime and instrumentation version mismatch: "
	"expected %d, but got %d\n",
	INSTR_PROF_RAW_VERSION,
	(int)GET_VERSION(__llvm_profile_get_version()));
	return;
	}

	const __llvm_profile_data *DataBegin = __llvm_profile_begin_data();
	const __llvm_profile_data *DataEnd = __llvm_profile_end_data();
	const uint64_t *CountersBegin = __llvm_profile_begin_counters();
	const uint64_t *CountersEnd = __llvm_profile_end_counters();
	const uint64_t DataSize = __llvm_profile_get_data_size(DataBegin, DataEnd);
	- const uint64_t CountersOffset =
	- sizeof(__llvm_profile_header) + (DataSize * sizeof(__llvm_profile_data));
	+ const uint64_t CountersOffset = sizeof(__llvm_profile_header) +
	+ __llvm_write_binary_ids(NULL) +
	+ (DataSize * sizeof(__llvm_profile_data));
	uint64_t CountersSize = CountersEnd - CountersBegin;

	/* Don't publish a VMO if there are no counters. */
	if (!CountersSize)
	return;

	zx_status_t Status;

	/* Create a VMO to hold the profile data. */
	zx_handle_t Vmo = ZX_HANDLE_INVALID;
	Status = _zx_vmo_create(0, ZX_VMO_RESIZABLE, &Vmo);
	if (Status != ZX_OK) {
	lprofWrite("LLVM Profile: cannot create VMO: %s\n",
	_zx_status_get_string(Status));
	return;
	}

	/* Give the VMO a name that includes the module signature. */
	char VmoName[ZX_MAX_NAME_LEN];
	snprintf(VmoName, sizeof(VmoName), "%" PRIu64 ".profraw",
	lprofGetLoadModuleSignature());
	_zx_object_set_property(Vmo, ZX_PROP_NAME, VmoName, strlen(VmoName));

	/* Write the profile data into the mapped region. */
	ProfDataWriter VMOWriter;
	struct lprofVMOWriterCtx Ctx = {.Vmo = Vmo, .Offset = 0};
	initVMOWriter(&VMOWriter, &Ctx);
	if (lprofWriteData(&VMOWriter, 0, 0) != 0) {
	lprofWrite("LLVM Profile: failed to write data\n");
	_zx_handle_close(Vmo);
	return;
	}

	uint64_t Len = 0;
	Status = _zx_vmo_get_size(Vmo, &Len);
	if (Status != ZX_OK) {
	lprofWrite("LLVM Profile: failed to get the VMO size: %s\n",
	_zx_status_get_string(Status));
	_zx_handle_close(Vmo);
	return;
	}

	uintptr_t Mapping;
	Status =
	_zx_vmar_map(_zx_vmar_root_self(), ZX_VM_PERM_READ \| ZX_VM_PERM_WRITE, 0,
	Vmo, 0, Len, &Mapping);
	if (Status != ZX_OK) {
	lprofWrite("LLVM Profile: failed to map the VMO: %s\n",
	_zx_status_get_string(Status));
	_zx_handle_close(Vmo);
	return;
	}

	/* Publish the VMO which contains profile data to the system. Note that this
	* also consumes the VMO handle. */
	__sanitizer_publish_data(ProfileSinkName, Vmo);

	/* Use the dumpfile symbolizer markup element to write the name of VMO. */
	lprofWrite("LLVM Profile: {{{dumpfile:%s:%s}}}\n", ProfileSinkName, VmoName);

	/* Update the profile fields based on the current mapping. */
	INSTR_PROF_PROFILE_COUNTER_BIAS_VAR =
	(intptr_t)Mapping - (uintptr_t)CountersBegin + CountersOffset;

	/* Return the memory allocated for counters to OS. */
	lprofReleaseMemoryPagesToOS((uintptr_t)CountersBegin, (uintptr_t)CountersEnd);
	}

	#endif
	diff --git a/contrib/llvm-project/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c b/contrib/llvm-project/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c
	index 7c15f97aff89..5d47083b8bfe 100644
	--- a/contrib/llvm-project/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c
	+++ b/contrib/llvm-project/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c
	@@ -1,202 +1,203 @@
	/===- InstrProfilingPlatformLinux.c - Profile data Linux platform ------===\
	\|*
	\|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	\|* See https://llvm.org/LICENSE.txt for license information.
	\|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	\|*
	\===----------------------------------------------------------------------===/

	#if defined(__linux__) \|\| defined(__FreeBSD__) \|\| defined(__Fuchsia__) \|\| \
	(defined(__sun__) && defined(__svr4__)) \|\| defined(__NetBSD__)

	#include <elf.h>
	#include <link.h>
	#include <stdlib.h>
	#include <string.h>

	#include "InstrProfiling.h"
	#include "InstrProfilingInternal.h"

	#if defined(__FreeBSD__) && !defined(ElfW)
	/*
	* FreeBSD's elf.h and link.h headers do not define the ElfW(type) macro yet.
	* If this is added to all supported FreeBSD versions in the future, this
	* compatibility macro can be removed.
	*/
	#define ElfW(type) __ElfN(type)
	#endif

	#define PROF_DATA_START INSTR_PROF_SECT_START(INSTR_PROF_DATA_COMMON)
	#define PROF_DATA_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_DATA_COMMON)
	#define PROF_NAME_START INSTR_PROF_SECT_START(INSTR_PROF_NAME_COMMON)
	#define PROF_NAME_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_NAME_COMMON)
	#define PROF_CNTS_START INSTR_PROF_SECT_START(INSTR_PROF_CNTS_COMMON)
	#define PROF_CNTS_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_CNTS_COMMON)
	#define PROF_ORDERFILE_START INSTR_PROF_SECT_START(INSTR_PROF_ORDERFILE_COMMON)
	#define PROF_VNODES_START INSTR_PROF_SECT_START(INSTR_PROF_VNODES_COMMON)
	#define PROF_VNODES_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_VNODES_COMMON)

	/* Declare section start and stop symbols for various sections
	* generated by compiler instrumentation.
	*/
	extern __llvm_profile_data PROF_DATA_START COMPILER_RT_VISIBILITY
	COMPILER_RT_WEAK;
	extern __llvm_profile_data PROF_DATA_STOP COMPILER_RT_VISIBILITY
	COMPILER_RT_WEAK;
	extern uint64_t PROF_CNTS_START COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
	extern uint64_t PROF_CNTS_STOP COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
	extern uint32_t PROF_ORDERFILE_START COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
	extern char PROF_NAME_START COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
	extern char PROF_NAME_STOP COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
	extern ValueProfNode PROF_VNODES_START COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
	extern ValueProfNode PROF_VNODES_STOP COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;

	COMPILER_RT_VISIBILITY const __llvm_profile_data *
	__llvm_profile_begin_data(void) {
	return &PROF_DATA_START;
	}
	COMPILER_RT_VISIBILITY const __llvm_profile_data *
	__llvm_profile_end_data(void) {
	return &PROF_DATA_STOP;
	}
	COMPILER_RT_VISIBILITY const char *__llvm_profile_begin_names(void) {
	return &PROF_NAME_START;
	}
	COMPILER_RT_VISIBILITY const char *__llvm_profile_end_names(void) {
	return &PROF_NAME_STOP;
	}
	COMPILER_RT_VISIBILITY uint64_t *__llvm_profile_begin_counters(void) {
	return &PROF_CNTS_START;
	}
	COMPILER_RT_VISIBILITY uint64_t *__llvm_profile_end_counters(void) {
	return &PROF_CNTS_STOP;
	}
	COMPILER_RT_VISIBILITY uint32_t *__llvm_profile_begin_orderfile(void) {
	return &PROF_ORDERFILE_START;
	}

	COMPILER_RT_VISIBILITY ValueProfNode *
	__llvm_profile_begin_vnodes(void) {
	return &PROF_VNODES_START;
	}
	COMPILER_RT_VISIBILITY ValueProfNode *__llvm_profile_end_vnodes(void) {
	return &PROF_VNODES_STOP;
	}
	COMPILER_RT_VISIBILITY ValueProfNode *CurrentVNode = &PROF_VNODES_START;
	COMPILER_RT_VISIBILITY ValueProfNode *EndVNode = &PROF_VNODES_STOP;

	#ifdef NT_GNU_BUILD_ID
	static size_t RoundUp(size_t size, size_t align) {
	return (size + align - 1) & ~(align - 1);
	}

	/*
	* Write binary id length and then its data, because binary id does not
	* have a fixed length.
	*/
	-int WriteOneBinaryId(ProfDataWriter *Writer, uint64_t BinaryIdLen,
	- const uint8_t *BinaryIdData) {
	+static int WriteOneBinaryId(ProfDataWriter *Writer, uint64_t BinaryIdLen,
	+ const uint8_t *BinaryIdData) {
	ProfDataIOVec BinaryIdIOVec[] = {
	{&BinaryIdLen, sizeof(uint64_t), 1, 0},
	{BinaryIdData, sizeof(uint8_t), BinaryIdLen, 0}};
	if (Writer->Write(Writer, BinaryIdIOVec,
	sizeof(BinaryIdIOVec) / sizeof(*BinaryIdIOVec)))
	return -1;

	/* Successfully wrote binary id, report success. */
	return 0;
	}

	/*
	* Look for the note that has the name "GNU\0" and type NT_GNU_BUILD_ID
	* that contains build id. If build id exists, write binary id.
	*
	* Each note in notes section starts with a struct which includes
	* n_namesz, n_descsz, and n_type members. It is followed by the name
	* (whose length is defined in n_namesz) and then by the descriptor
	* (whose length is defined in n_descsz).
	*
	* Note sections like .note.ABI-tag and .note.gnu.build-id are aligned
	* to 4 bytes, so round n_namesz and n_descsz to the nearest 4 bytes.
	*/
	-int WriteBinaryIdForNote(ProfDataWriter Writer, const ElfW(Nhdr) Note) {
	+static int WriteBinaryIdForNote(ProfDataWriter *Writer,
	+ const ElfW(Nhdr) * Note) {
	int BinaryIdSize = 0;

	const char NoteName = (const char )Note + sizeof(ElfW(Nhdr));
	if (Note->n_type == NT_GNU_BUILD_ID && Note->n_namesz == 4 &&
	memcmp(NoteName, "GNU\0", 4) == 0) {

	uint64_t BinaryIdLen = Note->n_descsz;
	const uint8_t *BinaryIdData =
	(const uint8_t *)(NoteName + RoundUp(Note->n_namesz, 4));
	if (Writer != NULL &&
	WriteOneBinaryId(Writer, BinaryIdLen, BinaryIdData) == -1)
	return -1;

	BinaryIdSize = sizeof(BinaryIdLen) + BinaryIdLen;
	}

	return BinaryIdSize;
	}

	/*
	* Helper function that iterates through notes section and find build ids.
	* If writer is given, write binary ids into profiles.
	* If an error happens while writing, return -1.
	*/
	-int WriteBinaryIds(ProfDataWriter Writer, const ElfW(Nhdr) Note,
	- const ElfW(Nhdr) * NotesEnd) {
	+static int WriteBinaryIds(ProfDataWriter Writer, const ElfW(Nhdr) Note,
	+ const ElfW(Nhdr) * NotesEnd) {
	int TotalBinaryIdsSize = 0;
	while (Note < NotesEnd) {
	int Result = WriteBinaryIdForNote(Writer, Note);
	if (Result == -1)
	return -1;
	TotalBinaryIdsSize += Result;

	/* Calculate the offset of the next note in notes section. */
	size_t NoteOffset = sizeof(ElfW(Nhdr)) + RoundUp(Note->n_namesz, 4) +
	RoundUp(Note->n_descsz, 4);
	Note = (const ElfW(Nhdr) )((const char )(Note) + NoteOffset);
	}

	return TotalBinaryIdsSize;
	}

	/*
	* Write binary ids into profiles if writer is given.
	* Return the total size of binary ids.
	* If an error happens while writing, return -1.
	*/
	COMPILER_RT_VISIBILITY int __llvm_write_binary_ids(ProfDataWriter *Writer) {
	extern const ElfW(Ehdr) __ehdr_start __attribute__((visibility("hidden")));
	const ElfW(Ehdr) *ElfHeader = &__ehdr_start;
	const ElfW(Phdr) *ProgramHeader =
	(const ElfW(Phdr) *)((uintptr_t)ElfHeader + ElfHeader->e_phoff);

	uint32_t I;
	/* Iterate through entries in the program header. */
	for (I = 0; I < ElfHeader->e_phnum; I++) {
	/* Look for the notes section in program header entries. */
	if (ProgramHeader[I].p_type != PT_NOTE)
	continue;

	const ElfW(Nhdr) *Note =
	(const ElfW(Nhdr) *)((uintptr_t)ElfHeader + ProgramHeader[I].p_offset);
	const ElfW(Nhdr) *NotesEnd =
	(const ElfW(Nhdr) )((const char )(Note) + ProgramHeader[I].p_filesz);
	return WriteBinaryIds(Writer, Note, NotesEnd);
	}

	return 0;
	}
	#else /* !NT_GNU_BUILD_ID */
	/*
	* Fallback implementation for targets that don't support the GNU
	* extensions NT_GNU_BUILD_ID and __ehdr_start.
	*/
	COMPILER_RT_VISIBILITY int __llvm_write_binary_ids(ProfDataWriter *Writer) {
	return 0;
	}
	#endif

	#endif
	diff --git a/contrib/llvm-project/libcxx/include/cwctype b/contrib/llvm-project/libcxx/include/cwctype
	index 17c68d6d4544..27eea2f15730 100644
	--- a/contrib/llvm-project/libcxx/include/cwctype
	+++ b/contrib/llvm-project/libcxx/include/cwctype
	@@ -1,86 +1,88 @@
	// -- C++ --
	//===--------------------------- cwctype ----------------------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#ifndef _LIBCPP_CWCTYPE
	#define _LIBCPP_CWCTYPE

	/*
	cwctype synopsis

	Macros:

	WEOF

	namespace std
	{

	Types:

	wint_t
	wctrans_t
	wctype_t

	int iswalnum(wint_t wc);
	int iswalpha(wint_t wc);
	int iswblank(wint_t wc); // C99
	int iswcntrl(wint_t wc);
	int iswdigit(wint_t wc);
	int iswgraph(wint_t wc);
	int iswlower(wint_t wc);
	int iswprint(wint_t wc);
	int iswpunct(wint_t wc);
	int iswspace(wint_t wc);
	int iswupper(wint_t wc);
	int iswxdigit(wint_t wc);
	int iswctype(wint_t wc, wctype_t desc);
	wctype_t wctype(const char* property);
	wint_t towlower(wint_t wc);
	wint_t towupper(wint_t wc);
	wint_t towctrans(wint_t wc, wctrans_t desc);
	wctrans_t wctrans(const char* property);

	} // std

	*/

	#include <__config>
	#include <cctype>
	#include <wctype.h>

	#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
	#pragma GCC system_header
	#endif

	_LIBCPP_BEGIN_NAMESPACE_STD

	+#if defined(_LIBCPP_INCLUDED_C_LIBRARY_WCTYPE_H)
	using ::wint_t _LIBCPP_USING_IF_EXISTS;
	using ::wctrans_t _LIBCPP_USING_IF_EXISTS;
	using ::wctype_t _LIBCPP_USING_IF_EXISTS;
	using ::iswalnum _LIBCPP_USING_IF_EXISTS;
	using ::iswalpha _LIBCPP_USING_IF_EXISTS;
	using ::iswblank _LIBCPP_USING_IF_EXISTS;
	using ::iswcntrl _LIBCPP_USING_IF_EXISTS;
	using ::iswdigit _LIBCPP_USING_IF_EXISTS;
	using ::iswgraph _LIBCPP_USING_IF_EXISTS;
	using ::iswlower _LIBCPP_USING_IF_EXISTS;
	using ::iswprint _LIBCPP_USING_IF_EXISTS;
	using ::iswpunct _LIBCPP_USING_IF_EXISTS;
	using ::iswspace _LIBCPP_USING_IF_EXISTS;
	using ::iswupper _LIBCPP_USING_IF_EXISTS;
	using ::iswxdigit _LIBCPP_USING_IF_EXISTS;
	using ::iswctype _LIBCPP_USING_IF_EXISTS;
	using ::wctype _LIBCPP_USING_IF_EXISTS;
	using ::towlower _LIBCPP_USING_IF_EXISTS;
	using ::towupper _LIBCPP_USING_IF_EXISTS;
	using ::towctrans _LIBCPP_USING_IF_EXISTS;
	using ::wctrans _LIBCPP_USING_IF_EXISTS;
	+#endif // _LIBCPP_INCLUDED_C_LIBRARY_WCTYPE_H

	_LIBCPP_END_NAMESPACE_STD

	#endif // _LIBCPP_CWCTYPE
	diff --git a/contrib/llvm-project/libcxx/include/string b/contrib/llvm-project/libcxx/include/string
	index 4940021b0c68..4159ea580345 100644
	--- a/contrib/llvm-project/libcxx/include/string
	+++ b/contrib/llvm-project/libcxx/include/string
	@@ -1,4566 +1,4585 @@
	// -- C++ --
	//===--------------------------- string -----------------------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#ifndef _LIBCPP_STRING
	#define _LIBCPP_STRING

	/*
	string synopsis

	namespace std
	{

	template <class stateT>
	class fpos
	{
	private:
	stateT st;
	public:
	fpos(streamoff = streamoff());

	operator streamoff() const;

	stateT state() const;
	void state(stateT);

	fpos& operator+=(streamoff);
	fpos operator+ (streamoff) const;
	fpos& operator-=(streamoff);
	fpos operator- (streamoff) const;
	};

	template <class stateT> streamoff operator-(const fpos<stateT>& x, const fpos<stateT>& y);

	template <class stateT> bool operator==(const fpos<stateT>& x, const fpos<stateT>& y);
	template <class stateT> bool operator!=(const fpos<stateT>& x, const fpos<stateT>& y);

	template <class charT>
	struct char_traits
	{
	typedef charT char_type;
	typedef ... int_type;
	typedef streamoff off_type;
	typedef streampos pos_type;
	typedef mbstate_t state_type;

	static void assign(char_type& c1, const char_type& c2) noexcept;
	static constexpr bool eq(char_type c1, char_type c2) noexcept;
	static constexpr bool lt(char_type c1, char_type c2) noexcept;

	static int compare(const char_type* s1, const char_type* s2, size_t n);
	static size_t length(const char_type* s);
	static const char_type* find(const char_type* s, size_t n, const char_type& a);
	static char_type* move(char_type* s1, const char_type* s2, size_t n);
	static char_type* copy(char_type* s1, const char_type* s2, size_t n);
	static char_type* assign(char_type* s, size_t n, char_type a);

	static constexpr int_type not_eof(int_type c) noexcept;
	static constexpr char_type to_char_type(int_type c) noexcept;
	static constexpr int_type to_int_type(char_type c) noexcept;
	static constexpr bool eq_int_type(int_type c1, int_type c2) noexcept;
	static constexpr int_type eof() noexcept;
	};

	template <> struct char_traits<char>;
	template <> struct char_traits<wchar_t>;
	template <> struct char_traits<char8_t>; // C++20
	template <> struct char_traits<char16_t>;
	template <> struct char_traits<char32_t>;

	template<class charT, class traits = char_traits<charT>, class Allocator = allocator<charT> >
	class basic_string
	{
	public:
	// types:
	typedef traits traits_type;
	typedef typename traits_type::char_type value_type;
	typedef Allocator allocator_type;
	typedef typename allocator_type::size_type size_type;
	typedef typename allocator_type::difference_type difference_type;
	typedef typename allocator_type::reference reference;
	typedef typename allocator_type::const_reference const_reference;
	typedef typename allocator_type::pointer pointer;
	typedef typename allocator_type::const_pointer const_pointer;
	typedef implementation-defined iterator;
	typedef implementation-defined const_iterator;
	typedef std::reverse_iterator<iterator> reverse_iterator;
	typedef std::reverse_iterator<const_iterator> const_reverse_iterator;

	static const size_type npos = -1;

	basic_string()
	noexcept(is_nothrow_default_constructible<allocator_type>::value);
	explicit basic_string(const allocator_type& a);
	basic_string(const basic_string& str);
	basic_string(basic_string&& str)
	noexcept(is_nothrow_move_constructible<allocator_type>::value);
	basic_string(const basic_string& str, size_type pos,
	const allocator_type& a = allocator_type());
	basic_string(const basic_string& str, size_type pos, size_type n,
	const Allocator& a = Allocator());
	template<class T>
	basic_string(const T& t, size_type pos, size_type n, const Allocator& a = Allocator()); // C++17
	template <class T>
	explicit basic_string(const T& t, const Allocator& a = Allocator()); // C++17
	basic_string(const value_type* s, const allocator_type& a = allocator_type());
	basic_string(const value_type* s, size_type n, const allocator_type& a = allocator_type());
	basic_string(nullptr_t) = delete; // C++2b
	basic_string(size_type n, value_type c, const allocator_type& a = allocator_type());
	template<class InputIterator>
	basic_string(InputIterator begin, InputIterator end,
	const allocator_type& a = allocator_type());
	basic_string(initializer_list<value_type>, const Allocator& = Allocator());
	basic_string(const basic_string&, const Allocator&);
	basic_string(basic_string&&, const Allocator&);

	~basic_string();

	operator basic_string_view<charT, traits>() const noexcept;

	basic_string& operator=(const basic_string& str);
	template <class T>
	basic_string& operator=(const T& t); // C++17
	basic_string& operator=(basic_string&& str)
	noexcept(
	allocator_type::propagate_on_container_move_assignment::value \|\|
	allocator_type::is_always_equal::value ); // C++17
	basic_string& operator=(const value_type* s);
	basic_string& operator=(nullptr_t) = delete; // C++2b
	basic_string& operator=(value_type c);
	basic_string& operator=(initializer_list<value_type>);

	iterator begin() noexcept;
	const_iterator begin() const noexcept;
	iterator end() noexcept;
	const_iterator end() const noexcept;

	reverse_iterator rbegin() noexcept;
	const_reverse_iterator rbegin() const noexcept;
	reverse_iterator rend() noexcept;
	const_reverse_iterator rend() const noexcept;

	const_iterator cbegin() const noexcept;
	const_iterator cend() const noexcept;
	const_reverse_iterator crbegin() const noexcept;
	const_reverse_iterator crend() const noexcept;

	size_type size() const noexcept;
	size_type length() const noexcept;
	size_type max_size() const noexcept;
	size_type capacity() const noexcept;

	void resize(size_type n, value_type c);
	void resize(size_type n);

	void reserve(size_type res_arg);
	void reserve(); // deprecated in C++20
	void shrink_to_fit();
	void clear() noexcept;
	bool empty() const noexcept;

	const_reference operator[](size_type pos) const;
	reference operator[](size_type pos);

	const_reference at(size_type n) const;
	reference at(size_type n);

	basic_string& operator+=(const basic_string& str);
	template <class T>
	basic_string& operator+=(const T& t); // C++17
	basic_string& operator+=(const value_type* s);
	basic_string& operator+=(value_type c);
	basic_string& operator+=(initializer_list<value_type>);

	basic_string& append(const basic_string& str);
	template <class T>
	basic_string& append(const T& t); // C++17
	basic_string& append(const basic_string& str, size_type pos, size_type n=npos); //C++14
	template <class T>
	basic_string& append(const T& t, size_type pos, size_type n=npos); // C++17
	basic_string& append(const value_type* s, size_type n);
	basic_string& append(const value_type* s);
	basic_string& append(size_type n, value_type c);
	template<class InputIterator>
	basic_string& append(InputIterator first, InputIterator last);
	basic_string& append(initializer_list<value_type>);

	void push_back(value_type c);
	void pop_back();
	reference front();
	const_reference front() const;
	reference back();
	const_reference back() const;

	basic_string& assign(const basic_string& str);
	template <class T>
	basic_string& assign(const T& t); // C++17
	basic_string& assign(basic_string&& str);
	basic_string& assign(const basic_string& str, size_type pos, size_type n=npos); // C++14
	template <class T>
	basic_string& assign(const T& t, size_type pos, size_type n=npos); // C++17
	basic_string& assign(const value_type* s, size_type n);
	basic_string& assign(const value_type* s);
	basic_string& assign(size_type n, value_type c);
	template<class InputIterator>
	basic_string& assign(InputIterator first, InputIterator last);
	basic_string& assign(initializer_list<value_type>);

	basic_string& insert(size_type pos1, const basic_string& str);
	template <class T>
	basic_string& insert(size_type pos1, const T& t);
	basic_string& insert(size_type pos1, const basic_string& str,
	size_type pos2, size_type n);
	template <class T>
	basic_string& insert(size_type pos1, const T& t, size_type pos2, size_type n); // C++17
	basic_string& insert(size_type pos, const value_type* s, size_type n=npos); //C++14
	basic_string& insert(size_type pos, const value_type* s);
	basic_string& insert(size_type pos, size_type n, value_type c);
	iterator insert(const_iterator p, value_type c);
	iterator insert(const_iterator p, size_type n, value_type c);
	template<class InputIterator>
	iterator insert(const_iterator p, InputIterator first, InputIterator last);
	iterator insert(const_iterator p, initializer_list<value_type>);

	basic_string& erase(size_type pos = 0, size_type n = npos);
	iterator erase(const_iterator position);
	iterator erase(const_iterator first, const_iterator last);

	basic_string& replace(size_type pos1, size_type n1, const basic_string& str);
	template <class T>
	basic_string& replace(size_type pos1, size_type n1, const T& t); // C++17
	basic_string& replace(size_type pos1, size_type n1, const basic_string& str,
	size_type pos2, size_type n2=npos); // C++14
	template <class T>
	basic_string& replace(size_type pos1, size_type n1, const T& t,
	size_type pos2, size_type n); // C++17
	basic_string& replace(size_type pos, size_type n1, const value_type* s, size_type n2);
	basic_string& replace(size_type pos, size_type n1, const value_type* s);
	basic_string& replace(size_type pos, size_type n1, size_type n2, value_type c);
	basic_string& replace(const_iterator i1, const_iterator i2, const basic_string& str);
	template <class T>
	basic_string& replace(const_iterator i1, const_iterator i2, const T& t); // C++17
	basic_string& replace(const_iterator i1, const_iterator i2, const value_type* s, size_type n);
	basic_string& replace(const_iterator i1, const_iterator i2, const value_type* s);
	basic_string& replace(const_iterator i1, const_iterator i2, size_type n, value_type c);
	template<class InputIterator>
	basic_string& replace(const_iterator i1, const_iterator i2, InputIterator j1, InputIterator j2);
	basic_string& replace(const_iterator i1, const_iterator i2, initializer_list<value_type>);

	size_type copy(value_type* s, size_type n, size_type pos = 0) const;
	basic_string substr(size_type pos = 0, size_type n = npos) const;

	void swap(basic_string& str)
	noexcept(allocator_traits<allocator_type>::propagate_on_container_swap::value \|\|
	allocator_traits<allocator_type>::is_always_equal::value); // C++17

	const value_type* c_str() const noexcept;
	const value_type* data() const noexcept;
	value_type* data() noexcept; // C++17

	allocator_type get_allocator() const noexcept;

	size_type find(const basic_string& str, size_type pos = 0) const noexcept;
	template <class T>
	size_type find(const T& t, size_type pos = 0) const noexcept; // C++17, noexcept as an extension
	size_type find(const value_type* s, size_type pos, size_type n) const noexcept;
	size_type find(const value_type* s, size_type pos = 0) const noexcept;
	size_type find(value_type c, size_type pos = 0) const noexcept;

	size_type rfind(const basic_string& str, size_type pos = npos) const noexcept;
	template <class T>
	size_type rfind(const T& t, size_type pos = npos) const noexcept; // C++17, noexcept as an extension
	size_type rfind(const value_type* s, size_type pos, size_type n) const noexcept;
	size_type rfind(const value_type* s, size_type pos = npos) const noexcept;
	size_type rfind(value_type c, size_type pos = npos) const noexcept;

	size_type find_first_of(const basic_string& str, size_type pos = 0) const noexcept;
	template <class T>
	size_type find_first_of(const T& t, size_type pos = 0) const noexcept; // C++17, noexcept as an extension
	size_type find_first_of(const value_type* s, size_type pos, size_type n) const noexcept;
	size_type find_first_of(const value_type* s, size_type pos = 0) const noexcept;
	size_type find_first_of(value_type c, size_type pos = 0) const noexcept;

	size_type find_last_of(const basic_string& str, size_type pos = npos) const noexcept;
	template <class T>
	size_type find_last_of(const T& t, size_type pos = npos) const noexcept noexcept; // C++17, noexcept as an extension
	size_type find_last_of(const value_type* s, size_type pos, size_type n) const noexcept;
	size_type find_last_of(const value_type* s, size_type pos = npos) const noexcept;
	size_type find_last_of(value_type c, size_type pos = npos) const noexcept;

	size_type find_first_not_of(const basic_string& str, size_type pos = 0) const noexcept;
	template <class T>
	size_type find_first_not_of(const T& t, size_type pos = 0) const noexcept; // C++17, noexcept as an extension
	size_type find_first_not_of(const value_type* s, size_type pos, size_type n) const noexcept;
	size_type find_first_not_of(const value_type* s, size_type pos = 0) const noexcept;
	size_type find_first_not_of(value_type c, size_type pos = 0) const noexcept;

	size_type find_last_not_of(const basic_string& str, size_type pos = npos) const noexcept;
	template <class T>
	size_type find_last_not_of(const T& t, size_type pos = npos) const noexcept; // C++17, noexcept as an extension
	size_type find_last_not_of(const value_type* s, size_type pos, size_type n) const noexcept;
	size_type find_last_not_of(const value_type* s, size_type pos = npos) const noexcept;
	size_type find_last_not_of(value_type c, size_type pos = npos) const noexcept;

	int compare(const basic_string& str) const noexcept;
	template <class T>
	int compare(const T& t) const noexcept; // C++17, noexcept as an extension
	int compare(size_type pos1, size_type n1, const basic_string& str) const;
	template <class T>
	int compare(size_type pos1, size_type n1, const T& t) const; // C++17
	int compare(size_type pos1, size_type n1, const basic_string& str,
	size_type pos2, size_type n2=npos) const; // C++14
	template <class T>
	int compare(size_type pos1, size_type n1, const T& t,
	size_type pos2, size_type n2=npos) const; // C++17
	int compare(const value_type* s) const noexcept;
	int compare(size_type pos1, size_type n1, const value_type* s) const;
	int compare(size_type pos1, size_type n1, const value_type* s, size_type n2) const;

	bool starts_with(basic_string_view<charT, traits> sv) const noexcept; // C++20
	bool starts_with(charT c) const noexcept; // C++20
	bool starts_with(const charT* s) const; // C++20
	bool ends_with(basic_string_view<charT, traits> sv) const noexcept; // C++20
	bool ends_with(charT c) const noexcept; // C++20
	bool ends_with(const charT* s) const; // C++20

	constexpr bool contains(basic_string_view<charT, traits> sv) const noexcept; // C++2b
	constexpr bool contains(charT c) const noexcept; // C++2b
	constexpr bool contains(const charT* s) const; // C++2b

	bool __invariants() const;
	};

	template<class InputIterator,
	class Allocator = allocator<typename iterator_traits<InputIterator>::value_type>>
	basic_string(InputIterator, InputIterator, Allocator = Allocator())
	-> basic_string<typename iterator_traits<InputIterator>::value_type,
	char_traits<typename iterator_traits<InputIterator>::value_type>,
	Allocator>; // C++17

	template<class charT, class traits, class Allocator>
	basic_string<charT, traits, Allocator>
	operator+(const basic_string<charT, traits, Allocator>& lhs,
	const basic_string<charT, traits, Allocator>& rhs);

	template<class charT, class traits, class Allocator>
	basic_string<charT, traits, Allocator>
	operator+(const charT* lhs , const basic_string<charT,traits,Allocator>&rhs);

	template<class charT, class traits, class Allocator>
	basic_string<charT, traits, Allocator>
	operator+(charT lhs, const basic_string<charT,traits,Allocator>& rhs);

	template<class charT, class traits, class Allocator>
	basic_string<charT, traits, Allocator>
	operator+(const basic_string<charT, traits, Allocator>& lhs, const charT* rhs);

	template<class charT, class traits, class Allocator>
	basic_string<charT, traits, Allocator>
	operator+(const basic_string<charT, traits, Allocator>& lhs, charT rhs);

	template<class charT, class traits, class Allocator>
	bool operator==(const basic_string<charT, traits, Allocator>& lhs,
	const basic_string<charT, traits, Allocator>& rhs) noexcept;

	template<class charT, class traits, class Allocator>
	bool operator==(const charT* lhs, const basic_string<charT, traits, Allocator>& rhs) noexcept;

	template<class charT, class traits, class Allocator>
	bool operator==(const basic_string<charT,traits,Allocator>& lhs, const charT* rhs) noexcept;

	template<class charT, class traits, class Allocator>
	bool operator!=(const basic_string<charT,traits,Allocator>& lhs,
	const basic_string<charT, traits, Allocator>& rhs) noexcept;

	template<class charT, class traits, class Allocator>
	bool operator!=(const charT* lhs, const basic_string<charT, traits, Allocator>& rhs) noexcept;

	template<class charT, class traits, class Allocator>
	bool operator!=(const basic_string<charT, traits, Allocator>& lhs, const charT* rhs) noexcept;

	template<class charT, class traits, class Allocator>
	bool operator< (const basic_string<charT, traits, Allocator>& lhs,
	const basic_string<charT, traits, Allocator>& rhs) noexcept;

	template<class charT, class traits, class Allocator>
	bool operator< (const basic_string<charT, traits, Allocator>& lhs, const charT* rhs) noexcept;

	template<class charT, class traits, class Allocator>
	bool operator< (const charT* lhs, const basic_string<charT, traits, Allocator>& rhs) noexcept;

	template<class charT, class traits, class Allocator>
	bool operator> (const basic_string<charT, traits, Allocator>& lhs,
	const basic_string<charT, traits, Allocator>& rhs) noexcept;

	template<class charT, class traits, class Allocator>
	bool operator> (const basic_string<charT, traits, Allocator>& lhs, const charT* rhs) noexcept;

	template<class charT, class traits, class Allocator>
	bool operator> (const charT* lhs, const basic_string<charT, traits, Allocator>& rhs) noexcept;

	template<class charT, class traits, class Allocator>
	bool operator<=(const basic_string<charT, traits, Allocator>& lhs,
	const basic_string<charT, traits, Allocator>& rhs) noexcept;

	template<class charT, class traits, class Allocator>
	bool operator<=(const basic_string<charT, traits, Allocator>& lhs, const charT* rhs) noexcept;

	template<class charT, class traits, class Allocator>
	bool operator<=(const charT* lhs, const basic_string<charT, traits, Allocator>& rhs) noexcept;

	template<class charT, class traits, class Allocator>
	bool operator>=(const basic_string<charT, traits, Allocator>& lhs,
	const basic_string<charT, traits, Allocator>& rhs) noexcept;

	template<class charT, class traits, class Allocator>
	bool operator>=(const basic_string<charT, traits, Allocator>& lhs, const charT* rhs) noexcept;

	template<class charT, class traits, class Allocator>
	bool operator>=(const charT* lhs, const basic_string<charT, traits, Allocator>& rhs) noexcept;

	template<class charT, class traits, class Allocator>
	void swap(basic_string<charT, traits, Allocator>& lhs,
	basic_string<charT, traits, Allocator>& rhs)
	noexcept(noexcept(lhs.swap(rhs)));

	template<class charT, class traits, class Allocator>
	basic_istream<charT, traits>&
	operator>>(basic_istream<charT, traits>& is, basic_string<charT, traits, Allocator>& str);

	template<class charT, class traits, class Allocator>
	basic_ostream<charT, traits>&
	operator<<(basic_ostream<charT, traits>& os, const basic_string<charT, traits, Allocator>& str);

	template<class charT, class traits, class Allocator>
	basic_istream<charT, traits>&
	getline(basic_istream<charT, traits>& is, basic_string<charT, traits, Allocator>& str,
	charT delim);

	template<class charT, class traits, class Allocator>
	basic_istream<charT, traits>&
	getline(basic_istream<charT, traits>& is, basic_string<charT, traits, Allocator>& str);

	template<class charT, class traits, class Allocator, class U>
	typename basic_string<charT, traits, Allocator>::size_type
	erase(basic_string<charT, traits, Allocator>& c, const U& value); // C++20
	template<class charT, class traits, class Allocator, class Predicate>
	typename basic_string<charT, traits, Allocator>::size_type
	erase_if(basic_string<charT, traits, Allocator>& c, Predicate pred); // C++20

	typedef basic_string<char> string;
	typedef basic_string<wchar_t> wstring;
	typedef basic_string<char8_t> u8string; // C++20
	typedef basic_string<char16_t> u16string;
	typedef basic_string<char32_t> u32string;

	int stoi (const string& str, size_t* idx = nullptr, int base = 10);
	long stol (const string& str, size_t* idx = nullptr, int base = 10);
	unsigned long stoul (const string& str, size_t* idx = nullptr, int base = 10);
	long long stoll (const string& str, size_t* idx = nullptr, int base = 10);
	unsigned long long stoull(const string& str, size_t* idx = nullptr, int base = 10);

	float stof (const string& str, size_t* idx = nullptr);
	double stod (const string& str, size_t* idx = nullptr);
	long double stold(const string& str, size_t* idx = nullptr);

	string to_string(int val);
	string to_string(unsigned val);
	string to_string(long val);
	string to_string(unsigned long val);
	string to_string(long long val);
	string to_string(unsigned long long val);
	string to_string(float val);
	string to_string(double val);
	string to_string(long double val);

	int stoi (const wstring& str, size_t* idx = nullptr, int base = 10);
	long stol (const wstring& str, size_t* idx = nullptr, int base = 10);
	unsigned long stoul (const wstring& str, size_t* idx = nullptr, int base = 10);
	long long stoll (const wstring& str, size_t* idx = nullptr, int base = 10);
	unsigned long long stoull(const wstring& str, size_t* idx = nullptr, int base = 10);

	float stof (const wstring& str, size_t* idx = nullptr);
	double stod (const wstring& str, size_t* idx = nullptr);
	long double stold(const wstring& str, size_t* idx = nullptr);

	wstring to_wstring(int val);
	wstring to_wstring(unsigned val);
	wstring to_wstring(long val);
	wstring to_wstring(unsigned long val);
	wstring to_wstring(long long val);
	wstring to_wstring(unsigned long long val);
	wstring to_wstring(float val);
	wstring to_wstring(double val);
	wstring to_wstring(long double val);

	template <> struct hash<string>;
	template <> struct hash<u8string>; // C++20
	template <> struct hash<u16string>;
	template <> struct hash<u32string>;
	template <> struct hash<wstring>;

	basic_string<char> operator "" s( const char *str, size_t len ); // C++14
	basic_string<wchar_t> operator "" s( const wchar_t *str, size_t len ); // C++14
	basic_string<char8_t> operator "" s( const char8_t *str, size_t len ); // C++20
	basic_string<char16_t> operator "" s( const char16_t *str, size_t len ); // C++14
	basic_string<char32_t> operator "" s( const char32_t *str, size_t len ); // C++14

	} // std

	*/

	#include <__config>
	#include <__debug>
	#include <__functional_base>
	#include <__iterator/wrap_iter.h>
	#include <algorithm>
	#include <compare>
	#include <cstdio> // EOF
	+#include <cstdlib>
	#include <cstring>
	#include <cwchar>
	#include <initializer_list>
	#include <iosfwd>
	#include <iterator>
	#include <memory>
	#include <stdexcept>
	#include <string_view>
	#include <type_traits>
	#include <utility>
	#include <version>

	#ifndef _LIBCPP_HAS_NO_UNICODE_CHARS
	# include <cstdint>
	#endif

	#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
	#pragma GCC system_header
	#endif

	_LIBCPP_PUSH_MACROS
	#include <__undef_macros>


	_LIBCPP_BEGIN_NAMESPACE_STD

	// fpos

	template <class _StateT>
	class _LIBCPP_TEMPLATE_VIS fpos
	{
	private:
	_StateT __st_;
	streamoff __off_;
	public:
	_LIBCPP_INLINE_VISIBILITY fpos(streamoff __off = streamoff()) : __st_(), __off_(__off) {}

	_LIBCPP_INLINE_VISIBILITY operator streamoff() const {return __off_;}

	_LIBCPP_INLINE_VISIBILITY _StateT state() const {return __st_;}
	_LIBCPP_INLINE_VISIBILITY void state(_StateT __st) {__st_ = __st;}

	_LIBCPP_INLINE_VISIBILITY fpos& operator+=(streamoff __off) {__off_ += __off; return *this;}
	_LIBCPP_INLINE_VISIBILITY fpos operator+ (streamoff __off) const {fpos __t(*this); __t += __off; return __t;}
	_LIBCPP_INLINE_VISIBILITY fpos& operator-=(streamoff __off) {__off_ -= __off; return *this;}
	_LIBCPP_INLINE_VISIBILITY fpos operator- (streamoff __off) const {fpos __t(*this); __t -= __off; return __t;}
	};

	template <class _StateT>
	inline _LIBCPP_INLINE_VISIBILITY
	streamoff operator-(const fpos<_StateT>& __x, const fpos<_StateT>& __y)
	{return streamoff(__x) - streamoff(__y);}

	template <class _StateT>
	inline _LIBCPP_INLINE_VISIBILITY
	bool operator==(const fpos<_StateT>& __x, const fpos<_StateT>& __y)
	{return streamoff(__x) == streamoff(__y);}

	template <class _StateT>
	inline _LIBCPP_INLINE_VISIBILITY
	bool operator!=(const fpos<_StateT>& __x, const fpos<_StateT>& __y)
	{return streamoff(__x) != streamoff(__y);}

	// basic_string

	template<class _CharT, class _Traits, class _Allocator>
	basic_string<_CharT, _Traits, _Allocator>
	operator+(const basic_string<_CharT, _Traits, _Allocator>& __x,
	const basic_string<_CharT, _Traits, _Allocator>& __y);

	template<class _CharT, class _Traits, class _Allocator>
	basic_string<_CharT, _Traits, _Allocator>
	operator+(const _CharT* __x, const basic_string<_CharT,_Traits,_Allocator>& __y);

	template<class _CharT, class _Traits, class _Allocator>
	basic_string<_CharT, _Traits, _Allocator>
	operator+(_CharT __x, const basic_string<_CharT,_Traits,_Allocator>& __y);

	template<class _CharT, class _Traits, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	basic_string<_CharT, _Traits, _Allocator>
	operator+(const basic_string<_CharT, _Traits, _Allocator>& __x, const _CharT* __y);

	template<class _CharT, class _Traits, class _Allocator>
	basic_string<_CharT, _Traits, _Allocator>
	operator+(const basic_string<_CharT, _Traits, _Allocator>& __x, _CharT __y);

	_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS string operator+<char, char_traits<char>, allocator<char> >(char const*, string const&))

	template <bool>
	class _LIBCPP_TEMPLATE_VIS __basic_string_common
	{
	protected:
	_LIBCPP_NORETURN void __throw_length_error() const;
	_LIBCPP_NORETURN void __throw_out_of_range() const;
	};

	template <bool __b>
	void
	__basic_string_common<__b>::__throw_length_error() const
	{
	_VSTD::__throw_length_error("basic_string");
	}

	template <bool __b>
	void
	__basic_string_common<__b>::__throw_out_of_range() const
	{
	_VSTD::__throw_out_of_range("basic_string");
	}

	_LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS __basic_string_common<true>)

	template <class _Iter>
	struct __string_is_trivial_iterator : public false_type {};

	template <class _Tp>
	struct __string_is_trivial_iterator<_Tp*>
	: public is_arithmetic<_Tp> {};

	template <class _Iter>
	struct __string_is_trivial_iterator<__wrap_iter<_Iter> >
	: public __string_is_trivial_iterator<_Iter> {};

	template <class _CharT, class _Traits, class _Tp>
	struct __can_be_converted_to_string_view : public _BoolConstant<
	is_convertible<const _Tp&, basic_string_view<_CharT, _Traits> >::value &&
	!is_convertible<const _Tp&, const _CharT*>::value
	> {};

	#ifdef _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT

	template <class _CharT, size_t = sizeof(_CharT)>
	struct __padding
	{
	unsigned char __xx[sizeof(_CharT)-1];
	};

	template <class _CharT>
	struct __padding<_CharT, 1>
	{
	};

	#endif // _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT

	#ifndef _LIBCPP_HAS_NO_CHAR8_T
	typedef basic_string<char8_t> u8string;
	#endif

	#ifndef _LIBCPP_HAS_NO_UNICODE_CHARS
	typedef basic_string<char16_t> u16string;
	typedef basic_string<char32_t> u32string;
	#endif // _LIBCPP_HAS_NO_UNICODE_CHARS

	template<class _CharT, class _Traits, class _Allocator>
	class
	_LIBCPP_TEMPLATE_VIS
	#ifndef _LIBCPP_HAS_NO_CHAR8_T
	_LIBCPP_PREFERRED_NAME(u8string)
	#endif
	#ifndef _LIBCPP_HAS_NO_UNICODE_CHARS
	_LIBCPP_PREFERRED_NAME(u16string)
	_LIBCPP_PREFERRED_NAME(u32string)
	#endif
	basic_string
	: private __basic_string_common<true>
	{
	public:
	typedef basic_string __self;
	typedef basic_string_view<_CharT, _Traits> __self_view;
	typedef _Traits traits_type;
	typedef _CharT value_type;
	typedef _Allocator allocator_type;
	typedef allocator_traits<allocator_type> __alloc_traits;
	typedef typename __alloc_traits::size_type size_type;
	typedef typename __alloc_traits::difference_type difference_type;
	typedef value_type& reference;
	typedef const value_type& const_reference;
	typedef typename __alloc_traits::pointer pointer;
	typedef typename __alloc_traits::const_pointer const_pointer;

	static_assert((!is_array<value_type>::value), "Character type of basic_string must not be an array");
	static_assert(( is_standard_layout<value_type>::value), "Character type of basic_string must be standard-layout");
	static_assert(( is_trivial<value_type>::value), "Character type of basic_string must be trivial");
	static_assert(( is_same<_CharT, typename traits_type::char_type>::value),
	"traits_type::char_type must be the same type as CharT");
	static_assert(( is_same<typename allocator_type::value_type, value_type>::value),
	"Allocator::value_type must be same type as value_type");

	typedef __wrap_iter<pointer> iterator;
	typedef __wrap_iter<const_pointer> const_iterator;
	typedef _VSTD::reverse_iterator<iterator> reverse_iterator;
	typedef _VSTD::reverse_iterator<const_iterator> const_reverse_iterator;

	private:

	#ifdef _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT

	struct __long
	{
	pointer __data_;
	size_type __size_;
	size_type __cap_;
	};

	#ifdef _LIBCPP_BIG_ENDIAN
	static const size_type __short_mask = 0x01;
	static const size_type __long_mask = 0x1ul;
	#else // _LIBCPP_BIG_ENDIAN
	static const size_type __short_mask = 0x80;
	static const size_type __long_mask = ~(size_type(~0) >> 1);
	#endif // _LIBCPP_BIG_ENDIAN

	enum {__min_cap = (sizeof(__long) - 1)/sizeof(value_type) > 2 ?
	(sizeof(__long) - 1)/sizeof(value_type) : 2};

	struct __short
	{
	value_type __data_[__min_cap];
	struct
	: __padding<value_type>
	{
	unsigned char __size_;
	};
	};

	#else

	struct __long
	{
	size_type __cap_;
	size_type __size_;
	pointer __data_;
	};

	#ifdef _LIBCPP_BIG_ENDIAN
	static const size_type __short_mask = 0x80;
	static const size_type __long_mask = ~(size_type(~0) >> 1);
	#else // _LIBCPP_BIG_ENDIAN
	static const size_type __short_mask = 0x01;
	static const size_type __long_mask = 0x1ul;
	#endif // _LIBCPP_BIG_ENDIAN

	enum {__min_cap = (sizeof(__long) - 1)/sizeof(value_type) > 2 ?
	(sizeof(__long) - 1)/sizeof(value_type) : 2};

	struct __short
	{
	union
	{
	unsigned char __size_;
	value_type __lx;
	};
	value_type __data_[__min_cap];
	};

	#endif // _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT

	union __ulx{__long __lx; __short __lxx;};

	enum {__n_words = sizeof(__ulx) / sizeof(size_type)};

	struct __raw
	{
	size_type __words[__n_words];
	};

	struct __rep
	{
	union
	{
	__long __l;
	__short __s;
	__raw __r;
	};
	};

	__compressed_pair<__rep, allocator_type> __r_;

	public:
	_LIBCPP_TEMPLATE_DATA_VIS
	static const size_type npos = -1;

	_LIBCPP_INLINE_VISIBILITY basic_string()
	_NOEXCEPT_(is_nothrow_default_constructible<allocator_type>::value);

	_LIBCPP_INLINE_VISIBILITY explicit basic_string(const allocator_type& __a)
	#if _LIBCPP_STD_VER <= 14
	_NOEXCEPT_(is_nothrow_copy_constructible<allocator_type>::value);
	#else
	_NOEXCEPT;
	#endif

	basic_string(const basic_string& __str);
	basic_string(const basic_string& __str, const allocator_type& __a);

	#ifndef _LIBCPP_CXX03_LANG
	_LIBCPP_INLINE_VISIBILITY
	basic_string(basic_string&& __str)
	#if _LIBCPP_STD_VER <= 14
	_NOEXCEPT_(is_nothrow_move_constructible<allocator_type>::value);
	#else
	_NOEXCEPT;
	#endif

	_LIBCPP_INLINE_VISIBILITY
	basic_string(basic_string&& __str, const allocator_type& __a);
	#endif // _LIBCPP_CXX03_LANG

	template <class = _EnableIf<__is_allocator<_Allocator>::value, nullptr_t> >
	_LIBCPP_INLINE_VISIBILITY
	basic_string(const _CharT* __s) : __r_(__default_init_tag(), __default_init_tag()) {
	_LIBCPP_ASSERT(__s != nullptr, "basic_string(const char*) detected nullptr");
	__init(__s, traits_type::length(__s));
	# if _LIBCPP_DEBUG_LEVEL == 2
	__get_db()->__insert_c(this);
	# endif
	}

	template <class = _EnableIf<__is_allocator<_Allocator>::value, nullptr_t> >
	_LIBCPP_INLINE_VISIBILITY
	basic_string(const _CharT* __s, const _Allocator& __a);

	#if _LIBCPP_STD_VER > 20
	basic_string(nullptr_t) = delete;
	#endif

	_LIBCPP_INLINE_VISIBILITY
	basic_string(const _CharT* __s, size_type __n);
	_LIBCPP_INLINE_VISIBILITY
	basic_string(const _CharT* __s, size_type __n, const _Allocator& __a);
	_LIBCPP_INLINE_VISIBILITY
	basic_string(size_type __n, _CharT __c);

	template <class = _EnableIf<__is_allocator<_Allocator>::value, nullptr_t> >
	_LIBCPP_INLINE_VISIBILITY
	basic_string(size_type __n, _CharT __c, const _Allocator& __a);

	basic_string(const basic_string& __str, size_type __pos, size_type __n,
	const _Allocator& __a = _Allocator());
	_LIBCPP_INLINE_VISIBILITY
	basic_string(const basic_string& __str, size_type __pos,
	const _Allocator& __a = _Allocator());

	template<class _Tp, class = _EnableIf<__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value && !__is_same_uncvref<_Tp, basic_string>::value> >
	_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
	basic_string(const _Tp& __t, size_type __pos, size_type __n,
	const allocator_type& __a = allocator_type());

	template<class _Tp, class = _EnableIf<__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value &&
	!__is_same_uncvref<_Tp, basic_string>::value> >
	_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
	explicit basic_string(const _Tp& __t);

	template<class _Tp, class = _EnableIf<__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value && !__is_same_uncvref<_Tp, basic_string>::value> >
	_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
	explicit basic_string(const _Tp& __t, const allocator_type& __a);

	template<class _InputIterator, class = _EnableIf<__is_cpp17_input_iterator<_InputIterator>::value> >
	_LIBCPP_INLINE_VISIBILITY
	basic_string(_InputIterator __first, _InputIterator __last);
	template<class _InputIterator, class = _EnableIf<__is_cpp17_input_iterator<_InputIterator>::value> >
	_LIBCPP_INLINE_VISIBILITY
	basic_string(_InputIterator __first, _InputIterator __last, const allocator_type& __a);
	#ifndef _LIBCPP_CXX03_LANG
	_LIBCPP_INLINE_VISIBILITY
	basic_string(initializer_list<_CharT> __il);
	_LIBCPP_INLINE_VISIBILITY
	basic_string(initializer_list<_CharT> __il, const _Allocator& __a);
	#endif // _LIBCPP_CXX03_LANG

	inline ~basic_string();

	_LIBCPP_INLINE_VISIBILITY
	operator __self_view() const _NOEXCEPT { return __self_view(data(), size()); }

	basic_string& operator=(const basic_string& __str);

	template <class _Tp, class = _EnableIf<__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value && !__is_same_uncvref<_Tp, basic_string>::value> >
	basic_string& operator=(const _Tp& __t)
	{__self_view __sv = __t; return assign(__sv);}

	#ifndef _LIBCPP_CXX03_LANG
	_LIBCPP_INLINE_VISIBILITY
	basic_string& operator=(basic_string&& __str)
	_NOEXCEPT_((__noexcept_move_assign_container<_Allocator, __alloc_traits>::value));
	_LIBCPP_INLINE_VISIBILITY
	basic_string& operator=(initializer_list<value_type> __il) {return assign(__il.begin(), __il.size());}
	#endif
	_LIBCPP_INLINE_VISIBILITY basic_string& operator=(const value_type* __s) {return assign(__s);}
	#if _LIBCPP_STD_VER > 20
	basic_string& operator=(nullptr_t) = delete;
	#endif
	basic_string& operator=(value_type __c);

	#if _LIBCPP_DEBUG_LEVEL == 2
	_LIBCPP_INLINE_VISIBILITY
	iterator begin() _NOEXCEPT
	{return iterator(this, __get_pointer());}
	_LIBCPP_INLINE_VISIBILITY
	const_iterator begin() const _NOEXCEPT
	{return const_iterator(this, __get_pointer());}
	_LIBCPP_INLINE_VISIBILITY
	iterator end() _NOEXCEPT
	{return iterator(this, __get_pointer() + size());}
	_LIBCPP_INLINE_VISIBILITY
	const_iterator end() const _NOEXCEPT
	{return const_iterator(this, __get_pointer() + size());}
	#else
	_LIBCPP_INLINE_VISIBILITY
	iterator begin() _NOEXCEPT
	{return iterator(__get_pointer());}
	_LIBCPP_INLINE_VISIBILITY
	const_iterator begin() const _NOEXCEPT
	{return const_iterator(__get_pointer());}
	_LIBCPP_INLINE_VISIBILITY
	iterator end() _NOEXCEPT
	{return iterator(__get_pointer() + size());}
	_LIBCPP_INLINE_VISIBILITY
	const_iterator end() const _NOEXCEPT
	{return const_iterator(__get_pointer() + size());}
	#endif // _LIBCPP_DEBUG_LEVEL == 2
	_LIBCPP_INLINE_VISIBILITY
	reverse_iterator rbegin() _NOEXCEPT
	{return reverse_iterator(end());}
	_LIBCPP_INLINE_VISIBILITY
	const_reverse_iterator rbegin() const _NOEXCEPT
	{return const_reverse_iterator(end());}
	_LIBCPP_INLINE_VISIBILITY
	reverse_iterator rend() _NOEXCEPT
	{return reverse_iterator(begin());}
	_LIBCPP_INLINE_VISIBILITY
	const_reverse_iterator rend() const _NOEXCEPT
	{return const_reverse_iterator(begin());}

	_LIBCPP_INLINE_VISIBILITY
	const_iterator cbegin() const _NOEXCEPT
	{return begin();}
	_LIBCPP_INLINE_VISIBILITY
	const_iterator cend() const _NOEXCEPT
	{return end();}
	_LIBCPP_INLINE_VISIBILITY
	const_reverse_iterator crbegin() const _NOEXCEPT
	{return rbegin();}
	_LIBCPP_INLINE_VISIBILITY
	const_reverse_iterator crend() const _NOEXCEPT
	{return rend();}

	_LIBCPP_INLINE_VISIBILITY size_type size() const _NOEXCEPT
	{return __is_long() ? __get_long_size() : __get_short_size();}
	_LIBCPP_INLINE_VISIBILITY size_type length() const _NOEXCEPT {return size();}
	_LIBCPP_INLINE_VISIBILITY size_type max_size() const _NOEXCEPT;
	_LIBCPP_INLINE_VISIBILITY size_type capacity() const _NOEXCEPT
	{return (__is_long() ? __get_long_cap()
	: static_cast<size_type>(__min_cap)) - 1;}

	void resize(size_type __n, value_type __c);
	_LIBCPP_INLINE_VISIBILITY void resize(size_type __n) {resize(__n, value_type());}

	void reserve(size_type __requested_capacity);
	_LIBCPP_INLINE_VISIBILITY void __resize_default_init(size_type __n);

	_LIBCPP_DEPRECATED_IN_CXX20 _LIBCPP_INLINE_VISIBILITY
	void reserve() _NOEXCEPT {shrink_to_fit();}
	_LIBCPP_INLINE_VISIBILITY
	void shrink_to_fit() _NOEXCEPT;
	_LIBCPP_INLINE_VISIBILITY
	void clear() _NOEXCEPT;
	_LIBCPP_NODISCARD_AFTER_CXX17 _LIBCPP_INLINE_VISIBILITY
	bool empty() const _NOEXCEPT {return size() == 0;}

	_LIBCPP_INLINE_VISIBILITY const_reference operator[](size_type __pos) const _NOEXCEPT;
	_LIBCPP_INLINE_VISIBILITY reference operator[](size_type __pos) _NOEXCEPT;

	const_reference at(size_type __n) const;
	reference at(size_type __n);

	_LIBCPP_INLINE_VISIBILITY basic_string& operator+=(const basic_string& __str) {return append(__str);}

	template <class _Tp>
	_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
	_EnableIf
	<
	__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value
	&& !__is_same_uncvref<_Tp, basic_string >::value,
	basic_string&
	>
	operator+=(const _Tp& __t) {__self_view __sv = __t; return append(__sv);}
	_LIBCPP_INLINE_VISIBILITY basic_string& operator+=(const value_type* __s) {return append(__s);}
	_LIBCPP_INLINE_VISIBILITY basic_string& operator+=(value_type __c) {push_back(__c); return *this;}
	#ifndef _LIBCPP_CXX03_LANG
	_LIBCPP_INLINE_VISIBILITY basic_string& operator+=(initializer_list<value_type> __il) {return append(__il);}
	#endif // _LIBCPP_CXX03_LANG

	_LIBCPP_INLINE_VISIBILITY
	basic_string& append(const basic_string& __str);

	template <class _Tp>
	_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
	_EnableIf<
	__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value
	&& !__is_same_uncvref<_Tp, basic_string>::value,
	basic_string&
	>
	append(const _Tp& __t) { __self_view __sv = __t; return append(__sv.data(), __sv.size()); }
	basic_string& append(const basic_string& __str, size_type __pos, size_type __n=npos);

	template <class _Tp>
	_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
	_EnableIf
	<
	__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value
	&& !__is_same_uncvref<_Tp, basic_string>::value,
	basic_string&
	>
	append(const _Tp& __t, size_type __pos, size_type __n=npos);
	basic_string& append(const value_type* __s, size_type __n);
	basic_string& append(const value_type* __s);
	basic_string& append(size_type __n, value_type __c);

	_LIBCPP_INLINE_VISIBILITY
	void __append_default_init(size_type __n);

	template<class _InputIterator>
	_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
	_EnableIf
	<
	__is_exactly_cpp17_input_iterator<_InputIterator>::value,
	basic_string&
	>
	_LIBCPP_INLINE_VISIBILITY
	append(_InputIterator __first, _InputIterator __last) {
	const basic_string __temp(__first, __last, __alloc());
	append(__temp.data(), __temp.size());
	return *this;
	}
	template<class _ForwardIterator>
	_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
	_EnableIf
	<
	__is_cpp17_forward_iterator<_ForwardIterator>::value,
	basic_string&
	>
	_LIBCPP_INLINE_VISIBILITY
	append(_ForwardIterator __first, _ForwardIterator __last);

	#ifndef _LIBCPP_CXX03_LANG
	_LIBCPP_INLINE_VISIBILITY
	basic_string& append(initializer_list<value_type> __il) {return append(__il.begin(), __il.size());}
	#endif // _LIBCPP_CXX03_LANG

	void push_back(value_type __c);
	_LIBCPP_INLINE_VISIBILITY
	void pop_back();
	_LIBCPP_INLINE_VISIBILITY reference front() _NOEXCEPT;
	_LIBCPP_INLINE_VISIBILITY const_reference front() const _NOEXCEPT;
	_LIBCPP_INLINE_VISIBILITY reference back() _NOEXCEPT;
	_LIBCPP_INLINE_VISIBILITY const_reference back() const _NOEXCEPT;

	template <class _Tp>
	_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
	_EnableIf
	<
	__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
	basic_string&
	>
	assign(const _Tp & __t) { __self_view __sv = __t; return assign(__sv.data(), __sv.size()); }
	_LIBCPP_INLINE_VISIBILITY
	basic_string& assign(const basic_string& __str) { return *this = __str; }
	#ifndef _LIBCPP_CXX03_LANG
	_LIBCPP_INLINE_VISIBILITY
	basic_string& assign(basic_string&& __str)
	_NOEXCEPT_((__noexcept_move_assign_container<_Allocator, __alloc_traits>::value))
	{this = _VSTD::move(__str); return this;}
	#endif
	basic_string& assign(const basic_string& __str, size_type __pos, size_type __n=npos);
	template <class _Tp>
	_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
	_EnableIf
	<
	__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value
	&& !__is_same_uncvref<_Tp, basic_string>::value,
	basic_string&
	>
	assign(const _Tp & __t, size_type __pos, size_type __n=npos);
	basic_string& assign(const value_type* __s, size_type __n);
	basic_string& assign(const value_type* __s);
	basic_string& assign(size_type __n, value_type __c);
	template<class _InputIterator>
	_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
	_EnableIf
	<
	__is_exactly_cpp17_input_iterator<_InputIterator>::value,
	basic_string&
	>
	assign(_InputIterator __first, _InputIterator __last);
	template<class _ForwardIterator>
	_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
	_EnableIf
	<
	__is_cpp17_forward_iterator<_ForwardIterator>::value,
	basic_string&
	>
	assign(_ForwardIterator __first, _ForwardIterator __last);
	#ifndef _LIBCPP_CXX03_LANG
	_LIBCPP_INLINE_VISIBILITY
	basic_string& assign(initializer_list<value_type> __il) {return assign(__il.begin(), __il.size());}
	#endif // _LIBCPP_CXX03_LANG

	_LIBCPP_INLINE_VISIBILITY
	basic_string& insert(size_type __pos1, const basic_string& __str);

	template <class _Tp>
	_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
	_EnableIf
	<
	__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
	basic_string&
	>
	insert(size_type __pos1, const _Tp& __t)
	{ __self_view __sv = __t; return insert(__pos1, __sv.data(), __sv.size()); }

	template <class _Tp>
	_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
	_EnableIf
	<
	__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value && !__is_same_uncvref<_Tp, basic_string>::value,
	basic_string&
	>
	insert(size_type __pos1, const _Tp& __t, size_type __pos2, size_type __n=npos);
	basic_string& insert(size_type __pos1, const basic_string& __str, size_type __pos2, size_type __n=npos);
	basic_string& insert(size_type __pos, const value_type* __s, size_type __n);
	basic_string& insert(size_type __pos, const value_type* __s);
	basic_string& insert(size_type __pos, size_type __n, value_type __c);
	iterator insert(const_iterator __pos, value_type __c);
	_LIBCPP_INLINE_VISIBILITY
	iterator insert(const_iterator __pos, size_type __n, value_type __c);
	template<class _InputIterator>
	_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
	_EnableIf
	<
	__is_exactly_cpp17_input_iterator<_InputIterator>::value,
	iterator
	>
	insert(const_iterator __pos, _InputIterator __first, _InputIterator __last);
	template<class _ForwardIterator>
	_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
	_EnableIf
	<
	__is_cpp17_forward_iterator<_ForwardIterator>::value,
	iterator
	>
	insert(const_iterator __pos, _ForwardIterator __first, _ForwardIterator __last);
	#ifndef _LIBCPP_CXX03_LANG
	_LIBCPP_INLINE_VISIBILITY
	iterator insert(const_iterator __pos, initializer_list<value_type> __il)
	{return insert(__pos, __il.begin(), __il.end());}
	#endif // _LIBCPP_CXX03_LANG

	basic_string& erase(size_type __pos = 0, size_type __n = npos);
	_LIBCPP_INLINE_VISIBILITY
	iterator erase(const_iterator __pos);
	_LIBCPP_INLINE_VISIBILITY
	iterator erase(const_iterator __first, const_iterator __last);

	_LIBCPP_INLINE_VISIBILITY
	basic_string& replace(size_type __pos1, size_type __n1, const basic_string& __str);

	template <class _Tp>
	_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
	_EnableIf
	<
	__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
	basic_string&
	>
	replace(size_type __pos1, size_type __n1, const _Tp& __t) { __self_view __sv = __t; return replace(__pos1, __n1, __sv.data(), __sv.size()); }
	basic_string& replace(size_type __pos1, size_type __n1, const basic_string& __str, size_type __pos2, size_type __n2=npos);
	template <class _Tp>
	_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
	_EnableIf
	<
	__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value && !__is_same_uncvref<_Tp, basic_string>::value,
	basic_string&
	>
	replace(size_type __pos1, size_type __n1, const _Tp& __t, size_type __pos2, size_type __n2=npos);
	basic_string& replace(size_type __pos, size_type __n1, const value_type* __s, size_type __n2);
	basic_string& replace(size_type __pos, size_type __n1, const value_type* __s);
	basic_string& replace(size_type __pos, size_type __n1, size_type __n2, value_type __c);
	_LIBCPP_INLINE_VISIBILITY
	basic_string& replace(const_iterator __i1, const_iterator __i2, const basic_string& __str);

	template <class _Tp>
	_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
	_EnableIf
	<
	__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
	basic_string&
	>
	replace(const_iterator __i1, const_iterator __i2, const _Tp& __t) { __self_view __sv = __t; return replace(__i1 - begin(), __i2 - __i1, __sv); }

	_LIBCPP_INLINE_VISIBILITY
	basic_string& replace(const_iterator __i1, const_iterator __i2, const value_type* __s, size_type __n);
	_LIBCPP_INLINE_VISIBILITY
	basic_string& replace(const_iterator __i1, const_iterator __i2, const value_type* __s);
	_LIBCPP_INLINE_VISIBILITY
	basic_string& replace(const_iterator __i1, const_iterator __i2, size_type __n, value_type __c);
	template<class _InputIterator>
	_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
	_EnableIf
	<
	__is_cpp17_input_iterator<_InputIterator>::value,
	basic_string&
	>
	replace(const_iterator __i1, const_iterator __i2, _InputIterator __j1, _InputIterator __j2);
	#ifndef _LIBCPP_CXX03_LANG
	_LIBCPP_INLINE_VISIBILITY
	basic_string& replace(const_iterator __i1, const_iterator __i2, initializer_list<value_type> __il)
	{return replace(__i1, __i2, __il.begin(), __il.end());}
	#endif // _LIBCPP_CXX03_LANG

	size_type copy(value_type* __s, size_type __n, size_type __pos = 0) const;
	_LIBCPP_INLINE_VISIBILITY
	basic_string substr(size_type __pos = 0, size_type __n = npos) const;

	_LIBCPP_INLINE_VISIBILITY
	void swap(basic_string& __str)
	#if _LIBCPP_STD_VER >= 14
	_NOEXCEPT;
	#else
	_NOEXCEPT_(!__alloc_traits::propagate_on_container_swap::value \|\|
	__is_nothrow_swappable<allocator_type>::value);
	#endif

	_LIBCPP_INLINE_VISIBILITY
	const value_type* c_str() const _NOEXCEPT {return data();}
	_LIBCPP_INLINE_VISIBILITY
	const value_type* data() const _NOEXCEPT {return _VSTD::__to_address(__get_pointer());}
	#if _LIBCPP_STD_VER > 14 \|\| defined(_LIBCPP_BUILDING_LIBRARY)
	_LIBCPP_INLINE_VISIBILITY
	value_type* data() _NOEXCEPT {return _VSTD::__to_address(__get_pointer());}
	#endif

	_LIBCPP_INLINE_VISIBILITY
	allocator_type get_allocator() const _NOEXCEPT {return __alloc();}

	_LIBCPP_INLINE_VISIBILITY
	size_type find(const basic_string& __str, size_type __pos = 0) const _NOEXCEPT;

	template <class _Tp>
	_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
	_EnableIf
	<
	__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
	size_type
	>
	find(const _Tp& __t, size_type __pos = 0) const _NOEXCEPT;
	size_type find(const value_type* __s, size_type __pos, size_type __n) const _NOEXCEPT;
	_LIBCPP_INLINE_VISIBILITY
	size_type find(const value_type* __s, size_type __pos = 0) const _NOEXCEPT;
	size_type find(value_type __c, size_type __pos = 0) const _NOEXCEPT;

	_LIBCPP_INLINE_VISIBILITY
	size_type rfind(const basic_string& __str, size_type __pos = npos) const _NOEXCEPT;

	template <class _Tp>
	_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
	_EnableIf
	<
	__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
	size_type
	>
	rfind(const _Tp& __t, size_type __pos = npos) const _NOEXCEPT;
	size_type rfind(const value_type* __s, size_type __pos, size_type __n) const _NOEXCEPT;
	_LIBCPP_INLINE_VISIBILITY
	size_type rfind(const value_type* __s, size_type __pos = npos) const _NOEXCEPT;
	size_type rfind(value_type __c, size_type __pos = npos) const _NOEXCEPT;

	_LIBCPP_INLINE_VISIBILITY
	size_type find_first_of(const basic_string& __str, size_type __pos = 0) const _NOEXCEPT;

	template <class _Tp>
	_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
	_EnableIf
	<
	__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
	size_type
	>
	find_first_of(const _Tp& __t, size_type __pos = 0) const _NOEXCEPT;
	size_type find_first_of(const value_type* __s, size_type __pos, size_type __n) const _NOEXCEPT;
	_LIBCPP_INLINE_VISIBILITY
	size_type find_first_of(const value_type* __s, size_type __pos = 0) const _NOEXCEPT;
	_LIBCPP_INLINE_VISIBILITY
	size_type find_first_of(value_type __c, size_type __pos = 0) const _NOEXCEPT;

	_LIBCPP_INLINE_VISIBILITY
	size_type find_last_of(const basic_string& __str, size_type __pos = npos) const _NOEXCEPT;

	template <class _Tp>
	_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
	_EnableIf
	<
	__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
	size_type
	>
	find_last_of(const _Tp& __t, size_type __pos = npos) const _NOEXCEPT;
	size_type find_last_of(const value_type* __s, size_type __pos, size_type __n) const _NOEXCEPT;
	_LIBCPP_INLINE_VISIBILITY
	size_type find_last_of(const value_type* __s, size_type __pos = npos) const _NOEXCEPT;
	_LIBCPP_INLINE_VISIBILITY
	size_type find_last_of(value_type __c, size_type __pos = npos) const _NOEXCEPT;

	_LIBCPP_INLINE_VISIBILITY
	size_type find_first_not_of(const basic_string& __str, size_type __pos = 0) const _NOEXCEPT;

	template <class _Tp>
	_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
	_EnableIf
	<
	__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
	size_type
	>
	find_first_not_of(const _Tp &__t, size_type __pos = 0) const _NOEXCEPT;
	size_type find_first_not_of(const value_type* __s, size_type __pos, size_type __n) const _NOEXCEPT;
	_LIBCPP_INLINE_VISIBILITY
	size_type find_first_not_of(const value_type* __s, size_type __pos = 0) const _NOEXCEPT;
	_LIBCPP_INLINE_VISIBILITY
	size_type find_first_not_of(value_type __c, size_type __pos = 0) const _NOEXCEPT;

	_LIBCPP_INLINE_VISIBILITY
	size_type find_last_not_of(const basic_string& __str, size_type __pos = npos) const _NOEXCEPT;

	template <class _Tp>
	_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
	_EnableIf
	<
	__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
	size_type
	>
	find_last_not_of(const _Tp& __t, size_type __pos = npos) const _NOEXCEPT;
	size_type find_last_not_of(const value_type* __s, size_type __pos, size_type __n) const _NOEXCEPT;
	_LIBCPP_INLINE_VISIBILITY
	size_type find_last_not_of(const value_type* __s, size_type __pos = npos) const _NOEXCEPT;
	_LIBCPP_INLINE_VISIBILITY
	size_type find_last_not_of(value_type __c, size_type __pos = npos) const _NOEXCEPT;

	_LIBCPP_INLINE_VISIBILITY
	int compare(const basic_string& __str) const _NOEXCEPT;

	template <class _Tp>
	_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
	_EnableIf
	<
	__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
	int
	>
	compare(const _Tp &__t) const _NOEXCEPT;

	template <class _Tp>
	_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
	_EnableIf
	<
	__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
	int
	>
	compare(size_type __pos1, size_type __n1, const _Tp& __t) const;

	_LIBCPP_INLINE_VISIBILITY
	int compare(size_type __pos1, size_type __n1, const basic_string& __str) const;
	int compare(size_type __pos1, size_type __n1, const basic_string& __str, size_type __pos2, size_type __n2=npos) const;

	template <class _Tp>
	inline _LIBCPP_INLINE_VISIBILITY
	_EnableIf
	<
	__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value && !__is_same_uncvref<_Tp, basic_string>::value,
	int
	>
	compare(size_type __pos1, size_type __n1, const _Tp& __t, size_type __pos2, size_type __n2=npos) const;
	int compare(const value_type* __s) const _NOEXCEPT;
	int compare(size_type __pos1, size_type __n1, const value_type* __s) const;
	int compare(size_type __pos1, size_type __n1, const value_type* __s, size_type __n2) const;

	#if _LIBCPP_STD_VER > 17
	_LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
	bool starts_with(__self_view __sv) const _NOEXCEPT
	{ return __self_view(data(), size()).starts_with(__sv); }

	_LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
	bool starts_with(value_type __c) const _NOEXCEPT
	{ return !empty() && _Traits::eq(front(), __c); }

	_LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
	bool starts_with(const value_type* __s) const _NOEXCEPT
	{ return starts_with(__self_view(__s)); }

	_LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
	bool ends_with(__self_view __sv) const _NOEXCEPT
	{ return __self_view(data(), size()).ends_with( __sv); }

	_LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
	bool ends_with(value_type __c) const _NOEXCEPT
	{ return !empty() && _Traits::eq(back(), __c); }

	_LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
	bool ends_with(const value_type* __s) const _NOEXCEPT
	{ return ends_with(__self_view(__s)); }
	#endif

	#if _LIBCPP_STD_VER > 20
	constexpr _LIBCPP_INLINE_VISIBILITY
	bool contains(__self_view __sv) const noexcept
	{ return __self_view(data(), size()).contains(__sv); }

	constexpr _LIBCPP_INLINE_VISIBILITY
	bool contains(value_type __c) const noexcept
	{ return __self_view(data(), size()).contains(__c); }

	constexpr _LIBCPP_INLINE_VISIBILITY
	bool contains(const value_type* __s) const
	{ return __self_view(data(), size()).contains(__s); }
	#endif

	_LIBCPP_INLINE_VISIBILITY bool __invariants() const;

	_LIBCPP_INLINE_VISIBILITY void __clear_and_shrink() _NOEXCEPT;

	_LIBCPP_INLINE_VISIBILITY void __shrink_or_extend(size_type __target_capacity);

	_LIBCPP_INLINE_VISIBILITY
	bool __is_long() const _NOEXCEPT
	{return bool(__r_.first().__s.__size_ & __short_mask);}

	#if _LIBCPP_DEBUG_LEVEL == 2

	bool __dereferenceable(const const_iterator* __i) const;
	bool __decrementable(const const_iterator* __i) const;
	bool __addable(const const_iterator* __i, ptrdiff_t __n) const;
	bool __subscriptable(const const_iterator* __i, ptrdiff_t __n) const;

	#endif // _LIBCPP_DEBUG_LEVEL == 2

	private:
	_LIBCPP_INLINE_VISIBILITY
	allocator_type& __alloc() _NOEXCEPT
	{return __r_.second();}
	_LIBCPP_INLINE_VISIBILITY
	const allocator_type& __alloc() const _NOEXCEPT
	{return __r_.second();}

	#ifdef _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT

	_LIBCPP_INLINE_VISIBILITY
	void __set_short_size(size_type __s) _NOEXCEPT
	# ifdef _LIBCPP_BIG_ENDIAN
	{__r_.first().__s.__size_ = (unsigned char)(__s << 1);}
	# else
	{__r_.first().__s.__size_ = (unsigned char)(__s);}
	# endif

	_LIBCPP_INLINE_VISIBILITY
	size_type __get_short_size() const _NOEXCEPT
	# ifdef _LIBCPP_BIG_ENDIAN
	{return __r_.first().__s.__size_ >> 1;}
	# else
	{return __r_.first().__s.__size_;}
	# endif

	#else // _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT

	_LIBCPP_INLINE_VISIBILITY
	void __set_short_size(size_type __s) _NOEXCEPT
	# ifdef _LIBCPP_BIG_ENDIAN
	{__r_.first().__s.__size_ = (unsigned char)(__s);}
	# else
	{__r_.first().__s.__size_ = (unsigned char)(__s << 1);}
	# endif

	_LIBCPP_INLINE_VISIBILITY
	size_type __get_short_size() const _NOEXCEPT
	# ifdef _LIBCPP_BIG_ENDIAN
	{return __r_.first().__s.__size_;}
	# else
	{return __r_.first().__s.__size_ >> 1;}
	# endif

	#endif // _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT

	_LIBCPP_INLINE_VISIBILITY
	void __set_long_size(size_type __s) _NOEXCEPT
	{__r_.first().__l.__size_ = __s;}
	_LIBCPP_INLINE_VISIBILITY
	size_type __get_long_size() const _NOEXCEPT
	{return __r_.first().__l.__size_;}
	_LIBCPP_INLINE_VISIBILITY
	void __set_size(size_type __s) _NOEXCEPT
	{if (__is_long()) __set_long_size(__s); else __set_short_size(__s);}

	_LIBCPP_INLINE_VISIBILITY
	void __set_long_cap(size_type __s) _NOEXCEPT
	{__r_.first().__l.__cap_ = __long_mask \| __s;}
	_LIBCPP_INLINE_VISIBILITY
	size_type __get_long_cap() const _NOEXCEPT
	{return __r_.first().__l.__cap_ & size_type(~__long_mask);}

	_LIBCPP_INLINE_VISIBILITY
	void __set_long_pointer(pointer __p) _NOEXCEPT
	{__r_.first().__l.__data_ = __p;}
	_LIBCPP_INLINE_VISIBILITY
	pointer __get_long_pointer() _NOEXCEPT
	{return __r_.first().__l.__data_;}
	_LIBCPP_INLINE_VISIBILITY
	const_pointer __get_long_pointer() const _NOEXCEPT
	{return __r_.first().__l.__data_;}
	_LIBCPP_INLINE_VISIBILITY
	pointer __get_short_pointer() _NOEXCEPT
	{return pointer_traits<pointer>::pointer_to(__r_.first().__s.__data_[0]);}
	_LIBCPP_INLINE_VISIBILITY
	const_pointer __get_short_pointer() const _NOEXCEPT
	{return pointer_traits<const_pointer>::pointer_to(__r_.first().__s.__data_[0]);}
	_LIBCPP_INLINE_VISIBILITY
	pointer __get_pointer() _NOEXCEPT
	{return __is_long() ? __get_long_pointer() : __get_short_pointer();}
	_LIBCPP_INLINE_VISIBILITY
	const_pointer __get_pointer() const _NOEXCEPT
	{return __is_long() ? __get_long_pointer() : __get_short_pointer();}

	_LIBCPP_INLINE_VISIBILITY
	void __zero() _NOEXCEPT
	{
	size_type (&__a)[__n_words] = __r_.first().__r.__words;
	for (unsigned __i = 0; __i < __n_words; ++__i)
	__a[__i] = 0;
	}

	template <size_type __a> static
	_LIBCPP_INLINE_VISIBILITY
	size_type __align_it(size_type __s) _NOEXCEPT
	{return (__s + (__a-1)) & ~(__a-1);}
	enum {__alignment = 16};
	static _LIBCPP_INLINE_VISIBILITY
	size_type __recommend(size_type __s) _NOEXCEPT
	{
	if (__s < __min_cap) return static_cast<size_type>(__min_cap) - 1;
	size_type __guess = __align_it<sizeof(value_type) < __alignment ?
	__alignment/sizeof(value_type) : 1 > (__s+1) - 1;
	if (__guess == __min_cap) ++__guess;
	return __guess;
	}

	inline
	void __init(const value_type* __s, size_type __sz, size_type __reserve);
	inline
	void __init(const value_type* __s, size_type __sz);
	inline
	void __init(size_type __n, value_type __c);

	// Slow path for the (inlined) copy constructor for 'long' strings.
	// Always externally instantiated and not inlined.
	// Requires that __s is zero terminated.
	// The main reason for this function to exist is because for unstable, we
	// want to allow inlining of the copy constructor. However, we don't want
	// to call the __init() functions as those are marked as inline which may
	// result in over-aggressive inlining by the compiler, where our aim is
	// to only inline the fast path code directly in the ctor.
	void __init_copy_ctor_external(const value_type* __s, size_type __sz);

	template <class _InputIterator>
	inline
	_EnableIf
	<
	__is_exactly_cpp17_input_iterator<_InputIterator>::value
	>
	__init(_InputIterator __first, _InputIterator __last);

	template <class _ForwardIterator>
	inline
	_EnableIf
	<
	__is_cpp17_forward_iterator<_ForwardIterator>::value
	>
	__init(_ForwardIterator __first, _ForwardIterator __last);

	void __grow_by(size_type __old_cap, size_type __delta_cap, size_type __old_sz,
	size_type __n_copy, size_type __n_del, size_type __n_add = 0);
	void __grow_by_and_replace(size_type __old_cap, size_type __delta_cap, size_type __old_sz,
	size_type __n_copy, size_type __n_del,
	size_type __n_add, const value_type* __p_new_stuff);

	// __assign_no_alias is invoked for assignment operations where we
	// have proof that the input does not alias the current instance.
	// For example, operator=(basic_string) performs a 'self' check.
	template <bool __is_short>
	basic_string& __assign_no_alias(const value_type* __s, size_type __n);

	_LIBCPP_INLINE_VISIBILITY
	void __erase_to_end(size_type __pos);

	// __erase_external_with_move is invoked for erase() invocations where
	// `n ~= npos`, likely requiring memory moves on the string data.
	void __erase_external_with_move(size_type __pos, size_type __n);

	_LIBCPP_INLINE_VISIBILITY
	void __copy_assign_alloc(const basic_string& __str)
	{__copy_assign_alloc(__str, integral_constant<bool,
	__alloc_traits::propagate_on_container_copy_assignment::value>());}

	_LIBCPP_INLINE_VISIBILITY
	void __copy_assign_alloc(const basic_string& __str, true_type)
	{
	if (__alloc() == __str.__alloc())
	__alloc() = __str.__alloc();
	else
	{
	if (!__str.__is_long())
	{
	__clear_and_shrink();
	__alloc() = __str.__alloc();
	}
	else
	{
	allocator_type __a = __str.__alloc();
	pointer __p = __alloc_traits::allocate(__a, __str.__get_long_cap());
	__clear_and_shrink();
	__alloc() = _VSTD::move(__a);
	__set_long_pointer(__p);
	__set_long_cap(__str.__get_long_cap());
	__set_long_size(__str.size());
	}
	}
	}

	_LIBCPP_INLINE_VISIBILITY
	void __copy_assign_alloc(const basic_string&, false_type) _NOEXCEPT
	{}

	#ifndef _LIBCPP_CXX03_LANG
	_LIBCPP_INLINE_VISIBILITY
	void __move_assign(basic_string& __str, false_type)
	_NOEXCEPT_(__alloc_traits::is_always_equal::value);
	_LIBCPP_INLINE_VISIBILITY
	void __move_assign(basic_string& __str, true_type)
	#if _LIBCPP_STD_VER > 14
	_NOEXCEPT;
	#else
	_NOEXCEPT_(is_nothrow_move_assignable<allocator_type>::value);
	#endif
	#endif

	_LIBCPP_INLINE_VISIBILITY
	void
	__move_assign_alloc(basic_string& __str)
	_NOEXCEPT_(
	!__alloc_traits::propagate_on_container_move_assignment::value \|\|
	is_nothrow_move_assignable<allocator_type>::value)
	{__move_assign_alloc(__str, integral_constant<bool,
	__alloc_traits::propagate_on_container_move_assignment::value>());}

	_LIBCPP_INLINE_VISIBILITY
	void __move_assign_alloc(basic_string& __c, true_type)
	_NOEXCEPT_(is_nothrow_move_assignable<allocator_type>::value)
	{
	__alloc() = _VSTD::move(__c.__alloc());
	}

	_LIBCPP_INLINE_VISIBILITY
	void __move_assign_alloc(basic_string&, false_type)
	_NOEXCEPT
	{}

	basic_string& __assign_external(const value_type* __s);
	basic_string& __assign_external(const value_type* __s, size_type __n);

	// Assigns the value in __s, guaranteed to be __n < __min_cap in length.
	inline basic_string& __assign_short(const value_type* __s, size_type __n) {
	pointer __p = __is_long()
	? (__set_long_size(__n), __get_long_pointer())
	: (__set_short_size(__n), __get_short_pointer());
	traits_type::move(_VSTD::__to_address(__p), __s, __n);
	traits_type::assign(__p[__n], value_type());
	return *this;
	}

	_LIBCPP_INLINE_VISIBILITY void __invalidate_all_iterators();
	_LIBCPP_INLINE_VISIBILITY void __invalidate_iterators_past(size_type);

	template<class _Tp>
	_LIBCPP_INLINE_VISIBILITY
	bool __addr_in_range(_Tp&& __t) const {
	const volatile void *__p = _VSTD::addressof(__t);
	return data() <= __p && __p <= data() + size();
	}

	+ _LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI
	+ void __throw_length_error() const {
	+#ifndef _LIBCPP_NO_EXCEPTIONS
	+ __basic_string_common<true>::__throw_length_error();
	+#else
	+ _VSTD::abort();
	+#endif
	+ }
	+
	+ _LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI
	+ void __throw_out_of_range() const {
	+#ifndef _LIBCPP_NO_EXCEPTIONS
	+ __basic_string_common<true>::__throw_out_of_range();
	+#else
	+ _VSTD::abort();
	+#endif
	+ }
	+
	friend basic_string operator+<>(const basic_string&, const basic_string&);
	friend basic_string operator+<>(const value_type*, const basic_string&);
	friend basic_string operator+<>(value_type, const basic_string&);
	friend basic_string operator+<>(const basic_string&, const value_type*);
	friend basic_string operator+<>(const basic_string&, value_type);
	};

	// These declarations must appear before any functions are implicitly used
	// so that they have the correct visibility specifier.
	#ifdef _LIBCPP_ABI_STRING_OPTIMIZED_EXTERNAL_INSTANTIATION
	_LIBCPP_STRING_UNSTABLE_EXTERN_TEMPLATE_LIST(_LIBCPP_EXTERN_TEMPLATE, char)
	_LIBCPP_STRING_UNSTABLE_EXTERN_TEMPLATE_LIST(_LIBCPP_EXTERN_TEMPLATE, wchar_t)
	#else
	_LIBCPP_STRING_V1_EXTERN_TEMPLATE_LIST(_LIBCPP_EXTERN_TEMPLATE, char)
	_LIBCPP_STRING_V1_EXTERN_TEMPLATE_LIST(_LIBCPP_EXTERN_TEMPLATE, wchar_t)
	#endif


	#ifndef _LIBCPP_HAS_NO_DEDUCTION_GUIDES
	template<class _InputIterator,
	class _CharT = __iter_value_type<_InputIterator>,
	class _Allocator = allocator<_CharT>,
	class = _EnableIf<__is_cpp17_input_iterator<_InputIterator>::value>,
	class = _EnableIf<__is_allocator<_Allocator>::value>
	>
	basic_string(_InputIterator, _InputIterator, _Allocator = _Allocator())
	-> basic_string<_CharT, char_traits<_CharT>, _Allocator>;

	template<class _CharT,
	class _Traits,
	class _Allocator = allocator<_CharT>,
	class = _EnableIf<__is_allocator<_Allocator>::value>
	>
	explicit basic_string(basic_string_view<_CharT, _Traits>, const _Allocator& = _Allocator())
	-> basic_string<_CharT, _Traits, _Allocator>;

	template<class _CharT,
	class _Traits,
	class _Allocator = allocator<_CharT>,
	class = _EnableIf<__is_allocator<_Allocator>::value>,
	class _Sz = typename allocator_traits<_Allocator>::size_type
	>
	basic_string(basic_string_view<_CharT, _Traits>, _Sz, _Sz, const _Allocator& = _Allocator())
	-> basic_string<_CharT, _Traits, _Allocator>;
	#endif

	template <class _CharT, class _Traits, class _Allocator>
	inline
	void
	basic_string<_CharT, _Traits, _Allocator>::__invalidate_all_iterators()
	{
	#if _LIBCPP_DEBUG_LEVEL == 2
	__get_db()->__invalidate_all(this);
	#endif
	}

	template <class _CharT, class _Traits, class _Allocator>
	inline
	void
	basic_string<_CharT, _Traits, _Allocator>::__invalidate_iterators_past(size_type __pos)
	{
	#if _LIBCPP_DEBUG_LEVEL == 2
	__c_node* __c = __get_db()->__find_c_and_lock(this);
	if (__c)
	{
	const_pointer __new_last = __get_pointer() + __pos;
	for (__i_node** __p = __c->end_; __p != __c->beg_; )
	{
	--__p;
	const_iterator* __i = static_cast<const_iterator>((__p)->__i_);
	if (__i->base() > __new_last)
	{
	(*__p)->__c_ = nullptr;
	if (--__c->end_ != __p)
	_VSTD::memmove(__p, __p+1, (__c->end_ - __p)sizeof(__i_node));
	}
	}
	__get_db()->unlock();
	}
	#else
	(void)__pos;
	#endif // _LIBCPP_DEBUG_LEVEL == 2
	}

	template <class _CharT, class _Traits, class _Allocator>
	inline
	basic_string<_CharT, _Traits, _Allocator>::basic_string()
	_NOEXCEPT_(is_nothrow_default_constructible<allocator_type>::value)
	: __r_(__default_init_tag(), __default_init_tag())
	{
	#if _LIBCPP_DEBUG_LEVEL == 2
	__get_db()->__insert_c(this);
	#endif
	__zero();
	}

	template <class _CharT, class _Traits, class _Allocator>
	inline
	basic_string<_CharT, _Traits, _Allocator>::basic_string(const allocator_type& __a)
	#if _LIBCPP_STD_VER <= 14
	_NOEXCEPT_(is_nothrow_copy_constructible<allocator_type>::value)
	#else
	_NOEXCEPT
	#endif
	: __r_(__default_init_tag(), __a)
	{
	#if _LIBCPP_DEBUG_LEVEL == 2
	__get_db()->__insert_c(this);
	#endif
	__zero();
	}

	template <class _CharT, class _Traits, class _Allocator>
	void basic_string<_CharT, _Traits, _Allocator>::__init(const value_type* __s,
	size_type __sz,
	size_type __reserve)
	{
	if (__reserve > max_size())
	this->__throw_length_error();
	pointer __p;
	if (__reserve < __min_cap)
	{
	__set_short_size(__sz);
	__p = __get_short_pointer();
	}
	else
	{
	size_type __cap = __recommend(__reserve);
	__p = __alloc_traits::allocate(__alloc(), __cap+1);
	__set_long_pointer(__p);
	__set_long_cap(__cap+1);
	__set_long_size(__sz);
	}
	traits_type::copy(_VSTD::__to_address(__p), __s, __sz);
	traits_type::assign(__p[__sz], value_type());
	}

	template <class _CharT, class _Traits, class _Allocator>
	void
	basic_string<_CharT, _Traits, _Allocator>::__init(const value_type* __s, size_type __sz)
	{
	if (__sz > max_size())
	this->__throw_length_error();
	pointer __p;
	if (__sz < __min_cap)
	{
	__set_short_size(__sz);
	__p = __get_short_pointer();
	}
	else
	{
	size_type __cap = __recommend(__sz);
	__p = __alloc_traits::allocate(__alloc(), __cap+1);
	__set_long_pointer(__p);
	__set_long_cap(__cap+1);
	__set_long_size(__sz);
	}
	traits_type::copy(_VSTD::__to_address(__p), __s, __sz);
	traits_type::assign(__p[__sz], value_type());
	}

	template <class _CharT, class _Traits, class _Allocator>
	template <class>
	basic_string<_CharT, _Traits, _Allocator>::basic_string(const _CharT* __s, const _Allocator& __a)
	: __r_(__default_init_tag(), __a)
	{
	_LIBCPP_ASSERT(__s != nullptr, "basic_string(const char*, allocator) detected nullptr");
	__init(__s, traits_type::length(__s));
	#if _LIBCPP_DEBUG_LEVEL == 2
	__get_db()->__insert_c(this);
	#endif
	}

	template <class _CharT, class _Traits, class _Allocator>
	inline
	basic_string<_CharT, _Traits, _Allocator>::basic_string(const _CharT* __s, size_type __n)
	: __r_(__default_init_tag(), __default_init_tag())
	{
	_LIBCPP_ASSERT(__n == 0 \|\| __s != nullptr, "basic_string(const char*, n) detected nullptr");
	__init(__s, __n);
	#if _LIBCPP_DEBUG_LEVEL == 2
	__get_db()->__insert_c(this);
	#endif
	}

	template <class _CharT, class _Traits, class _Allocator>
	inline
	basic_string<_CharT, _Traits, _Allocator>::basic_string(const _CharT* __s, size_type __n, const _Allocator& __a)
	: __r_(__default_init_tag(), __a)
	{
	_LIBCPP_ASSERT(__n == 0 \|\| __s != nullptr, "basic_string(const char*, n, allocator) detected nullptr");
	__init(__s, __n);
	#if _LIBCPP_DEBUG_LEVEL == 2
	__get_db()->__insert_c(this);
	#endif
	}

	template <class _CharT, class _Traits, class _Allocator>
	basic_string<_CharT, _Traits, _Allocator>::basic_string(const basic_string& __str)
	: __r_(__default_init_tag(), __alloc_traits::select_on_container_copy_construction(__str.__alloc()))
	{
	if (!__str.__is_long())
	__r_.first().__r = __str.__r_.first().__r;
	else
	__init_copy_ctor_external(_VSTD::__to_address(__str.__get_long_pointer()),
	__str.__get_long_size());

	#if _LIBCPP_DEBUG_LEVEL == 2
	__get_db()->__insert_c(this);
	#endif
	}

	template <class _CharT, class _Traits, class _Allocator>
	basic_string<_CharT, _Traits, _Allocator>::basic_string(
	const basic_string& __str, const allocator_type& __a)
	: __r_(__default_init_tag(), __a)
	{
	if (!__str.__is_long())
	__r_.first().__r = __str.__r_.first().__r;
	else
	__init_copy_ctor_external(_VSTD::__to_address(__str.__get_long_pointer()),
	__str.__get_long_size());
	#if _LIBCPP_DEBUG_LEVEL == 2
	__get_db()->__insert_c(this);
	#endif
	}

	template <class _CharT, class _Traits, class _Allocator>
	void basic_string<_CharT, _Traits, _Allocator>::__init_copy_ctor_external(
	const value_type* __s, size_type __sz) {
	pointer __p;
	if (__sz < __min_cap) {
	__p = __get_short_pointer();
	__set_short_size(__sz);
	} else {
	if (__sz > max_size())
	this->__throw_length_error();
	size_t __cap = __recommend(__sz);
	__p = __alloc_traits::allocate(__alloc(), __cap + 1);
	__set_long_pointer(__p);
	__set_long_cap(__cap + 1);
	__set_long_size(__sz);
	}
	traits_type::copy(_VSTD::__to_address(__p), __s, __sz + 1);
	}

	#ifndef _LIBCPP_CXX03_LANG

	template <class _CharT, class _Traits, class _Allocator>
	inline
	basic_string<_CharT, _Traits, _Allocator>::basic_string(basic_string&& __str)
	#if _LIBCPP_STD_VER <= 14
	_NOEXCEPT_(is_nothrow_move_constructible<allocator_type>::value)
	#else
	_NOEXCEPT
	#endif
	: __r_(_VSTD::move(__str.__r_))
	{
	__str.__zero();
	#if _LIBCPP_DEBUG_LEVEL == 2
	__get_db()->__insert_c(this);
	if (__is_long())
	__get_db()->swap(this, &__str);
	#endif
	}

	template <class _CharT, class _Traits, class _Allocator>
	inline
	basic_string<_CharT, _Traits, _Allocator>::basic_string(basic_string&& __str, const allocator_type& __a)
	: __r_(__default_init_tag(), __a)
	{
	if (__str.__is_long() && __a != __str.__alloc()) // copy, not move
	__init(_VSTD::__to_address(__str.__get_long_pointer()), __str.__get_long_size());
	else
	{
	__r_.first().__r = __str.__r_.first().__r;
	__str.__zero();
	}
	#if _LIBCPP_DEBUG_LEVEL == 2
	__get_db()->__insert_c(this);
	if (__is_long())
	__get_db()->swap(this, &__str);
	#endif
	}

	#endif // _LIBCPP_CXX03_LANG

	template <class _CharT, class _Traits, class _Allocator>
	void
	basic_string<_CharT, _Traits, _Allocator>::__init(size_type __n, value_type __c)
	{
	if (__n > max_size())
	this->__throw_length_error();
	pointer __p;
	if (__n < __min_cap)
	{
	__set_short_size(__n);
	__p = __get_short_pointer();
	}
	else
	{
	size_type __cap = __recommend(__n);
	__p = __alloc_traits::allocate(__alloc(), __cap+1);
	__set_long_pointer(__p);
	__set_long_cap(__cap+1);
	__set_long_size(__n);
	}
	traits_type::assign(_VSTD::__to_address(__p), __n, __c);
	traits_type::assign(__p[__n], value_type());
	}

	template <class _CharT, class _Traits, class _Allocator>
	inline
	basic_string<_CharT, _Traits, _Allocator>::basic_string(size_type __n, _CharT __c)
	: __r_(__default_init_tag(), __default_init_tag())
	{
	__init(__n, __c);
	#if _LIBCPP_DEBUG_LEVEL == 2
	__get_db()->__insert_c(this);
	#endif
	}

	template <class _CharT, class _Traits, class _Allocator>
	template <class>
	basic_string<_CharT, _Traits, _Allocator>::basic_string(size_type __n, _CharT __c, const _Allocator& __a)
	: __r_(__default_init_tag(), __a)
	{
	__init(__n, __c);
	#if _LIBCPP_DEBUG_LEVEL == 2
	__get_db()->__insert_c(this);
	#endif
	}

	template <class _CharT, class _Traits, class _Allocator>
	basic_string<_CharT, _Traits, _Allocator>::basic_string(const basic_string& __str,
	size_type __pos, size_type __n,
	const _Allocator& __a)
	: __r_(__default_init_tag(), __a)
	{
	size_type __str_sz = __str.size();
	if (__pos > __str_sz)
	this->__throw_out_of_range();
	__init(__str.data() + __pos, _VSTD::min(__n, __str_sz - __pos));
	#if _LIBCPP_DEBUG_LEVEL == 2
	__get_db()->__insert_c(this);
	#endif
	}

	template <class _CharT, class _Traits, class _Allocator>
	inline
	basic_string<_CharT, _Traits, _Allocator>::basic_string(const basic_string& __str, size_type __pos,
	const _Allocator& __a)
	: __r_(__default_init_tag(), __a)
	{
	size_type __str_sz = __str.size();
	if (__pos > __str_sz)
	this->__throw_out_of_range();
	__init(__str.data() + __pos, __str_sz - __pos);
	#if _LIBCPP_DEBUG_LEVEL == 2
	__get_db()->__insert_c(this);
	#endif
	}

	template <class _CharT, class _Traits, class _Allocator>
	template <class _Tp, class>
	basic_string<_CharT, _Traits, _Allocator>::basic_string(
	const _Tp& __t, size_type __pos, size_type __n, const allocator_type& __a)
	: __r_(__default_init_tag(), __a)
	{
	__self_view __sv0 = __t;
	__self_view __sv = __sv0.substr(__pos, __n);
	__init(__sv.data(), __sv.size());
	#if _LIBCPP_DEBUG_LEVEL == 2
	__get_db()->__insert_c(this);
	#endif
	}

	template <class _CharT, class _Traits, class _Allocator>
	template <class _Tp, class>
	basic_string<_CharT, _Traits, _Allocator>::basic_string(const _Tp & __t)
	: __r_(__default_init_tag(), __default_init_tag())
	{
	__self_view __sv = __t;
	__init(__sv.data(), __sv.size());
	#if _LIBCPP_DEBUG_LEVEL == 2
	__get_db()->__insert_c(this);
	#endif
	}

	template <class _CharT, class _Traits, class _Allocator>
	template <class _Tp, class>
	basic_string<_CharT, _Traits, _Allocator>::basic_string(const _Tp & __t, const _Allocator& __a)
	: __r_(__default_init_tag(), __a)
	{
	__self_view __sv = __t;
	__init(__sv.data(), __sv.size());
	#if _LIBCPP_DEBUG_LEVEL == 2
	__get_db()->__insert_c(this);
	#endif
	}

	template <class _CharT, class _Traits, class _Allocator>
	template <class _InputIterator>
	_EnableIf
	<
	__is_exactly_cpp17_input_iterator<_InputIterator>::value
	>
	basic_string<_CharT, _Traits, _Allocator>::__init(_InputIterator __first, _InputIterator __last)
	{
	__zero();
	#ifndef _LIBCPP_NO_EXCEPTIONS
	try
	{
	#endif // _LIBCPP_NO_EXCEPTIONS
	for (; __first != __last; ++__first)
	push_back(*__first);
	#ifndef _LIBCPP_NO_EXCEPTIONS
	}
	catch (...)
	{
	if (__is_long())
	__alloc_traits::deallocate(__alloc(), __get_long_pointer(), __get_long_cap());
	throw;
	}
	#endif // _LIBCPP_NO_EXCEPTIONS
	}

	template <class _CharT, class _Traits, class _Allocator>
	template <class _ForwardIterator>
	_EnableIf
	<
	__is_cpp17_forward_iterator<_ForwardIterator>::value
	>
	basic_string<_CharT, _Traits, _Allocator>::__init(_ForwardIterator __first, _ForwardIterator __last)
	{
	size_type __sz = static_cast<size_type>(_VSTD::distance(__first, __last));
	if (__sz > max_size())
	this->__throw_length_error();
	pointer __p;
	if (__sz < __min_cap)
	{
	__set_short_size(__sz);
	__p = __get_short_pointer();
	}
	else
	{
	size_type __cap = __recommend(__sz);
	__p = __alloc_traits::allocate(__alloc(), __cap+1);
	__set_long_pointer(__p);
	__set_long_cap(__cap+1);
	__set_long_size(__sz);
	}

	#ifndef _LIBCPP_NO_EXCEPTIONS
	try
	{
	#endif // _LIBCPP_NO_EXCEPTIONS
	for (; __first != __last; ++__first, (void) ++__p)
	traits_type::assign(__p, __first);
	traits_type::assign(*__p, value_type());
	#ifndef _LIBCPP_NO_EXCEPTIONS
	}
	catch (...)
	{
	if (__is_long())
	__alloc_traits::deallocate(__alloc(), __get_long_pointer(), __get_long_cap());
	throw;
	}
	#endif // _LIBCPP_NO_EXCEPTIONS
	}

	template <class _CharT, class _Traits, class _Allocator>
	template<class _InputIterator, class>
	inline
	basic_string<_CharT, _Traits, _Allocator>::basic_string(_InputIterator __first, _InputIterator __last)
	: __r_(__default_init_tag(), __default_init_tag())
	{
	__init(__first, __last);
	#if _LIBCPP_DEBUG_LEVEL == 2
	__get_db()->__insert_c(this);
	#endif
	}

	template <class _CharT, class _Traits, class _Allocator>
	template<class _InputIterator, class>
	inline
	basic_string<_CharT, _Traits, _Allocator>::basic_string(_InputIterator __first, _InputIterator __last,
	const allocator_type& __a)
	: __r_(__default_init_tag(), __a)
	{
	__init(__first, __last);
	#if _LIBCPP_DEBUG_LEVEL == 2
	__get_db()->__insert_c(this);
	#endif
	}

	#ifndef _LIBCPP_CXX03_LANG

	template <class _CharT, class _Traits, class _Allocator>
	inline
	basic_string<_CharT, _Traits, _Allocator>::basic_string(
	initializer_list<_CharT> __il)
	: __r_(__default_init_tag(), __default_init_tag())
	{
	__init(__il.begin(), __il.end());
	#if _LIBCPP_DEBUG_LEVEL == 2
	__get_db()->__insert_c(this);
	#endif
	}

	template <class _CharT, class _Traits, class _Allocator>
	inline

	basic_string<_CharT, _Traits, _Allocator>::basic_string(
	initializer_list<_CharT> __il, const _Allocator& __a)
	: __r_(__default_init_tag(), __a)
	{
	__init(__il.begin(), __il.end());
	#if _LIBCPP_DEBUG_LEVEL == 2
	__get_db()->__insert_c(this);
	#endif
	}

	#endif // _LIBCPP_CXX03_LANG

	template <class _CharT, class _Traits, class _Allocator>
	basic_string<_CharT, _Traits, _Allocator>::~basic_string()
	{
	#if _LIBCPP_DEBUG_LEVEL == 2
	__get_db()->__erase_c(this);
	#endif
	if (__is_long())
	__alloc_traits::deallocate(__alloc(), __get_long_pointer(), __get_long_cap());
	}

	template <class _CharT, class _Traits, class _Allocator>
	void
	basic_string<_CharT, _Traits, _Allocator>::__grow_by_and_replace
	(size_type __old_cap, size_type __delta_cap, size_type __old_sz,
	size_type __n_copy, size_type __n_del, size_type __n_add, const value_type* __p_new_stuff)
	{
	size_type __ms = max_size();
	if (__delta_cap > __ms - __old_cap - 1)
	this->__throw_length_error();
	pointer __old_p = __get_pointer();
	size_type __cap = __old_cap < __ms / 2 - __alignment ?
	__recommend(_VSTD::max(__old_cap + __delta_cap, 2 * __old_cap)) :
	__ms - 1;
	pointer __p = __alloc_traits::allocate(__alloc(), __cap+1);
	__invalidate_all_iterators();
	if (__n_copy != 0)
	traits_type::copy(_VSTD::__to_address(__p),
	_VSTD::__to_address(__old_p), __n_copy);
	if (__n_add != 0)
	traits_type::copy(_VSTD::__to_address(__p) + __n_copy, __p_new_stuff, __n_add);
	size_type __sec_cp_sz = __old_sz - __n_del - __n_copy;
	if (__sec_cp_sz != 0)
	traits_type::copy(_VSTD::__to_address(__p) + __n_copy + __n_add,
	_VSTD::__to_address(__old_p) + __n_copy + __n_del, __sec_cp_sz);
	if (__old_cap+1 != __min_cap)
	__alloc_traits::deallocate(__alloc(), __old_p, __old_cap+1);
	__set_long_pointer(__p);
	__set_long_cap(__cap+1);
	__old_sz = __n_copy + __n_add + __sec_cp_sz;
	__set_long_size(__old_sz);
	traits_type::assign(__p[__old_sz], value_type());
	}

	template <class _CharT, class _Traits, class _Allocator>
	void
	basic_string<_CharT, _Traits, _Allocator>::__grow_by(size_type __old_cap, size_type __delta_cap, size_type __old_sz,
	size_type __n_copy, size_type __n_del, size_type __n_add)
	{
	size_type __ms = max_size();
	if (__delta_cap > __ms - __old_cap)
	this->__throw_length_error();
	pointer __old_p = __get_pointer();
	size_type __cap = __old_cap < __ms / 2 - __alignment ?
	__recommend(_VSTD::max(__old_cap + __delta_cap, 2 * __old_cap)) :
	__ms - 1;
	pointer __p = __alloc_traits::allocate(__alloc(), __cap+1);
	__invalidate_all_iterators();
	if (__n_copy != 0)
	traits_type::copy(_VSTD::__to_address(__p),
	_VSTD::__to_address(__old_p), __n_copy);
	size_type __sec_cp_sz = __old_sz - __n_del - __n_copy;
	if (__sec_cp_sz != 0)
	traits_type::copy(_VSTD::__to_address(__p) + __n_copy + __n_add,
	_VSTD::__to_address(__old_p) + __n_copy + __n_del,
	__sec_cp_sz);
	if (__old_cap+1 != __min_cap)
	__alloc_traits::deallocate(__alloc(), __old_p, __old_cap+1);
	__set_long_pointer(__p);
	__set_long_cap(__cap+1);
	}

	// assign

	template <class _CharT, class _Traits, class _Allocator>
	template <bool __is_short>
	basic_string<_CharT, _Traits, _Allocator>&
	basic_string<_CharT, _Traits, _Allocator>::__assign_no_alias(
	const value_type* __s, size_type __n) {
	size_type __cap = __is_short ? __min_cap : __get_long_cap();
	if (__n < __cap) {
	pointer __p = __is_short ? __get_short_pointer() : __get_long_pointer();
	__is_short ? __set_short_size(__n) : __set_long_size(__n);
	traits_type::copy(_VSTD::__to_address(__p), __s, __n);
	traits_type::assign(__p[__n], value_type());
	__invalidate_iterators_past(__n);
	} else {
	size_type __sz = __is_short ? __get_short_size() : __get_long_size();
	__grow_by_and_replace(__cap - 1, __n - __cap + 1, __sz, 0, __sz, __n, __s);
	}
	return *this;
	}

	template <class _CharT, class _Traits, class _Allocator>
	basic_string<_CharT, _Traits, _Allocator>&
	basic_string<_CharT, _Traits, _Allocator>::__assign_external(
	const value_type* __s, size_type __n) {
	size_type __cap = capacity();
	if (__cap >= __n) {
	value_type* __p = _VSTD::__to_address(__get_pointer());
	traits_type::move(__p, __s, __n);
	traits_type::assign(__p[__n], value_type());
	__set_size(__n);
	__invalidate_iterators_past(__n);
	} else {
	size_type __sz = size();
	__grow_by_and_replace(__cap, __n - __cap, __sz, 0, __sz, __n, __s);
	}
	return *this;
	}

	template <class _CharT, class _Traits, class _Allocator>
	basic_string<_CharT, _Traits, _Allocator>&
	basic_string<_CharT, _Traits, _Allocator>::assign(const value_type* __s, size_type __n)
	{
	_LIBCPP_ASSERT(__n == 0 \|\| __s != nullptr, "string::assign received nullptr");
	return (_LIBCPP_BUILTIN_CONSTANT_P(__n) && __n < __min_cap)
	? __assign_short(__s, __n)
	: __assign_external(__s, __n);
	}

	template <class _CharT, class _Traits, class _Allocator>
	basic_string<_CharT, _Traits, _Allocator>&
	basic_string<_CharT, _Traits, _Allocator>::assign(size_type __n, value_type __c)
	{
	size_type __cap = capacity();
	if (__cap < __n)
	{
	size_type __sz = size();
	__grow_by(__cap, __n - __cap, __sz, 0, __sz);
	}
	value_type* __p = _VSTD::__to_address(__get_pointer());
	traits_type::assign(__p, __n, __c);
	traits_type::assign(__p[__n], value_type());
	__set_size(__n);
	__invalidate_iterators_past(__n);
	return *this;
	}

	template <class _CharT, class _Traits, class _Allocator>
	basic_string<_CharT, _Traits, _Allocator>&
	basic_string<_CharT, _Traits, _Allocator>::operator=(value_type __c)
	{
	pointer __p;
	if (__is_long())
	{
	__p = __get_long_pointer();
	__set_long_size(1);
	}
	else
	{
	__p = __get_short_pointer();
	__set_short_size(1);
	}
	traits_type::assign(*__p, __c);
	traits_type::assign(*++__p, value_type());
	__invalidate_iterators_past(1);
	return *this;
	}

	template <class _CharT, class _Traits, class _Allocator>
	basic_string<_CharT, _Traits, _Allocator>&
	basic_string<_CharT, _Traits, _Allocator>::operator=(const basic_string& __str)
	{
	if (this != &__str) {
	__copy_assign_alloc(__str);
	if (!__is_long()) {
	if (!__str.__is_long()) {
	__r_.first().__r = __str.__r_.first().__r;
	} else {
	return __assign_no_alias<true>(__str.data(), __str.size());
	}
	} else {
	return __assign_no_alias<false>(__str.data(), __str.size());
	}
	}
	return *this;
	}

	#ifndef _LIBCPP_CXX03_LANG

	template <class _CharT, class _Traits, class _Allocator>
	inline
	void
	basic_string<_CharT, _Traits, _Allocator>::__move_assign(basic_string& __str, false_type)
	_NOEXCEPT_(__alloc_traits::is_always_equal::value)
	{
	if (__alloc() != __str.__alloc())
	assign(__str);
	else
	__move_assign(__str, true_type());
	}

	template <class _CharT, class _Traits, class _Allocator>
	inline
	void
	basic_string<_CharT, _Traits, _Allocator>::__move_assign(basic_string& __str, true_type)
	#if _LIBCPP_STD_VER > 14
	_NOEXCEPT
	#else
	_NOEXCEPT_(is_nothrow_move_assignable<allocator_type>::value)
	#endif
	{
	if (__is_long()) {
	__alloc_traits::deallocate(__alloc(), __get_long_pointer(),
	__get_long_cap());
	#if _LIBCPP_STD_VER <= 14
	if (!is_nothrow_move_assignable<allocator_type>::value) {
	__set_short_size(0);
	traits_type::assign(__get_short_pointer()[0], value_type());
	}
	#endif
	}
	__move_assign_alloc(__str);
	__r_.first() = __str.__r_.first();
	__str.__set_short_size(0);
	traits_type::assign(__str.__get_short_pointer()[0], value_type());
	}

	template <class _CharT, class _Traits, class _Allocator>
	inline
	basic_string<_CharT, _Traits, _Allocator>&
	basic_string<_CharT, _Traits, _Allocator>::operator=(basic_string&& __str)
	_NOEXCEPT_((__noexcept_move_assign_container<_Allocator, __alloc_traits>::value))
	{
	__move_assign(__str, integral_constant<bool,
	__alloc_traits::propagate_on_container_move_assignment::value>());
	return *this;
	}

	#endif

	template <class _CharT, class _Traits, class _Allocator>
	template<class _InputIterator>
	_EnableIf
	<
	__is_exactly_cpp17_input_iterator<_InputIterator>::value,
	basic_string<_CharT, _Traits, _Allocator>&
	>
	basic_string<_CharT, _Traits, _Allocator>::assign(_InputIterator __first, _InputIterator __last)
	{
	const basic_string __temp(__first, __last, __alloc());
	assign(__temp.data(), __temp.size());
	return *this;
	}

	template <class _CharT, class _Traits, class _Allocator>
	template<class _ForwardIterator>
	_EnableIf
	<
	__is_cpp17_forward_iterator<_ForwardIterator>::value,
	basic_string<_CharT, _Traits, _Allocator>&
	>
	basic_string<_CharT, _Traits, _Allocator>::assign(_ForwardIterator __first, _ForwardIterator __last)
	{
	size_type __cap = capacity();
	size_type __n = __string_is_trivial_iterator<_ForwardIterator>::value ?
	static_cast<size_type>(_VSTD::distance(__first, __last)) : 0;

	if (__string_is_trivial_iterator<_ForwardIterator>::value &&
	(__cap >= __n \|\| !__addr_in_range(*__first)))
	{
	if (__cap < __n)
	{
	size_type __sz = size();
	__grow_by(__cap, __n - __cap, __sz, 0, __sz);
	}
	pointer __p = __get_pointer();
	for (; __first != __last; ++__first, ++__p)
	traits_type::assign(__p, __first);
	traits_type::assign(*__p, value_type());
	__set_size(__n);
	__invalidate_iterators_past(__n);
	}
	else
	{
	const basic_string __temp(__first, __last, __alloc());
	assign(__temp.data(), __temp.size());
	}
	return *this;
	}

	template <class _CharT, class _Traits, class _Allocator>
	basic_string<_CharT, _Traits, _Allocator>&
	basic_string<_CharT, _Traits, _Allocator>::assign(const basic_string& __str, size_type __pos, size_type __n)
	{
	size_type __sz = __str.size();
	if (__pos > __sz)
	this->__throw_out_of_range();
	return assign(__str.data() + __pos, _VSTD::min(__n, __sz - __pos));
	}

	template <class _CharT, class _Traits, class _Allocator>
	template <class _Tp>
	_EnableIf
	<
	__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value
	&& !__is_same_uncvref<_Tp, basic_string<_CharT, _Traits, _Allocator> >::value,
	basic_string<_CharT, _Traits, _Allocator>&
	>
	basic_string<_CharT, _Traits, _Allocator>::assign(const _Tp & __t, size_type __pos, size_type __n)
	{
	__self_view __sv = __t;
	size_type __sz = __sv.size();
	if (__pos > __sz)
	this->__throw_out_of_range();
	return assign(__sv.data() + __pos, _VSTD::min(__n, __sz - __pos));
	}


	template <class _CharT, class _Traits, class _Allocator>
	basic_string<_CharT, _Traits, _Allocator>&
	basic_string<_CharT, _Traits, _Allocator>::__assign_external(const value_type* __s) {
	return __assign_external(__s, traits_type::length(__s));
	}

	template <class _CharT, class _Traits, class _Allocator>
	basic_string<_CharT, _Traits, _Allocator>&
	basic_string<_CharT, _Traits, _Allocator>::assign(const value_type* __s)
	{
	_LIBCPP_ASSERT(__s != nullptr, "string::assign received nullptr");
	return _LIBCPP_BUILTIN_CONSTANT_P(*__s)
	? (traits_type::length(__s) < __min_cap
	? __assign_short(__s, traits_type::length(__s))
	: __assign_external(__s, traits_type::length(__s)))
	: __assign_external(__s);
	}
	// append

	template <class _CharT, class _Traits, class _Allocator>
	basic_string<_CharT, _Traits, _Allocator>&
	basic_string<_CharT, _Traits, _Allocator>::append(const value_type* __s, size_type __n)
	{
	_LIBCPP_ASSERT(__n == 0 \|\| __s != nullptr, "string::append received nullptr");
	size_type __cap = capacity();
	size_type __sz = size();
	if (__cap - __sz >= __n)
	{
	if (__n)
	{
	value_type* __p = _VSTD::__to_address(__get_pointer());
	traits_type::copy(__p + __sz, __s, __n);
	__sz += __n;
	__set_size(__sz);
	traits_type::assign(__p[__sz], value_type());
	}
	}
	else
	__grow_by_and_replace(__cap, __sz + __n - __cap, __sz, __sz, 0, __n, __s);
	return *this;
	}

	template <class _CharT, class _Traits, class _Allocator>
	basic_string<_CharT, _Traits, _Allocator>&
	basic_string<_CharT, _Traits, _Allocator>::append(size_type __n, value_type __c)
	{
	if (__n)
	{
	size_type __cap = capacity();
	size_type __sz = size();
	if (__cap - __sz < __n)
	__grow_by(__cap, __sz + __n - __cap, __sz, __sz, 0);
	pointer __p = __get_pointer();
	traits_type::assign(_VSTD::__to_address(__p) + __sz, __n, __c);
	__sz += __n;
	__set_size(__sz);
	traits_type::assign(__p[__sz], value_type());
	}
	return *this;
	}

	template <class _CharT, class _Traits, class _Allocator>
	inline void
	basic_string<_CharT, _Traits, _Allocator>::__append_default_init(size_type __n)
	{
	if (__n)
	{
	size_type __cap = capacity();
	size_type __sz = size();
	if (__cap - __sz < __n)
	__grow_by(__cap, __sz + __n - __cap, __sz, __sz, 0);
	pointer __p = __get_pointer();
	__sz += __n;
	__set_size(__sz);
	traits_type::assign(__p[__sz], value_type());
	}
	}

	template <class _CharT, class _Traits, class _Allocator>
	void
	basic_string<_CharT, _Traits, _Allocator>::push_back(value_type __c)
	{
	bool __is_short = !__is_long();
	size_type __cap;
	size_type __sz;
	if (__is_short)
	{
	__cap = __min_cap - 1;
	__sz = __get_short_size();
	}
	else
	{
	__cap = __get_long_cap() - 1;
	__sz = __get_long_size();
	}
	if (__sz == __cap)
	{
	__grow_by(__cap, 1, __sz, __sz, 0);
	__is_short = !__is_long();
	}
	pointer __p;
	if (__is_short)
	{
	__p = __get_short_pointer() + __sz;
	__set_short_size(__sz+1);
	}
	else
	{
	__p = __get_long_pointer() + __sz;
	__set_long_size(__sz+1);
	}
	traits_type::assign(*__p, __c);
	traits_type::assign(*++__p, value_type());
	}

	template <class _CharT, class _Traits, class _Allocator>
	template<class _ForwardIterator>
	_EnableIf
	<
	__is_cpp17_forward_iterator<_ForwardIterator>::value,
	basic_string<_CharT, _Traits, _Allocator>&
	>
	basic_string<_CharT, _Traits, _Allocator>::append(
	_ForwardIterator __first, _ForwardIterator __last)
	{
	size_type __sz = size();
	size_type __cap = capacity();
	size_type __n = static_cast<size_type>(_VSTD::distance(__first, __last));
	if (__n)
	{
	if (__string_is_trivial_iterator<_ForwardIterator>::value &&
	!__addr_in_range(*__first))
	{
	if (__cap - __sz < __n)
	__grow_by(__cap, __sz + __n - __cap, __sz, __sz, 0);
	pointer __p = __get_pointer() + __sz;
	for (; __first != __last; ++__p, ++__first)
	traits_type::assign(__p, __first);
	traits_type::assign(*__p, value_type());
	__set_size(__sz + __n);
	}
	else
	{
	const basic_string __temp(__first, __last, __alloc());
	append(__temp.data(), __temp.size());
	}
	}
	return *this;
	}

	template <class _CharT, class _Traits, class _Allocator>
	inline
	basic_string<_CharT, _Traits, _Allocator>&
	basic_string<_CharT, _Traits, _Allocator>::append(const basic_string& __str)
	{
	return append(__str.data(), __str.size());
	}

	template <class _CharT, class _Traits, class _Allocator>
	basic_string<_CharT, _Traits, _Allocator>&
	basic_string<_CharT, _Traits, _Allocator>::append(const basic_string& __str, size_type __pos, size_type __n)
	{
	size_type __sz = __str.size();
	if (__pos > __sz)
	this->__throw_out_of_range();
	return append(__str.data() + __pos, _VSTD::min(__n, __sz - __pos));
	}

	template <class _CharT, class _Traits, class _Allocator>
	template <class _Tp>
	_EnableIf
	<
	__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value && !__is_same_uncvref<_Tp, basic_string<_CharT, _Traits, _Allocator> >::value,
	basic_string<_CharT, _Traits, _Allocator>&
	>
	basic_string<_CharT, _Traits, _Allocator>::append(const _Tp & __t, size_type __pos, size_type __n)
	{
	__self_view __sv = __t;
	size_type __sz = __sv.size();
	if (__pos > __sz)
	this->__throw_out_of_range();
	return append(__sv.data() + __pos, _VSTD::min(__n, __sz - __pos));
	}

	template <class _CharT, class _Traits, class _Allocator>
	basic_string<_CharT, _Traits, _Allocator>&
	basic_string<_CharT, _Traits, _Allocator>::append(const value_type* __s)
	{
	_LIBCPP_ASSERT(__s != nullptr, "string::append received nullptr");
	return append(__s, traits_type::length(__s));
	}

	// insert

	template <class _CharT, class _Traits, class _Allocator>
	basic_string<_CharT, _Traits, _Allocator>&
	basic_string<_CharT, _Traits, _Allocator>::insert(size_type __pos, const value_type* __s, size_type __n)
	{
	_LIBCPP_ASSERT(__n == 0 \|\| __s != nullptr, "string::insert received nullptr");
	size_type __sz = size();
	if (__pos > __sz)
	this->__throw_out_of_range();
	size_type __cap = capacity();
	if (__cap - __sz >= __n)
	{
	if (__n)
	{
	value_type* __p = _VSTD::__to_address(__get_pointer());
	size_type __n_move = __sz - __pos;
	if (__n_move != 0)
	{
	if (__p + __pos <= __s && __s < __p + __sz)
	__s += __n;
	traits_type::move(__p + __pos + __n, __p + __pos, __n_move);
	}
	traits_type::move(__p + __pos, __s, __n);
	__sz += __n;
	__set_size(__sz);
	traits_type::assign(__p[__sz], value_type());
	}
	}
	else
	__grow_by_and_replace(__cap, __sz + __n - __cap, __sz, __pos, 0, __n, __s);
	return *this;
	}

	template <class _CharT, class _Traits, class _Allocator>
	basic_string<_CharT, _Traits, _Allocator>&
	basic_string<_CharT, _Traits, _Allocator>::insert(size_type __pos, size_type __n, value_type __c)
	{
	size_type __sz = size();
	if (__pos > __sz)
	this->__throw_out_of_range();
	if (__n)
	{
	size_type __cap = capacity();
	value_type* __p;
	if (__cap - __sz >= __n)
	{
	__p = _VSTD::__to_address(__get_pointer());
	size_type __n_move = __sz - __pos;
	if (__n_move != 0)
	traits_type::move(__p + __pos + __n, __p + __pos, __n_move);
	}
	else
	{
	__grow_by(__cap, __sz + __n - __cap, __sz, __pos, 0, __n);
	__p = _VSTD::__to_address(__get_long_pointer());
	}
	traits_type::assign(__p + __pos, __n, __c);
	__sz += __n;
	__set_size(__sz);
	traits_type::assign(__p[__sz], value_type());
	}
	return *this;
	}

	template <class _CharT, class _Traits, class _Allocator>
	template<class _InputIterator>
	_EnableIf
	<
	__is_exactly_cpp17_input_iterator<_InputIterator>::value,
	typename basic_string<_CharT, _Traits, _Allocator>::iterator
	>
	basic_string<_CharT, _Traits, _Allocator>::insert(const_iterator __pos, _InputIterator __first, _InputIterator __last)
	{
	#if _LIBCPP_DEBUG_LEVEL == 2
	_LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__pos) == this,
	"string::insert(iterator, range) called with an iterator not"
	" referring to this string");
	#endif
	const basic_string __temp(__first, __last, __alloc());
	return insert(__pos, __temp.data(), __temp.data() + __temp.size());
	}

	template <class _CharT, class _Traits, class _Allocator>
	template<class _ForwardIterator>
	_EnableIf
	<
	__is_cpp17_forward_iterator<_ForwardIterator>::value,
	typename basic_string<_CharT, _Traits, _Allocator>::iterator
	>
	basic_string<_CharT, _Traits, _Allocator>::insert(const_iterator __pos, _ForwardIterator __first, _ForwardIterator __last)
	{
	#if _LIBCPP_DEBUG_LEVEL == 2
	_LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__pos) == this,
	"string::insert(iterator, range) called with an iterator not"
	" referring to this string");
	#endif
	size_type __ip = static_cast<size_type>(__pos - begin());
	size_type __n = static_cast<size_type>(_VSTD::distance(__first, __last));
	if (__n)
	{
	if (__string_is_trivial_iterator<_ForwardIterator>::value &&
	!__addr_in_range(*__first))
	{
	size_type __sz = size();
	size_type __cap = capacity();
	value_type* __p;
	if (__cap - __sz >= __n)
	{
	__p = _VSTD::__to_address(__get_pointer());
	size_type __n_move = __sz - __ip;
	if (__n_move != 0)
	traits_type::move(__p + __ip + __n, __p + __ip, __n_move);
	}
	else
	{
	__grow_by(__cap, __sz + __n - __cap, __sz, __ip, 0, __n);
	__p = _VSTD::__to_address(__get_long_pointer());
	}
	__sz += __n;
	__set_size(__sz);
	traits_type::assign(__p[__sz], value_type());
	for (__p += __ip; __first != __last; ++__p, ++__first)
	traits_type::assign(__p, __first);
	}
	else
	{
	const basic_string __temp(__first, __last, __alloc());
	return insert(__pos, __temp.data(), __temp.data() + __temp.size());
	}
	}
	return begin() + __ip;
	}

	template <class _CharT, class _Traits, class _Allocator>
	inline
	basic_string<_CharT, _Traits, _Allocator>&
	basic_string<_CharT, _Traits, _Allocator>::insert(size_type __pos1, const basic_string& __str)
	{
	return insert(__pos1, __str.data(), __str.size());
	}

	template <class _CharT, class _Traits, class _Allocator>
	basic_string<_CharT, _Traits, _Allocator>&
	basic_string<_CharT, _Traits, _Allocator>::insert(size_type __pos1, const basic_string& __str,
	size_type __pos2, size_type __n)
	{
	size_type __str_sz = __str.size();
	if (__pos2 > __str_sz)
	this->__throw_out_of_range();
	return insert(__pos1, __str.data() + __pos2, _VSTD::min(__n, __str_sz - __pos2));
	}

	template <class _CharT, class _Traits, class _Allocator>
	template <class _Tp>
	_EnableIf
	<
	__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value && !__is_same_uncvref<_Tp, basic_string<_CharT, _Traits, _Allocator> >::value,
	basic_string<_CharT, _Traits, _Allocator>&
	>
	basic_string<_CharT, _Traits, _Allocator>::insert(size_type __pos1, const _Tp& __t,
	size_type __pos2, size_type __n)
	{
	__self_view __sv = __t;
	size_type __str_sz = __sv.size();
	if (__pos2 > __str_sz)
	this->__throw_out_of_range();
	return insert(__pos1, __sv.data() + __pos2, _VSTD::min(__n, __str_sz - __pos2));
	}

	template <class _CharT, class _Traits, class _Allocator>
	basic_string<_CharT, _Traits, _Allocator>&
	basic_string<_CharT, _Traits, _Allocator>::insert(size_type __pos, const value_type* __s)
	{
	_LIBCPP_ASSERT(__s != nullptr, "string::insert received nullptr");
	return insert(__pos, __s, traits_type::length(__s));
	}

	template <class _CharT, class _Traits, class _Allocator>
	typename basic_string<_CharT, _Traits, _Allocator>::iterator
	basic_string<_CharT, _Traits, _Allocator>::insert(const_iterator __pos, value_type __c)
	{
	size_type __ip = static_cast<size_type>(__pos - begin());
	size_type __sz = size();
	size_type __cap = capacity();
	value_type* __p;
	if (__cap == __sz)
	{
	__grow_by(__cap, 1, __sz, __ip, 0, 1);
	__p = _VSTD::__to_address(__get_long_pointer());
	}
	else
	{
	__p = _VSTD::__to_address(__get_pointer());
	size_type __n_move = __sz - __ip;
	if (__n_move != 0)
	traits_type::move(__p + __ip + 1, __p + __ip, __n_move);
	}
	traits_type::assign(__p[__ip], __c);
	traits_type::assign(__p[++__sz], value_type());
	__set_size(__sz);
	return begin() + static_cast<difference_type>(__ip);
	}

	template <class _CharT, class _Traits, class _Allocator>
	inline
	typename basic_string<_CharT, _Traits, _Allocator>::iterator
	basic_string<_CharT, _Traits, _Allocator>::insert(const_iterator __pos, size_type __n, value_type __c)
	{
	#if _LIBCPP_DEBUG_LEVEL == 2
	_LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__pos) == this,
	"string::insert(iterator, n, value) called with an iterator not"
	" referring to this string");
	#endif
	difference_type __p = __pos - begin();
	insert(static_cast<size_type>(__p), __n, __c);
	return begin() + __p;
	}

	// replace

	template <class _CharT, class _Traits, class _Allocator>
	basic_string<_CharT, _Traits, _Allocator>&
	basic_string<_CharT, _Traits, _Allocator>::replace(size_type __pos, size_type __n1, const value_type* __s, size_type __n2)
	_LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK
	{
	_LIBCPP_ASSERT(__n2 == 0 \|\| __s != nullptr, "string::replace received nullptr");
	size_type __sz = size();
	if (__pos > __sz)
	this->__throw_out_of_range();
	__n1 = _VSTD::min(__n1, __sz - __pos);
	size_type __cap = capacity();
	if (__cap - __sz + __n1 >= __n2)
	{
	value_type* __p = _VSTD::__to_address(__get_pointer());
	if (__n1 != __n2)
	{
	size_type __n_move = __sz - __pos - __n1;
	if (__n_move != 0)
	{
	if (__n1 > __n2)
	{
	traits_type::move(__p + __pos, __s, __n2);
	traits_type::move(__p + __pos + __n2, __p + __pos + __n1, __n_move);
	goto __finish;
	}
	if (__p + __pos < __s && __s < __p + __sz)
	{
	if (__p + __pos + __n1 <= __s)
	__s += __n2 - __n1;
	else // __p + __pos < __s < __p + __pos + __n1
	{
	traits_type::move(__p + __pos, __s, __n1);
	__pos += __n1;
	__s += __n2;
	__n2 -= __n1;
	__n1 = 0;
	}
	}
	traits_type::move(__p + __pos + __n2, __p + __pos + __n1, __n_move);
	}
	}
	traits_type::move(__p + __pos, __s, __n2);
	__finish:
	// __sz += __n2 - __n1; in this and the below function below can cause unsigned
	// integer overflow, but this is a safe operation, so we disable the check.
	__sz += __n2 - __n1;
	__set_size(__sz);
	__invalidate_iterators_past(__sz);
	traits_type::assign(__p[__sz], value_type());
	}
	else
	__grow_by_and_replace(__cap, __sz - __n1 + __n2 - __cap, __sz, __pos, __n1, __n2, __s);
	return *this;
	}

	template <class _CharT, class _Traits, class _Allocator>
	basic_string<_CharT, _Traits, _Allocator>&
	basic_string<_CharT, _Traits, _Allocator>::replace(size_type __pos, size_type __n1, size_type __n2, value_type __c)
	_LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK
	{
	size_type __sz = size();
	if (__pos > __sz)
	this->__throw_out_of_range();
	__n1 = _VSTD::min(__n1, __sz - __pos);
	size_type __cap = capacity();
	value_type* __p;
	if (__cap - __sz + __n1 >= __n2)
	{
	__p = _VSTD::__to_address(__get_pointer());
	if (__n1 != __n2)
	{
	size_type __n_move = __sz - __pos - __n1;
	if (__n_move != 0)
	traits_type::move(__p + __pos + __n2, __p + __pos + __n1, __n_move);
	}
	}
	else
	{
	__grow_by(__cap, __sz - __n1 + __n2 - __cap, __sz, __pos, __n1, __n2);
	__p = _VSTD::__to_address(__get_long_pointer());
	}
	traits_type::assign(__p + __pos, __n2, __c);
	__sz += __n2 - __n1;
	__set_size(__sz);
	__invalidate_iterators_past(__sz);
	traits_type::assign(__p[__sz], value_type());
	return *this;
	}

	template <class _CharT, class _Traits, class _Allocator>
	template<class _InputIterator>
	_EnableIf
	<
	__is_cpp17_input_iterator<_InputIterator>::value,
	basic_string<_CharT, _Traits, _Allocator>&
	>
	basic_string<_CharT, _Traits, _Allocator>::replace(const_iterator __i1, const_iterator __i2,
	_InputIterator __j1, _InputIterator __j2)
	{
	const basic_string __temp(__j1, __j2, __alloc());
	return this->replace(__i1, __i2, __temp);
	}

	template <class _CharT, class _Traits, class _Allocator>
	inline
	basic_string<_CharT, _Traits, _Allocator>&
	basic_string<_CharT, _Traits, _Allocator>::replace(size_type __pos1, size_type __n1, const basic_string& __str)
	{
	return replace(__pos1, __n1, __str.data(), __str.size());
	}

	template <class _CharT, class _Traits, class _Allocator>
	basic_string<_CharT, _Traits, _Allocator>&
	basic_string<_CharT, _Traits, _Allocator>::replace(size_type __pos1, size_type __n1, const basic_string& __str,
	size_type __pos2, size_type __n2)
	{
	size_type __str_sz = __str.size();
	if (__pos2 > __str_sz)
	this->__throw_out_of_range();
	return replace(__pos1, __n1, __str.data() + __pos2, _VSTD::min(__n2, __str_sz - __pos2));
	}

	template <class _CharT, class _Traits, class _Allocator>
	template <class _Tp>
	_EnableIf
	<
	__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value && !__is_same_uncvref<_Tp, basic_string<_CharT, _Traits, _Allocator> >::value,
	basic_string<_CharT, _Traits, _Allocator>&
	>
	basic_string<_CharT, _Traits, _Allocator>::replace(size_type __pos1, size_type __n1, const _Tp& __t,
	size_type __pos2, size_type __n2)
	{
	__self_view __sv = __t;
	size_type __str_sz = __sv.size();
	if (__pos2 > __str_sz)
	this->__throw_out_of_range();
	return replace(__pos1, __n1, __sv.data() + __pos2, _VSTD::min(__n2, __str_sz - __pos2));
	}

	template <class _CharT, class _Traits, class _Allocator>
	basic_string<_CharT, _Traits, _Allocator>&
	basic_string<_CharT, _Traits, _Allocator>::replace(size_type __pos, size_type __n1, const value_type* __s)
	{
	_LIBCPP_ASSERT(__s != nullptr, "string::replace received nullptr");
	return replace(__pos, __n1, __s, traits_type::length(__s));
	}

	template <class _CharT, class _Traits, class _Allocator>
	inline
	basic_string<_CharT, _Traits, _Allocator>&
	basic_string<_CharT, _Traits, _Allocator>::replace(const_iterator __i1, const_iterator __i2, const basic_string& __str)
	{
	return replace(static_cast<size_type>(__i1 - begin()), static_cast<size_type>(__i2 - __i1),
	__str.data(), __str.size());
	}

	template <class _CharT, class _Traits, class _Allocator>
	inline
	basic_string<_CharT, _Traits, _Allocator>&
	basic_string<_CharT, _Traits, _Allocator>::replace(const_iterator __i1, const_iterator __i2, const value_type* __s, size_type __n)
	{
	return replace(static_cast<size_type>(__i1 - begin()), static_cast<size_type>(__i2 - __i1), __s, __n);
	}

	template <class _CharT, class _Traits, class _Allocator>
	inline
	basic_string<_CharT, _Traits, _Allocator>&
	basic_string<_CharT, _Traits, _Allocator>::replace(const_iterator __i1, const_iterator __i2, const value_type* __s)
	{
	return replace(static_cast<size_type>(__i1 - begin()), static_cast<size_type>(__i2 - __i1), __s);
	}

	template <class _CharT, class _Traits, class _Allocator>
	inline
	basic_string<_CharT, _Traits, _Allocator>&
	basic_string<_CharT, _Traits, _Allocator>::replace(const_iterator __i1, const_iterator __i2, size_type __n, value_type __c)
	{
	return replace(static_cast<size_type>(__i1 - begin()), static_cast<size_type>(__i2 - __i1), __n, __c);
	}

	// erase

	// 'externally instantiated' erase() implementation, called when __n != npos.
	// Does not check __pos against size()
	template <class _CharT, class _Traits, class _Allocator>
	void
	basic_string<_CharT, _Traits, _Allocator>::__erase_external_with_move(
	size_type __pos, size_type __n)
	{
	if (__n)
	{
	size_type __sz = size();
	value_type* __p = _VSTD::__to_address(__get_pointer());
	__n = _VSTD::min(__n, __sz - __pos);
	size_type __n_move = __sz - __pos - __n;
	if (__n_move != 0)
	traits_type::move(__p + __pos, __p + __pos + __n, __n_move);
	__sz -= __n;
	__set_size(__sz);
	__invalidate_iterators_past(__sz);
	traits_type::assign(__p[__sz], value_type());
	}
	}

	template <class _CharT, class _Traits, class _Allocator>
	basic_string<_CharT, _Traits, _Allocator>&
	basic_string<_CharT, _Traits, _Allocator>::erase(size_type __pos,
	size_type __n) {
	if (__pos > size()) this->__throw_out_of_range();
	if (__n == npos) {
	__erase_to_end(__pos);
	} else {
	__erase_external_with_move(__pos, __n);
	}
	return *this;
	}

	template <class _CharT, class _Traits, class _Allocator>
	inline
	typename basic_string<_CharT, _Traits, _Allocator>::iterator
	basic_string<_CharT, _Traits, _Allocator>::erase(const_iterator __pos)
	{
	#if _LIBCPP_DEBUG_LEVEL == 2
	_LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__pos) == this,
	"string::erase(iterator) called with an iterator not"
	" referring to this string");
	#endif
	_LIBCPP_ASSERT(__pos != end(),
	"string::erase(iterator) called with a non-dereferenceable iterator");
	iterator __b = begin();
	size_type __r = static_cast<size_type>(__pos - __b);
	erase(__r, 1);
	return __b + static_cast<difference_type>(__r);
	}

	template <class _CharT, class _Traits, class _Allocator>
	inline
	typename basic_string<_CharT, _Traits, _Allocator>::iterator
	basic_string<_CharT, _Traits, _Allocator>::erase(const_iterator __first, const_iterator __last)
	{
	#if _LIBCPP_DEBUG_LEVEL == 2
	_LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__first) == this,
	"string::erase(iterator, iterator) called with an iterator not"
	" referring to this string");
	#endif
	_LIBCPP_ASSERT(__first <= __last, "string::erase(first, last) called with invalid range");
	iterator __b = begin();
	size_type __r = static_cast<size_type>(__first - __b);
	erase(__r, static_cast<size_type>(__last - __first));
	return __b + static_cast<difference_type>(__r);
	}

	template <class _CharT, class _Traits, class _Allocator>
	inline
	void
	basic_string<_CharT, _Traits, _Allocator>::pop_back()
	{
	_LIBCPP_ASSERT(!empty(), "string::pop_back(): string is already empty");
	size_type __sz;
	if (__is_long())
	{
	__sz = __get_long_size() - 1;
	__set_long_size(__sz);
	traits_type::assign(*(__get_long_pointer() + __sz), value_type());
	}
	else
	{
	__sz = __get_short_size() - 1;
	__set_short_size(__sz);
	traits_type::assign(*(__get_short_pointer() + __sz), value_type());
	}
	__invalidate_iterators_past(__sz);
	}

	template <class _CharT, class _Traits, class _Allocator>
	inline
	void
	basic_string<_CharT, _Traits, _Allocator>::clear() _NOEXCEPT
	{
	__invalidate_all_iterators();
	if (__is_long())
	{
	traits_type::assign(*__get_long_pointer(), value_type());
	__set_long_size(0);
	}
	else
	{
	traits_type::assign(*__get_short_pointer(), value_type());
	__set_short_size(0);
	}
	}

	template <class _CharT, class _Traits, class _Allocator>
	inline
	void
	basic_string<_CharT, _Traits, _Allocator>::__erase_to_end(size_type __pos)
	{
	if (__is_long())
	{
	traits_type::assign(*(__get_long_pointer() + __pos), value_type());
	__set_long_size(__pos);
	}
	else
	{
	traits_type::assign(*(__get_short_pointer() + __pos), value_type());
	__set_short_size(__pos);
	}
	__invalidate_iterators_past(__pos);
	}

	template <class _CharT, class _Traits, class _Allocator>
	void
	basic_string<_CharT, _Traits, _Allocator>::resize(size_type __n, value_type __c)
	{
	size_type __sz = size();
	if (__n > __sz)
	append(__n - __sz, __c);
	else
	__erase_to_end(__n);
	}

	template <class _CharT, class _Traits, class _Allocator>
	inline void
	basic_string<_CharT, _Traits, _Allocator>::__resize_default_init(size_type __n)
	{
	size_type __sz = size();
	if (__n > __sz) {
	__append_default_init(__n - __sz);
	} else
	__erase_to_end(__n);
	}

	template <class _CharT, class _Traits, class _Allocator>
	inline
	typename basic_string<_CharT, _Traits, _Allocator>::size_type
	basic_string<_CharT, _Traits, _Allocator>::max_size() const _NOEXCEPT
	{
	size_type __m = __alloc_traits::max_size(__alloc());
	#ifdef _LIBCPP_BIG_ENDIAN
	return (__m <= ~__long_mask ? __m : __m/2) - __alignment;
	#else
	return __m - __alignment;
	#endif
	}

	template <class _CharT, class _Traits, class _Allocator>
	void
	basic_string<_CharT, _Traits, _Allocator>::reserve(size_type __requested_capacity)
	{
	if (__requested_capacity > max_size())
	this->__throw_length_error();

	#if _LIBCPP_STD_VER > 17
	// Reserve never shrinks as of C++20.
	if (__requested_capacity <= capacity()) return;
	#endif

	size_type __target_capacity = _VSTD::max(__requested_capacity, size());
	__target_capacity = __recommend(__target_capacity);
	if (__target_capacity == capacity()) return;

	__shrink_or_extend(__target_capacity);
	}

	template <class _CharT, class _Traits, class _Allocator>
	void
	basic_string<_CharT, _Traits, _Allocator>::shrink_to_fit() _NOEXCEPT
	{
	size_type __target_capacity = __recommend(size());
	if (__target_capacity == capacity()) return;

	__shrink_or_extend(__target_capacity);
	}

	template <class _CharT, class _Traits, class _Allocator>
	void
	basic_string<_CharT, _Traits, _Allocator>::__shrink_or_extend(size_type __target_capacity)
	{
	size_type __cap = capacity();
	size_type __sz = size();

	pointer __new_data, __p;
	bool __was_long, __now_long;
	if (__target_capacity == __min_cap - 1)
	{
	__was_long = true;
	__now_long = false;
	__new_data = __get_short_pointer();
	__p = __get_long_pointer();
	}
	else
	{
	if (__target_capacity > __cap)
	__new_data = __alloc_traits::allocate(__alloc(), __target_capacity+1);
	else
	{
	#ifndef _LIBCPP_NO_EXCEPTIONS
	try
	{
	#endif // _LIBCPP_NO_EXCEPTIONS
	__new_data = __alloc_traits::allocate(__alloc(), __target_capacity+1);
	#ifndef _LIBCPP_NO_EXCEPTIONS
	}
	catch (...)
	{
	return;
	}
	#else // _LIBCPP_NO_EXCEPTIONS
	if (__new_data == nullptr)
	return;
	#endif // _LIBCPP_NO_EXCEPTIONS
	}
	__now_long = true;
	__was_long = __is_long();
	__p = __get_pointer();
	}
	traits_type::copy(_VSTD::__to_address(__new_data),
	_VSTD::__to_address(__p), size()+1);
	if (__was_long)
	__alloc_traits::deallocate(__alloc(), __p, __cap+1);
	if (__now_long)
	{
	__set_long_cap(__target_capacity+1);
	__set_long_size(__sz);
	__set_long_pointer(__new_data);
	}
	else
	__set_short_size(__sz);
	__invalidate_all_iterators();
	}

	template <class _CharT, class _Traits, class _Allocator>
	inline
	typename basic_string<_CharT, _Traits, _Allocator>::const_reference
	basic_string<_CharT, _Traits, _Allocator>::operator[](size_type __pos) const _NOEXCEPT
	{
	_LIBCPP_ASSERT(__pos <= size(), "string index out of bounds");
	return *(data() + __pos);
	}

	template <class _CharT, class _Traits, class _Allocator>
	inline
	typename basic_string<_CharT, _Traits, _Allocator>::reference
	basic_string<_CharT, _Traits, _Allocator>::operator[](size_type __pos) _NOEXCEPT
	{
	_LIBCPP_ASSERT(__pos <= size(), "string index out of bounds");
	return *(__get_pointer() + __pos);
	}

	template <class _CharT, class _Traits, class _Allocator>
	typename basic_string<_CharT, _Traits, _Allocator>::const_reference
	basic_string<_CharT, _Traits, _Allocator>::at(size_type __n) const
	{
	if (__n >= size())
	this->__throw_out_of_range();
	return (*this)[__n];
	}

	template <class _CharT, class _Traits, class _Allocator>
	typename basic_string<_CharT, _Traits, _Allocator>::reference
	basic_string<_CharT, _Traits, _Allocator>::at(size_type __n)
	{
	if (__n >= size())
	this->__throw_out_of_range();
	return (*this)[__n];
	}

	template <class _CharT, class _Traits, class _Allocator>
	inline
	typename basic_string<_CharT, _Traits, _Allocator>::reference
	basic_string<_CharT, _Traits, _Allocator>::front() _NOEXCEPT
	{
	_LIBCPP_ASSERT(!empty(), "string::front(): string is empty");
	return *__get_pointer();
	}

	template <class _CharT, class _Traits, class _Allocator>
	inline
	typename basic_string<_CharT, _Traits, _Allocator>::const_reference
	basic_string<_CharT, _Traits, _Allocator>::front() const _NOEXCEPT
	{
	_LIBCPP_ASSERT(!empty(), "string::front(): string is empty");
	return *data();
	}

	template <class _CharT, class _Traits, class _Allocator>
	inline
	typename basic_string<_CharT, _Traits, _Allocator>::reference
	basic_string<_CharT, _Traits, _Allocator>::back() _NOEXCEPT
	{
	_LIBCPP_ASSERT(!empty(), "string::back(): string is empty");
	return *(__get_pointer() + size() - 1);
	}

	template <class _CharT, class _Traits, class _Allocator>
	inline
	typename basic_string<_CharT, _Traits, _Allocator>::const_reference
	basic_string<_CharT, _Traits, _Allocator>::back() const _NOEXCEPT
	{
	_LIBCPP_ASSERT(!empty(), "string::back(): string is empty");
	return *(data() + size() - 1);
	}

	template <class _CharT, class _Traits, class _Allocator>
	typename basic_string<_CharT, _Traits, _Allocator>::size_type
	basic_string<_CharT, _Traits, _Allocator>::copy(value_type* __s, size_type __n, size_type __pos) const
	{
	size_type __sz = size();
	if (__pos > __sz)
	this->__throw_out_of_range();
	size_type __rlen = _VSTD::min(__n, __sz - __pos);
	traits_type::copy(__s, data() + __pos, __rlen);
	return __rlen;
	}

	template <class _CharT, class _Traits, class _Allocator>
	inline
	basic_string<_CharT, _Traits, _Allocator>
	basic_string<_CharT, _Traits, _Allocator>::substr(size_type __pos, size_type __n) const
	{
	return basic_string(*this, __pos, __n, __alloc());
	}

	template <class _CharT, class _Traits, class _Allocator>
	inline
	void
	basic_string<_CharT, _Traits, _Allocator>::swap(basic_string& __str)
	#if _LIBCPP_STD_VER >= 14
	_NOEXCEPT
	#else
	_NOEXCEPT_(!__alloc_traits::propagate_on_container_swap::value \|\|
	__is_nothrow_swappable<allocator_type>::value)
	#endif
	{
	#if _LIBCPP_DEBUG_LEVEL == 2
	if (!__is_long())
	__get_db()->__invalidate_all(this);
	if (!__str.__is_long())
	__get_db()->__invalidate_all(&__str);
	__get_db()->swap(this, &__str);
	#endif
	_LIBCPP_ASSERT(
	__alloc_traits::propagate_on_container_swap::value \|\|
	__alloc_traits::is_always_equal::value \|\|
	__alloc() == __str.__alloc(), "swapping non-equal allocators");
	_VSTD::swap(__r_.first(), __str.__r_.first());
	_VSTD::__swap_allocator(__alloc(), __str.__alloc());
	}

	// find

	template <class _Traits>
	struct _LIBCPP_HIDDEN __traits_eq
	{
	typedef typename _Traits::char_type char_type;
	_LIBCPP_INLINE_VISIBILITY
	bool operator()(const char_type& __x, const char_type& __y) _NOEXCEPT
	{return _Traits::eq(__x, __y);}
	};

	template<class _CharT, class _Traits, class _Allocator>
	typename basic_string<_CharT, _Traits, _Allocator>::size_type
	basic_string<_CharT, _Traits, _Allocator>::find(const value_type* __s,
	size_type __pos,
	size_type __n) const _NOEXCEPT
	{
	_LIBCPP_ASSERT(__n == 0 \|\| __s != nullptr, "string::find(): received nullptr");
	return __str_find<value_type, size_type, traits_type, npos>
	(data(), size(), __s, __pos, __n);
	}

	template<class _CharT, class _Traits, class _Allocator>
	inline
	typename basic_string<_CharT, _Traits, _Allocator>::size_type
	basic_string<_CharT, _Traits, _Allocator>::find(const basic_string& __str,
	size_type __pos) const _NOEXCEPT
	{
	return __str_find<value_type, size_type, traits_type, npos>
	(data(), size(), __str.data(), __pos, __str.size());
	}

	template<class _CharT, class _Traits, class _Allocator>
	template <class _Tp>
	_EnableIf
	<
	__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
	typename basic_string<_CharT, _Traits, _Allocator>::size_type
	>
	basic_string<_CharT, _Traits, _Allocator>::find(const _Tp &__t,
	size_type __pos) const _NOEXCEPT
	{
	__self_view __sv = __t;
	return __str_find<value_type, size_type, traits_type, npos>
	(data(), size(), __sv.data(), __pos, __sv.size());
	}

	template<class _CharT, class _Traits, class _Allocator>
	inline
	typename basic_string<_CharT, _Traits, _Allocator>::size_type
	basic_string<_CharT, _Traits, _Allocator>::find(const value_type* __s,
	size_type __pos) const _NOEXCEPT
	{
	_LIBCPP_ASSERT(__s != nullptr, "string::find(): received nullptr");
	return __str_find<value_type, size_type, traits_type, npos>
	(data(), size(), __s, __pos, traits_type::length(__s));
	}

	template<class _CharT, class _Traits, class _Allocator>
	typename basic_string<_CharT, _Traits, _Allocator>::size_type
	basic_string<_CharT, _Traits, _Allocator>::find(value_type __c,
	size_type __pos) const _NOEXCEPT
	{
	return __str_find<value_type, size_type, traits_type, npos>
	(data(), size(), __c, __pos);
	}

	// rfind

	template<class _CharT, class _Traits, class _Allocator>
	typename basic_string<_CharT, _Traits, _Allocator>::size_type
	basic_string<_CharT, _Traits, _Allocator>::rfind(const value_type* __s,
	size_type __pos,
	size_type __n) const _NOEXCEPT
	{
	_LIBCPP_ASSERT(__n == 0 \|\| __s != nullptr, "string::rfind(): received nullptr");
	return __str_rfind<value_type, size_type, traits_type, npos>
	(data(), size(), __s, __pos, __n);
	}

	template<class _CharT, class _Traits, class _Allocator>
	inline
	typename basic_string<_CharT, _Traits, _Allocator>::size_type
	basic_string<_CharT, _Traits, _Allocator>::rfind(const basic_string& __str,
	size_type __pos) const _NOEXCEPT
	{
	return __str_rfind<value_type, size_type, traits_type, npos>
	(data(), size(), __str.data(), __pos, __str.size());
	}

	template<class _CharT, class _Traits, class _Allocator>
	template <class _Tp>
	_EnableIf
	<
	__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
	typename basic_string<_CharT, _Traits, _Allocator>::size_type
	>
	basic_string<_CharT, _Traits, _Allocator>::rfind(const _Tp& __t,
	size_type __pos) const _NOEXCEPT
	{
	__self_view __sv = __t;
	return __str_rfind<value_type, size_type, traits_type, npos>
	(data(), size(), __sv.data(), __pos, __sv.size());
	}

	template<class _CharT, class _Traits, class _Allocator>
	inline
	typename basic_string<_CharT, _Traits, _Allocator>::size_type
	basic_string<_CharT, _Traits, _Allocator>::rfind(const value_type* __s,
	size_type __pos) const _NOEXCEPT
	{
	_LIBCPP_ASSERT(__s != nullptr, "string::rfind(): received nullptr");
	return __str_rfind<value_type, size_type, traits_type, npos>
	(data(), size(), __s, __pos, traits_type::length(__s));
	}

	template<class _CharT, class _Traits, class _Allocator>
	typename basic_string<_CharT, _Traits, _Allocator>::size_type
	basic_string<_CharT, _Traits, _Allocator>::rfind(value_type __c,
	size_type __pos) const _NOEXCEPT
	{
	return __str_rfind<value_type, size_type, traits_type, npos>
	(data(), size(), __c, __pos);
	}

	// find_first_of

	template<class _CharT, class _Traits, class _Allocator>
	typename basic_string<_CharT, _Traits, _Allocator>::size_type
	basic_string<_CharT, _Traits, _Allocator>::find_first_of(const value_type* __s,
	size_type __pos,
	size_type __n) const _NOEXCEPT
	{
	_LIBCPP_ASSERT(__n == 0 \|\| __s != nullptr, "string::find_first_of(): received nullptr");
	return __str_find_first_of<value_type, size_type, traits_type, npos>
	(data(), size(), __s, __pos, __n);
	}

	template<class _CharT, class _Traits, class _Allocator>
	inline
	typename basic_string<_CharT, _Traits, _Allocator>::size_type
	basic_string<_CharT, _Traits, _Allocator>::find_first_of(const basic_string& __str,
	size_type __pos) const _NOEXCEPT
	{
	return __str_find_first_of<value_type, size_type, traits_type, npos>
	(data(), size(), __str.data(), __pos, __str.size());
	}

	template<class _CharT, class _Traits, class _Allocator>
	template <class _Tp>
	_EnableIf
	<
	__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
	typename basic_string<_CharT, _Traits, _Allocator>::size_type
	>
	basic_string<_CharT, _Traits, _Allocator>::find_first_of(const _Tp& __t,
	size_type __pos) const _NOEXCEPT
	{
	__self_view __sv = __t;
	return __str_find_first_of<value_type, size_type, traits_type, npos>
	(data(), size(), __sv.data(), __pos, __sv.size());
	}

	template<class _CharT, class _Traits, class _Allocator>
	inline
	typename basic_string<_CharT, _Traits, _Allocator>::size_type
	basic_string<_CharT, _Traits, _Allocator>::find_first_of(const value_type* __s,
	size_type __pos) const _NOEXCEPT
	{
	_LIBCPP_ASSERT(__s != nullptr, "string::find_first_of(): received nullptr");
	return __str_find_first_of<value_type, size_type, traits_type, npos>
	(data(), size(), __s, __pos, traits_type::length(__s));
	}

	template<class _CharT, class _Traits, class _Allocator>
	inline
	typename basic_string<_CharT, _Traits, _Allocator>::size_type
	basic_string<_CharT, _Traits, _Allocator>::find_first_of(value_type __c,
	size_type __pos) const _NOEXCEPT
	{
	return find(__c, __pos);
	}

	// find_last_of

	template<class _CharT, class _Traits, class _Allocator>
	typename basic_string<_CharT, _Traits, _Allocator>::size_type
	basic_string<_CharT, _Traits, _Allocator>::find_last_of(const value_type* __s,
	size_type __pos,
	size_type __n) const _NOEXCEPT
	{
	_LIBCPP_ASSERT(__n == 0 \|\| __s != nullptr, "string::find_last_of(): received nullptr");
	return __str_find_last_of<value_type, size_type, traits_type, npos>
	(data(), size(), __s, __pos, __n);
	}

	template<class _CharT, class _Traits, class _Allocator>
	inline
	typename basic_string<_CharT, _Traits, _Allocator>::size_type
	basic_string<_CharT, _Traits, _Allocator>::find_last_of(const basic_string& __str,
	size_type __pos) const _NOEXCEPT
	{
	return __str_find_last_of<value_type, size_type, traits_type, npos>
	(data(), size(), __str.data(), __pos, __str.size());
	}

	template<class _CharT, class _Traits, class _Allocator>
	template <class _Tp>
	_EnableIf
	<
	__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
	typename basic_string<_CharT, _Traits, _Allocator>::size_type
	>
	basic_string<_CharT, _Traits, _Allocator>::find_last_of(const _Tp& __t,
	size_type __pos) const _NOEXCEPT
	{
	__self_view __sv = __t;
	return __str_find_last_of<value_type, size_type, traits_type, npos>
	(data(), size(), __sv.data(), __pos, __sv.size());
	}

	template<class _CharT, class _Traits, class _Allocator>
	inline
	typename basic_string<_CharT, _Traits, _Allocator>::size_type
	basic_string<_CharT, _Traits, _Allocator>::find_last_of(const value_type* __s,
	size_type __pos) const _NOEXCEPT
	{
	_LIBCPP_ASSERT(__s != nullptr, "string::find_last_of(): received nullptr");
	return __str_find_last_of<value_type, size_type, traits_type, npos>
	(data(), size(), __s, __pos, traits_type::length(__s));
	}

	template<class _CharT, class _Traits, class _Allocator>
	inline
	typename basic_string<_CharT, _Traits, _Allocator>::size_type
	basic_string<_CharT, _Traits, _Allocator>::find_last_of(value_type __c,
	size_type __pos) const _NOEXCEPT
	{
	return rfind(__c, __pos);
	}

	// find_first_not_of

	template<class _CharT, class _Traits, class _Allocator>
	typename basic_string<_CharT, _Traits, _Allocator>::size_type
	basic_string<_CharT, _Traits, _Allocator>::find_first_not_of(const value_type* __s,
	size_type __pos,
	size_type __n) const _NOEXCEPT
	{
	_LIBCPP_ASSERT(__n == 0 \|\| __s != nullptr, "string::find_first_not_of(): received nullptr");
	return __str_find_first_not_of<value_type, size_type, traits_type, npos>
	(data(), size(), __s, __pos, __n);
	}

	template<class _CharT, class _Traits, class _Allocator>
	inline
	typename basic_string<_CharT, _Traits, _Allocator>::size_type
	basic_string<_CharT, _Traits, _Allocator>::find_first_not_of(const basic_string& __str,
	size_type __pos) const _NOEXCEPT
	{
	return __str_find_first_not_of<value_type, size_type, traits_type, npos>
	(data(), size(), __str.data(), __pos, __str.size());
	}

	template<class _CharT, class _Traits, class _Allocator>
	template <class _Tp>
	_EnableIf
	<
	__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
	typename basic_string<_CharT, _Traits, _Allocator>::size_type
	>
	basic_string<_CharT, _Traits, _Allocator>::find_first_not_of(const _Tp& __t,
	size_type __pos) const _NOEXCEPT
	{
	__self_view __sv = __t;
	return __str_find_first_not_of<value_type, size_type, traits_type, npos>
	(data(), size(), __sv.data(), __pos, __sv.size());
	}

	template<class _CharT, class _Traits, class _Allocator>
	inline
	typename basic_string<_CharT, _Traits, _Allocator>::size_type
	basic_string<_CharT, _Traits, _Allocator>::find_first_not_of(const value_type* __s,
	size_type __pos) const _NOEXCEPT
	{
	_LIBCPP_ASSERT(__s != nullptr, "string::find_first_not_of(): received nullptr");
	return __str_find_first_not_of<value_type, size_type, traits_type, npos>
	(data(), size(), __s, __pos, traits_type::length(__s));
	}

	template<class _CharT, class _Traits, class _Allocator>
	inline
	typename basic_string<_CharT, _Traits, _Allocator>::size_type
	basic_string<_CharT, _Traits, _Allocator>::find_first_not_of(value_type __c,
	size_type __pos) const _NOEXCEPT
	{
	return __str_find_first_not_of<value_type, size_type, traits_type, npos>
	(data(), size(), __c, __pos);
	}

	// find_last_not_of

	template<class _CharT, class _Traits, class _Allocator>
	typename basic_string<_CharT, _Traits, _Allocator>::size_type
	basic_string<_CharT, _Traits, _Allocator>::find_last_not_of(const value_type* __s,
	size_type __pos,
	size_type __n) const _NOEXCEPT
	{
	_LIBCPP_ASSERT(__n == 0 \|\| __s != nullptr, "string::find_last_not_of(): received nullptr");
	return __str_find_last_not_of<value_type, size_type, traits_type, npos>
	(data(), size(), __s, __pos, __n);
	}

	template<class _CharT, class _Traits, class _Allocator>
	inline
	typename basic_string<_CharT, _Traits, _Allocator>::size_type
	basic_string<_CharT, _Traits, _Allocator>::find_last_not_of(const basic_string& __str,
	size_type __pos) const _NOEXCEPT
	{
	return __str_find_last_not_of<value_type, size_type, traits_type, npos>
	(data(), size(), __str.data(), __pos, __str.size());
	}

	template<class _CharT, class _Traits, class _Allocator>
	template <class _Tp>
	_EnableIf
	<
	__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
	typename basic_string<_CharT, _Traits, _Allocator>::size_type
	>
	basic_string<_CharT, _Traits, _Allocator>::find_last_not_of(const _Tp& __t,
	size_type __pos) const _NOEXCEPT
	{
	__self_view __sv = __t;
	return __str_find_last_not_of<value_type, size_type, traits_type, npos>
	(data(), size(), __sv.data(), __pos, __sv.size());
	}

	template<class _CharT, class _Traits, class _Allocator>
	inline
	typename basic_string<_CharT, _Traits, _Allocator>::size_type
	basic_string<_CharT, _Traits, _Allocator>::find_last_not_of(const value_type* __s,
	size_type __pos) const _NOEXCEPT
	{
	_LIBCPP_ASSERT(__s != nullptr, "string::find_last_not_of(): received nullptr");
	return __str_find_last_not_of<value_type, size_type, traits_type, npos>
	(data(), size(), __s, __pos, traits_type::length(__s));
	}

	template<class _CharT, class _Traits, class _Allocator>
	inline
	typename basic_string<_CharT, _Traits, _Allocator>::size_type
	basic_string<_CharT, _Traits, _Allocator>::find_last_not_of(value_type __c,
	size_type __pos) const _NOEXCEPT
	{
	return __str_find_last_not_of<value_type, size_type, traits_type, npos>
	(data(), size(), __c, __pos);
	}

	// compare

	template <class _CharT, class _Traits, class _Allocator>
	template <class _Tp>
	_EnableIf
	<
	__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
	int
	>
	basic_string<_CharT, _Traits, _Allocator>::compare(const _Tp& __t) const _NOEXCEPT
	{
	__self_view __sv = __t;
	size_t __lhs_sz = size();
	size_t __rhs_sz = __sv.size();
	int __result = traits_type::compare(data(), __sv.data(),
	_VSTD::min(__lhs_sz, __rhs_sz));
	if (__result != 0)
	return __result;
	if (__lhs_sz < __rhs_sz)
	return -1;
	if (__lhs_sz > __rhs_sz)
	return 1;
	return 0;
	}

	template <class _CharT, class _Traits, class _Allocator>
	inline
	int
	basic_string<_CharT, _Traits, _Allocator>::compare(const basic_string& __str) const _NOEXCEPT
	{
	return compare(__self_view(__str));
	}

	template <class _CharT, class _Traits, class _Allocator>
	int
	basic_string<_CharT, _Traits, _Allocator>::compare(size_type __pos1,
	size_type __n1,
	const value_type* __s,
	size_type __n2) const
	{
	_LIBCPP_ASSERT(__n2 == 0 \|\| __s != nullptr, "string::compare(): received nullptr");
	size_type __sz = size();
	if (__pos1 > __sz \|\| __n2 == npos)
	this->__throw_out_of_range();
	size_type __rlen = _VSTD::min(__n1, __sz - __pos1);
	int __r = traits_type::compare(data() + __pos1, __s, _VSTD::min(__rlen, __n2));
	if (__r == 0)
	{
	if (__rlen < __n2)
	__r = -1;
	else if (__rlen > __n2)
	__r = 1;
	}
	return __r;
	}

	template <class _CharT, class _Traits, class _Allocator>
	template <class _Tp>
	_EnableIf
	<
	__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
	int
	>
	basic_string<_CharT, _Traits, _Allocator>::compare(size_type __pos1,
	size_type __n1,
	const _Tp& __t) const
	{
	__self_view __sv = __t;
	return compare(__pos1, __n1, __sv.data(), __sv.size());
	}

	template <class _CharT, class _Traits, class _Allocator>
	inline
	int
	basic_string<_CharT, _Traits, _Allocator>::compare(size_type __pos1,
	size_type __n1,
	const basic_string& __str) const
	{
	return compare(__pos1, __n1, __str.data(), __str.size());
	}

	template <class _CharT, class _Traits, class _Allocator>
	template <class _Tp>
	_EnableIf
	<
	__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value
	&& !__is_same_uncvref<_Tp, basic_string<_CharT, _Traits, _Allocator> >::value,
	int
	>
	basic_string<_CharT, _Traits, _Allocator>::compare(size_type __pos1,
	size_type __n1,
	const _Tp& __t,
	size_type __pos2,
	size_type __n2) const
	{
	__self_view __sv = __t;
	return __self_view(*this).substr(__pos1, __n1).compare(__sv.substr(__pos2, __n2));
	}

	template <class _CharT, class _Traits, class _Allocator>
	int
	basic_string<_CharT, _Traits, _Allocator>::compare(size_type __pos1,
	size_type __n1,
	const basic_string& __str,
	size_type __pos2,
	size_type __n2) const
	{
	return compare(__pos1, __n1, __self_view(__str), __pos2, __n2);
	}

	template <class _CharT, class _Traits, class _Allocator>
	int
	basic_string<_CharT, _Traits, _Allocator>::compare(const value_type* __s) const _NOEXCEPT
	{
	_LIBCPP_ASSERT(__s != nullptr, "string::compare(): received nullptr");
	return compare(0, npos, __s, traits_type::length(__s));
	}

	template <class _CharT, class _Traits, class _Allocator>
	int
	basic_string<_CharT, _Traits, _Allocator>::compare(size_type __pos1,
	size_type __n1,
	const value_type* __s) const
	{
	_LIBCPP_ASSERT(__s != nullptr, "string::compare(): received nullptr");
	return compare(__pos1, __n1, __s, traits_type::length(__s));
	}

	// __invariants

	template<class _CharT, class _Traits, class _Allocator>
	inline
	bool
	basic_string<_CharT, _Traits, _Allocator>::__invariants() const
	{
	if (size() > capacity())
	return false;
	if (capacity() < __min_cap - 1)
	return false;
	if (data() == nullptr)
	return false;
	if (data()[size()] != value_type())
	return false;
	return true;
	}

	// __clear_and_shrink

	template<class _CharT, class _Traits, class _Allocator>
	inline
	void
	basic_string<_CharT, _Traits, _Allocator>::__clear_and_shrink() _NOEXCEPT
	{
	clear();
	if(__is_long())
	{
	__alloc_traits::deallocate(__alloc(), __get_long_pointer(), capacity() + 1);
	__set_long_cap(0);
	__set_short_size(0);
	traits_type::assign(*__get_short_pointer(), value_type());
	}
	}

	// operator==

	template<class _CharT, class _Traits, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	bool
	operator==(const basic_string<_CharT, _Traits, _Allocator>& __lhs,
	const basic_string<_CharT, _Traits, _Allocator>& __rhs) _NOEXCEPT
	{
	size_t __lhs_sz = __lhs.size();
	return __lhs_sz == __rhs.size() && _Traits::compare(__lhs.data(),
	__rhs.data(),
	__lhs_sz) == 0;
	}

	template<class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	bool
	operator==(const basic_string<char, char_traits<char>, _Allocator>& __lhs,
	const basic_string<char, char_traits<char>, _Allocator>& __rhs) _NOEXCEPT
	{
	size_t __lhs_sz = __lhs.size();
	if (__lhs_sz != __rhs.size())
	return false;
	const char* __lp = __lhs.data();
	const char* __rp = __rhs.data();
	if (__lhs.__is_long())
	return char_traits<char>::compare(__lp, __rp, __lhs_sz) == 0;
	for (; __lhs_sz != 0; --__lhs_sz, ++__lp, ++__rp)
	if (__lp != __rp)
	return false;
	return true;
	}

	template<class _CharT, class _Traits, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	bool
	operator==(const _CharT* __lhs,
	const basic_string<_CharT, _Traits, _Allocator>& __rhs) _NOEXCEPT
	{
	typedef basic_string<_CharT, _Traits, _Allocator> _String;
	_LIBCPP_ASSERT(__lhs != nullptr, "operator==(char*, basic_string): received nullptr");
	size_t __lhs_len = _Traits::length(__lhs);
	if (__lhs_len != __rhs.size()) return false;
	return __rhs.compare(0, _String::npos, __lhs, __lhs_len) == 0;
	}

	template<class _CharT, class _Traits, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	bool
	operator==(const basic_string<_CharT,_Traits,_Allocator>& __lhs,
	const _CharT* __rhs) _NOEXCEPT
	{
	typedef basic_string<_CharT, _Traits, _Allocator> _String;
	_LIBCPP_ASSERT(__rhs != nullptr, "operator==(basic_string, char*): received nullptr");
	size_t __rhs_len = _Traits::length(__rhs);
	if (__rhs_len != __lhs.size()) return false;
	return __lhs.compare(0, _String::npos, __rhs, __rhs_len) == 0;
	}

	template<class _CharT, class _Traits, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	bool
	operator!=(const basic_string<_CharT,_Traits,_Allocator>& __lhs,
	const basic_string<_CharT, _Traits, _Allocator>& __rhs) _NOEXCEPT
	{
	return !(__lhs == __rhs);
	}

	template<class _CharT, class _Traits, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	bool
	operator!=(const _CharT* __lhs,
	const basic_string<_CharT, _Traits, _Allocator>& __rhs) _NOEXCEPT
	{
	return !(__lhs == __rhs);
	}

	template<class _CharT, class _Traits, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	bool
	operator!=(const basic_string<_CharT, _Traits, _Allocator>& __lhs,
	const _CharT* __rhs) _NOEXCEPT
	{
	return !(__lhs == __rhs);
	}

	// operator<

	template<class _CharT, class _Traits, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	bool
	operator< (const basic_string<_CharT, _Traits, _Allocator>& __lhs,
	const basic_string<_CharT, _Traits, _Allocator>& __rhs) _NOEXCEPT
	{
	return __lhs.compare(__rhs) < 0;
	}

	template<class _CharT, class _Traits, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	bool
	operator< (const basic_string<_CharT, _Traits, _Allocator>& __lhs,
	const _CharT* __rhs) _NOEXCEPT
	{
	return __lhs.compare(__rhs) < 0;
	}

	template<class _CharT, class _Traits, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	bool
	operator< (const _CharT* __lhs,
	const basic_string<_CharT, _Traits, _Allocator>& __rhs) _NOEXCEPT
	{
	return __rhs.compare(__lhs) > 0;
	}

	// operator>

	template<class _CharT, class _Traits, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	bool
	operator> (const basic_string<_CharT, _Traits, _Allocator>& __lhs,
	const basic_string<_CharT, _Traits, _Allocator>& __rhs) _NOEXCEPT
	{
	return __rhs < __lhs;
	}

	template<class _CharT, class _Traits, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	bool
	operator> (const basic_string<_CharT, _Traits, _Allocator>& __lhs,
	const _CharT* __rhs) _NOEXCEPT
	{
	return __rhs < __lhs;
	}

	template<class _CharT, class _Traits, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	bool
	operator> (const _CharT* __lhs,
	const basic_string<_CharT, _Traits, _Allocator>& __rhs) _NOEXCEPT
	{
	return __rhs < __lhs;
	}

	// operator<=

	template<class _CharT, class _Traits, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	bool
	operator<=(const basic_string<_CharT, _Traits, _Allocator>& __lhs,
	const basic_string<_CharT, _Traits, _Allocator>& __rhs) _NOEXCEPT
	{
	return !(__rhs < __lhs);
	}

	template<class _CharT, class _Traits, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	bool
	operator<=(const basic_string<_CharT, _Traits, _Allocator>& __lhs,
	const _CharT* __rhs) _NOEXCEPT
	{
	return !(__rhs < __lhs);
	}

	template<class _CharT, class _Traits, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	bool
	operator<=(const _CharT* __lhs,
	const basic_string<_CharT, _Traits, _Allocator>& __rhs) _NOEXCEPT
	{
	return !(__rhs < __lhs);
	}

	// operator>=

	template<class _CharT, class _Traits, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	bool
	operator>=(const basic_string<_CharT, _Traits, _Allocator>& __lhs,
	const basic_string<_CharT, _Traits, _Allocator>& __rhs) _NOEXCEPT
	{
	return !(__lhs < __rhs);
	}

	template<class _CharT, class _Traits, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	bool
	operator>=(const basic_string<_CharT, _Traits, _Allocator>& __lhs,
	const _CharT* __rhs) _NOEXCEPT
	{
	return !(__lhs < __rhs);
	}

	template<class _CharT, class _Traits, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	bool
	operator>=(const _CharT* __lhs,
	const basic_string<_CharT, _Traits, _Allocator>& __rhs) _NOEXCEPT
	{
	return !(__lhs < __rhs);
	}

	// operator +

	template<class _CharT, class _Traits, class _Allocator>
	basic_string<_CharT, _Traits, _Allocator>
	operator+(const basic_string<_CharT, _Traits, _Allocator>& __lhs,
	const basic_string<_CharT, _Traits, _Allocator>& __rhs)
	{
	basic_string<_CharT, _Traits, _Allocator> __r(__lhs.get_allocator());
	typename basic_string<_CharT, _Traits, _Allocator>::size_type __lhs_sz = __lhs.size();
	typename basic_string<_CharT, _Traits, _Allocator>::size_type __rhs_sz = __rhs.size();
	__r.__init(__lhs.data(), __lhs_sz, __lhs_sz + __rhs_sz);
	__r.append(__rhs.data(), __rhs_sz);
	return __r;
	}

	template<class _CharT, class _Traits, class _Allocator>
	basic_string<_CharT, _Traits, _Allocator>
	operator+(const _CharT* __lhs , const basic_string<_CharT,_Traits,_Allocator>& __rhs)
	{
	basic_string<_CharT, _Traits, _Allocator> __r(__rhs.get_allocator());
	typename basic_string<_CharT, _Traits, _Allocator>::size_type __lhs_sz = _Traits::length(__lhs);
	typename basic_string<_CharT, _Traits, _Allocator>::size_type __rhs_sz = __rhs.size();
	__r.__init(__lhs, __lhs_sz, __lhs_sz + __rhs_sz);
	__r.append(__rhs.data(), __rhs_sz);
	return __r;
	}

	template<class _CharT, class _Traits, class _Allocator>
	basic_string<_CharT, _Traits, _Allocator>
	operator+(_CharT __lhs, const basic_string<_CharT,_Traits,_Allocator>& __rhs)
	{
	basic_string<_CharT, _Traits, _Allocator> __r(__rhs.get_allocator());
	typename basic_string<_CharT, _Traits, _Allocator>::size_type __rhs_sz = __rhs.size();
	__r.__init(&__lhs, 1, 1 + __rhs_sz);
	__r.append(__rhs.data(), __rhs_sz);
	return __r;
	}

	template<class _CharT, class _Traits, class _Allocator>
	inline
	basic_string<_CharT, _Traits, _Allocator>
	operator+(const basic_string<_CharT, _Traits, _Allocator>& __lhs, const _CharT* __rhs)
	{
	basic_string<_CharT, _Traits, _Allocator> __r(__lhs.get_allocator());
	typename basic_string<_CharT, _Traits, _Allocator>::size_type __lhs_sz = __lhs.size();
	typename basic_string<_CharT, _Traits, _Allocator>::size_type __rhs_sz = _Traits::length(__rhs);
	__r.__init(__lhs.data(), __lhs_sz, __lhs_sz + __rhs_sz);
	__r.append(__rhs, __rhs_sz);
	return __r;
	}

	template<class _CharT, class _Traits, class _Allocator>
	basic_string<_CharT, _Traits, _Allocator>
	operator+(const basic_string<_CharT, _Traits, _Allocator>& __lhs, _CharT __rhs)
	{
	basic_string<_CharT, _Traits, _Allocator> __r(__lhs.get_allocator());
	typename basic_string<_CharT, _Traits, _Allocator>::size_type __lhs_sz = __lhs.size();
	__r.__init(__lhs.data(), __lhs_sz, __lhs_sz + 1);
	__r.push_back(__rhs);
	return __r;
	}

	#ifndef _LIBCPP_CXX03_LANG

	template<class _CharT, class _Traits, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	basic_string<_CharT, _Traits, _Allocator>
	operator+(basic_string<_CharT, _Traits, _Allocator>&& __lhs, const basic_string<_CharT, _Traits, _Allocator>& __rhs)
	{
	return _VSTD::move(__lhs.append(__rhs));
	}

	template<class _CharT, class _Traits, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	basic_string<_CharT, _Traits, _Allocator>
	operator+(const basic_string<_CharT, _Traits, _Allocator>& __lhs, basic_string<_CharT, _Traits, _Allocator>&& __rhs)
	{
	return _VSTD::move(__rhs.insert(0, __lhs));
	}

	template<class _CharT, class _Traits, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	basic_string<_CharT, _Traits, _Allocator>
	operator+(basic_string<_CharT, _Traits, _Allocator>&& __lhs, basic_string<_CharT, _Traits, _Allocator>&& __rhs)
	{
	return _VSTD::move(__lhs.append(__rhs));
	}

	template<class _CharT, class _Traits, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	basic_string<_CharT, _Traits, _Allocator>
	operator+(const _CharT* __lhs , basic_string<_CharT,_Traits,_Allocator>&& __rhs)
	{
	return _VSTD::move(__rhs.insert(0, __lhs));
	}

	template<class _CharT, class _Traits, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	basic_string<_CharT, _Traits, _Allocator>
	operator+(_CharT __lhs, basic_string<_CharT,_Traits,_Allocator>&& __rhs)
	{
	__rhs.insert(__rhs.begin(), __lhs);
	return _VSTD::move(__rhs);
	}

	template<class _CharT, class _Traits, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	basic_string<_CharT, _Traits, _Allocator>
	operator+(basic_string<_CharT, _Traits, _Allocator>&& __lhs, const _CharT* __rhs)
	{
	return _VSTD::move(__lhs.append(__rhs));
	}

	template<class _CharT, class _Traits, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	basic_string<_CharT, _Traits, _Allocator>
	operator+(basic_string<_CharT, _Traits, _Allocator>&& __lhs, _CharT __rhs)
	{
	__lhs.push_back(__rhs);
	return _VSTD::move(__lhs);
	}

	#endif // _LIBCPP_CXX03_LANG

	// swap

	template<class _CharT, class _Traits, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	void
	swap(basic_string<_CharT, _Traits, _Allocator>& __lhs,
	basic_string<_CharT, _Traits, _Allocator>& __rhs)
	_NOEXCEPT_(_NOEXCEPT_(__lhs.swap(__rhs)))
	{
	__lhs.swap(__rhs);
	}

	_LIBCPP_FUNC_VIS int stoi (const string& __str, size_t* __idx = nullptr, int __base = 10);
	_LIBCPP_FUNC_VIS long stol (const string& __str, size_t* __idx = nullptr, int __base = 10);
	_LIBCPP_FUNC_VIS unsigned long stoul (const string& __str, size_t* __idx = nullptr, int __base = 10);
	_LIBCPP_FUNC_VIS long long stoll (const string& __str, size_t* __idx = nullptr, int __base = 10);
	_LIBCPP_FUNC_VIS unsigned long long stoull(const string& __str, size_t* __idx = nullptr, int __base = 10);

	_LIBCPP_FUNC_VIS float stof (const string& __str, size_t* __idx = nullptr);
	_LIBCPP_FUNC_VIS double stod (const string& __str, size_t* __idx = nullptr);
	_LIBCPP_FUNC_VIS long double stold(const string& __str, size_t* __idx = nullptr);

	_LIBCPP_FUNC_VIS string to_string(int __val);
	_LIBCPP_FUNC_VIS string to_string(unsigned __val);
	_LIBCPP_FUNC_VIS string to_string(long __val);
	_LIBCPP_FUNC_VIS string to_string(unsigned long __val);
	_LIBCPP_FUNC_VIS string to_string(long long __val);
	_LIBCPP_FUNC_VIS string to_string(unsigned long long __val);
	_LIBCPP_FUNC_VIS string to_string(float __val);
	_LIBCPP_FUNC_VIS string to_string(double __val);
	_LIBCPP_FUNC_VIS string to_string(long double __val);

	_LIBCPP_FUNC_VIS int stoi (const wstring& __str, size_t* __idx = nullptr, int __base = 10);
	_LIBCPP_FUNC_VIS long stol (const wstring& __str, size_t* __idx = nullptr, int __base = 10);
	_LIBCPP_FUNC_VIS unsigned long stoul (const wstring& __str, size_t* __idx = nullptr, int __base = 10);
	_LIBCPP_FUNC_VIS long long stoll (const wstring& __str, size_t* __idx = nullptr, int __base = 10);
	_LIBCPP_FUNC_VIS unsigned long long stoull(const wstring& __str, size_t* __idx = nullptr, int __base = 10);

	_LIBCPP_FUNC_VIS float stof (const wstring& __str, size_t* __idx = nullptr);
	_LIBCPP_FUNC_VIS double stod (const wstring& __str, size_t* __idx = nullptr);
	_LIBCPP_FUNC_VIS long double stold(const wstring& __str, size_t* __idx = nullptr);

	_LIBCPP_FUNC_VIS wstring to_wstring(int __val);
	_LIBCPP_FUNC_VIS wstring to_wstring(unsigned __val);
	_LIBCPP_FUNC_VIS wstring to_wstring(long __val);
	_LIBCPP_FUNC_VIS wstring to_wstring(unsigned long __val);
	_LIBCPP_FUNC_VIS wstring to_wstring(long long __val);
	_LIBCPP_FUNC_VIS wstring to_wstring(unsigned long long __val);
	_LIBCPP_FUNC_VIS wstring to_wstring(float __val);
	_LIBCPP_FUNC_VIS wstring to_wstring(double __val);
	_LIBCPP_FUNC_VIS wstring to_wstring(long double __val);

	template<class _CharT, class _Traits, class _Allocator>
	_LIBCPP_TEMPLATE_DATA_VIS
	const typename basic_string<_CharT, _Traits, _Allocator>::size_type
	basic_string<_CharT, _Traits, _Allocator>::npos;

	template <class _CharT, class _Allocator>
	struct _LIBCPP_TEMPLATE_VIS
	hash<basic_string<_CharT, char_traits<_CharT>, _Allocator> >
	: public unary_function<
	basic_string<_CharT, char_traits<_CharT>, _Allocator>, size_t>
	{
	size_t
	operator()(const basic_string<_CharT, char_traits<_CharT>, _Allocator>& __val) const _NOEXCEPT
	{ return __do_string_hash(__val.data(), __val.data() + __val.size()); }
	};


	template<class _CharT, class _Traits, class _Allocator>
	basic_ostream<_CharT, _Traits>&
	operator<<(basic_ostream<_CharT, _Traits>& __os,
	const basic_string<_CharT, _Traits, _Allocator>& __str);

	template<class _CharT, class _Traits, class _Allocator>
	basic_istream<_CharT, _Traits>&
	operator>>(basic_istream<_CharT, _Traits>& __is,
	basic_string<_CharT, _Traits, _Allocator>& __str);

	template<class _CharT, class _Traits, class _Allocator>
	basic_istream<_CharT, _Traits>&
	getline(basic_istream<_CharT, _Traits>& __is,
	basic_string<_CharT, _Traits, _Allocator>& __str, _CharT __dlm);

	template<class _CharT, class _Traits, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	basic_istream<_CharT, _Traits>&
	getline(basic_istream<_CharT, _Traits>& __is,
	basic_string<_CharT, _Traits, _Allocator>& __str);

	template<class _CharT, class _Traits, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	basic_istream<_CharT, _Traits>&
	getline(basic_istream<_CharT, _Traits>&& __is,
	basic_string<_CharT, _Traits, _Allocator>& __str, _CharT __dlm);

	template<class _CharT, class _Traits, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	basic_istream<_CharT, _Traits>&
	getline(basic_istream<_CharT, _Traits>&& __is,
	basic_string<_CharT, _Traits, _Allocator>& __str);

	#if _LIBCPP_STD_VER > 17
	template <class _CharT, class _Traits, class _Allocator, class _Up>
	inline _LIBCPP_INLINE_VISIBILITY
	typename basic_string<_CharT, _Traits, _Allocator>::size_type
	erase(basic_string<_CharT, _Traits, _Allocator>& __str, const _Up& __v) {
	auto __old_size = __str.size();
	__str.erase(_VSTD::remove(__str.begin(), __str.end(), __v), __str.end());
	return __old_size - __str.size();
	}

	template <class _CharT, class _Traits, class _Allocator, class _Predicate>
	inline _LIBCPP_INLINE_VISIBILITY
	typename basic_string<_CharT, _Traits, _Allocator>::size_type
	erase_if(basic_string<_CharT, _Traits, _Allocator>& __str,
	_Predicate __pred) {
	auto __old_size = __str.size();
	__str.erase(_VSTD::remove_if(__str.begin(), __str.end(), __pred),
	__str.end());
	return __old_size - __str.size();
	}
	#endif

	#if _LIBCPP_DEBUG_LEVEL == 2

	template<class _CharT, class _Traits, class _Allocator>
	bool
	basic_string<_CharT, _Traits, _Allocator>::__dereferenceable(const const_iterator* __i) const
	{
	return this->data() <= _VSTD::__to_address(__i->base()) &&
	_VSTD::__to_address(__i->base()) < this->data() + this->size();
	}

	template<class _CharT, class _Traits, class _Allocator>
	bool
	basic_string<_CharT, _Traits, _Allocator>::__decrementable(const const_iterator* __i) const
	{
	return this->data() < _VSTD::__to_address(__i->base()) &&
	_VSTD::__to_address(__i->base()) <= this->data() + this->size();
	}

	template<class _CharT, class _Traits, class _Allocator>
	bool
	basic_string<_CharT, _Traits, _Allocator>::__addable(const const_iterator* __i, ptrdiff_t __n) const
	{
	const value_type* __p = _VSTD::__to_address(__i->base()) + __n;
	return this->data() <= __p && __p <= this->data() + this->size();
	}

	template<class _CharT, class _Traits, class _Allocator>
	bool
	basic_string<_CharT, _Traits, _Allocator>::__subscriptable(const const_iterator* __i, ptrdiff_t __n) const
	{
	const value_type* __p = _VSTD::__to_address(__i->base()) + __n;
	return this->data() <= __p && __p < this->data() + this->size();
	}

	#endif // _LIBCPP_DEBUG_LEVEL == 2

	#if _LIBCPP_STD_VER > 11
	// Literal suffixes for basic_string [basic.string.literals]
	inline namespace literals
	{
	inline namespace string_literals
	{
	inline _LIBCPP_INLINE_VISIBILITY
	basic_string<char> operator "" s( const char *__str, size_t __len )
	{
	return basic_string<char> (__str, __len);
	}

	inline _LIBCPP_INLINE_VISIBILITY
	basic_string<wchar_t> operator "" s( const wchar_t *__str, size_t __len )
	{
	return basic_string<wchar_t> (__str, __len);
	}

	#ifndef _LIBCPP_HAS_NO_CHAR8_T
	inline _LIBCPP_INLINE_VISIBILITY
	basic_string<char8_t> operator "" s(const char8_t *__str, size_t __len) _NOEXCEPT
	{
	return basic_string<char8_t> (__str, __len);
	}
	#endif

	inline _LIBCPP_INLINE_VISIBILITY
	basic_string<char16_t> operator "" s( const char16_t *__str, size_t __len )
	{
	return basic_string<char16_t> (__str, __len);
	}

	inline _LIBCPP_INLINE_VISIBILITY
	basic_string<char32_t> operator "" s( const char32_t *__str, size_t __len )
	{
	return basic_string<char32_t> (__str, __len);
	}
	}
	}
	#endif

	_LIBCPP_END_NAMESPACE_STD

	_LIBCPP_POP_MACROS

	#endif // _LIBCPP_STRING
	diff --git a/contrib/llvm-project/libcxx/include/vector b/contrib/llvm-project/libcxx/include/vector
	index 9189ed44a80c..90d8b946f135 100644
	--- a/contrib/llvm-project/libcxx/include/vector
	+++ b/contrib/llvm-project/libcxx/include/vector
	@@ -1,3416 +1,3436 @@
	// -- C++ --
	//===------------------------------ vector --------------------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#ifndef _LIBCPP_VECTOR
	#define _LIBCPP_VECTOR

	/*
	vector synopsis

	namespace std
	{

	template <class T, class Allocator = allocator<T> >
	class vector
	{
	public:
	typedef T value_type;
	typedef Allocator allocator_type;
	typedef typename allocator_type::reference reference;
	typedef typename allocator_type::const_reference const_reference;
	typedef implementation-defined iterator;
	typedef implementation-defined const_iterator;
	typedef typename allocator_type::size_type size_type;
	typedef typename allocator_type::difference_type difference_type;
	typedef typename allocator_type::pointer pointer;
	typedef typename allocator_type::const_pointer const_pointer;
	typedef std::reverse_iterator<iterator> reverse_iterator;
	typedef std::reverse_iterator<const_iterator> const_reverse_iterator;

	vector()
	noexcept(is_nothrow_default_constructible<allocator_type>::value);
	explicit vector(const allocator_type&);
	explicit vector(size_type n);
	explicit vector(size_type n, const allocator_type&); // C++14
	vector(size_type n, const value_type& value, const allocator_type& = allocator_type());
	template <class InputIterator>
	vector(InputIterator first, InputIterator last, const allocator_type& = allocator_type());
	vector(const vector& x);
	vector(vector&& x)
	noexcept(is_nothrow_move_constructible<allocator_type>::value);
	vector(initializer_list<value_type> il);
	vector(initializer_list<value_type> il, const allocator_type& a);
	~vector();
	vector& operator=(const vector& x);
	vector& operator=(vector&& x)
	noexcept(
	allocator_type::propagate_on_container_move_assignment::value \|\|
	allocator_type::is_always_equal::value); // C++17
	vector& operator=(initializer_list<value_type> il);
	template <class InputIterator>
	void assign(InputIterator first, InputIterator last);
	void assign(size_type n, const value_type& u);
	void assign(initializer_list<value_type> il);

	allocator_type get_allocator() const noexcept;

	iterator begin() noexcept;
	const_iterator begin() const noexcept;
	iterator end() noexcept;
	const_iterator end() const noexcept;

	reverse_iterator rbegin() noexcept;
	const_reverse_iterator rbegin() const noexcept;
	reverse_iterator rend() noexcept;
	const_reverse_iterator rend() const noexcept;

	const_iterator cbegin() const noexcept;
	const_iterator cend() const noexcept;
	const_reverse_iterator crbegin() const noexcept;
	const_reverse_iterator crend() const noexcept;

	size_type size() const noexcept;
	size_type max_size() const noexcept;
	size_type capacity() const noexcept;
	bool empty() const noexcept;
	void reserve(size_type n);
	void shrink_to_fit() noexcept;

	reference operator[](size_type n);
	const_reference operator[](size_type n) const;
	reference at(size_type n);
	const_reference at(size_type n) const;

	reference front();
	const_reference front() const;
	reference back();
	const_reference back() const;

	value_type* data() noexcept;
	const value_type* data() const noexcept;

	void push_back(const value_type& x);
	void push_back(value_type&& x);
	template <class... Args>
	reference emplace_back(Args&&... args); // reference in C++17
	void pop_back();

	template <class... Args> iterator emplace(const_iterator position, Args&&... args);
	iterator insert(const_iterator position, const value_type& x);
	iterator insert(const_iterator position, value_type&& x);
	iterator insert(const_iterator position, size_type n, const value_type& x);
	template <class InputIterator>
	iterator insert(const_iterator position, InputIterator first, InputIterator last);
	iterator insert(const_iterator position, initializer_list<value_type> il);

	iterator erase(const_iterator position);
	iterator erase(const_iterator first, const_iterator last);

	void clear() noexcept;

	void resize(size_type sz);
	void resize(size_type sz, const value_type& c);

	void swap(vector&)
	noexcept(allocator_traits<allocator_type>::propagate_on_container_swap::value \|\|
	allocator_traits<allocator_type>::is_always_equal::value); // C++17

	bool __invariants() const;
	};

	template <class Allocator = allocator<T> >
	class vector<bool, Allocator>
	{
	public:
	typedef bool value_type;
	typedef Allocator allocator_type;
	typedef implementation-defined iterator;
	typedef implementation-defined const_iterator;
	typedef typename allocator_type::size_type size_type;
	typedef typename allocator_type::difference_type difference_type;
	typedef iterator pointer;
	typedef const_iterator const_pointer;
	typedef std::reverse_iterator<iterator> reverse_iterator;
	typedef std::reverse_iterator<const_iterator> const_reverse_iterator;

	class reference
	{
	public:
	reference(const reference&) noexcept;
	operator bool() const noexcept;
	reference& operator=(bool x) noexcept;
	reference& operator=(const reference& x) noexcept;
	iterator operator&() const noexcept;
	void flip() noexcept;
	};

	class const_reference
	{
	public:
	const_reference(const reference&) noexcept;
	operator bool() const noexcept;
	const_iterator operator&() const noexcept;
	};

	vector()
	noexcept(is_nothrow_default_constructible<allocator_type>::value);
	explicit vector(const allocator_type&);
	explicit vector(size_type n, const allocator_type& a = allocator_type()); // C++14
	vector(size_type n, const value_type& value, const allocator_type& = allocator_type());
	template <class InputIterator>
	vector(InputIterator first, InputIterator last, const allocator_type& = allocator_type());
	vector(const vector& x);
	vector(vector&& x)
	noexcept(is_nothrow_move_constructible<allocator_type>::value);
	vector(initializer_list<value_type> il);
	vector(initializer_list<value_type> il, const allocator_type& a);
	~vector();
	vector& operator=(const vector& x);
	vector& operator=(vector&& x)
	noexcept(
	allocator_type::propagate_on_container_move_assignment::value \|\|
	allocator_type::is_always_equal::value); // C++17
	vector& operator=(initializer_list<value_type> il);
	template <class InputIterator>
	void assign(InputIterator first, InputIterator last);
	void assign(size_type n, const value_type& u);
	void assign(initializer_list<value_type> il);

	allocator_type get_allocator() const noexcept;

	iterator begin() noexcept;
	const_iterator begin() const noexcept;
	iterator end() noexcept;
	const_iterator end() const noexcept;

	reverse_iterator rbegin() noexcept;
	const_reverse_iterator rbegin() const noexcept;
	reverse_iterator rend() noexcept;
	const_reverse_iterator rend() const noexcept;

	const_iterator cbegin() const noexcept;
	const_iterator cend() const noexcept;
	const_reverse_iterator crbegin() const noexcept;
	const_reverse_iterator crend() const noexcept;

	size_type size() const noexcept;
	size_type max_size() const noexcept;
	size_type capacity() const noexcept;
	bool empty() const noexcept;
	void reserve(size_type n);
	void shrink_to_fit() noexcept;

	reference operator[](size_type n);
	const_reference operator[](size_type n) const;
	reference at(size_type n);
	const_reference at(size_type n) const;

	reference front();
	const_reference front() const;
	reference back();
	const_reference back() const;

	void push_back(const value_type& x);
	template <class... Args> reference emplace_back(Args&&... args); // C++14; reference in C++17
	void pop_back();

	template <class... Args> iterator emplace(const_iterator position, Args&&... args); // C++14
	iterator insert(const_iterator position, const value_type& x);
	iterator insert(const_iterator position, size_type n, const value_type& x);
	template <class InputIterator>
	iterator insert(const_iterator position, InputIterator first, InputIterator last);
	iterator insert(const_iterator position, initializer_list<value_type> il);

	iterator erase(const_iterator position);
	iterator erase(const_iterator first, const_iterator last);

	void clear() noexcept;

	void resize(size_type sz);
	void resize(size_type sz, value_type x);

	void swap(vector&)
	noexcept(allocator_traits<allocator_type>::propagate_on_container_swap::value \|\|
	allocator_traits<allocator_type>::is_always_equal::value); // C++17
	void flip() noexcept;

	bool __invariants() const;
	};

	template <class InputIterator, class Allocator = allocator<typename iterator_traits<InputIterator>::value_type>>
	vector(InputIterator, InputIterator, Allocator = Allocator())
	-> vector<typename iterator_traits<InputIterator>::value_type, Allocator>;

	template <class Allocator> struct hash<std::vector<bool, Allocator>>;

	template <class T, class Allocator> bool operator==(const vector<T,Allocator>& x, const vector<T,Allocator>& y);
	template <class T, class Allocator> bool operator< (const vector<T,Allocator>& x, const vector<T,Allocator>& y);
	template <class T, class Allocator> bool operator!=(const vector<T,Allocator>& x, const vector<T,Allocator>& y);
	template <class T, class Allocator> bool operator> (const vector<T,Allocator>& x, const vector<T,Allocator>& y);
	template <class T, class Allocator> bool operator>=(const vector<T,Allocator>& x, const vector<T,Allocator>& y);
	template <class T, class Allocator> bool operator<=(const vector<T,Allocator>& x, const vector<T,Allocator>& y);

	template <class T, class Allocator>
	void swap(vector<T,Allocator>& x, vector<T,Allocator>& y)
	noexcept(noexcept(x.swap(y)));

	template <class T, class Allocator, class U>
	typename vector<T, Allocator>::size_type
	erase(vector<T, Allocator>& c, const U& value); // C++20
	template <class T, class Allocator, class Predicate>
	typename vector<T, Allocator>::size_type
	erase_if(vector<T, Allocator>& c, Predicate pred); // C++20

	} // std

	*/

	#include <__config>
	#include <__bit_reference>
	#include <__debug>
	#include <__functional_base>
	#include <__iterator/wrap_iter.h>
	#include <__split_buffer>
	#include <__utility/forward.h>
	#include <algorithm>
	#include <climits>
	#include <compare>
	+#include <cstdlib>
	#include <cstring>
	#include <initializer_list>
	#include <iosfwd> // for forward declaration of vector
	#include <limits>
	#include <memory>
	#include <stdexcept>
	#include <type_traits>
	#include <version>

	#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
	#pragma GCC system_header
	#endif

	_LIBCPP_PUSH_MACROS
	#include <__undef_macros>


	_LIBCPP_BEGIN_NAMESPACE_STD

	template <bool>
	class _LIBCPP_TEMPLATE_VIS __vector_base_common
	{
	protected:
	_LIBCPP_INLINE_VISIBILITY __vector_base_common() {}
	_LIBCPP_NORETURN void __throw_length_error() const;
	_LIBCPP_NORETURN void __throw_out_of_range() const;
	};

	template <bool __b>
	void
	__vector_base_common<__b>::__throw_length_error() const
	{
	_VSTD::__throw_length_error("vector");
	}

	template <bool __b>
	void
	__vector_base_common<__b>::__throw_out_of_range() const
	{
	_VSTD::__throw_out_of_range("vector");
	}

	_LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS __vector_base_common<true>)

	template <class _Tp, class _Allocator>
	class __vector_base
	: protected __vector_base_common<true>
	{
	public:
	typedef _Allocator allocator_type;
	typedef allocator_traits<allocator_type> __alloc_traits;
	typedef typename __alloc_traits::size_type size_type;
	protected:
	typedef _Tp value_type;
	typedef value_type& reference;
	typedef const value_type& const_reference;
	typedef typename __alloc_traits::difference_type difference_type;
	typedef typename __alloc_traits::pointer pointer;
	typedef typename __alloc_traits::const_pointer const_pointer;
	typedef pointer iterator;
	typedef const_pointer const_iterator;

	pointer __begin_;
	pointer __end_;
	__compressed_pair<pointer, allocator_type> __end_cap_;

	_LIBCPP_INLINE_VISIBILITY
	allocator_type& __alloc() _NOEXCEPT
	{return __end_cap_.second();}
	_LIBCPP_INLINE_VISIBILITY
	const allocator_type& __alloc() const _NOEXCEPT
	{return __end_cap_.second();}
	_LIBCPP_INLINE_VISIBILITY
	pointer& __end_cap() _NOEXCEPT
	{return __end_cap_.first();}
	_LIBCPP_INLINE_VISIBILITY
	const pointer& __end_cap() const _NOEXCEPT
	{return __end_cap_.first();}

	_LIBCPP_INLINE_VISIBILITY
	__vector_base()
	_NOEXCEPT_(is_nothrow_default_constructible<allocator_type>::value);
	_LIBCPP_INLINE_VISIBILITY __vector_base(const allocator_type& __a);
	#ifndef _LIBCPP_CXX03_LANG
	_LIBCPP_INLINE_VISIBILITY __vector_base(allocator_type&& __a) _NOEXCEPT;
	#endif
	~__vector_base();

	_LIBCPP_INLINE_VISIBILITY
	void clear() _NOEXCEPT {__destruct_at_end(__begin_);}
	_LIBCPP_INLINE_VISIBILITY
	size_type capacity() const _NOEXCEPT
	{return static_cast<size_type>(__end_cap() - __begin_);}

	_LIBCPP_INLINE_VISIBILITY
	void __destruct_at_end(pointer __new_last) _NOEXCEPT;

	_LIBCPP_INLINE_VISIBILITY
	void __copy_assign_alloc(const __vector_base& __c)
	{__copy_assign_alloc(__c, integral_constant<bool,
	__alloc_traits::propagate_on_container_copy_assignment::value>());}

	_LIBCPP_INLINE_VISIBILITY
	void __move_assign_alloc(__vector_base& __c)
	_NOEXCEPT_(
	!__alloc_traits::propagate_on_container_move_assignment::value \|\|
	is_nothrow_move_assignable<allocator_type>::value)
	{__move_assign_alloc(__c, integral_constant<bool,
	__alloc_traits::propagate_on_container_move_assignment::value>());}
	+
	+ _LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI
	+ void __throw_length_error() const {
	+#ifndef _LIBCPP_NO_EXCEPTIONS
	+ __vector_base_common<true>::__throw_length_error();
	+#else
	+ _VSTD::abort();
	+#endif
	+ }
	+
	+ _LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI
	+ void __throw_out_of_range() const {
	+#ifndef _LIBCPP_NO_EXCEPTIONS
	+ __vector_base_common<true>::__throw_out_of_range();
	+#else
	+ _VSTD::abort();
	+#endif
	+ }
	+
	private:
	_LIBCPP_INLINE_VISIBILITY
	void __copy_assign_alloc(const __vector_base& __c, true_type)
	{
	if (__alloc() != __c.__alloc())
	{
	clear();
	__alloc_traits::deallocate(__alloc(), __begin_, capacity());
	__begin_ = __end_ = __end_cap() = nullptr;
	}
	__alloc() = __c.__alloc();
	}

	_LIBCPP_INLINE_VISIBILITY
	void __copy_assign_alloc(const __vector_base&, false_type)
	{}

	_LIBCPP_INLINE_VISIBILITY
	void __move_assign_alloc(__vector_base& __c, true_type)
	_NOEXCEPT_(is_nothrow_move_assignable<allocator_type>::value)
	{
	__alloc() = _VSTD::move(__c.__alloc());
	}

	_LIBCPP_INLINE_VISIBILITY
	void __move_assign_alloc(__vector_base&, false_type)
	_NOEXCEPT
	{}
	};

	template <class _Tp, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	void
	__vector_base<_Tp, _Allocator>::__destruct_at_end(pointer __new_last) _NOEXCEPT
	{
	pointer __soon_to_be_end = __end_;
	while (__new_last != __soon_to_be_end)
	__alloc_traits::destroy(__alloc(), _VSTD::__to_address(--__soon_to_be_end));
	__end_ = __new_last;
	}

	template <class _Tp, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	__vector_base<_Tp, _Allocator>::__vector_base()
	_NOEXCEPT_(is_nothrow_default_constructible<allocator_type>::value)
	: __begin_(nullptr),
	__end_(nullptr),
	__end_cap_(nullptr, __default_init_tag())
	{
	}

	template <class _Tp, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	__vector_base<_Tp, _Allocator>::__vector_base(const allocator_type& __a)
	: __begin_(nullptr),
	__end_(nullptr),
	__end_cap_(nullptr, __a)
	{
	}

	#ifndef _LIBCPP_CXX03_LANG
	template <class _Tp, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	__vector_base<_Tp, _Allocator>::__vector_base(allocator_type&& __a) _NOEXCEPT
	: __begin_(nullptr),
	__end_(nullptr),
	__end_cap_(nullptr, _VSTD::move(__a)) {}
	#endif

	template <class _Tp, class _Allocator>
	__vector_base<_Tp, _Allocator>::~__vector_base()
	{
	if (__begin_ != nullptr)
	{
	clear();
	__alloc_traits::deallocate(__alloc(), __begin_, capacity());
	}
	}

	template <class _Tp, class _Allocator /* = allocator<_Tp> */>
	class _LIBCPP_TEMPLATE_VIS vector
	: private __vector_base<_Tp, _Allocator>
	{
	private:
	typedef __vector_base<_Tp, _Allocator> __base;
	typedef allocator<_Tp> __default_allocator_type;
	public:
	typedef vector __self;
	typedef _Tp value_type;
	typedef _Allocator allocator_type;
	typedef typename __base::__alloc_traits __alloc_traits;
	typedef typename __base::reference reference;
	typedef typename __base::const_reference const_reference;
	typedef typename __base::size_type size_type;
	typedef typename __base::difference_type difference_type;
	typedef typename __base::pointer pointer;
	typedef typename __base::const_pointer const_pointer;
	typedef __wrap_iter<pointer> iterator;
	typedef __wrap_iter<const_pointer> const_iterator;
	typedef _VSTD::reverse_iterator<iterator> reverse_iterator;
	typedef _VSTD::reverse_iterator<const_iterator> const_reverse_iterator;

	static_assert((is_same<typename allocator_type::value_type, value_type>::value),
	"Allocator::value_type must be same type as value_type");

	_LIBCPP_INLINE_VISIBILITY
	vector() _NOEXCEPT_(is_nothrow_default_constructible<allocator_type>::value)
	{
	#if _LIBCPP_DEBUG_LEVEL == 2
	__get_db()->__insert_c(this);
	#endif
	}
	_LIBCPP_INLINE_VISIBILITY explicit vector(const allocator_type& __a)
	#if _LIBCPP_STD_VER <= 14
	_NOEXCEPT_(is_nothrow_copy_constructible<allocator_type>::value)
	#else
	_NOEXCEPT
	#endif
	: __base(__a)
	{
	#if _LIBCPP_DEBUG_LEVEL == 2
	__get_db()->__insert_c(this);
	#endif
	}
	explicit vector(size_type __n);
	#if _LIBCPP_STD_VER > 11
	explicit vector(size_type __n, const allocator_type& __a);
	#endif
	vector(size_type __n, const value_type& __x);
	vector(size_type __n, const value_type& __x, const allocator_type& __a);
	template <class _InputIterator>
	vector(_InputIterator __first,
	typename enable_if<__is_cpp17_input_iterator <_InputIterator>::value &&
	!__is_cpp17_forward_iterator<_InputIterator>::value &&
	is_constructible<
	value_type,
	typename iterator_traits<_InputIterator>::reference>::value,
	_InputIterator>::type __last);
	template <class _InputIterator>
	vector(_InputIterator __first, _InputIterator __last, const allocator_type& __a,
	typename enable_if<__is_cpp17_input_iterator <_InputIterator>::value &&
	!__is_cpp17_forward_iterator<_InputIterator>::value &&
	is_constructible<
	value_type,
	typename iterator_traits<_InputIterator>::reference>::value>::type* = 0);
	template <class _ForwardIterator>
	vector(_ForwardIterator __first,
	typename enable_if<__is_cpp17_forward_iterator<_ForwardIterator>::value &&
	is_constructible<
	value_type,
	typename iterator_traits<_ForwardIterator>::reference>::value,
	_ForwardIterator>::type __last);
	template <class _ForwardIterator>
	vector(_ForwardIterator __first, _ForwardIterator __last, const allocator_type& __a,
	typename enable_if<__is_cpp17_forward_iterator<_ForwardIterator>::value &&
	is_constructible<
	value_type,
	typename iterator_traits<_ForwardIterator>::reference>::value>::type* = 0);

	_LIBCPP_INLINE_VISIBILITY
	~vector()
	{
	__annotate_delete();
	#if _LIBCPP_DEBUG_LEVEL == 2
	__get_db()->__erase_c(this);
	#endif
	}

	vector(const vector& __x);
	vector(const vector& __x, const __identity_t<allocator_type>& __a);
	_LIBCPP_INLINE_VISIBILITY
	vector& operator=(const vector& __x);

	#ifndef _LIBCPP_CXX03_LANG
	_LIBCPP_INLINE_VISIBILITY
	vector(initializer_list<value_type> __il);

	_LIBCPP_INLINE_VISIBILITY
	vector(initializer_list<value_type> __il, const allocator_type& __a);

	_LIBCPP_INLINE_VISIBILITY
	vector(vector&& __x)
	#if _LIBCPP_STD_VER > 14
	_NOEXCEPT;
	#else
	_NOEXCEPT_(is_nothrow_move_constructible<allocator_type>::value);
	#endif

	_LIBCPP_INLINE_VISIBILITY
	vector(vector&& __x, const __identity_t<allocator_type>& __a);
	_LIBCPP_INLINE_VISIBILITY
	vector& operator=(vector&& __x)
	_NOEXCEPT_((__noexcept_move_assign_container<_Allocator, __alloc_traits>::value));

	_LIBCPP_INLINE_VISIBILITY
	vector& operator=(initializer_list<value_type> __il)
	{assign(__il.begin(), __il.end()); return *this;}

	#endif // !_LIBCPP_CXX03_LANG

	template <class _InputIterator>
	typename enable_if
	<
	__is_cpp17_input_iterator <_InputIterator>::value &&
	!__is_cpp17_forward_iterator<_InputIterator>::value &&
	is_constructible<
	value_type,
	typename iterator_traits<_InputIterator>::reference>::value,
	void
	>::type
	assign(_InputIterator __first, _InputIterator __last);
	template <class _ForwardIterator>
	typename enable_if
	<
	__is_cpp17_forward_iterator<_ForwardIterator>::value &&
	is_constructible<
	value_type,
	typename iterator_traits<_ForwardIterator>::reference>::value,
	void
	>::type
	assign(_ForwardIterator __first, _ForwardIterator __last);

	void assign(size_type __n, const_reference __u);

	#ifndef _LIBCPP_CXX03_LANG
	_LIBCPP_INLINE_VISIBILITY
	void assign(initializer_list<value_type> __il)
	{assign(__il.begin(), __il.end());}
	#endif

	_LIBCPP_INLINE_VISIBILITY
	allocator_type get_allocator() const _NOEXCEPT
	{return this->__alloc();}

	_LIBCPP_INLINE_VISIBILITY iterator begin() _NOEXCEPT;
	_LIBCPP_INLINE_VISIBILITY const_iterator begin() const _NOEXCEPT;
	_LIBCPP_INLINE_VISIBILITY iterator end() _NOEXCEPT;
	_LIBCPP_INLINE_VISIBILITY const_iterator end() const _NOEXCEPT;

	_LIBCPP_INLINE_VISIBILITY
	reverse_iterator rbegin() _NOEXCEPT
	{return reverse_iterator(end());}
	_LIBCPP_INLINE_VISIBILITY
	const_reverse_iterator rbegin() const _NOEXCEPT
	{return const_reverse_iterator(end());}
	_LIBCPP_INLINE_VISIBILITY
	reverse_iterator rend() _NOEXCEPT
	{return reverse_iterator(begin());}
	_LIBCPP_INLINE_VISIBILITY
	const_reverse_iterator rend() const _NOEXCEPT
	{return const_reverse_iterator(begin());}

	_LIBCPP_INLINE_VISIBILITY
	const_iterator cbegin() const _NOEXCEPT
	{return begin();}
	_LIBCPP_INLINE_VISIBILITY
	const_iterator cend() const _NOEXCEPT
	{return end();}
	_LIBCPP_INLINE_VISIBILITY
	const_reverse_iterator crbegin() const _NOEXCEPT
	{return rbegin();}
	_LIBCPP_INLINE_VISIBILITY
	const_reverse_iterator crend() const _NOEXCEPT
	{return rend();}

	_LIBCPP_INLINE_VISIBILITY
	size_type size() const _NOEXCEPT
	{return static_cast<size_type>(this->__end_ - this->__begin_);}
	_LIBCPP_INLINE_VISIBILITY
	size_type capacity() const _NOEXCEPT
	{return __base::capacity();}
	_LIBCPP_NODISCARD_AFTER_CXX17 _LIBCPP_INLINE_VISIBILITY
	bool empty() const _NOEXCEPT
	{return this->__begin_ == this->__end_;}
	size_type max_size() const _NOEXCEPT;
	void reserve(size_type __n);
	void shrink_to_fit() _NOEXCEPT;

	_LIBCPP_INLINE_VISIBILITY reference operator[](size_type __n) _NOEXCEPT;
	_LIBCPP_INLINE_VISIBILITY const_reference operator[](size_type __n) const _NOEXCEPT;
	reference at(size_type __n);
	const_reference at(size_type __n) const;

	_LIBCPP_INLINE_VISIBILITY reference front() _NOEXCEPT
	{
	_LIBCPP_ASSERT(!empty(), "front() called on an empty vector");
	return *this->__begin_;
	}
	_LIBCPP_INLINE_VISIBILITY const_reference front() const _NOEXCEPT
	{
	_LIBCPP_ASSERT(!empty(), "front() called on an empty vector");
	return *this->__begin_;
	}
	_LIBCPP_INLINE_VISIBILITY reference back() _NOEXCEPT
	{
	_LIBCPP_ASSERT(!empty(), "back() called on an empty vector");
	return *(this->__end_ - 1);
	}
	_LIBCPP_INLINE_VISIBILITY const_reference back() const _NOEXCEPT
	{
	_LIBCPP_ASSERT(!empty(), "back() called on an empty vector");
	return *(this->__end_ - 1);
	}

	_LIBCPP_INLINE_VISIBILITY
	value_type* data() _NOEXCEPT
	{return _VSTD::__to_address(this->__begin_);}
	_LIBCPP_INLINE_VISIBILITY
	const value_type* data() const _NOEXCEPT
	{return _VSTD::__to_address(this->__begin_);}

	#ifdef _LIBCPP_CXX03_LANG
	_LIBCPP_INLINE_VISIBILITY
	void __emplace_back(const value_type& __x) { push_back(__x); }
	#else
	template <class _Arg>
	_LIBCPP_INLINE_VISIBILITY
	void __emplace_back(_Arg&& __arg) {
	emplace_back(_VSTD::forward<_Arg>(__arg));
	}
	#endif

	_LIBCPP_INLINE_VISIBILITY void push_back(const_reference __x);

	#ifndef _LIBCPP_CXX03_LANG
	_LIBCPP_INLINE_VISIBILITY void push_back(value_type&& __x);

	template <class... _Args>
	_LIBCPP_INLINE_VISIBILITY
	#if _LIBCPP_STD_VER > 14
	reference emplace_back(_Args&&... __args);
	#else
	void emplace_back(_Args&&... __args);
	#endif
	#endif // !_LIBCPP_CXX03_LANG

	_LIBCPP_INLINE_VISIBILITY
	void pop_back();

	iterator insert(const_iterator __position, const_reference __x);

	#ifndef _LIBCPP_CXX03_LANG
	iterator insert(const_iterator __position, value_type&& __x);
	template <class... _Args>
	iterator emplace(const_iterator __position, _Args&&... __args);
	#endif // !_LIBCPP_CXX03_LANG

	iterator insert(const_iterator __position, size_type __n, const_reference __x);
	template <class _InputIterator>
	typename enable_if
	<
	__is_cpp17_input_iterator <_InputIterator>::value &&
	!__is_cpp17_forward_iterator<_InputIterator>::value &&
	is_constructible<
	value_type,
	typename iterator_traits<_InputIterator>::reference>::value,
	iterator
	>::type
	insert(const_iterator __position, _InputIterator __first, _InputIterator __last);
	template <class _ForwardIterator>
	typename enable_if
	<
	__is_cpp17_forward_iterator<_ForwardIterator>::value &&
	is_constructible<
	value_type,
	typename iterator_traits<_ForwardIterator>::reference>::value,
	iterator
	>::type
	insert(const_iterator __position, _ForwardIterator __first, _ForwardIterator __last);

	#ifndef _LIBCPP_CXX03_LANG
	_LIBCPP_INLINE_VISIBILITY
	iterator insert(const_iterator __position, initializer_list<value_type> __il)
	{return insert(__position, __il.begin(), __il.end());}
	#endif

	_LIBCPP_INLINE_VISIBILITY iterator erase(const_iterator __position);
	iterator erase(const_iterator __first, const_iterator __last);

	_LIBCPP_INLINE_VISIBILITY
	void clear() _NOEXCEPT
	{
	size_type __old_size = size();
	__base::clear();
	__annotate_shrink(__old_size);
	__invalidate_all_iterators();
	}

	void resize(size_type __sz);
	void resize(size_type __sz, const_reference __x);

	void swap(vector&)
	#if _LIBCPP_STD_VER >= 14
	_NOEXCEPT;
	#else
	_NOEXCEPT_(!__alloc_traits::propagate_on_container_swap::value \|\|
	__is_nothrow_swappable<allocator_type>::value);
	#endif

	bool __invariants() const;

	#if _LIBCPP_DEBUG_LEVEL == 2

	bool __dereferenceable(const const_iterator* __i) const;
	bool __decrementable(const const_iterator* __i) const;
	bool __addable(const const_iterator* __i, ptrdiff_t __n) const;
	bool __subscriptable(const const_iterator* __i, ptrdiff_t __n) const;

	#endif // _LIBCPP_DEBUG_LEVEL == 2

	private:
	_LIBCPP_INLINE_VISIBILITY void __invalidate_all_iterators();
	_LIBCPP_INLINE_VISIBILITY void __invalidate_iterators_past(pointer __new_last);
	void __vallocate(size_type __n);
	void __vdeallocate() _NOEXCEPT;
	_LIBCPP_INLINE_VISIBILITY size_type __recommend(size_type __new_size) const;
	void __construct_at_end(size_type __n);
	_LIBCPP_INLINE_VISIBILITY
	void __construct_at_end(size_type __n, const_reference __x);
	template <class _ForwardIterator>
	typename enable_if
	<
	__is_cpp17_forward_iterator<_ForwardIterator>::value,
	void
	>::type
	__construct_at_end(_ForwardIterator __first, _ForwardIterator __last, size_type __n);
	void __append(size_type __n);
	void __append(size_type __n, const_reference __x);
	_LIBCPP_INLINE_VISIBILITY
	iterator __make_iter(pointer __p) _NOEXCEPT;
	_LIBCPP_INLINE_VISIBILITY
	const_iterator __make_iter(const_pointer __p) const _NOEXCEPT;
	void __swap_out_circular_buffer(__split_buffer<value_type, allocator_type&>& __v);
	pointer __swap_out_circular_buffer(__split_buffer<value_type, allocator_type&>& __v, pointer __p);
	void __move_range(pointer __from_s, pointer __from_e, pointer __to);
	void __move_assign(vector& __c, true_type)
	_NOEXCEPT_(is_nothrow_move_assignable<allocator_type>::value);
	void __move_assign(vector& __c, false_type)
	_NOEXCEPT_(__alloc_traits::is_always_equal::value);
	_LIBCPP_INLINE_VISIBILITY
	void __destruct_at_end(pointer __new_last) _NOEXCEPT
	{
	__invalidate_iterators_past(__new_last);
	size_type __old_size = size();
	__base::__destruct_at_end(__new_last);
	__annotate_shrink(__old_size);
	}

	#ifndef _LIBCPP_CXX03_LANG
	template <class _Up>
	_LIBCPP_INLINE_VISIBILITY
	inline void __push_back_slow_path(_Up&& __x);

	template <class... _Args>
	_LIBCPP_INLINE_VISIBILITY
	inline void __emplace_back_slow_path(_Args&&... __args);
	#else
	template <class _Up>
	_LIBCPP_INLINE_VISIBILITY
	inline void __push_back_slow_path(_Up& __x);
	#endif

	// The following functions are no-ops outside of AddressSanitizer mode.
	// We call annotatations only for the default Allocator because other allocators
	// may not meet the AddressSanitizer alignment constraints.
	// See the documentation for __sanitizer_annotate_contiguous_container for more details.
	#ifndef _LIBCPP_HAS_NO_ASAN
	void __annotate_contiguous_container(const void __beg, const void __end,
	const void *__old_mid,
	const void *__new_mid) const
	{

	if (__beg && is_same<allocator_type, __default_allocator_type>::value)
	__sanitizer_annotate_contiguous_container(__beg, __end, __old_mid, __new_mid);
	}
	#else
	_LIBCPP_INLINE_VISIBILITY
	void __annotate_contiguous_container(const void, const void, const void*,
	const void*) const _NOEXCEPT {}
	#endif
	_LIBCPP_INLINE_VISIBILITY
	void __annotate_new(size_type __current_size) const _NOEXCEPT {
	__annotate_contiguous_container(data(), data() + capacity(),
	data() + capacity(), data() + __current_size);
	}

	_LIBCPP_INLINE_VISIBILITY
	void __annotate_delete() const _NOEXCEPT {
	__annotate_contiguous_container(data(), data() + capacity(),
	data() + size(), data() + capacity());
	}

	_LIBCPP_INLINE_VISIBILITY
	void __annotate_increase(size_type __n) const _NOEXCEPT
	{
	__annotate_contiguous_container(data(), data() + capacity(),
	data() + size(), data() + size() + __n);
	}

	_LIBCPP_INLINE_VISIBILITY
	void __annotate_shrink(size_type __old_size) const _NOEXCEPT
	{
	__annotate_contiguous_container(data(), data() + capacity(),
	data() + __old_size, data() + size());
	}

	struct _ConstructTransaction {
	explicit _ConstructTransaction(vector &__v, size_type __n)
	: __v_(__v), __pos_(__v.__end_), __new_end_(__v.__end_ + __n) {
	#ifndef _LIBCPP_HAS_NO_ASAN
	__v_.__annotate_increase(__n);
	#endif
	}
	~_ConstructTransaction() {
	__v_.__end_ = __pos_;
	#ifndef _LIBCPP_HAS_NO_ASAN
	if (__pos_ != __new_end_) {
	__v_.__annotate_shrink(__new_end_ - __v_.__begin_);
	}
	#endif
	}

	vector &__v_;
	pointer __pos_;
	const_pointer const __new_end_;

	private:
	_ConstructTransaction(_ConstructTransaction const&) = delete;
	_ConstructTransaction& operator=(_ConstructTransaction const&) = delete;
	};

	template <class ..._Args>
	_LIBCPP_INLINE_VISIBILITY
	void __construct_one_at_end(_Args&& ...__args) {
	_ConstructTransaction __tx(*this, 1);
	__alloc_traits::construct(this->__alloc(), _VSTD::__to_address(__tx.__pos_),
	_VSTD::forward<_Args>(__args)...);
	++__tx.__pos_;
	}
	};

	#ifndef _LIBCPP_HAS_NO_DEDUCTION_GUIDES
	template<class _InputIterator,
	class _Alloc = allocator<__iter_value_type<_InputIterator>>,
	class = _EnableIf<__is_allocator<_Alloc>::value>
	>
	vector(_InputIterator, _InputIterator)
	-> vector<__iter_value_type<_InputIterator>, _Alloc>;

	template<class _InputIterator,
	class _Alloc,
	class = _EnableIf<__is_allocator<_Alloc>::value>
	>
	vector(_InputIterator, _InputIterator, _Alloc)
	-> vector<__iter_value_type<_InputIterator>, _Alloc>;
	#endif

	template <class _Tp, class _Allocator>
	void
	vector<_Tp, _Allocator>::__swap_out_circular_buffer(__split_buffer<value_type, allocator_type&>& __v)
	{

	__annotate_delete();
	_VSTD::__construct_backward_with_exception_guarantees(this->__alloc(), this->__begin_, this->__end_, __v.__begin_);
	_VSTD::swap(this->__begin_, __v.__begin_);
	_VSTD::swap(this->__end_, __v.__end_);
	_VSTD::swap(this->__end_cap(), __v.__end_cap());
	__v.__first_ = __v.__begin_;
	__annotate_new(size());
	__invalidate_all_iterators();
	}

	template <class _Tp, class _Allocator>
	typename vector<_Tp, _Allocator>::pointer
	vector<_Tp, _Allocator>::__swap_out_circular_buffer(__split_buffer<value_type, allocator_type&>& __v, pointer __p)
	{
	__annotate_delete();
	pointer __r = __v.__begin_;
	_VSTD::__construct_backward_with_exception_guarantees(this->__alloc(), this->__begin_, __p, __v.__begin_);
	_VSTD::__construct_forward_with_exception_guarantees(this->__alloc(), __p, this->__end_, __v.__end_);
	_VSTD::swap(this->__begin_, __v.__begin_);
	_VSTD::swap(this->__end_, __v.__end_);
	_VSTD::swap(this->__end_cap(), __v.__end_cap());
	__v.__first_ = __v.__begin_;
	__annotate_new(size());
	__invalidate_all_iterators();
	return __r;
	}

	// Allocate space for __n objects
	// throws length_error if __n > max_size()
	// throws (probably bad_alloc) if memory run out
	// Precondition: __begin_ == __end_ == __end_cap() == 0
	// Precondition: __n > 0
	// Postcondition: capacity() == __n
	// Postcondition: size() == 0
	template <class _Tp, class _Allocator>
	void
	vector<_Tp, _Allocator>::__vallocate(size_type __n)
	{
	if (__n > max_size())
	this->__throw_length_error();
	this->__begin_ = this->__end_ = __alloc_traits::allocate(this->__alloc(), __n);
	this->__end_cap() = this->__begin_ + __n;
	__annotate_new(0);
	}

	template <class _Tp, class _Allocator>
	void
	vector<_Tp, _Allocator>::__vdeallocate() _NOEXCEPT
	{
	if (this->__begin_ != nullptr)
	{
	clear();
	__alloc_traits::deallocate(this->__alloc(), this->__begin_, capacity());
	this->__begin_ = this->__end_ = this->__end_cap() = nullptr;
	}
	}

	template <class _Tp, class _Allocator>
	typename vector<_Tp, _Allocator>::size_type
	vector<_Tp, _Allocator>::max_size() const _NOEXCEPT
	{
	return _VSTD::min<size_type>(__alloc_traits::max_size(this->__alloc()),
	numeric_limits<difference_type>::max());
	}

	// Precondition: __new_size > capacity()
	template <class _Tp, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	typename vector<_Tp, _Allocator>::size_type
	vector<_Tp, _Allocator>::__recommend(size_type __new_size) const
	{
	const size_type __ms = max_size();
	if (__new_size > __ms)
	this->__throw_length_error();
	const size_type __cap = capacity();
	if (__cap >= __ms / 2)
	return __ms;
	return _VSTD::max<size_type>(2 * __cap, __new_size);
	}

	// Default constructs __n objects starting at __end_
	// throws if construction throws
	// Precondition: __n > 0
	// Precondition: size() + __n <= capacity()
	// Postcondition: size() == size() + __n
	template <class _Tp, class _Allocator>
	void
	vector<_Tp, _Allocator>::__construct_at_end(size_type __n)
	{
	_ConstructTransaction __tx(*this, __n);
	const_pointer __new_end = __tx.__new_end_;
	for (pointer __pos = __tx.__pos_; __pos != __new_end; ++__pos, __tx.__pos_ = __pos) {
	__alloc_traits::construct(this->__alloc(), _VSTD::__to_address(__pos));
	}
	}

	// Copy constructs __n objects starting at __end_ from __x
	// throws if construction throws
	// Precondition: __n > 0
	// Precondition: size() + __n <= capacity()
	// Postcondition: size() == old size() + __n
	// Postcondition: [i] == __x for all i in [size() - __n, __n)
	template <class _Tp, class _Allocator>
	inline
	void
	vector<_Tp, _Allocator>::__construct_at_end(size_type __n, const_reference __x)
	{
	_ConstructTransaction __tx(*this, __n);
	const_pointer __new_end = __tx.__new_end_;
	for (pointer __pos = __tx.__pos_; __pos != __new_end; ++__pos, __tx.__pos_ = __pos) {
	__alloc_traits::construct(this->__alloc(), _VSTD::__to_address(__pos), __x);
	}
	}

	template <class _Tp, class _Allocator>
	template <class _ForwardIterator>
	typename enable_if
	<
	__is_cpp17_forward_iterator<_ForwardIterator>::value,
	void
	>::type
	vector<_Tp, _Allocator>::__construct_at_end(_ForwardIterator __first, _ForwardIterator __last, size_type __n)
	{
	_ConstructTransaction __tx(*this, __n);
	_VSTD::__construct_range_forward(this->__alloc(), __first, __last, __tx.__pos_);
	}

	// Default constructs __n objects starting at __end_
	// throws if construction throws
	// Postcondition: size() == size() + __n
	// Exception safety: strong.
	template <class _Tp, class _Allocator>
	void
	vector<_Tp, _Allocator>::__append(size_type __n)
	{
	if (static_cast<size_type>(this->__end_cap() - this->__end_) >= __n)
	this->__construct_at_end(__n);
	else
	{
	allocator_type& __a = this->__alloc();
	__split_buffer<value_type, allocator_type&> __v(__recommend(size() + __n), size(), __a);
	__v.__construct_at_end(__n);
	__swap_out_circular_buffer(__v);
	}
	}

	// Default constructs __n objects starting at __end_
	// throws if construction throws
	// Postcondition: size() == size() + __n
	// Exception safety: strong.
	template <class _Tp, class _Allocator>
	void
	vector<_Tp, _Allocator>::__append(size_type __n, const_reference __x)
	{
	if (static_cast<size_type>(this->__end_cap() - this->__end_) >= __n)
	this->__construct_at_end(__n, __x);
	else
	{
	allocator_type& __a = this->__alloc();
	__split_buffer<value_type, allocator_type&> __v(__recommend(size() + __n), size(), __a);
	__v.__construct_at_end(__n, __x);
	__swap_out_circular_buffer(__v);
	}
	}

	template <class _Tp, class _Allocator>
	vector<_Tp, _Allocator>::vector(size_type __n)
	{
	#if _LIBCPP_DEBUG_LEVEL == 2
	__get_db()->__insert_c(this);
	#endif
	if (__n > 0)
	{
	__vallocate(__n);
	__construct_at_end(__n);
	}
	}

	#if _LIBCPP_STD_VER > 11
	template <class _Tp, class _Allocator>
	vector<_Tp, _Allocator>::vector(size_type __n, const allocator_type& __a)
	: __base(__a)
	{
	#if _LIBCPP_DEBUG_LEVEL == 2
	__get_db()->__insert_c(this);
	#endif
	if (__n > 0)
	{
	__vallocate(__n);
	__construct_at_end(__n);
	}
	}
	#endif

	template <class _Tp, class _Allocator>
	vector<_Tp, _Allocator>::vector(size_type __n, const value_type& __x)
	{
	#if _LIBCPP_DEBUG_LEVEL == 2
	__get_db()->__insert_c(this);
	#endif
	if (__n > 0)
	{
	__vallocate(__n);
	__construct_at_end(__n, __x);
	}
	}

	template <class _Tp, class _Allocator>
	vector<_Tp, _Allocator>::vector(size_type __n, const value_type& __x, const allocator_type& __a)
	: __base(__a)
	{
	#if _LIBCPP_DEBUG_LEVEL == 2
	__get_db()->__insert_c(this);
	#endif
	if (__n > 0)
	{
	__vallocate(__n);
	__construct_at_end(__n, __x);
	}
	}

	template <class _Tp, class _Allocator>
	template <class _InputIterator>
	vector<_Tp, _Allocator>::vector(_InputIterator __first,
	typename enable_if<__is_cpp17_input_iterator <_InputIterator>::value &&
	!__is_cpp17_forward_iterator<_InputIterator>::value &&
	is_constructible<
	value_type,
	typename iterator_traits<_InputIterator>::reference>::value,
	_InputIterator>::type __last)
	{
	#if _LIBCPP_DEBUG_LEVEL == 2
	__get_db()->__insert_c(this);
	#endif
	for (; __first != __last; ++__first)
	__emplace_back(*__first);
	}

	template <class _Tp, class _Allocator>
	template <class _InputIterator>
	vector<_Tp, _Allocator>::vector(_InputIterator __first, _InputIterator __last, const allocator_type& __a,
	typename enable_if<__is_cpp17_input_iterator <_InputIterator>::value &&
	!__is_cpp17_forward_iterator<_InputIterator>::value &&
	is_constructible<
	value_type,
	typename iterator_traits<_InputIterator>::reference>::value>::type*)
	: __base(__a)
	{
	#if _LIBCPP_DEBUG_LEVEL == 2
	__get_db()->__insert_c(this);
	#endif
	for (; __first != __last; ++__first)
	__emplace_back(*__first);
	}

	template <class _Tp, class _Allocator>
	template <class _ForwardIterator>
	vector<_Tp, _Allocator>::vector(_ForwardIterator __first,
	typename enable_if<__is_cpp17_forward_iterator<_ForwardIterator>::value &&
	is_constructible<
	value_type,
	typename iterator_traits<_ForwardIterator>::reference>::value,
	_ForwardIterator>::type __last)
	{
	#if _LIBCPP_DEBUG_LEVEL == 2
	__get_db()->__insert_c(this);
	#endif
	size_type __n = static_cast<size_type>(_VSTD::distance(__first, __last));
	if (__n > 0)
	{
	__vallocate(__n);
	__construct_at_end(__first, __last, __n);
	}
	}

	template <class _Tp, class _Allocator>
	template <class _ForwardIterator>
	vector<_Tp, _Allocator>::vector(_ForwardIterator __first, _ForwardIterator __last, const allocator_type& __a,
	typename enable_if<__is_cpp17_forward_iterator<_ForwardIterator>::value &&
	is_constructible<
	value_type,
	typename iterator_traits<_ForwardIterator>::reference>::value>::type*)
	: __base(__a)
	{
	#if _LIBCPP_DEBUG_LEVEL == 2
	__get_db()->__insert_c(this);
	#endif
	size_type __n = static_cast<size_type>(_VSTD::distance(__first, __last));
	if (__n > 0)
	{
	__vallocate(__n);
	__construct_at_end(__first, __last, __n);
	}
	}

	template <class _Tp, class _Allocator>
	vector<_Tp, _Allocator>::vector(const vector& __x)
	: __base(__alloc_traits::select_on_container_copy_construction(__x.__alloc()))
	{
	#if _LIBCPP_DEBUG_LEVEL == 2
	__get_db()->__insert_c(this);
	#endif
	size_type __n = __x.size();
	if (__n > 0)
	{
	__vallocate(__n);
	__construct_at_end(__x.__begin_, __x.__end_, __n);
	}
	}

	template <class _Tp, class _Allocator>
	vector<_Tp, _Allocator>::vector(const vector& __x, const __identity_t<allocator_type>& __a)
	: __base(__a)
	{
	#if _LIBCPP_DEBUG_LEVEL == 2
	__get_db()->__insert_c(this);
	#endif
	size_type __n = __x.size();
	if (__n > 0)
	{
	__vallocate(__n);
	__construct_at_end(__x.__begin_, __x.__end_, __n);
	}
	}

	#ifndef _LIBCPP_CXX03_LANG

	template <class _Tp, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	vector<_Tp, _Allocator>::vector(vector&& __x)
	#if _LIBCPP_STD_VER > 14
	_NOEXCEPT
	#else
	_NOEXCEPT_(is_nothrow_move_constructible<allocator_type>::value)
	#endif
	: __base(_VSTD::move(__x.__alloc()))
	{
	#if _LIBCPP_DEBUG_LEVEL == 2
	__get_db()->__insert_c(this);
	__get_db()->swap(this, &__x);
	#endif
	this->__begin_ = __x.__begin_;
	this->__end_ = __x.__end_;
	this->__end_cap() = __x.__end_cap();
	__x.__begin_ = __x.__end_ = __x.__end_cap() = nullptr;
	}

	template <class _Tp, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	vector<_Tp, _Allocator>::vector(vector&& __x, const __identity_t<allocator_type>& __a)
	: __base(__a)
	{
	#if _LIBCPP_DEBUG_LEVEL == 2
	__get_db()->__insert_c(this);
	#endif
	if (__a == __x.__alloc())
	{
	this->__begin_ = __x.__begin_;
	this->__end_ = __x.__end_;
	this->__end_cap() = __x.__end_cap();
	__x.__begin_ = __x.__end_ = __x.__end_cap() = nullptr;
	#if _LIBCPP_DEBUG_LEVEL == 2
	__get_db()->swap(this, &__x);
	#endif
	}
	else
	{
	typedef move_iterator<iterator> _Ip;
	assign(_Ip(__x.begin()), _Ip(__x.end()));
	}
	}

	template <class _Tp, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	vector<_Tp, _Allocator>::vector(initializer_list<value_type> __il)
	{
	#if _LIBCPP_DEBUG_LEVEL == 2
	__get_db()->__insert_c(this);
	#endif
	if (__il.size() > 0)
	{
	__vallocate(__il.size());
	__construct_at_end(__il.begin(), __il.end(), __il.size());
	}
	}

	template <class _Tp, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	vector<_Tp, _Allocator>::vector(initializer_list<value_type> __il, const allocator_type& __a)
	: __base(__a)
	{
	#if _LIBCPP_DEBUG_LEVEL == 2
	__get_db()->__insert_c(this);
	#endif
	if (__il.size() > 0)
	{
	__vallocate(__il.size());
	__construct_at_end(__il.begin(), __il.end(), __il.size());
	}
	}

	template <class _Tp, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	vector<_Tp, _Allocator>&
	vector<_Tp, _Allocator>::operator=(vector&& __x)
	_NOEXCEPT_((__noexcept_move_assign_container<_Allocator, __alloc_traits>::value))
	{
	__move_assign(__x, integral_constant<bool,
	__alloc_traits::propagate_on_container_move_assignment::value>());
	return *this;
	}

	template <class _Tp, class _Allocator>
	void
	vector<_Tp, _Allocator>::__move_assign(vector& __c, false_type)
	_NOEXCEPT_(__alloc_traits::is_always_equal::value)
	{
	if (__base::__alloc() != __c.__alloc())
	{
	typedef move_iterator<iterator> _Ip;
	assign(_Ip(__c.begin()), _Ip(__c.end()));
	}
	else
	__move_assign(__c, true_type());
	}

	template <class _Tp, class _Allocator>
	void
	vector<_Tp, _Allocator>::__move_assign(vector& __c, true_type)
	_NOEXCEPT_(is_nothrow_move_assignable<allocator_type>::value)
	{
	__vdeallocate();
	__base::__move_assign_alloc(__c); // this can throw
	this->__begin_ = __c.__begin_;
	this->__end_ = __c.__end_;
	this->__end_cap() = __c.__end_cap();
	__c.__begin_ = __c.__end_ = __c.__end_cap() = nullptr;
	#if _LIBCPP_DEBUG_LEVEL == 2
	__get_db()->swap(this, &__c);
	#endif
	}

	#endif // !_LIBCPP_CXX03_LANG

	template <class _Tp, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	vector<_Tp, _Allocator>&
	vector<_Tp, _Allocator>::operator=(const vector& __x)
	{
	if (this != &__x)
	{
	__base::__copy_assign_alloc(__x);
	assign(__x.__begin_, __x.__end_);
	}
	return *this;
	}

	template <class _Tp, class _Allocator>
	template <class _InputIterator>
	typename enable_if
	<
	__is_cpp17_input_iterator <_InputIterator>::value &&
	!__is_cpp17_forward_iterator<_InputIterator>::value &&
	is_constructible<
	_Tp,
	typename iterator_traits<_InputIterator>::reference>::value,
	void
	>::type
	vector<_Tp, _Allocator>::assign(_InputIterator __first, _InputIterator __last)
	{
	clear();
	for (; __first != __last; ++__first)
	__emplace_back(*__first);
	}

	template <class _Tp, class _Allocator>
	template <class _ForwardIterator>
	typename enable_if
	<
	__is_cpp17_forward_iterator<_ForwardIterator>::value &&
	is_constructible<
	_Tp,
	typename iterator_traits<_ForwardIterator>::reference>::value,
	void
	>::type
	vector<_Tp, _Allocator>::assign(_ForwardIterator __first, _ForwardIterator __last)
	{
	size_type __new_size = static_cast<size_type>(_VSTD::distance(__first, __last));
	if (__new_size <= capacity())
	{
	_ForwardIterator __mid = __last;
	bool __growing = false;
	if (__new_size > size())
	{
	__growing = true;
	__mid = __first;
	_VSTD::advance(__mid, size());
	}
	pointer __m = _VSTD::copy(__first, __mid, this->__begin_);
	if (__growing)
	__construct_at_end(__mid, __last, __new_size - size());
	else
	this->__destruct_at_end(__m);
	}
	else
	{
	__vdeallocate();
	__vallocate(__recommend(__new_size));
	__construct_at_end(__first, __last, __new_size);
	}
	__invalidate_all_iterators();
	}

	template <class _Tp, class _Allocator>
	void
	vector<_Tp, _Allocator>::assign(size_type __n, const_reference __u)
	{
	if (__n <= capacity())
	{
	size_type __s = size();
	_VSTD::fill_n(this->__begin_, _VSTD::min(__n, __s), __u);
	if (__n > __s)
	__construct_at_end(__n - __s, __u);
	else
	this->__destruct_at_end(this->__begin_ + __n);
	}
	else
	{
	__vdeallocate();
	__vallocate(__recommend(static_cast<size_type>(__n)));
	__construct_at_end(__n, __u);
	}
	__invalidate_all_iterators();
	}

	template <class _Tp, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	typename vector<_Tp, _Allocator>::iterator
	vector<_Tp, _Allocator>::__make_iter(pointer __p) _NOEXCEPT
	{
	#if _LIBCPP_DEBUG_LEVEL == 2
	return iterator(this, __p);
	#else
	return iterator(__p);
	#endif
	}

	template <class _Tp, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	typename vector<_Tp, _Allocator>::const_iterator
	vector<_Tp, _Allocator>::__make_iter(const_pointer __p) const _NOEXCEPT
	{
	#if _LIBCPP_DEBUG_LEVEL == 2
	return const_iterator(this, __p);
	#else
	return const_iterator(__p);
	#endif
	}

	template <class _Tp, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	typename vector<_Tp, _Allocator>::iterator
	vector<_Tp, _Allocator>::begin() _NOEXCEPT
	{
	return __make_iter(this->__begin_);
	}

	template <class _Tp, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	typename vector<_Tp, _Allocator>::const_iterator
	vector<_Tp, _Allocator>::begin() const _NOEXCEPT
	{
	return __make_iter(this->__begin_);
	}

	template <class _Tp, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	typename vector<_Tp, _Allocator>::iterator
	vector<_Tp, _Allocator>::end() _NOEXCEPT
	{
	return __make_iter(this->__end_);
	}

	template <class _Tp, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	typename vector<_Tp, _Allocator>::const_iterator
	vector<_Tp, _Allocator>::end() const _NOEXCEPT
	{
	return __make_iter(this->__end_);
	}

	template <class _Tp, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	typename vector<_Tp, _Allocator>::reference
	vector<_Tp, _Allocator>::operator[](size_type __n) _NOEXCEPT
	{
	_LIBCPP_ASSERT(__n < size(), "vector[] index out of bounds");
	return this->__begin_[__n];
	}

	template <class _Tp, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	typename vector<_Tp, _Allocator>::const_reference
	vector<_Tp, _Allocator>::operator[](size_type __n) const _NOEXCEPT
	{
	_LIBCPP_ASSERT(__n < size(), "vector[] index out of bounds");
	return this->__begin_[__n];
	}

	template <class _Tp, class _Allocator>
	typename vector<_Tp, _Allocator>::reference
	vector<_Tp, _Allocator>::at(size_type __n)
	{
	if (__n >= size())
	this->__throw_out_of_range();
	return this->__begin_[__n];
	}

	template <class _Tp, class _Allocator>
	typename vector<_Tp, _Allocator>::const_reference
	vector<_Tp, _Allocator>::at(size_type __n) const
	{
	if (__n >= size())
	this->__throw_out_of_range();
	return this->__begin_[__n];
	}

	template <class _Tp, class _Allocator>
	void
	vector<_Tp, _Allocator>::reserve(size_type __n)
	{
	if (__n > capacity())
	{
	allocator_type& __a = this->__alloc();
	__split_buffer<value_type, allocator_type&> __v(__n, size(), __a);
	__swap_out_circular_buffer(__v);
	}
	}

	template <class _Tp, class _Allocator>
	void
	vector<_Tp, _Allocator>::shrink_to_fit() _NOEXCEPT
	{
	if (capacity() > size())
	{
	#ifndef _LIBCPP_NO_EXCEPTIONS
	try
	{
	#endif // _LIBCPP_NO_EXCEPTIONS
	allocator_type& __a = this->__alloc();
	__split_buffer<value_type, allocator_type&> __v(size(), size(), __a);
	__swap_out_circular_buffer(__v);
	#ifndef _LIBCPP_NO_EXCEPTIONS
	}
	catch (...)
	{
	}
	#endif // _LIBCPP_NO_EXCEPTIONS
	}
	}

	template <class _Tp, class _Allocator>
	template <class _Up>
	void
	#ifndef _LIBCPP_CXX03_LANG
	vector<_Tp, _Allocator>::__push_back_slow_path(_Up&& __x)
	#else
	vector<_Tp, _Allocator>::__push_back_slow_path(_Up& __x)
	#endif
	{
	allocator_type& __a = this->__alloc();
	__split_buffer<value_type, allocator_type&> __v(__recommend(size() + 1), size(), __a);
	// __v.push_back(_VSTD::forward<_Up>(__x));
	__alloc_traits::construct(__a, _VSTD::__to_address(__v.__end_), _VSTD::forward<_Up>(__x));
	__v.__end_++;
	__swap_out_circular_buffer(__v);
	}

	template <class _Tp, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	void
	vector<_Tp, _Allocator>::push_back(const_reference __x)
	{
	if (this->__end_ != this->__end_cap())
	{
	__construct_one_at_end(__x);
	}
	else
	__push_back_slow_path(__x);
	}

	#ifndef _LIBCPP_CXX03_LANG

	template <class _Tp, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	void
	vector<_Tp, _Allocator>::push_back(value_type&& __x)
	{
	if (this->__end_ < this->__end_cap())
	{
	__construct_one_at_end(_VSTD::move(__x));
	}
	else
	__push_back_slow_path(_VSTD::move(__x));
	}

	template <class _Tp, class _Allocator>
	template <class... _Args>
	void
	vector<_Tp, _Allocator>::__emplace_back_slow_path(_Args&&... __args)
	{
	allocator_type& __a = this->__alloc();
	__split_buffer<value_type, allocator_type&> __v(__recommend(size() + 1), size(), __a);
	// __v.emplace_back(_VSTD::forward<_Args>(__args)...);
	__alloc_traits::construct(__a, _VSTD::__to_address(__v.__end_), _VSTD::forward<_Args>(__args)...);
	__v.__end_++;
	__swap_out_circular_buffer(__v);
	}

	template <class _Tp, class _Allocator>
	template <class... _Args>
	inline
	#if _LIBCPP_STD_VER > 14
	typename vector<_Tp, _Allocator>::reference
	#else
	void
	#endif
	vector<_Tp, _Allocator>::emplace_back(_Args&&... __args)
	{
	if (this->__end_ < this->__end_cap())
	{
	__construct_one_at_end(_VSTD::forward<_Args>(__args)...);
	}
	else
	__emplace_back_slow_path(_VSTD::forward<_Args>(__args)...);
	#if _LIBCPP_STD_VER > 14
	return this->back();
	#endif
	}

	#endif // !_LIBCPP_CXX03_LANG

	template <class _Tp, class _Allocator>
	inline
	void
	vector<_Tp, _Allocator>::pop_back()
	{
	_LIBCPP_ASSERT(!empty(), "vector::pop_back called on an empty vector");
	this->__destruct_at_end(this->__end_ - 1);
	}

	template <class _Tp, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	typename vector<_Tp, _Allocator>::iterator
	vector<_Tp, _Allocator>::erase(const_iterator __position)
	{
	#if _LIBCPP_DEBUG_LEVEL == 2
	_LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__position) == this,
	"vector::erase(iterator) called with an iterator not"
	" referring to this vector");
	#endif
	_LIBCPP_ASSERT(__position != end(),
	"vector::erase(iterator) called with a non-dereferenceable iterator");
	difference_type __ps = __position - cbegin();
	pointer __p = this->__begin_ + __ps;
	this->__destruct_at_end(_VSTD::move(__p + 1, this->__end_, __p));
	this->__invalidate_iterators_past(__p-1);
	iterator __r = __make_iter(__p);
	return __r;
	}

	template <class _Tp, class _Allocator>
	typename vector<_Tp, _Allocator>::iterator
	vector<_Tp, _Allocator>::erase(const_iterator __first, const_iterator __last)
	{
	#if _LIBCPP_DEBUG_LEVEL == 2
	_LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__first) == this,
	"vector::erase(iterator, iterator) called with an iterator not"
	" referring to this vector");
	_LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__last) == this,
	"vector::erase(iterator, iterator) called with an iterator not"
	" referring to this vector");
	#endif
	_LIBCPP_ASSERT(__first <= __last, "vector::erase(first, last) called with invalid range");
	pointer __p = this->__begin_ + (__first - begin());
	if (__first != __last) {
	this->__destruct_at_end(_VSTD::move(__p + (__last - __first), this->__end_, __p));
	this->__invalidate_iterators_past(__p - 1);
	}
	iterator __r = __make_iter(__p);
	return __r;
	}

	template <class _Tp, class _Allocator>
	void
	vector<_Tp, _Allocator>::__move_range(pointer __from_s, pointer __from_e, pointer __to)
	{
	pointer __old_last = this->__end_;
	difference_type __n = __old_last - __to;
	{
	pointer __i = __from_s + __n;
	_ConstructTransaction __tx(*this, __from_e - __i);
	for (pointer __pos = __tx.__pos_; __i < __from_e;
	++__i, ++__pos, __tx.__pos_ = __pos) {
	__alloc_traits::construct(this->__alloc(),
	_VSTD::__to_address(__pos),
	_VSTD::move(*__i));
	}
	}
	_VSTD::move_backward(__from_s, __from_s + __n, __old_last);
	}

	template <class _Tp, class _Allocator>
	typename vector<_Tp, _Allocator>::iterator
	vector<_Tp, _Allocator>::insert(const_iterator __position, const_reference __x)
	{
	#if _LIBCPP_DEBUG_LEVEL == 2
	_LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__position) == this,
	"vector::insert(iterator, x) called with an iterator not"
	" referring to this vector");
	#endif
	pointer __p = this->__begin_ + (__position - begin());
	if (this->__end_ < this->__end_cap())
	{
	if (__p == this->__end_)
	{
	__construct_one_at_end(__x);
	}
	else
	{
	__move_range(__p, this->__end_, __p + 1);
	const_pointer __xr = pointer_traits<const_pointer>::pointer_to(__x);
	if (__p <= __xr && __xr < this->__end_)
	++__xr;
	__p = __xr;
	}
	}
	else
	{
	allocator_type& __a = this->__alloc();
	__split_buffer<value_type, allocator_type&> __v(__recommend(size() + 1), __p - this->__begin_, __a);
	__v.push_back(__x);
	__p = __swap_out_circular_buffer(__v, __p);
	}
	return __make_iter(__p);
	}

	#ifndef _LIBCPP_CXX03_LANG

	template <class _Tp, class _Allocator>
	typename vector<_Tp, _Allocator>::iterator
	vector<_Tp, _Allocator>::insert(const_iterator __position, value_type&& __x)
	{
	#if _LIBCPP_DEBUG_LEVEL == 2
	_LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__position) == this,
	"vector::insert(iterator, x) called with an iterator not"
	" referring to this vector");
	#endif
	pointer __p = this->__begin_ + (__position - begin());
	if (this->__end_ < this->__end_cap())
	{
	if (__p == this->__end_)
	{
	__construct_one_at_end(_VSTD::move(__x));
	}
	else
	{
	__move_range(__p, this->__end_, __p + 1);
	*__p = _VSTD::move(__x);
	}
	}
	else
	{
	allocator_type& __a = this->__alloc();
	__split_buffer<value_type, allocator_type&> __v(__recommend(size() + 1), __p - this->__begin_, __a);
	__v.push_back(_VSTD::move(__x));
	__p = __swap_out_circular_buffer(__v, __p);
	}
	return __make_iter(__p);
	}

	template <class _Tp, class _Allocator>
	template <class... _Args>
	typename vector<_Tp, _Allocator>::iterator
	vector<_Tp, _Allocator>::emplace(const_iterator __position, _Args&&... __args)
	{
	#if _LIBCPP_DEBUG_LEVEL == 2
	_LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__position) == this,
	"vector::emplace(iterator, x) called with an iterator not"
	" referring to this vector");
	#endif
	pointer __p = this->__begin_ + (__position - begin());
	if (this->__end_ < this->__end_cap())
	{
	if (__p == this->__end_)
	{
	__construct_one_at_end(_VSTD::forward<_Args>(__args)...);
	}
	else
	{
	__temp_value<value_type, _Allocator> __tmp(this->__alloc(), _VSTD::forward<_Args>(__args)...);
	__move_range(__p, this->__end_, __p + 1);
	*__p = _VSTD::move(__tmp.get());
	}
	}
	else
	{
	allocator_type& __a = this->__alloc();
	__split_buffer<value_type, allocator_type&> __v(__recommend(size() + 1), __p - this->__begin_, __a);
	__v.emplace_back(_VSTD::forward<_Args>(__args)...);
	__p = __swap_out_circular_buffer(__v, __p);
	}
	return __make_iter(__p);
	}

	#endif // !_LIBCPP_CXX03_LANG

	template <class _Tp, class _Allocator>
	typename vector<_Tp, _Allocator>::iterator
	vector<_Tp, _Allocator>::insert(const_iterator __position, size_type __n, const_reference __x)
	{
	#if _LIBCPP_DEBUG_LEVEL == 2
	_LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__position) == this,
	"vector::insert(iterator, n, x) called with an iterator not"
	" referring to this vector");
	#endif
	pointer __p = this->__begin_ + (__position - begin());
	if (__n > 0)
	{
	if (__n <= static_cast<size_type>(this->__end_cap() - this->__end_))
	{
	size_type __old_n = __n;
	pointer __old_last = this->__end_;
	if (__n > static_cast<size_type>(this->__end_ - __p))
	{
	size_type __cx = __n - (this->__end_ - __p);
	__construct_at_end(__cx, __x);
	__n -= __cx;
	}
	if (__n > 0)
	{
	__move_range(__p, __old_last, __p + __old_n);
	const_pointer __xr = pointer_traits<const_pointer>::pointer_to(__x);
	if (__p <= __xr && __xr < this->__end_)
	__xr += __old_n;
	_VSTD::fill_n(__p, __n, *__xr);
	}
	}
	else
	{
	allocator_type& __a = this->__alloc();
	__split_buffer<value_type, allocator_type&> __v(__recommend(size() + __n), __p - this->__begin_, __a);
	__v.__construct_at_end(__n, __x);
	__p = __swap_out_circular_buffer(__v, __p);
	}
	}
	return __make_iter(__p);
	}

	template <class _Tp, class _Allocator>
	template <class _InputIterator>
	typename enable_if
	<
	__is_cpp17_input_iterator <_InputIterator>::value &&
	!__is_cpp17_forward_iterator<_InputIterator>::value &&
	is_constructible<
	_Tp,
	typename iterator_traits<_InputIterator>::reference>::value,
	typename vector<_Tp, _Allocator>::iterator
	>::type
	vector<_Tp, _Allocator>::insert(const_iterator __position, _InputIterator __first, _InputIterator __last)
	{
	#if _LIBCPP_DEBUG_LEVEL == 2
	_LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__position) == this,
	"vector::insert(iterator, range) called with an iterator not"
	" referring to this vector");
	#endif
	difference_type __off = __position - begin();
	pointer __p = this->__begin_ + __off;
	allocator_type& __a = this->__alloc();
	pointer __old_last = this->__end_;
	for (; this->__end_ != this->__end_cap() && __first != __last; ++__first)
	{
	__construct_one_at_end(*__first);
	}
	__split_buffer<value_type, allocator_type&> __v(__a);
	if (__first != __last)
	{
	#ifndef _LIBCPP_NO_EXCEPTIONS
	try
	{
	#endif // _LIBCPP_NO_EXCEPTIONS
	__v.__construct_at_end(__first, __last);
	difference_type __old_size = __old_last - this->__begin_;
	difference_type __old_p = __p - this->__begin_;
	reserve(__recommend(size() + __v.size()));
	__p = this->__begin_ + __old_p;
	__old_last = this->__begin_ + __old_size;
	#ifndef _LIBCPP_NO_EXCEPTIONS
	}
	catch (...)
	{
	erase(__make_iter(__old_last), end());
	throw;
	}
	#endif // _LIBCPP_NO_EXCEPTIONS
	}
	__p = _VSTD::rotate(__p, __old_last, this->__end_);
	insert(__make_iter(__p), _VSTD::make_move_iterator(__v.begin()),
	_VSTD::make_move_iterator(__v.end()));
	return begin() + __off;
	}

	template <class _Tp, class _Allocator>
	template <class _ForwardIterator>
	typename enable_if
	<
	__is_cpp17_forward_iterator<_ForwardIterator>::value &&
	is_constructible<
	_Tp,
	typename iterator_traits<_ForwardIterator>::reference>::value,
	typename vector<_Tp, _Allocator>::iterator
	>::type
	vector<_Tp, _Allocator>::insert(const_iterator __position, _ForwardIterator __first, _ForwardIterator __last)
	{
	#if _LIBCPP_DEBUG_LEVEL == 2
	_LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__position) == this,
	"vector::insert(iterator, range) called with an iterator not"
	" referring to this vector");
	#endif
	pointer __p = this->__begin_ + (__position - begin());
	difference_type __n = _VSTD::distance(__first, __last);
	if (__n > 0)
	{
	if (__n <= this->__end_cap() - this->__end_)
	{
	size_type __old_n = __n;
	pointer __old_last = this->__end_;
	_ForwardIterator __m = __last;
	difference_type __dx = this->__end_ - __p;
	if (__n > __dx)
	{
	__m = __first;
	difference_type __diff = this->__end_ - __p;
	_VSTD::advance(__m, __diff);
	__construct_at_end(__m, __last, __n - __diff);
	__n = __dx;
	}
	if (__n > 0)
	{
	__move_range(__p, __old_last, __p + __old_n);
	_VSTD::copy(__first, __m, __p);
	}
	}
	else
	{
	allocator_type& __a = this->__alloc();
	__split_buffer<value_type, allocator_type&> __v(__recommend(size() + __n), __p - this->__begin_, __a);
	__v.__construct_at_end(__first, __last);
	__p = __swap_out_circular_buffer(__v, __p);
	}
	}
	return __make_iter(__p);
	}

	template <class _Tp, class _Allocator>
	void
	vector<_Tp, _Allocator>::resize(size_type __sz)
	{
	size_type __cs = size();
	if (__cs < __sz)
	this->__append(__sz - __cs);
	else if (__cs > __sz)
	this->__destruct_at_end(this->__begin_ + __sz);
	}

	template <class _Tp, class _Allocator>
	void
	vector<_Tp, _Allocator>::resize(size_type __sz, const_reference __x)
	{
	size_type __cs = size();
	if (__cs < __sz)
	this->__append(__sz - __cs, __x);
	else if (__cs > __sz)
	this->__destruct_at_end(this->__begin_ + __sz);
	}

	template <class _Tp, class _Allocator>
	void
	vector<_Tp, _Allocator>::swap(vector& __x)
	#if _LIBCPP_STD_VER >= 14
	_NOEXCEPT
	#else
	_NOEXCEPT_(!__alloc_traits::propagate_on_container_swap::value \|\|
	__is_nothrow_swappable<allocator_type>::value)
	#endif
	{
	_LIBCPP_ASSERT(__alloc_traits::propagate_on_container_swap::value \|\|
	this->__alloc() == __x.__alloc(),
	"vector::swap: Either propagate_on_container_swap must be true"
	" or the allocators must compare equal");
	_VSTD::swap(this->__begin_, __x.__begin_);
	_VSTD::swap(this->__end_, __x.__end_);
	_VSTD::swap(this->__end_cap(), __x.__end_cap());
	_VSTD::__swap_allocator(this->__alloc(), __x.__alloc(),
	integral_constant<bool,__alloc_traits::propagate_on_container_swap::value>());
	#if _LIBCPP_DEBUG_LEVEL == 2
	__get_db()->swap(this, &__x);
	#endif
	}

	template <class _Tp, class _Allocator>
	bool
	vector<_Tp, _Allocator>::__invariants() const
	{
	if (this->__begin_ == nullptr)
	{
	if (this->__end_ != nullptr \|\| this->__end_cap() != nullptr)
	return false;
	}
	else
	{
	if (this->__begin_ > this->__end_)
	return false;
	if (this->__begin_ == this->__end_cap())
	return false;
	if (this->__end_ > this->__end_cap())
	return false;
	}
	return true;
	}

	#if _LIBCPP_DEBUG_LEVEL == 2

	template <class _Tp, class _Allocator>
	bool
	vector<_Tp, _Allocator>::__dereferenceable(const const_iterator* __i) const
	{
	return this->__begin_ <= __i->base() && __i->base() < this->__end_;
	}

	template <class _Tp, class _Allocator>
	bool
	vector<_Tp, _Allocator>::__decrementable(const const_iterator* __i) const
	{
	return this->__begin_ < __i->base() && __i->base() <= this->__end_;
	}

	template <class _Tp, class _Allocator>
	bool
	vector<_Tp, _Allocator>::__addable(const const_iterator* __i, ptrdiff_t __n) const
	{
	const_pointer __p = __i->base() + __n;
	return this->__begin_ <= __p && __p <= this->__end_;
	}

	template <class _Tp, class _Allocator>
	bool
	vector<_Tp, _Allocator>::__subscriptable(const const_iterator* __i, ptrdiff_t __n) const
	{
	const_pointer __p = __i->base() + __n;
	return this->__begin_ <= __p && __p < this->__end_;
	}

	#endif // _LIBCPP_DEBUG_LEVEL == 2

	template <class _Tp, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	void
	vector<_Tp, _Allocator>::__invalidate_all_iterators()
	{
	#if _LIBCPP_DEBUG_LEVEL == 2
	__get_db()->__invalidate_all(this);
	#endif
	}


	template <class _Tp, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	void
	vector<_Tp, _Allocator>::__invalidate_iterators_past(pointer __new_last) {
	#if _LIBCPP_DEBUG_LEVEL == 2
	__c_node* __c = __get_db()->__find_c_and_lock(this);
	for (__i_node** __p = __c->end_; __p != __c->beg_; ) {
	--__p;
	const_iterator* __i = static_cast<const_iterator>((__p)->__i_);
	if (__i->base() > __new_last) {
	(*__p)->__c_ = nullptr;
	if (--__c->end_ != __p)
	_VSTD::memmove(__p, __p+1, (__c->end_ - __p)sizeof(__i_node));
	}
	}
	__get_db()->unlock();
	#else
	((void)__new_last);
	#endif
	}

	// vector<bool>

	template <class _Allocator> class vector<bool, _Allocator>;

	template <class _Allocator> struct hash<vector<bool, _Allocator> >;

	template <class _Allocator>
	struct __has_storage_type<vector<bool, _Allocator> >
	{
	static const bool value = true;
	};

	template <class _Allocator>
	class _LIBCPP_TEMPLATE_VIS vector<bool, _Allocator>
	: private __vector_base_common<true>
	{
	public:
	typedef vector __self;
	typedef bool value_type;
	typedef _Allocator allocator_type;
	typedef allocator_traits<allocator_type> __alloc_traits;
	typedef typename __alloc_traits::size_type size_type;
	typedef typename __alloc_traits::difference_type difference_type;
	typedef size_type __storage_type;
	typedef __bit_iterator<vector, false> pointer;
	typedef __bit_iterator<vector, true> const_pointer;
	typedef pointer iterator;
	typedef const_pointer const_iterator;
	typedef _VSTD::reverse_iterator<iterator> reverse_iterator;
	typedef _VSTD::reverse_iterator<const_iterator> const_reverse_iterator;

	private:
	typedef typename __rebind_alloc_helper<__alloc_traits, __storage_type>::type __storage_allocator;
	typedef allocator_traits<__storage_allocator> __storage_traits;
	typedef typename __storage_traits::pointer __storage_pointer;
	typedef typename __storage_traits::const_pointer __const_storage_pointer;

	__storage_pointer __begin_;
	size_type __size_;
	__compressed_pair<size_type, __storage_allocator> __cap_alloc_;
	public:
	typedef __bit_reference<vector> reference;
	typedef __bit_const_reference<vector> const_reference;
	private:
	_LIBCPP_INLINE_VISIBILITY
	size_type& __cap() _NOEXCEPT
	{return __cap_alloc_.first();}
	_LIBCPP_INLINE_VISIBILITY
	const size_type& __cap() const _NOEXCEPT
	{return __cap_alloc_.first();}
	_LIBCPP_INLINE_VISIBILITY
	__storage_allocator& __alloc() _NOEXCEPT
	{return __cap_alloc_.second();}
	_LIBCPP_INLINE_VISIBILITY
	const __storage_allocator& __alloc() const _NOEXCEPT
	{return __cap_alloc_.second();}

	static const unsigned __bits_per_word = static_cast<unsigned>(sizeof(__storage_type) * CHAR_BIT);

	_LIBCPP_INLINE_VISIBILITY
	static size_type __internal_cap_to_external(size_type __n) _NOEXCEPT
	{return __n * __bits_per_word;}
	_LIBCPP_INLINE_VISIBILITY
	static size_type __external_cap_to_internal(size_type __n) _NOEXCEPT
	{return (__n - 1) / __bits_per_word + 1;}

	public:
	_LIBCPP_INLINE_VISIBILITY
	vector() _NOEXCEPT_(is_nothrow_default_constructible<allocator_type>::value);

	_LIBCPP_INLINE_VISIBILITY explicit vector(const allocator_type& __a)
	#if _LIBCPP_STD_VER <= 14
	_NOEXCEPT_(is_nothrow_copy_constructible<allocator_type>::value);
	#else
	_NOEXCEPT;
	#endif
	~vector();
	explicit vector(size_type __n);
	#if _LIBCPP_STD_VER > 11
	explicit vector(size_type __n, const allocator_type& __a);
	#endif
	vector(size_type __n, const value_type& __v);
	vector(size_type __n, const value_type& __v, const allocator_type& __a);
	template <class _InputIterator>
	vector(_InputIterator __first, _InputIterator __last,
	typename enable_if<__is_cpp17_input_iterator <_InputIterator>::value &&
	!__is_cpp17_forward_iterator<_InputIterator>::value>::type* = 0);
	template <class _InputIterator>
	vector(_InputIterator __first, _InputIterator __last, const allocator_type& __a,
	typename enable_if<__is_cpp17_input_iterator <_InputIterator>::value &&
	!__is_cpp17_forward_iterator<_InputIterator>::value>::type* = 0);
	template <class _ForwardIterator>
	vector(_ForwardIterator __first, _ForwardIterator __last,
	typename enable_if<__is_cpp17_forward_iterator<_ForwardIterator>::value>::type* = 0);
	template <class _ForwardIterator>
	vector(_ForwardIterator __first, _ForwardIterator __last, const allocator_type& __a,
	typename enable_if<__is_cpp17_forward_iterator<_ForwardIterator>::value>::type* = 0);

	vector(const vector& __v);
	vector(const vector& __v, const allocator_type& __a);
	vector& operator=(const vector& __v);

	#ifndef _LIBCPP_CXX03_LANG
	vector(initializer_list<value_type> __il);
	vector(initializer_list<value_type> __il, const allocator_type& __a);

	_LIBCPP_INLINE_VISIBILITY
	vector(vector&& __v)
	#if _LIBCPP_STD_VER > 14
	_NOEXCEPT;
	#else
	_NOEXCEPT_(is_nothrow_move_constructible<allocator_type>::value);
	#endif
	vector(vector&& __v, const __identity_t<allocator_type>& __a);
	_LIBCPP_INLINE_VISIBILITY
	vector& operator=(vector&& __v)
	_NOEXCEPT_((__noexcept_move_assign_container<_Allocator, __alloc_traits>::value));

	_LIBCPP_INLINE_VISIBILITY
	vector& operator=(initializer_list<value_type> __il)
	{assign(__il.begin(), __il.end()); return *this;}

	#endif // !_LIBCPP_CXX03_LANG

	template <class _InputIterator>
	typename enable_if
	<
	__is_cpp17_input_iterator<_InputIterator>::value &&
	!__is_cpp17_forward_iterator<_InputIterator>::value,
	void
	>::type
	assign(_InputIterator __first, _InputIterator __last);
	template <class _ForwardIterator>
	typename enable_if
	<
	__is_cpp17_forward_iterator<_ForwardIterator>::value,
	void
	>::type
	assign(_ForwardIterator __first, _ForwardIterator __last);

	void assign(size_type __n, const value_type& __x);

	#ifndef _LIBCPP_CXX03_LANG
	_LIBCPP_INLINE_VISIBILITY
	void assign(initializer_list<value_type> __il)
	{assign(__il.begin(), __il.end());}
	#endif

	_LIBCPP_INLINE_VISIBILITY allocator_type get_allocator() const _NOEXCEPT
	{return allocator_type(this->__alloc());}

	size_type max_size() const _NOEXCEPT;
	_LIBCPP_INLINE_VISIBILITY
	size_type capacity() const _NOEXCEPT
	{return __internal_cap_to_external(__cap());}
	_LIBCPP_INLINE_VISIBILITY
	size_type size() const _NOEXCEPT
	{return __size_;}
	_LIBCPP_NODISCARD_AFTER_CXX17 _LIBCPP_INLINE_VISIBILITY
	bool empty() const _NOEXCEPT
	{return __size_ == 0;}
	void reserve(size_type __n);
	void shrink_to_fit() _NOEXCEPT;

	_LIBCPP_INLINE_VISIBILITY
	iterator begin() _NOEXCEPT
	{return __make_iter(0);}
	_LIBCPP_INLINE_VISIBILITY
	const_iterator begin() const _NOEXCEPT
	{return __make_iter(0);}
	_LIBCPP_INLINE_VISIBILITY
	iterator end() _NOEXCEPT
	{return __make_iter(__size_);}
	_LIBCPP_INLINE_VISIBILITY
	const_iterator end() const _NOEXCEPT
	{return __make_iter(__size_);}

	_LIBCPP_INLINE_VISIBILITY
	reverse_iterator rbegin() _NOEXCEPT
	{return reverse_iterator(end());}
	_LIBCPP_INLINE_VISIBILITY
	const_reverse_iterator rbegin() const _NOEXCEPT
	{return const_reverse_iterator(end());}
	_LIBCPP_INLINE_VISIBILITY
	reverse_iterator rend() _NOEXCEPT
	{return reverse_iterator(begin());}
	_LIBCPP_INLINE_VISIBILITY
	const_reverse_iterator rend() const _NOEXCEPT
	{return const_reverse_iterator(begin());}

	_LIBCPP_INLINE_VISIBILITY
	const_iterator cbegin() const _NOEXCEPT
	{return __make_iter(0);}
	_LIBCPP_INLINE_VISIBILITY
	const_iterator cend() const _NOEXCEPT
	{return __make_iter(__size_);}
	_LIBCPP_INLINE_VISIBILITY
	const_reverse_iterator crbegin() const _NOEXCEPT
	{return rbegin();}
	_LIBCPP_INLINE_VISIBILITY
	const_reverse_iterator crend() const _NOEXCEPT
	{return rend();}

	_LIBCPP_INLINE_VISIBILITY reference operator[](size_type __n) {return __make_ref(__n);}
	_LIBCPP_INLINE_VISIBILITY const_reference operator[](size_type __n) const {return __make_ref(__n);}
	reference at(size_type __n);
	const_reference at(size_type __n) const;

	_LIBCPP_INLINE_VISIBILITY reference front() {return __make_ref(0);}
	_LIBCPP_INLINE_VISIBILITY const_reference front() const {return __make_ref(0);}
	_LIBCPP_INLINE_VISIBILITY reference back() {return __make_ref(__size_ - 1);}
	_LIBCPP_INLINE_VISIBILITY const_reference back() const {return __make_ref(__size_ - 1);}

	void push_back(const value_type& __x);
	#if _LIBCPP_STD_VER > 11
	template <class... _Args>
	#if _LIBCPP_STD_VER > 14
	_LIBCPP_INLINE_VISIBILITY reference emplace_back(_Args&&... __args)
	#else
	_LIBCPP_INLINE_VISIBILITY void emplace_back(_Args&&... __args)
	#endif
	{
	push_back ( value_type ( _VSTD::forward<_Args>(__args)... ));
	#if _LIBCPP_STD_VER > 14
	return this->back();
	#endif
	}
	#endif

	_LIBCPP_INLINE_VISIBILITY void pop_back() {--__size_;}

	#if _LIBCPP_STD_VER > 11
	template <class... _Args>
	_LIBCPP_INLINE_VISIBILITY iterator emplace(const_iterator position, _Args&&... __args)
	{ return insert ( position, value_type ( _VSTD::forward<_Args>(__args)... )); }
	#endif

	iterator insert(const_iterator __position, const value_type& __x);
	iterator insert(const_iterator __position, size_type __n, const value_type& __x);
	iterator insert(const_iterator __position, size_type __n, const_reference __x);
	template <class _InputIterator>
	typename enable_if
	<
	__is_cpp17_input_iterator <_InputIterator>::value &&
	!__is_cpp17_forward_iterator<_InputIterator>::value,
	iterator
	>::type
	insert(const_iterator __position, _InputIterator __first, _InputIterator __last);
	template <class _ForwardIterator>
	typename enable_if
	<
	__is_cpp17_forward_iterator<_ForwardIterator>::value,
	iterator
	>::type
	insert(const_iterator __position, _ForwardIterator __first, _ForwardIterator __last);

	#ifndef _LIBCPP_CXX03_LANG
	_LIBCPP_INLINE_VISIBILITY
	iterator insert(const_iterator __position, initializer_list<value_type> __il)
	{return insert(__position, __il.begin(), __il.end());}
	#endif

	_LIBCPP_INLINE_VISIBILITY iterator erase(const_iterator __position);
	iterator erase(const_iterator __first, const_iterator __last);

	_LIBCPP_INLINE_VISIBILITY
	void clear() _NOEXCEPT {__size_ = 0;}

	void swap(vector&)
	#if _LIBCPP_STD_VER >= 14
	_NOEXCEPT;
	#else
	_NOEXCEPT_(!__alloc_traits::propagate_on_container_swap::value \|\|
	__is_nothrow_swappable<allocator_type>::value);
	#endif
	static void swap(reference __x, reference __y) _NOEXCEPT { _VSTD::swap(__x, __y); }

	void resize(size_type __sz, value_type __x = false);
	void flip() _NOEXCEPT;

	bool __invariants() const;

	private:
	_LIBCPP_INLINE_VISIBILITY void __invalidate_all_iterators();
	void __vallocate(size_type __n);
	void __vdeallocate() _NOEXCEPT;
	_LIBCPP_INLINE_VISIBILITY
	static size_type __align_it(size_type __new_size) _NOEXCEPT
	{return __new_size + (__bits_per_word-1) & ~((size_type)__bits_per_word-1);}
	_LIBCPP_INLINE_VISIBILITY size_type __recommend(size_type __new_size) const;
	_LIBCPP_INLINE_VISIBILITY void __construct_at_end(size_type __n, bool __x);
	template <class _ForwardIterator>
	typename enable_if
	<
	__is_cpp17_forward_iterator<_ForwardIterator>::value,
	void
	>::type
	__construct_at_end(_ForwardIterator __first, _ForwardIterator __last);
	void __append(size_type __n, const_reference __x);
	_LIBCPP_INLINE_VISIBILITY
	reference __make_ref(size_type __pos) _NOEXCEPT
	{return reference(__begin_ + __pos / __bits_per_word, __storage_type(1) << __pos % __bits_per_word);}
	_LIBCPP_INLINE_VISIBILITY
	const_reference __make_ref(size_type __pos) const _NOEXCEPT
	{return const_reference(__begin_ + __pos / __bits_per_word, __storage_type(1) << __pos % __bits_per_word);}
	_LIBCPP_INLINE_VISIBILITY
	iterator __make_iter(size_type __pos) _NOEXCEPT
	{return iterator(__begin_ + __pos / __bits_per_word, static_cast<unsigned>(__pos % __bits_per_word));}
	_LIBCPP_INLINE_VISIBILITY
	const_iterator __make_iter(size_type __pos) const _NOEXCEPT
	{return const_iterator(__begin_ + __pos / __bits_per_word, static_cast<unsigned>(__pos % __bits_per_word));}
	_LIBCPP_INLINE_VISIBILITY
	iterator __const_iterator_cast(const_iterator __p) _NOEXCEPT
	{return begin() + (__p - cbegin());}

	_LIBCPP_INLINE_VISIBILITY
	void __copy_assign_alloc(const vector& __v)
	{__copy_assign_alloc(__v, integral_constant<bool,
	__storage_traits::propagate_on_container_copy_assignment::value>());}
	_LIBCPP_INLINE_VISIBILITY
	void __copy_assign_alloc(const vector& __c, true_type)
	{
	if (__alloc() != __c.__alloc())
	__vdeallocate();
	__alloc() = __c.__alloc();
	}

	_LIBCPP_INLINE_VISIBILITY
	void __copy_assign_alloc(const vector&, false_type)
	{}

	void __move_assign(vector& __c, false_type);
	void __move_assign(vector& __c, true_type)
	_NOEXCEPT_(is_nothrow_move_assignable<allocator_type>::value);
	_LIBCPP_INLINE_VISIBILITY
	void __move_assign_alloc(vector& __c)
	_NOEXCEPT_(
	!__storage_traits::propagate_on_container_move_assignment::value \|\|
	is_nothrow_move_assignable<allocator_type>::value)
	{__move_assign_alloc(__c, integral_constant<bool,
	__storage_traits::propagate_on_container_move_assignment::value>());}
	_LIBCPP_INLINE_VISIBILITY
	void __move_assign_alloc(vector& __c, true_type)
	_NOEXCEPT_(is_nothrow_move_assignable<allocator_type>::value)
	{
	__alloc() = _VSTD::move(__c.__alloc());
	}

	_LIBCPP_INLINE_VISIBILITY
	void __move_assign_alloc(vector&, false_type)
	_NOEXCEPT
	{}

	size_t __hash_code() const _NOEXCEPT;

	friend class __bit_reference<vector>;
	friend class __bit_const_reference<vector>;
	friend class __bit_iterator<vector, false>;
	friend class __bit_iterator<vector, true>;
	friend struct __bit_array<vector>;
	friend struct _LIBCPP_TEMPLATE_VIS hash<vector>;
	};

	template <class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	void
	vector<bool, _Allocator>::__invalidate_all_iterators()
	{
	}

	// Allocate space for __n objects
	// throws length_error if __n > max_size()
	// throws (probably bad_alloc) if memory run out
	// Precondition: __begin_ == __end_ == __cap() == 0
	// Precondition: __n > 0
	// Postcondition: capacity() == __n
	// Postcondition: size() == 0
	template <class _Allocator>
	void
	vector<bool, _Allocator>::__vallocate(size_type __n)
	{
	if (__n > max_size())
	this->__throw_length_error();
	__n = __external_cap_to_internal(__n);
	this->__begin_ = __storage_traits::allocate(this->__alloc(), __n);
	this->__size_ = 0;
	this->__cap() = __n;
	}

	template <class _Allocator>
	void
	vector<bool, _Allocator>::__vdeallocate() _NOEXCEPT
	{
	if (this->__begin_ != nullptr)
	{
	__storage_traits::deallocate(this->__alloc(), this->__begin_, __cap());
	__invalidate_all_iterators();
	this->__begin_ = nullptr;
	this->__size_ = this->__cap() = 0;
	}
	}

	template <class _Allocator>
	typename vector<bool, _Allocator>::size_type
	vector<bool, _Allocator>::max_size() const _NOEXCEPT
	{
	size_type __amax = __storage_traits::max_size(__alloc());
	size_type __nmax = numeric_limits<size_type>::max() / 2; // end() >= begin(), always
	if (__nmax / __bits_per_word <= __amax)
	return __nmax;
	return __internal_cap_to_external(__amax);
	}

	// Precondition: __new_size > capacity()
	template <class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	typename vector<bool, _Allocator>::size_type
	vector<bool, _Allocator>::__recommend(size_type __new_size) const
	{
	const size_type __ms = max_size();
	if (__new_size > __ms)
	this->__throw_length_error();
	const size_type __cap = capacity();
	if (__cap >= __ms / 2)
	return __ms;
	return _VSTD::max(2 * __cap, __align_it(__new_size));
	}

	// Default constructs __n objects starting at __end_
	// Precondition: __n > 0
	// Precondition: size() + __n <= capacity()
	// Postcondition: size() == size() + __n
	template <class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	void
	vector<bool, _Allocator>::__construct_at_end(size_type __n, bool __x)
	{
	size_type __old_size = this->__size_;
	this->__size_ += __n;
	if (__old_size == 0 \|\| ((__old_size - 1) / __bits_per_word) != ((this->__size_ - 1) / __bits_per_word))
	{
	if (this->__size_ <= __bits_per_word)
	this->__begin_[0] = __storage_type(0);
	else
	this->__begin_[(this->__size_ - 1) / __bits_per_word] = __storage_type(0);
	}
	_VSTD::fill_n(__make_iter(__old_size), __n, __x);
	}

	template <class _Allocator>
	template <class _ForwardIterator>
	typename enable_if
	<
	__is_cpp17_forward_iterator<_ForwardIterator>::value,
	void
	>::type
	vector<bool, _Allocator>::__construct_at_end(_ForwardIterator __first, _ForwardIterator __last)
	{
	size_type __old_size = this->__size_;
	this->__size_ += _VSTD::distance(__first, __last);
	if (__old_size == 0 \|\| ((__old_size - 1) / __bits_per_word) != ((this->__size_ - 1) / __bits_per_word))
	{
	if (this->__size_ <= __bits_per_word)
	this->__begin_[0] = __storage_type(0);
	else
	this->__begin_[(this->__size_ - 1) / __bits_per_word] = __storage_type(0);
	}
	_VSTD::copy(__first, __last, __make_iter(__old_size));
	}

	template <class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	vector<bool, _Allocator>::vector()
	_NOEXCEPT_(is_nothrow_default_constructible<allocator_type>::value)
	: __begin_(nullptr),
	__size_(0),
	__cap_alloc_(0, __default_init_tag())
	{
	}

	template <class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	vector<bool, _Allocator>::vector(const allocator_type& __a)
	#if _LIBCPP_STD_VER <= 14
	_NOEXCEPT_(is_nothrow_copy_constructible<allocator_type>::value)
	#else
	_NOEXCEPT
	#endif
	: __begin_(nullptr),
	__size_(0),
	__cap_alloc_(0, static_cast<__storage_allocator>(__a))
	{
	}

	template <class _Allocator>
	vector<bool, _Allocator>::vector(size_type __n)
	: __begin_(nullptr),
	__size_(0),
	__cap_alloc_(0, __default_init_tag())
	{
	if (__n > 0)
	{
	__vallocate(__n);
	__construct_at_end(__n, false);
	}
	}

	#if _LIBCPP_STD_VER > 11
	template <class _Allocator>
	vector<bool, _Allocator>::vector(size_type __n, const allocator_type& __a)
	: __begin_(nullptr),
	__size_(0),
	__cap_alloc_(0, static_cast<__storage_allocator>(__a))
	{
	if (__n > 0)
	{
	__vallocate(__n);
	__construct_at_end(__n, false);
	}
	}
	#endif

	template <class _Allocator>
	vector<bool, _Allocator>::vector(size_type __n, const value_type& __x)
	: __begin_(nullptr),
	__size_(0),
	__cap_alloc_(0, __default_init_tag())
	{
	if (__n > 0)
	{
	__vallocate(__n);
	__construct_at_end(__n, __x);
	}
	}

	template <class _Allocator>
	vector<bool, _Allocator>::vector(size_type __n, const value_type& __x, const allocator_type& __a)
	: __begin_(nullptr),
	__size_(0),
	__cap_alloc_(0, static_cast<__storage_allocator>(__a))
	{
	if (__n > 0)
	{
	__vallocate(__n);
	__construct_at_end(__n, __x);
	}
	}

	template <class _Allocator>
	template <class _InputIterator>
	vector<bool, _Allocator>::vector(_InputIterator __first, _InputIterator __last,
	typename enable_if<__is_cpp17_input_iterator <_InputIterator>::value &&
	!__is_cpp17_forward_iterator<_InputIterator>::value>::type*)
	: __begin_(nullptr),
	__size_(0),
	__cap_alloc_(0, __default_init_tag())
	{
	#ifndef _LIBCPP_NO_EXCEPTIONS
	try
	{
	#endif // _LIBCPP_NO_EXCEPTIONS
	for (; __first != __last; ++__first)
	push_back(*__first);
	#ifndef _LIBCPP_NO_EXCEPTIONS
	}
	catch (...)
	{
	if (__begin_ != nullptr)
	__storage_traits::deallocate(__alloc(), __begin_, __cap());
	__invalidate_all_iterators();
	throw;
	}
	#endif // _LIBCPP_NO_EXCEPTIONS
	}

	template <class _Allocator>
	template <class _InputIterator>
	vector<bool, _Allocator>::vector(_InputIterator __first, _InputIterator __last, const allocator_type& __a,
	typename enable_if<__is_cpp17_input_iterator <_InputIterator>::value &&
	!__is_cpp17_forward_iterator<_InputIterator>::value>::type*)
	: __begin_(nullptr),
	__size_(0),
	__cap_alloc_(0, static_cast<__storage_allocator>(__a))
	{
	#ifndef _LIBCPP_NO_EXCEPTIONS
	try
	{
	#endif // _LIBCPP_NO_EXCEPTIONS
	for (; __first != __last; ++__first)
	push_back(*__first);
	#ifndef _LIBCPP_NO_EXCEPTIONS
	}
	catch (...)
	{
	if (__begin_ != nullptr)
	__storage_traits::deallocate(__alloc(), __begin_, __cap());
	__invalidate_all_iterators();
	throw;
	}
	#endif // _LIBCPP_NO_EXCEPTIONS
	}

	template <class _Allocator>
	template <class _ForwardIterator>
	vector<bool, _Allocator>::vector(_ForwardIterator __first, _ForwardIterator __last,
	typename enable_if<__is_cpp17_forward_iterator<_ForwardIterator>::value>::type*)
	: __begin_(nullptr),
	__size_(0),
	__cap_alloc_(0, __default_init_tag())
	{
	size_type __n = static_cast<size_type>(_VSTD::distance(__first, __last));
	if (__n > 0)
	{
	__vallocate(__n);
	__construct_at_end(__first, __last);
	}
	}

	template <class _Allocator>
	template <class _ForwardIterator>
	vector<bool, _Allocator>::vector(_ForwardIterator __first, _ForwardIterator __last, const allocator_type& __a,
	typename enable_if<__is_cpp17_forward_iterator<_ForwardIterator>::value>::type*)
	: __begin_(nullptr),
	__size_(0),
	__cap_alloc_(0, static_cast<__storage_allocator>(__a))
	{
	size_type __n = static_cast<size_type>(_VSTD::distance(__first, __last));
	if (__n > 0)
	{
	__vallocate(__n);
	__construct_at_end(__first, __last);
	}
	}

	#ifndef _LIBCPP_CXX03_LANG

	template <class _Allocator>
	vector<bool, _Allocator>::vector(initializer_list<value_type> __il)
	: __begin_(nullptr),
	__size_(0),
	__cap_alloc_(0, __default_init_tag())
	{
	size_type __n = static_cast<size_type>(__il.size());
	if (__n > 0)
	{
	__vallocate(__n);
	__construct_at_end(__il.begin(), __il.end());
	}
	}

	template <class _Allocator>
	vector<bool, _Allocator>::vector(initializer_list<value_type> __il, const allocator_type& __a)
	: __begin_(nullptr),
	__size_(0),
	__cap_alloc_(0, static_cast<__storage_allocator>(__a))
	{
	size_type __n = static_cast<size_type>(__il.size());
	if (__n > 0)
	{
	__vallocate(__n);
	__construct_at_end(__il.begin(), __il.end());
	}
	}

	#endif // _LIBCPP_CXX03_LANG

	template <class _Allocator>
	vector<bool, _Allocator>::~vector()
	{
	if (__begin_ != nullptr)
	__storage_traits::deallocate(__alloc(), __begin_, __cap());
	__invalidate_all_iterators();
	}

	template <class _Allocator>
	vector<bool, _Allocator>::vector(const vector& __v)
	: __begin_(nullptr),
	__size_(0),
	__cap_alloc_(0, __storage_traits::select_on_container_copy_construction(__v.__alloc()))
	{
	if (__v.size() > 0)
	{
	__vallocate(__v.size());
	__construct_at_end(__v.begin(), __v.end());
	}
	}

	template <class _Allocator>
	vector<bool, _Allocator>::vector(const vector& __v, const allocator_type& __a)
	: __begin_(nullptr),
	__size_(0),
	__cap_alloc_(0, __a)
	{
	if (__v.size() > 0)
	{
	__vallocate(__v.size());
	__construct_at_end(__v.begin(), __v.end());
	}
	}

	template <class _Allocator>
	vector<bool, _Allocator>&
	vector<bool, _Allocator>::operator=(const vector& __v)
	{
	if (this != &__v)
	{
	__copy_assign_alloc(__v);
	if (__v.__size_)
	{
	if (__v.__size_ > capacity())
	{
	__vdeallocate();
	__vallocate(__v.__size_);
	}
	_VSTD::copy(__v.__begin_, __v.__begin_ + __external_cap_to_internal(__v.__size_), __begin_);
	}
	__size_ = __v.__size_;
	}
	return *this;
	}

	#ifndef _LIBCPP_CXX03_LANG

	template <class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY vector<bool, _Allocator>::vector(vector&& __v)
	#if _LIBCPP_STD_VER > 14
	_NOEXCEPT
	#else
	_NOEXCEPT_(is_nothrow_move_constructible<allocator_type>::value)
	#endif
	: __begin_(__v.__begin_),
	__size_(__v.__size_),
	__cap_alloc_(_VSTD::move(__v.__cap_alloc_)) {
	__v.__begin_ = nullptr;
	__v.__size_ = 0;
	__v.__cap() = 0;
	}

	template <class _Allocator>
	vector<bool, _Allocator>::vector(vector&& __v, const __identity_t<allocator_type>& __a)
	: __begin_(nullptr),
	__size_(0),
	__cap_alloc_(0, __a)
	{
	if (__a == allocator_type(__v.__alloc()))
	{
	this->__begin_ = __v.__begin_;
	this->__size_ = __v.__size_;
	this->__cap() = __v.__cap();
	__v.__begin_ = nullptr;
	__v.__cap() = __v.__size_ = 0;
	}
	else if (__v.size() > 0)
	{
	__vallocate(__v.size());
	__construct_at_end(__v.begin(), __v.end());
	}
	}

	template <class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	vector<bool, _Allocator>&
	vector<bool, _Allocator>::operator=(vector&& __v)
	_NOEXCEPT_((__noexcept_move_assign_container<_Allocator, __alloc_traits>::value))
	{
	__move_assign(__v, integral_constant<bool,
	__storage_traits::propagate_on_container_move_assignment::value>());
	return *this;
	}

	template <class _Allocator>
	void
	vector<bool, _Allocator>::__move_assign(vector& __c, false_type)
	{
	if (__alloc() != __c.__alloc())
	assign(__c.begin(), __c.end());
	else
	__move_assign(__c, true_type());
	}

	template <class _Allocator>
	void
	vector<bool, _Allocator>::__move_assign(vector& __c, true_type)
	_NOEXCEPT_(is_nothrow_move_assignable<allocator_type>::value)
	{
	__vdeallocate();
	__move_assign_alloc(__c);
	this->__begin_ = __c.__begin_;
	this->__size_ = __c.__size_;
	this->__cap() = __c.__cap();
	__c.__begin_ = nullptr;
	__c.__cap() = __c.__size_ = 0;
	}

	#endif // !_LIBCPP_CXX03_LANG

	template <class _Allocator>
	void
	vector<bool, _Allocator>::assign(size_type __n, const value_type& __x)
	{
	__size_ = 0;
	if (__n > 0)
	{
	size_type __c = capacity();
	if (__n <= __c)
	__size_ = __n;
	else
	{
	vector __v(__alloc());
	__v.reserve(__recommend(__n));
	__v.__size_ = __n;
	swap(__v);
	}
	_VSTD::fill_n(begin(), __n, __x);
	}
	__invalidate_all_iterators();
	}

	template <class _Allocator>
	template <class _InputIterator>
	typename enable_if
	<
	__is_cpp17_input_iterator<_InputIterator>::value &&
	!__is_cpp17_forward_iterator<_InputIterator>::value,
	void
	>::type
	vector<bool, _Allocator>::assign(_InputIterator __first, _InputIterator __last)
	{
	clear();
	for (; __first != __last; ++__first)
	push_back(*__first);
	}

	template <class _Allocator>
	template <class _ForwardIterator>
	typename enable_if
	<
	__is_cpp17_forward_iterator<_ForwardIterator>::value,
	void
	>::type
	vector<bool, _Allocator>::assign(_ForwardIterator __first, _ForwardIterator __last)
	{
	clear();
	difference_type __ns = _VSTD::distance(__first, __last);
	_LIBCPP_ASSERT(__ns >= 0, "invalid range specified");
	const size_t __n = static_cast<size_type>(__ns);
	if (__n)
	{
	if (__n > capacity())
	{
	__vdeallocate();
	__vallocate(__n);
	}
	__construct_at_end(__first, __last);
	}
	}

	template <class _Allocator>
	void
	vector<bool, _Allocator>::reserve(size_type __n)
	{
	if (__n > capacity())
	{
	vector __v(this->__alloc());
	__v.__vallocate(__n);
	__v.__construct_at_end(this->begin(), this->end());
	swap(__v);
	__invalidate_all_iterators();
	}
	}

	template <class _Allocator>
	void
	vector<bool, _Allocator>::shrink_to_fit() _NOEXCEPT
	{
	if (__external_cap_to_internal(size()) > __cap())
	{
	#ifndef _LIBCPP_NO_EXCEPTIONS
	try
	{
	#endif // _LIBCPP_NO_EXCEPTIONS
	vector(this, allocator_type(__alloc())).swap(this);
	#ifndef _LIBCPP_NO_EXCEPTIONS
	}
	catch (...)
	{
	}
	#endif // _LIBCPP_NO_EXCEPTIONS
	}
	}

	template <class _Allocator>
	typename vector<bool, _Allocator>::reference
	vector<bool, _Allocator>::at(size_type __n)
	{
	if (__n >= size())
	this->__throw_out_of_range();
	return (*this)[__n];
	}

	template <class _Allocator>
	typename vector<bool, _Allocator>::const_reference
	vector<bool, _Allocator>::at(size_type __n) const
	{
	if (__n >= size())
	this->__throw_out_of_range();
	return (*this)[__n];
	}

	template <class _Allocator>
	void
	vector<bool, _Allocator>::push_back(const value_type& __x)
	{
	if (this->__size_ == this->capacity())
	reserve(__recommend(this->__size_ + 1));
	++this->__size_;
	back() = __x;
	}

	template <class _Allocator>
	typename vector<bool, _Allocator>::iterator
	vector<bool, _Allocator>::insert(const_iterator __position, const value_type& __x)
	{
	iterator __r;
	if (size() < capacity())
	{
	const_iterator __old_end = end();
	++__size_;
	_VSTD::copy_backward(__position, __old_end, end());
	__r = __const_iterator_cast(__position);
	}
	else
	{
	vector __v(__alloc());
	__v.reserve(__recommend(__size_ + 1));
	__v.__size_ = __size_ + 1;
	__r = _VSTD::copy(cbegin(), __position, __v.begin());
	_VSTD::copy_backward(__position, cend(), __v.end());
	swap(__v);
	}
	*__r = __x;
	return __r;
	}

	template <class _Allocator>
	typename vector<bool, _Allocator>::iterator
	vector<bool, _Allocator>::insert(const_iterator __position, size_type __n, const value_type& __x)
	{
	iterator __r;
	size_type __c = capacity();
	if (__n <= __c && size() <= __c - __n)
	{
	const_iterator __old_end = end();
	__size_ += __n;
	_VSTD::copy_backward(__position, __old_end, end());
	__r = __const_iterator_cast(__position);
	}
	else
	{
	vector __v(__alloc());
	__v.reserve(__recommend(__size_ + __n));
	__v.__size_ = __size_ + __n;
	__r = _VSTD::copy(cbegin(), __position, __v.begin());
	_VSTD::copy_backward(__position, cend(), __v.end());
	swap(__v);
	}
	_VSTD::fill_n(__r, __n, __x);
	return __r;
	}

	template <class _Allocator>
	template <class _InputIterator>
	typename enable_if
	<
	__is_cpp17_input_iterator <_InputIterator>::value &&
	!__is_cpp17_forward_iterator<_InputIterator>::value,
	typename vector<bool, _Allocator>::iterator
	>::type
	vector<bool, _Allocator>::insert(const_iterator __position, _InputIterator __first, _InputIterator __last)
	{
	difference_type __off = __position - begin();
	iterator __p = __const_iterator_cast(__position);
	iterator __old_end = end();
	for (; size() != capacity() && __first != __last; ++__first)
	{
	++this->__size_;
	back() = *__first;
	}
	vector __v(__alloc());
	if (__first != __last)
	{
	#ifndef _LIBCPP_NO_EXCEPTIONS
	try
	{
	#endif // _LIBCPP_NO_EXCEPTIONS
	__v.assign(__first, __last);
	difference_type __old_size = static_cast<difference_type>(__old_end - begin());
	difference_type __old_p = __p - begin();
	reserve(__recommend(size() + __v.size()));
	__p = begin() + __old_p;
	__old_end = begin() + __old_size;
	#ifndef _LIBCPP_NO_EXCEPTIONS
	}
	catch (...)
	{
	erase(__old_end, end());
	throw;
	}
	#endif // _LIBCPP_NO_EXCEPTIONS
	}
	__p = _VSTD::rotate(__p, __old_end, end());
	insert(__p, __v.begin(), __v.end());
	return begin() + __off;
	}

	template <class _Allocator>
	template <class _ForwardIterator>
	typename enable_if
	<
	__is_cpp17_forward_iterator<_ForwardIterator>::value,
	typename vector<bool, _Allocator>::iterator
	>::type
	vector<bool, _Allocator>::insert(const_iterator __position, _ForwardIterator __first, _ForwardIterator __last)
	{
	const difference_type __n_signed = _VSTD::distance(__first, __last);
	_LIBCPP_ASSERT(__n_signed >= 0, "invalid range specified");
	const size_type __n = static_cast<size_type>(__n_signed);
	iterator __r;
	size_type __c = capacity();
	if (__n <= __c && size() <= __c - __n)
	{
	const_iterator __old_end = end();
	__size_ += __n;
	_VSTD::copy_backward(__position, __old_end, end());
	__r = __const_iterator_cast(__position);
	}
	else
	{
	vector __v(__alloc());
	__v.reserve(__recommend(__size_ + __n));
	__v.__size_ = __size_ + __n;
	__r = _VSTD::copy(cbegin(), __position, __v.begin());
	_VSTD::copy_backward(__position, cend(), __v.end());
	swap(__v);
	}
	_VSTD::copy(__first, __last, __r);
	return __r;
	}

	template <class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	typename vector<bool, _Allocator>::iterator
	vector<bool, _Allocator>::erase(const_iterator __position)
	{
	iterator __r = __const_iterator_cast(__position);
	_VSTD::copy(__position + 1, this->cend(), __r);
	--__size_;
	return __r;
	}

	template <class _Allocator>
	typename vector<bool, _Allocator>::iterator
	vector<bool, _Allocator>::erase(const_iterator __first, const_iterator __last)
	{
	iterator __r = __const_iterator_cast(__first);
	difference_type __d = __last - __first;
	_VSTD::copy(__last, this->cend(), __r);
	__size_ -= __d;
	return __r;
	}

	template <class _Allocator>
	void
	vector<bool, _Allocator>::swap(vector& __x)
	#if _LIBCPP_STD_VER >= 14
	_NOEXCEPT
	#else
	_NOEXCEPT_(!__alloc_traits::propagate_on_container_swap::value \|\|
	__is_nothrow_swappable<allocator_type>::value)
	#endif
	{
	_VSTD::swap(this->__begin_, __x.__begin_);
	_VSTD::swap(this->__size_, __x.__size_);
	_VSTD::swap(this->__cap(), __x.__cap());
	_VSTD::__swap_allocator(this->__alloc(), __x.__alloc(),
	integral_constant<bool, __alloc_traits::propagate_on_container_swap::value>());
	}

	template <class _Allocator>
	void
	vector<bool, _Allocator>::resize(size_type __sz, value_type __x)
	{
	size_type __cs = size();
	if (__cs < __sz)
	{
	iterator __r;
	size_type __c = capacity();
	size_type __n = __sz - __cs;
	if (__n <= __c && __cs <= __c - __n)
	{
	__r = end();
	__size_ += __n;
	}
	else
	{
	vector __v(__alloc());
	__v.reserve(__recommend(__size_ + __n));
	__v.__size_ = __size_ + __n;
	__r = _VSTD::copy(cbegin(), cend(), __v.begin());
	swap(__v);
	}
	_VSTD::fill_n(__r, __n, __x);
	}
	else
	__size_ = __sz;
	}

	template <class _Allocator>
	void
	vector<bool, _Allocator>::flip() _NOEXCEPT
	{
	// do middle whole words
	size_type __n = __size_;
	__storage_pointer __p = __begin_;
	for (; __n >= __bits_per_word; ++__p, __n -= __bits_per_word)
	__p = ~__p;
	// do last partial word
	if (__n > 0)
	{
	__storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n);
	__storage_type __b = *__p & __m;
	*__p &= ~__m;
	*__p \|= ~__b & __m;
	}
	}

	template <class _Allocator>
	bool
	vector<bool, _Allocator>::__invariants() const
	{
	if (this->__begin_ == nullptr)
	{
	if (this->__size_ != 0 \|\| this->__cap() != 0)
	return false;
	}
	else
	{
	if (this->__cap() == 0)
	return false;
	if (this->__size_ > this->capacity())
	return false;
	}
	return true;
	}

	template <class _Allocator>
	size_t
	vector<bool, _Allocator>::__hash_code() const _NOEXCEPT
	{
	size_t __h = 0;
	// do middle whole words
	size_type __n = __size_;
	__storage_pointer __p = __begin_;
	for (; __n >= __bits_per_word; ++__p, __n -= __bits_per_word)
	__h ^= *__p;
	// do last partial word
	if (__n > 0)
	{
	const __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n);
	__h ^= *__p & __m;
	}
	return __h;
	}

	template <class _Allocator>
	struct _LIBCPP_TEMPLATE_VIS hash<vector<bool, _Allocator> >
	: public unary_function<vector<bool, _Allocator>, size_t>
	{
	_LIBCPP_INLINE_VISIBILITY
	size_t operator()(const vector<bool, _Allocator>& __vec) const _NOEXCEPT
	{return __vec.__hash_code();}
	};

	template <class _Tp, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	bool
	operator==(const vector<_Tp, _Allocator>& __x, const vector<_Tp, _Allocator>& __y)
	{
	const typename vector<_Tp, _Allocator>::size_type __sz = __x.size();
	return __sz == __y.size() && _VSTD::equal(__x.begin(), __x.end(), __y.begin());
	}

	template <class _Tp, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	bool
	operator!=(const vector<_Tp, _Allocator>& __x, const vector<_Tp, _Allocator>& __y)
	{
	return !(__x == __y);
	}

	template <class _Tp, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	bool
	operator< (const vector<_Tp, _Allocator>& __x, const vector<_Tp, _Allocator>& __y)
	{
	return _VSTD::lexicographical_compare(__x.begin(), __x.end(), __y.begin(), __y.end());
	}

	template <class _Tp, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	bool
	operator> (const vector<_Tp, _Allocator>& __x, const vector<_Tp, _Allocator>& __y)
	{
	return __y < __x;
	}

	template <class _Tp, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	bool
	operator>=(const vector<_Tp, _Allocator>& __x, const vector<_Tp, _Allocator>& __y)
	{
	return !(__x < __y);
	}

	template <class _Tp, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	bool
	operator<=(const vector<_Tp, _Allocator>& __x, const vector<_Tp, _Allocator>& __y)
	{
	return !(__y < __x);
	}

	template <class _Tp, class _Allocator>
	inline _LIBCPP_INLINE_VISIBILITY
	void
	swap(vector<_Tp, _Allocator>& __x, vector<_Tp, _Allocator>& __y)
	_NOEXCEPT_(_NOEXCEPT_(__x.swap(__y)))
	{
	__x.swap(__y);
	}

	#if _LIBCPP_STD_VER > 17
	template <class _Tp, class _Allocator, class _Up>
	inline _LIBCPP_INLINE_VISIBILITY typename vector<_Tp, _Allocator>::size_type
	erase(vector<_Tp, _Allocator>& __c, const _Up& __v) {
	auto __old_size = __c.size();
	__c.erase(_VSTD::remove(__c.begin(), __c.end(), __v), __c.end());
	return __old_size - __c.size();
	}

	template <class _Tp, class _Allocator, class _Predicate>
	inline _LIBCPP_INLINE_VISIBILITY typename vector<_Tp, _Allocator>::size_type
	erase_if(vector<_Tp, _Allocator>& __c, _Predicate __pred) {
	auto __old_size = __c.size();
	__c.erase(_VSTD::remove_if(__c.begin(), __c.end(), __pred), __c.end());
	return __old_size - __c.size();
	}
	#endif

	_LIBCPP_END_NAMESPACE_STD

	_LIBCPP_POP_MACROS

	#endif // _LIBCPP_VECTOR
	diff --git a/contrib/llvm-project/libcxx/include/wctype.h b/contrib/llvm-project/libcxx/include/wctype.h
	index 1b4b1461496c..3b614759ac6d 100644
	--- a/contrib/llvm-project/libcxx/include/wctype.h
	+++ b/contrib/llvm-project/libcxx/include/wctype.h
	@@ -1,80 +1,90 @@
	// -- C++ --
	//===--------------------------- wctype.h ---------------------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#ifndef _LIBCPP_WCTYPE_H
	#define _LIBCPP_WCTYPE_H

	/*
	wctype.h synopsis

	Macros:

	WEOF

	Types:

	wint_t
	wctrans_t
	wctype_t

	int iswalnum(wint_t wc);
	int iswalpha(wint_t wc);
	int iswblank(wint_t wc); // C99
	int iswcntrl(wint_t wc);
	int iswdigit(wint_t wc);
	int iswgraph(wint_t wc);
	int iswlower(wint_t wc);
	int iswprint(wint_t wc);
	int iswpunct(wint_t wc);
	int iswspace(wint_t wc);
	int iswupper(wint_t wc);
	int iswxdigit(wint_t wc);
	int iswctype(wint_t wc, wctype_t desc);
	wctype_t wctype(const char* property);
	wint_t towlower(wint_t wc);
	wint_t towupper(wint_t wc);
	wint_t towctrans(wint_t wc, wctrans_t desc);
	wctrans_t wctrans(const char* property);

	*/

	#include <__config>

	#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
	#pragma GCC system_header
	#endif

	+// TODO:
	+// In the future, we should unconditionally include_next <wctype.h> here and instead
	+// have a mode under which the library does not need libc++'s <wctype.h> or <cwctype>
	+// at all (i.e. a mode without wchar_t). As it stands, we need to do that to completely
	+// bypass the using declarations in <cwctype> when we did not include <wctype.h>.
	+// Otherwise, a using declaration like `using ::wint_t` in <cwctype> will refer to
	+// nothing (with using_if_exists), and if we include another header that defines one
	+// of these declarations (e.g. <wchar.h>), the second `using ::wint_t` with using_if_exists
	+// will fail because it does not refer to the same declaration.
	#if __has_include_next(<wctype.h>)
	# include_next <wctype.h>
	+# define _LIBCPP_INCLUDED_C_LIBRARY_WCTYPE_H
	#endif

	#ifdef __cplusplus

	#undef iswalnum
	#undef iswalpha
	#undef iswblank
	#undef iswcntrl
	#undef iswdigit
	#undef iswgraph
	#undef iswlower
	#undef iswprint
	#undef iswpunct
	#undef iswspace
	#undef iswupper
	#undef iswxdigit
	#undef iswctype
	#undef wctype
	#undef towlower
	#undef towupper
	#undef towctrans
	#undef wctrans

	#endif // __cplusplus

	#endif // _LIBCPP_WCTYPE_H
	diff --git a/contrib/llvm-project/libunwind/src/Unwind-EHABI.cpp b/contrib/llvm-project/libunwind/src/Unwind-EHABI.cpp
	index 32b5cbc3be92..8843db7f54c3 100644
	--- a/contrib/llvm-project/libunwind/src/Unwind-EHABI.cpp
	+++ b/contrib/llvm-project/libunwind/src/Unwind-EHABI.cpp
	@@ -1,1003 +1,1005 @@
	//===--------------------------- Unwind-EHABI.cpp -------------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//
	// Implements ARM zero-cost C++ exceptions
	//
	//===----------------------------------------------------------------------===//

	#include "Unwind-EHABI.h"

	#if defined(_LIBUNWIND_ARM_EHABI)

	#include <inttypes.h>
	#include <stdbool.h>
	#include <stdint.h>
	#include <stdio.h>
	#include <stdlib.h>
	#include <string.h>

	#include "config.h"
	#include "libunwind.h"
	#include "libunwind_ext.h"
	#include "unwind.h"

	namespace {

	// Strange order: take words in order, but inside word, take from most to least
	// signinficant byte.
	uint8_t getByte(const uint32_t* data, size_t offset) {
	const uint8_t* byteData = reinterpret_cast<const uint8_t*>(data);
	#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
	return byteData[(offset & ~(size_t)0x03) + (3 - (offset & (size_t)0x03))];
	#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
	return byteData[offset];
	#else
	#error "Unable to determine endianess"
	#endif
	}

	const char* getNextWord(const char* data, uint32_t* out) {
	out = reinterpret_cast<const uint32_t*>(data);
	return data + 4;
	}

	const char* getNextNibble(const char* data, uint32_t* out) {
	out = reinterpret_cast<const uint16_t*>(data);
	return data + 2;
	}

	struct Descriptor {
	// See # 9.2
	typedef enum {
	SU16 = 0, // Short descriptor, 16-bit entries
	LU16 = 1, // Long descriptor, 16-bit entries
	LU32 = 3, // Long descriptor, 32-bit entries
	RESERVED0 = 4, RESERVED1 = 5, RESERVED2 = 6, RESERVED3 = 7,
	RESERVED4 = 8, RESERVED5 = 9, RESERVED6 = 10, RESERVED7 = 11,
	RESERVED8 = 12, RESERVED9 = 13, RESERVED10 = 14, RESERVED11 = 15
	} Format;

	// See # 9.2
	typedef enum {
	CLEANUP = 0x0,
	FUNC = 0x1,
	CATCH = 0x2,
	INVALID = 0x4
	} Kind;
	};

	_Unwind_Reason_Code ProcessDescriptors(
	_Unwind_State state,
	_Unwind_Control_Block* ucbp,
	struct _Unwind_Context* context,
	Descriptor::Format format,
	const char* descriptorStart,
	uint32_t flags) {

	// EHT is inlined in the index using compact form. No descriptors. #5
	if (flags & 0x1)
	return _URC_CONTINUE_UNWIND;

	// TODO: We should check the state here, and determine whether we need to
	// perform phase1 or phase2 unwinding.
	(void)state;

	const char* descriptor = descriptorStart;
	uint32_t descriptorWord;
	getNextWord(descriptor, &descriptorWord);
	while (descriptorWord) {
	// Read descriptor based on # 9.2.
	uint32_t length;
	uint32_t offset;
	switch (format) {
	case Descriptor::LU32:
	descriptor = getNextWord(descriptor, &length);
	descriptor = getNextWord(descriptor, &offset);
	+ break;
	case Descriptor::LU16:
	descriptor = getNextNibble(descriptor, &length);
	descriptor = getNextNibble(descriptor, &offset);
	+ break;
	default:
	assert(false);
	return _URC_FAILURE;
	}

	// See # 9.2 table for decoding the kind of descriptor. It's a 2-bit value.
	Descriptor::Kind kind =
	static_cast<Descriptor::Kind>((length & 0x1) \| ((offset & 0x1) << 1));

	// Clear off flag from last bit.
	length &= ~1u;
	offset &= ~1u;
	uintptr_t scopeStart = ucbp->pr_cache.fnstart + offset;
	uintptr_t scopeEnd = scopeStart + length;
	uintptr_t pc = _Unwind_GetIP(context);
	bool isInScope = (scopeStart <= pc) && (pc < scopeEnd);

	switch (kind) {
	case Descriptor::CLEANUP: {
	// TODO(ajwong): Handle cleanup descriptors.
	break;
	}
	case Descriptor::FUNC: {
	// TODO(ajwong): Handle function descriptors.
	break;
	}
	case Descriptor::CATCH: {
	// Catch descriptors require gobbling one more word.
	uint32_t landing_pad;
	descriptor = getNextWord(descriptor, &landing_pad);

	if (isInScope) {
	// TODO(ajwong): This is only phase1 compatible logic. Implement
	// phase2.
	landing_pad = signExtendPrel31(landing_pad & ~0x80000000);
	if (landing_pad == 0xffffffff) {
	return _URC_HANDLER_FOUND;
	} else if (landing_pad == 0xfffffffe) {
	return _URC_FAILURE;
	} else {
	/*
	bool is_reference_type = landing_pad & 0x80000000;
	void* matched_object;
	if (__cxxabiv1::__cxa_type_match(
	ucbp, reinterpret_cast<const std::type_info *>(landing_pad),
	is_reference_type,
	&matched_object) != __cxxabiv1::ctm_failed)
	return _URC_HANDLER_FOUND;
	*/
	_LIBUNWIND_ABORT("Type matching not implemented");
	}
	}
	break;
	}
	default:
	_LIBUNWIND_ABORT("Invalid descriptor kind found.");
	}

	getNextWord(descriptor, &descriptorWord);
	}

	return _URC_CONTINUE_UNWIND;
	}

	static _Unwind_Reason_Code unwindOneFrame(_Unwind_State state,
	_Unwind_Control_Block* ucbp,
	struct _Unwind_Context* context) {
	// Read the compact model EHT entry's header # 6.3
	const uint32_t* unwindingData = ucbp->pr_cache.ehtp;
	assert((*unwindingData & 0xf0000000) == 0x80000000 && "Must be a compact entry");
	Descriptor::Format format =
	static_cast<Descriptor::Format>((*unwindingData & 0x0f000000) >> 24);

	const char *lsda =
	reinterpret_cast<const char *>(_Unwind_GetLanguageSpecificData(context));

	// Handle descriptors before unwinding so they are processed in the context
	// of the correct stack frame.
	_Unwind_Reason_Code result =
	ProcessDescriptors(state, ucbp, context, format, lsda,
	ucbp->pr_cache.additional);

	if (result != _URC_CONTINUE_UNWIND)
	return result;

	if (__unw_step(reinterpret_cast<unw_cursor_t *>(context)) != UNW_STEP_SUCCESS)
	return _URC_FAILURE;
	return _URC_CONTINUE_UNWIND;
	}

	// Generates mask discriminator for _Unwind_VRS_Pop, e.g. for _UVRSC_CORE /
	// _UVRSD_UINT32.
	uint32_t RegisterMask(uint8_t start, uint8_t count_minus_one) {
	return ((1U << (count_minus_one + 1)) - 1) << start;
	}

	// Generates mask discriminator for _Unwind_VRS_Pop, e.g. for _UVRSC_VFP /
	// _UVRSD_DOUBLE.
	uint32_t RegisterRange(uint8_t start, uint8_t count_minus_one) {
	return ((uint32_t)start << 16) \| ((uint32_t)count_minus_one + 1);
	}

	} // end anonymous namespace

	/**
	* Decodes an EHT entry.
	*
	* @param data Pointer to EHT.
	* @param[out] off Offset from return value (in bytes) to begin interpretation.
	* @param[out] len Number of bytes in unwind code.
	* @return Pointer to beginning of unwind code.
	*/
	extern "C" const uint32_t*
	decode_eht_entry(const uint32_t* data, size_t* off, size_t* len) {
	if ((*data & 0x80000000) == 0) {
	// 6.2: Generic Model
	//
	// EHT entry is a prel31 pointing to the PR, followed by data understood
	// only by the personality routine. Fortunately, all existing assembler
	// implementations, including GNU assembler, LLVM integrated assembler,
	// and ARM assembler, assume that the unwind opcodes come after the
	// personality rountine address.
	*off = 1; // First byte is size data.
	len = (((data[1] >> 24) & 0xff) + 1) 4;
	data++; // Skip the first word, which is the prel31 offset.
	} else {
	// 6.3: ARM Compact Model
	//
	// EHT entries here correspond to the __aeabi_unwind_cpp_pr[012] PRs indeded
	// by format:
	Descriptor::Format format =
	static_cast<Descriptor::Format>((*data & 0x0f000000) >> 24);
	switch (format) {
	case Descriptor::SU16:
	*len = 4;
	*off = 1;
	break;
	case Descriptor::LU16:
	case Descriptor::LU32:
	len = 4 + 4 ((*data & 0x00ff0000) >> 16);
	*off = 2;
	break;
	default:
	return nullptr;
	}
	}
	return data;
	}

	_LIBUNWIND_EXPORT _Unwind_Reason_Code
	_Unwind_VRS_Interpret(_Unwind_Context context, const uint32_t data,
	size_t offset, size_t len) {
	bool wrotePC = false;
	bool finish = false;
	while (offset < len && !finish) {
	uint8_t byte = getByte(data, offset++);
	if ((byte & 0x80) == 0) {
	uint32_t sp;
	_Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32, &sp);
	if (byte & 0x40)
	sp -= (((uint32_t)byte & 0x3f) << 2) + 4;
	else
	sp += ((uint32_t)byte << 2) + 4;
	_Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32, &sp);
	} else {
	switch (byte & 0xf0) {
	case 0x80: {
	if (offset >= len)
	return _URC_FAILURE;
	uint32_t registers =
	(((uint32_t)byte & 0x0f) << 12) \|
	(((uint32_t)getByte(data, offset++)) << 4);
	if (!registers)
	return _URC_FAILURE;
	if (registers & (1 << 15))
	wrotePC = true;
	_Unwind_VRS_Pop(context, _UVRSC_CORE, registers, _UVRSD_UINT32);
	break;
	}
	case 0x90: {
	uint8_t reg = byte & 0x0f;
	if (reg == 13 \|\| reg == 15)
	return _URC_FAILURE;
	uint32_t sp;
	_Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_R0 + reg,
	_UVRSD_UINT32, &sp);
	_Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32,
	&sp);
	break;
	}
	case 0xa0: {
	uint32_t registers = RegisterMask(4, byte & 0x07);
	if (byte & 0x08)
	registers \|= 1 << 14;
	_Unwind_VRS_Pop(context, _UVRSC_CORE, registers, _UVRSD_UINT32);
	break;
	}
	case 0xb0: {
	switch (byte) {
	case 0xb0:
	finish = true;
	break;
	case 0xb1: {
	if (offset >= len)
	return _URC_FAILURE;
	uint8_t registers = getByte(data, offset++);
	if (registers & 0xf0 \|\| !registers)
	return _URC_FAILURE;
	_Unwind_VRS_Pop(context, _UVRSC_CORE, registers, _UVRSD_UINT32);
	break;
	}
	case 0xb2: {
	uint32_t addend = 0;
	uint32_t shift = 0;
	// This decodes a uleb128 value.
	while (true) {
	if (offset >= len)
	return _URC_FAILURE;
	uint32_t v = getByte(data, offset++);
	addend \|= (v & 0x7f) << shift;
	if ((v & 0x80) == 0)
	break;
	shift += 7;
	}
	uint32_t sp;
	_Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32,
	&sp);
	sp += 0x204 + (addend << 2);
	_Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32,
	&sp);
	break;
	}
	case 0xb3: {
	uint8_t v = getByte(data, offset++);
	_Unwind_VRS_Pop(context, _UVRSC_VFP,
	RegisterRange(static_cast<uint8_t>(v >> 4),
	v & 0x0f), _UVRSD_VFPX);
	break;
	}
	case 0xb4:
	case 0xb5:
	case 0xb6:
	case 0xb7:
	return _URC_FAILURE;
	default:
	_Unwind_VRS_Pop(context, _UVRSC_VFP,
	RegisterRange(8, byte & 0x07), _UVRSD_VFPX);
	break;
	}
	break;
	}
	case 0xc0: {
	switch (byte) {
	#if defined(__ARM_WMMX)
	case 0xc0:
	case 0xc1:
	case 0xc2:
	case 0xc3:
	case 0xc4:
	case 0xc5:
	_Unwind_VRS_Pop(context, _UVRSC_WMMXD,
	RegisterRange(10, byte & 0x7), _UVRSD_DOUBLE);
	break;
	case 0xc6: {
	uint8_t v = getByte(data, offset++);
	uint8_t start = static_cast<uint8_t>(v >> 4);
	uint8_t count_minus_one = v & 0xf;
	if (start + count_minus_one >= 16)
	return _URC_FAILURE;
	_Unwind_VRS_Pop(context, _UVRSC_WMMXD,
	RegisterRange(start, count_minus_one),
	_UVRSD_DOUBLE);
	break;
	}
	case 0xc7: {
	uint8_t v = getByte(data, offset++);
	if (!v \|\| v & 0xf0)
	return _URC_FAILURE;
	_Unwind_VRS_Pop(context, _UVRSC_WMMXC, v, _UVRSD_DOUBLE);
	break;
	}
	#endif
	case 0xc8:
	case 0xc9: {
	uint8_t v = getByte(data, offset++);
	uint8_t start =
	static_cast<uint8_t>(((byte == 0xc8) ? 16 : 0) + (v >> 4));
	uint8_t count_minus_one = v & 0xf;
	if (start + count_minus_one >= 32)
	return _URC_FAILURE;
	_Unwind_VRS_Pop(context, _UVRSC_VFP,
	RegisterRange(start, count_minus_one),
	_UVRSD_DOUBLE);
	break;
	}
	default:
	return _URC_FAILURE;
	}
	break;
	}
	case 0xd0: {
	if (byte & 0x08)
	return _URC_FAILURE;
	_Unwind_VRS_Pop(context, _UVRSC_VFP, RegisterRange(8, byte & 0x7),
	_UVRSD_DOUBLE);
	break;
	}
	default:
	return _URC_FAILURE;
	}
	}
	}
	if (!wrotePC) {
	uint32_t lr;
	_Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_LR, _UVRSD_UINT32, &lr);
	_Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_IP, _UVRSD_UINT32, &lr);
	}
	return _URC_CONTINUE_UNWIND;
	}

	extern "C" _LIBUNWIND_EXPORT _Unwind_Reason_Code
	__aeabi_unwind_cpp_pr0(_Unwind_State state, _Unwind_Control_Block *ucbp,
	_Unwind_Context *context) {
	return unwindOneFrame(state, ucbp, context);
	}

	extern "C" _LIBUNWIND_EXPORT _Unwind_Reason_Code
	__aeabi_unwind_cpp_pr1(_Unwind_State state, _Unwind_Control_Block *ucbp,
	_Unwind_Context *context) {
	return unwindOneFrame(state, ucbp, context);
	}

	extern "C" _LIBUNWIND_EXPORT _Unwind_Reason_Code
	__aeabi_unwind_cpp_pr2(_Unwind_State state, _Unwind_Control_Block *ucbp,
	_Unwind_Context *context) {
	return unwindOneFrame(state, ucbp, context);
	}

	static _Unwind_Reason_Code
	unwind_phase1(unw_context_t uc, unw_cursor_t cursor, _Unwind_Exception *exception_object) {
	// EHABI #7.3 discusses preserving the VRS in a "temporary VRS" during
	// phase 1 and then restoring it to the "primary VRS" for phase 2. The
	// effect is phase 2 doesn't see any of the VRS manipulations from phase 1.
	// In this implementation, the phases don't share the VRS backing store.
	// Instead, they are passed the original \|uc\| and they create a new VRS
	// from scratch thus achieving the same effect.
	__unw_init_local(cursor, uc);

	// Walk each frame looking for a place to stop.
	for (bool handlerNotFound = true; handlerNotFound;) {

	// See if frame has code to run (has personality routine).
	unw_proc_info_t frameInfo;
	if (__unw_get_proc_info(cursor, &frameInfo) != UNW_ESUCCESS) {
	_LIBUNWIND_TRACE_UNWINDING(
	"unwind_phase1(ex_ojb=%p): __unw_get_proc_info "
	"failed => _URC_FATAL_PHASE1_ERROR",
	static_cast<void *>(exception_object));
	return _URC_FATAL_PHASE1_ERROR;
	}

	// When tracing, print state information.
	if (_LIBUNWIND_TRACING_UNWINDING) {
	char functionBuf[512];
	const char *functionName = functionBuf;
	unw_word_t offset;
	if ((__unw_get_proc_name(cursor, functionBuf, sizeof(functionBuf),
	&offset) != UNW_ESUCCESS) \|\|
	(frameInfo.start_ip + offset > frameInfo.end_ip))
	functionName = ".anonymous.";
	unw_word_t pc;
	__unw_get_reg(cursor, UNW_REG_IP, &pc);
	_LIBUNWIND_TRACE_UNWINDING(
	"unwind_phase1(ex_ojb=%p): pc=0x%" PRIxPTR ", start_ip=0x%" PRIxPTR ", func=%s, "
	"lsda=0x%" PRIxPTR ", personality=0x%" PRIxPTR,
	static_cast<void *>(exception_object), pc,
	frameInfo.start_ip, functionName,
	frameInfo.lsda, frameInfo.handler);
	}

	// If there is a personality routine, ask it if it will want to stop at
	// this frame.
	if (frameInfo.handler != 0) {
	_Unwind_Personality_Fn p =
	(_Unwind_Personality_Fn)(long)(frameInfo.handler);
	_LIBUNWIND_TRACE_UNWINDING(
	"unwind_phase1(ex_ojb=%p): calling personality function %p",
	static_cast<void *>(exception_object),
	reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(p)));
	struct _Unwind_Context context = (struct _Unwind_Context )(cursor);
	exception_object->pr_cache.fnstart = frameInfo.start_ip;
	exception_object->pr_cache.ehtp =
	(_Unwind_EHT_Header *)frameInfo.unwind_info;
	exception_object->pr_cache.additional = frameInfo.flags;
	_Unwind_Reason_Code personalityResult =
	(*p)(_US_VIRTUAL_UNWIND_FRAME, exception_object, context);
	_LIBUNWIND_TRACE_UNWINDING(
	"unwind_phase1(ex_ojb=%p): personality result %d start_ip %x ehtp %p "
	"additional %x",
	static_cast<void *>(exception_object), personalityResult,
	exception_object->pr_cache.fnstart,
	static_cast<void *>(exception_object->pr_cache.ehtp),
	exception_object->pr_cache.additional);
	switch (personalityResult) {
	case _URC_HANDLER_FOUND:
	// found a catch clause or locals that need destructing in this frame
	// stop search and remember stack pointer at the frame
	handlerNotFound = false;
	// p should have initialized barrier_cache. EHABI #7.3.5
	_LIBUNWIND_TRACE_UNWINDING(
	"unwind_phase1(ex_ojb=%p): _URC_HANDLER_FOUND",
	static_cast<void *>(exception_object));
	return _URC_NO_REASON;

	case _URC_CONTINUE_UNWIND:
	_LIBUNWIND_TRACE_UNWINDING(
	"unwind_phase1(ex_ojb=%p): _URC_CONTINUE_UNWIND",
	static_cast<void *>(exception_object));
	// continue unwinding
	break;

	// EHABI #7.3.3
	case _URC_FAILURE:
	return _URC_FAILURE;

	default:
	// something went wrong
	_LIBUNWIND_TRACE_UNWINDING(
	"unwind_phase1(ex_ojb=%p): _URC_FATAL_PHASE1_ERROR",
	static_cast<void *>(exception_object));
	return _URC_FATAL_PHASE1_ERROR;
	}
	}
	}
	return _URC_NO_REASON;
	}

	static _Unwind_Reason_Code unwind_phase2(unw_context_t uc, unw_cursor_t cursor,
	_Unwind_Exception *exception_object,
	bool resume) {
	// See comment at the start of unwind_phase1 regarding VRS integrity.
	__unw_init_local(cursor, uc);

	_LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p)",
	static_cast<void *>(exception_object));
	int frame_count = 0;

	// Walk each frame until we reach where search phase said to stop.
	while (true) {
	// Ask libunwind to get next frame (skip over first which is
	// _Unwind_RaiseException or _Unwind_Resume).
	//
	// Resume only ever makes sense for 1 frame.
	_Unwind_State state =
	resume ? _US_UNWIND_FRAME_RESUME : _US_UNWIND_FRAME_STARTING;
	if (resume && frame_count == 1) {
	// On a resume, first unwind the _Unwind_Resume() frame. The next frame
	// is now the landing pad for the cleanup from a previous execution of
	// phase2. To continue unwindingly correctly, replace VRS[15] with the
	// IP of the frame that the previous run of phase2 installed the context
	// for. After this, continue unwinding as if normal.
	//
	// See #7.4.6 for details.
	__unw_set_reg(cursor, UNW_REG_IP,
	exception_object->unwinder_cache.reserved2);
	resume = false;
	}

	// Get info about this frame.
	unw_word_t sp;
	unw_proc_info_t frameInfo;
	__unw_get_reg(cursor, UNW_REG_SP, &sp);
	if (__unw_get_proc_info(cursor, &frameInfo) != UNW_ESUCCESS) {
	_LIBUNWIND_TRACE_UNWINDING(
	"unwind_phase2(ex_ojb=%p): __unw_get_proc_info "
	"failed => _URC_FATAL_PHASE2_ERROR",
	static_cast<void *>(exception_object));
	return _URC_FATAL_PHASE2_ERROR;
	}

	// When tracing, print state information.
	if (_LIBUNWIND_TRACING_UNWINDING) {
	char functionBuf[512];
	const char *functionName = functionBuf;
	unw_word_t offset;
	if ((__unw_get_proc_name(cursor, functionBuf, sizeof(functionBuf),
	&offset) != UNW_ESUCCESS) \|\|
	(frameInfo.start_ip + offset > frameInfo.end_ip))
	functionName = ".anonymous.";
	_LIBUNWIND_TRACE_UNWINDING(
	"unwind_phase2(ex_ojb=%p): start_ip=0x%" PRIxPTR ", func=%s, sp=0x%" PRIxPTR ", "
	"lsda=0x%" PRIxPTR ", personality=0x%" PRIxPTR "",
	static_cast<void *>(exception_object), frameInfo.start_ip,
	functionName, sp, frameInfo.lsda,
	frameInfo.handler);
	}

	// If there is a personality routine, tell it we are unwinding.
	if (frameInfo.handler != 0) {
	_Unwind_Personality_Fn p =
	(_Unwind_Personality_Fn)(long)(frameInfo.handler);
	struct _Unwind_Context context = (struct _Unwind_Context )(cursor);
	// EHABI #7.2
	exception_object->pr_cache.fnstart = frameInfo.start_ip;
	exception_object->pr_cache.ehtp =
	(_Unwind_EHT_Header *)frameInfo.unwind_info;
	exception_object->pr_cache.additional = frameInfo.flags;
	_Unwind_Reason_Code personalityResult =
	(*p)(state, exception_object, context);
	switch (personalityResult) {
	case _URC_CONTINUE_UNWIND:
	// Continue unwinding
	_LIBUNWIND_TRACE_UNWINDING(
	"unwind_phase2(ex_ojb=%p): _URC_CONTINUE_UNWIND",
	static_cast<void *>(exception_object));
	// EHABI #7.2
	if (sp == exception_object->barrier_cache.sp) {
	// Phase 1 said we would stop at this frame, but we did not...
	_LIBUNWIND_ABORT("during phase1 personality function said it would "
	"stop here, but now in phase2 it did not stop here");
	}
	break;
	case _URC_INSTALL_CONTEXT:
	_LIBUNWIND_TRACE_UNWINDING(
	"unwind_phase2(ex_ojb=%p): _URC_INSTALL_CONTEXT",
	static_cast<void *>(exception_object));
	// Personality routine says to transfer control to landing pad.
	// We may get control back if landing pad calls _Unwind_Resume().
	if (_LIBUNWIND_TRACING_UNWINDING) {
	unw_word_t pc;
	__unw_get_reg(cursor, UNW_REG_IP, &pc);
	__unw_get_reg(cursor, UNW_REG_SP, &sp);
	_LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p): re-entering "
	"user code with ip=0x%" PRIxPTR ", sp=0x%" PRIxPTR,
	static_cast<void *>(exception_object),
	pc, sp);
	}

	{
	// EHABI #7.4.1 says we need to preserve pc for when _Unwind_Resume
	// is called back, to find this same frame.
	unw_word_t pc;
	__unw_get_reg(cursor, UNW_REG_IP, &pc);
	exception_object->unwinder_cache.reserved2 = (uint32_t)pc;
	}
	__unw_resume(cursor);
	// __unw_resume() only returns if there was an error.
	return _URC_FATAL_PHASE2_ERROR;

	// # EHABI #7.4.3
	case _URC_FAILURE:
	abort();

	default:
	// Personality routine returned an unknown result code.
	_LIBUNWIND_DEBUG_LOG("personality function returned unknown result %d",
	personalityResult);
	return _URC_FATAL_PHASE2_ERROR;
	}
	}
	frame_count++;
	}

	// Clean up phase did not resume at the frame that the search phase
	// said it would...
	return _URC_FATAL_PHASE2_ERROR;
	}

	/// Called by __cxa_throw. Only returns if there is a fatal error.
	_LIBUNWIND_EXPORT _Unwind_Reason_Code
	_Unwind_RaiseException(_Unwind_Exception *exception_object) {
	_LIBUNWIND_TRACE_API("_Unwind_RaiseException(ex_obj=%p)",
	static_cast<void *>(exception_object));
	unw_context_t uc;
	unw_cursor_t cursor;
	__unw_getcontext(&uc);

	// This field for is for compatibility with GCC to say this isn't a forced
	// unwind. EHABI #7.2
	exception_object->unwinder_cache.reserved1 = 0;

	// phase 1: the search phase
	_Unwind_Reason_Code phase1 = unwind_phase1(&uc, &cursor, exception_object);
	if (phase1 != _URC_NO_REASON)
	return phase1;

	// phase 2: the clean up phase
	return unwind_phase2(&uc, &cursor, exception_object, false);
	}

	_LIBUNWIND_EXPORT void _Unwind_Complete(_Unwind_Exception* exception_object) {
	// This is to be called when exception handling completes to give us a chance
	// to perform any housekeeping. EHABI #7.2. But we have nothing to do here.
	(void)exception_object;
	}

	/// When _Unwind_RaiseException() is in phase2, it hands control
	/// to the personality function at each frame. The personality
	/// may force a jump to a landing pad in that function, the landing
	/// pad code may then call _Unwind_Resume() to continue with the
	/// unwinding. Note: the call to _Unwind_Resume() is from compiler
	/// geneated user code. All other _Unwind_* routines are called
	/// by the C++ runtime __cxa_* routines.
	///
	/// Note: re-throwing an exception (as opposed to continuing the unwind)
	/// is implemented by having the code call __cxa_rethrow() which
	/// in turn calls _Unwind_Resume_or_Rethrow().
	_LIBUNWIND_EXPORT void
	_Unwind_Resume(_Unwind_Exception *exception_object) {
	_LIBUNWIND_TRACE_API("_Unwind_Resume(ex_obj=%p)",
	static_cast<void *>(exception_object));
	unw_context_t uc;
	unw_cursor_t cursor;
	__unw_getcontext(&uc);

	// _Unwind_RaiseException on EHABI will always set the reserved1 field to 0,
	// which is in the same position as private_1 below.
	// TODO(ajwong): Who wronte the above? Why is it true?
	unwind_phase2(&uc, &cursor, exception_object, true);

	// Clients assume _Unwind_Resume() does not return, so all we can do is abort.
	_LIBUNWIND_ABORT("_Unwind_Resume() can't return");
	}

	/// Called by personality handler during phase 2 to get LSDA for current frame.
	_LIBUNWIND_EXPORT uintptr_t
	_Unwind_GetLanguageSpecificData(struct _Unwind_Context *context) {
	unw_cursor_t cursor = (unw_cursor_t )context;
	unw_proc_info_t frameInfo;
	uintptr_t result = 0;
	if (__unw_get_proc_info(cursor, &frameInfo) == UNW_ESUCCESS)
	result = (uintptr_t)frameInfo.lsda;
	_LIBUNWIND_TRACE_API(
	"_Unwind_GetLanguageSpecificData(context=%p) => 0x%llx",
	static_cast<void *>(context), (long long)result);
	return result;
	}

	static uint64_t ValueAsBitPattern(_Unwind_VRS_DataRepresentation representation,
	void* valuep) {
	uint64_t value = 0;
	switch (representation) {
	case _UVRSD_UINT32:
	case _UVRSD_FLOAT:
	memcpy(&value, valuep, sizeof(uint32_t));
	break;

	case _UVRSD_VFPX:
	case _UVRSD_UINT64:
	case _UVRSD_DOUBLE:
	memcpy(&value, valuep, sizeof(uint64_t));
	break;
	}
	return value;
	}

	_LIBUNWIND_EXPORT _Unwind_VRS_Result
	_Unwind_VRS_Set(_Unwind_Context *context, _Unwind_VRS_RegClass regclass,
	uint32_t regno, _Unwind_VRS_DataRepresentation representation,
	void *valuep) {
	_LIBUNWIND_TRACE_API("_Unwind_VRS_Set(context=%p, regclass=%d, reg=%d, "
	"rep=%d, value=0x%llX)",
	static_cast<void *>(context), regclass, regno,
	representation,
	ValueAsBitPattern(representation, valuep));
	unw_cursor_t cursor = (unw_cursor_t )context;
	switch (regclass) {
	case _UVRSC_CORE:
	if (representation != _UVRSD_UINT32 \|\| regno > 15)
	return _UVRSR_FAILED;
	return __unw_set_reg(cursor, (unw_regnum_t)(UNW_ARM_R0 + regno),
	(unw_word_t )valuep) == UNW_ESUCCESS
	? _UVRSR_OK
	: _UVRSR_FAILED;
	case _UVRSC_VFP:
	if (representation != _UVRSD_VFPX && representation != _UVRSD_DOUBLE)
	return _UVRSR_FAILED;
	if (representation == _UVRSD_VFPX) {
	// Can only touch d0-15 with FSTMFDX.
	if (regno > 15)
	return _UVRSR_FAILED;
	__unw_save_vfp_as_X(cursor);
	} else {
	if (regno > 31)
	return _UVRSR_FAILED;
	}
	return __unw_set_fpreg(cursor, (unw_regnum_t)(UNW_ARM_D0 + regno),
	(unw_fpreg_t )valuep) == UNW_ESUCCESS
	? _UVRSR_OK
	: _UVRSR_FAILED;
	#if defined(__ARM_WMMX)
	case _UVRSC_WMMXC:
	if (representation != _UVRSD_UINT32 \|\| regno > 3)
	return _UVRSR_FAILED;
	return __unw_set_reg(cursor, (unw_regnum_t)(UNW_ARM_WC0 + regno),
	(unw_word_t )valuep) == UNW_ESUCCESS
	? _UVRSR_OK
	: _UVRSR_FAILED;
	case _UVRSC_WMMXD:
	if (representation != _UVRSD_DOUBLE \|\| regno > 31)
	return _UVRSR_FAILED;
	return __unw_set_fpreg(cursor, (unw_regnum_t)(UNW_ARM_WR0 + regno),
	(unw_fpreg_t )valuep) == UNW_ESUCCESS
	? _UVRSR_OK
	: _UVRSR_FAILED;
	#else
	case _UVRSC_WMMXC:
	case _UVRSC_WMMXD:
	break;
	#endif
	}
	_LIBUNWIND_ABORT("unsupported register class");
	}

	static _Unwind_VRS_Result
	_Unwind_VRS_Get_Internal(_Unwind_Context *context,
	_Unwind_VRS_RegClass regclass, uint32_t regno,
	_Unwind_VRS_DataRepresentation representation,
	void *valuep) {
	unw_cursor_t cursor = (unw_cursor_t )context;
	switch (regclass) {
	case _UVRSC_CORE:
	if (representation != _UVRSD_UINT32 \|\| regno > 15)
	return _UVRSR_FAILED;
	return __unw_get_reg(cursor, (unw_regnum_t)(UNW_ARM_R0 + regno),
	(unw_word_t *)valuep) == UNW_ESUCCESS
	? _UVRSR_OK
	: _UVRSR_FAILED;
	case _UVRSC_VFP:
	if (representation != _UVRSD_VFPX && representation != _UVRSD_DOUBLE)
	return _UVRSR_FAILED;
	if (representation == _UVRSD_VFPX) {
	// Can only touch d0-15 with FSTMFDX.
	if (regno > 15)
	return _UVRSR_FAILED;
	__unw_save_vfp_as_X(cursor);
	} else {
	if (regno > 31)
	return _UVRSR_FAILED;
	}
	return __unw_get_fpreg(cursor, (unw_regnum_t)(UNW_ARM_D0 + regno),
	(unw_fpreg_t *)valuep) == UNW_ESUCCESS
	? _UVRSR_OK
	: _UVRSR_FAILED;
	#if defined(__ARM_WMMX)
	case _UVRSC_WMMXC:
	if (representation != _UVRSD_UINT32 \|\| regno > 3)
	return _UVRSR_FAILED;
	return __unw_get_reg(cursor, (unw_regnum_t)(UNW_ARM_WC0 + regno),
	(unw_word_t *)valuep) == UNW_ESUCCESS
	? _UVRSR_OK
	: _UVRSR_FAILED;
	case _UVRSC_WMMXD:
	if (representation != _UVRSD_DOUBLE \|\| regno > 31)
	return _UVRSR_FAILED;
	return __unw_get_fpreg(cursor, (unw_regnum_t)(UNW_ARM_WR0 + regno),
	(unw_fpreg_t *)valuep) == UNW_ESUCCESS
	? _UVRSR_OK
	: _UVRSR_FAILED;
	#else
	case _UVRSC_WMMXC:
	case _UVRSC_WMMXD:
	break;
	#endif
	}
	_LIBUNWIND_ABORT("unsupported register class");
	}

	_LIBUNWIND_EXPORT _Unwind_VRS_Result
	_Unwind_VRS_Get(_Unwind_Context *context, _Unwind_VRS_RegClass regclass,
	uint32_t regno, _Unwind_VRS_DataRepresentation representation,
	void *valuep) {
	_Unwind_VRS_Result result =
	_Unwind_VRS_Get_Internal(context, regclass, regno, representation,
	valuep);
	_LIBUNWIND_TRACE_API("_Unwind_VRS_Get(context=%p, regclass=%d, reg=%d, "
	"rep=%d, value=0x%llX, result = %d)",
	static_cast<void *>(context), regclass, regno,
	representation,
	ValueAsBitPattern(representation, valuep), result);
	return result;
	}

	_Unwind_VRS_Result
	_Unwind_VRS_Pop(_Unwind_Context *context, _Unwind_VRS_RegClass regclass,
	uint32_t discriminator,
	_Unwind_VRS_DataRepresentation representation) {
	_LIBUNWIND_TRACE_API("_Unwind_VRS_Pop(context=%p, regclass=%d, "
	"discriminator=%d, representation=%d)",
	static_cast<void *>(context), regclass, discriminator,
	representation);
	switch (regclass) {
	case _UVRSC_WMMXC:
	#if !defined(__ARM_WMMX)
	break;
	#endif
	case _UVRSC_CORE: {
	if (representation != _UVRSD_UINT32)
	return _UVRSR_FAILED;
	// When popping SP from the stack, we don't want to override it from the
	// computed new stack location. See EHABI #7.5.4 table 3.
	bool poppedSP = false;
	uint32_t* sp;
	if (_Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_SP,
	_UVRSD_UINT32, &sp) != _UVRSR_OK) {
	return _UVRSR_FAILED;
	}
	for (uint32_t i = 0; i < 16; ++i) {
	if (!(discriminator & static_cast<uint32_t>(1 << i)))
	continue;
	uint32_t value = *sp++;
	if (regclass == _UVRSC_CORE && i == 13)
	poppedSP = true;
	if (_Unwind_VRS_Set(context, regclass, i,
	_UVRSD_UINT32, &value) != _UVRSR_OK) {
	return _UVRSR_FAILED;
	}
	}
	if (!poppedSP) {
	return _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_SP,
	_UVRSD_UINT32, &sp);
	}
	return _UVRSR_OK;
	}
	case _UVRSC_WMMXD:
	#if !defined(__ARM_WMMX)
	break;
	#endif
	case _UVRSC_VFP: {
	if (representation != _UVRSD_VFPX && representation != _UVRSD_DOUBLE)
	return _UVRSR_FAILED;
	uint32_t first = discriminator >> 16;
	uint32_t count = discriminator & 0xffff;
	uint32_t end = first+count;
	uint32_t* sp;
	if (_Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_SP,
	_UVRSD_UINT32, &sp) != _UVRSR_OK) {
	return _UVRSR_FAILED;
	}
	// For _UVRSD_VFPX, we're assuming the data is stored in FSTMX "standard
	// format 1", which is equivalent to FSTMD + a padding word.
	for (uint32_t i = first; i < end; ++i) {
	// SP is only 32-bit aligned so don't copy 64-bit at a time.
	uint64_t w0 = *sp++;
	uint64_t w1 = *sp++;
	#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
	uint64_t value = (w1 << 32) \| w0;
	#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
	uint64_t value = (w0 << 32) \| w1;
	#else
	#error "Unable to determine endianess"
	#endif
	if (_Unwind_VRS_Set(context, regclass, i, representation, &value) !=
	_UVRSR_OK)
	return _UVRSR_FAILED;
	}
	if (representation == _UVRSD_VFPX)
	++sp;
	return _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32,
	&sp);
	}
	}
	_LIBUNWIND_ABORT("unsupported register class");
	}

	/// Called by personality handler during phase 2 to find the start of the
	/// function.
	_LIBUNWIND_EXPORT uintptr_t
	_Unwind_GetRegionStart(struct _Unwind_Context *context) {
	unw_cursor_t cursor = (unw_cursor_t )context;
	unw_proc_info_t frameInfo;
	uintptr_t result = 0;
	if (__unw_get_proc_info(cursor, &frameInfo) == UNW_ESUCCESS)
	result = (uintptr_t)frameInfo.start_ip;
	_LIBUNWIND_TRACE_API("_Unwind_GetRegionStart(context=%p) => 0x%llX",
	static_cast<void *>(context), (long long)result);
	return result;
	}


	/// Called by personality handler during phase 2 if a foreign exception
	// is caught.
	_LIBUNWIND_EXPORT void
	_Unwind_DeleteException(_Unwind_Exception *exception_object) {
	_LIBUNWIND_TRACE_API("_Unwind_DeleteException(ex_obj=%p)",
	static_cast<void *>(exception_object));
	if (exception_object->exception_cleanup != NULL)
	(*exception_object->exception_cleanup)(_URC_FOREIGN_EXCEPTION_CAUGHT,
	exception_object);
	}

	extern "C" _LIBUNWIND_EXPORT _Unwind_Reason_Code
	__gnu_unwind_frame(_Unwind_Exception *exception_object,
	struct _Unwind_Context *context) {
	unw_cursor_t cursor = (unw_cursor_t )context;
	if (__unw_step(cursor) != UNW_STEP_SUCCESS)
	return _URC_FAILURE;
	return _URC_OK;
	}

	#endif // defined(_LIBUNWIND_ARM_EHABI)
	diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/LazyCallGraph.h b/contrib/llvm-project/llvm/include/llvm/Analysis/LazyCallGraph.h
	index ca276d2f3cf8..81500905c0f5 100644
	--- a/contrib/llvm-project/llvm/include/llvm/Analysis/LazyCallGraph.h
	+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/LazyCallGraph.h
	@@ -1,1327 +1,1327 @@
	//===- LazyCallGraph.h - Analysis of a Module's call graph ------- C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	/// \file
	///
	/// Implements a lazy call graph analysis and related passes for the new pass
	/// manager.
	///
	/// NB: This is not a traditional call graph! It is a graph which models both
	/// the current calls and potential calls. As a consequence there are many
	/// edges in this call graph that do not correspond to a 'call' or 'invoke'
	/// instruction.
	///
	/// The primary use cases of this graph analysis is to facilitate iterating
	/// across the functions of a module in ways that ensure all callees are
	/// visited prior to a caller (given any SCC constraints), or vice versa. As
	/// such is it particularly well suited to organizing CGSCC optimizations such
	/// as inlining, outlining, argument promotion, etc. That is its primary use
	/// case and motivates the design. It may not be appropriate for other
	/// purposes. The use graph of functions or some other conservative analysis of
	/// call instructions may be interesting for optimizations and subsequent
	/// analyses which don't work in the context of an overly specified
	/// potential-call-edge graph.
	///
	/// To understand the specific rules and nature of this call graph analysis,
	/// see the documentation of the \c LazyCallGraph below.
	///
	//===----------------------------------------------------------------------===//

	#ifndef LLVM_ANALYSIS_LAZYCALLGRAPH_H
	#define LLVM_ANALYSIS_LAZYCALLGRAPH_H

	#include "llvm/ADT/ArrayRef.h"
	#include "llvm/ADT/DenseMap.h"
	#include "llvm/ADT/Optional.h"
	#include "llvm/ADT/PointerIntPair.h"
	#include "llvm/ADT/STLExtras.h"
	#include "llvm/ADT/SetVector.h"
	#include "llvm/ADT/SmallPtrSet.h"
	#include "llvm/ADT/SmallVector.h"
	#include "llvm/ADT/StringRef.h"
	#include "llvm/ADT/iterator.h"
	#include "llvm/ADT/iterator_range.h"
	#include "llvm/Analysis/TargetLibraryInfo.h"
	#include "llvm/IR/Constant.h"
	#include "llvm/IR/Constants.h"
	#include "llvm/IR/Function.h"
	#include "llvm/IR/PassManager.h"
	#include "llvm/Support/Allocator.h"
	#include "llvm/Support/Casting.h"
	#include "llvm/Support/raw_ostream.h"
	#include <cassert>
	#include <iterator>
	#include <string>
	#include <utility>

	namespace llvm {

	template <class GraphType> struct GraphTraits;
	class Module;
	class Value;

	/// A lazily constructed view of the call graph of a module.
	///
	/// With the edges of this graph, the motivating constraint that we are
	/// attempting to maintain is that function-local optimization, CGSCC-local
	/// optimizations, and optimizations transforming a pair of functions connected
	/// by an edge in the graph, do not invalidate a bottom-up traversal of the SCC
	/// DAG. That is, no optimizations will delete, remove, or add an edge such
	/// that functions already visited in a bottom-up order of the SCC DAG are no
	/// longer valid to have visited, or such that functions not yet visited in
	/// a bottom-up order of the SCC DAG are not required to have already been
	/// visited.
	///
	/// Within this constraint, the desire is to minimize the merge points of the
	/// SCC DAG. The greater the fanout of the SCC DAG and the fewer merge points
	/// in the SCC DAG, the more independence there is in optimizing within it.
	/// There is a strong desire to enable parallelization of optimizations over
	/// the call graph, and both limited fanout and merge points will (artificially
	/// in some cases) limit the scaling of such an effort.
	///
	/// To this end, graph represents both direct and any potential resolution to
	/// an indirect call edge. Another way to think about it is that it represents
	/// both the direct call edges and any direct call edges that might be formed
	/// through static optimizations. Specifically, it considers taking the address
	/// of a function to be an edge in the call graph because this might be
	/// forwarded to become a direct call by some subsequent function-local
	/// optimization. The result is that the graph closely follows the use-def
	/// edges for functions. Walking "up" the graph can be done by looking at all
	/// of the uses of a function.
	///
	/// The roots of the call graph are the external functions and functions
	/// escaped into global variables. Those functions can be called from outside
	/// of the module or via unknowable means in the IR -- we may not be able to
	/// form even a potential call edge from a function body which may dynamically
	/// load the function and call it.
	///
	/// This analysis still requires updates to remain valid after optimizations
	/// which could potentially change the set of potential callees. The
	/// constraints it operates under only make the traversal order remain valid.
	///
	/// The entire analysis must be re-computed if full interprocedural
	/// optimizations run at any point. For example, globalopt completely
	/// invalidates the information in this analysis.
	///
	/// FIXME: This class is named LazyCallGraph in a lame attempt to distinguish
	/// it from the existing CallGraph. At some point, it is expected that this
	/// will be the only call graph and it will be renamed accordingly.
	class LazyCallGraph {
	public:
	class Node;
	class EdgeSequence;
	class SCC;
	class RefSCC;

	/// A class used to represent edges in the call graph.
	///
	/// The lazy call graph models both call edges and reference edges. Call
	/// edges are much what you would expect, and exist when there is a 'call' or
	/// 'invoke' instruction of some function. Reference edges are also tracked
	/// along side these, and exist whenever any instruction (transitively
	/// through its operands) references a function. All call edges are
	/// inherently reference edges, and so the reference graph forms a superset
	/// of the formal call graph.
	///
	/// All of these forms of edges are fundamentally represented as outgoing
	/// edges. The edges are stored in the source node and point at the target
	/// node. This allows the edge structure itself to be a very compact data
	/// structure: essentially a tagged pointer.
	class Edge {
	public:
	/// The kind of edge in the graph.
	enum Kind : bool { Ref = false, Call = true };

	Edge();
	explicit Edge(Node &N, Kind K);

	/// Test whether the edge is null.
	///
	/// This happens when an edge has been deleted. We leave the edge objects
	/// around but clear them.
	explicit operator bool() const;

	/// Returnss the \c Kind of the edge.
	Kind getKind() const;

	/// Test whether the edge represents a direct call to a function.
	///
	/// This requires that the edge is not null.
	bool isCall() const;

	/// Get the call graph node referenced by this edge.
	///
	/// This requires that the edge is not null.
	Node &getNode() const;

	/// Get the function referenced by this edge.
	///
	/// This requires that the edge is not null.
	Function &getFunction() const;

	private:
	friend class LazyCallGraph::EdgeSequence;
	friend class LazyCallGraph::RefSCC;

	PointerIntPair<Node *, 1, Kind> Value;

	void setKind(Kind K) { Value.setInt(K); }
	};

	/// The edge sequence object.
	///
	/// This typically exists entirely within the node but is exposed as
	/// a separate type because a node doesn't initially have edges. An explicit
	/// population step is required to produce this sequence at first and it is
	/// then cached in the node. It is also used to represent edges entering the
	/// graph from outside the module to model the graph's roots.
	///
	/// The sequence itself both iterable and indexable. The indexes remain
	/// stable even as the sequence mutates (including removal).
	class EdgeSequence {
	friend class LazyCallGraph;
	friend class LazyCallGraph::Node;
	friend class LazyCallGraph::RefSCC;

	using VectorT = SmallVector<Edge, 4>;
	using VectorImplT = SmallVectorImpl<Edge>;

	public:
	/// An iterator used for the edges to both entry nodes and child nodes.
	class iterator
	: public iterator_adaptor_base<iterator, VectorImplT::iterator,
	std::forward_iterator_tag> {
	friend class LazyCallGraph;
	friend class LazyCallGraph::Node;

	VectorImplT::iterator E;

	// Build the iterator for a specific position in the edge list.
	iterator(VectorImplT::iterator BaseI, VectorImplT::iterator E)
	: iterator_adaptor_base(BaseI), E(E) {
	while (I != E && !*I)
	++I;
	}

	public:
	iterator() = default;

	using iterator_adaptor_base::operator++;
	iterator &operator++() {
	do {
	++I;
	} while (I != E && !*I);
	return *this;
	}
	};

	/// An iterator over specifically call edges.
	///
	/// This has the same iteration properties as the \c iterator, but
	/// restricts itself to edges which represent actual calls.
	class call_iterator
	: public iterator_adaptor_base<call_iterator, VectorImplT::iterator,
	std::forward_iterator_tag> {
	friend class LazyCallGraph;
	friend class LazyCallGraph::Node;

	VectorImplT::iterator E;

	/// Advance the iterator to the next valid, call edge.
	void advanceToNextEdge() {
	while (I != E && (!*I \|\| !I->isCall()))
	++I;
	}

	// Build the iterator for a specific position in the edge list.
	call_iterator(VectorImplT::iterator BaseI, VectorImplT::iterator E)
	: iterator_adaptor_base(BaseI), E(E) {
	advanceToNextEdge();
	}

	public:
	call_iterator() = default;

	using iterator_adaptor_base::operator++;
	call_iterator &operator++() {
	++I;
	advanceToNextEdge();
	return *this;
	}
	};

	iterator begin() { return iterator(Edges.begin(), Edges.end()); }
	iterator end() { return iterator(Edges.end(), Edges.end()); }

	Edge &operator[](Node &N) {
	assert(EdgeIndexMap.find(&N) != EdgeIndexMap.end() && "No such edge!");
	auto &E = Edges[EdgeIndexMap.find(&N)->second];
	assert(E && "Dead or null edge!");
	return E;
	}

	Edge *lookup(Node &N) {
	auto EI = EdgeIndexMap.find(&N);
	if (EI == EdgeIndexMap.end())
	return nullptr;
	auto &E = Edges[EI->second];
	return E ? &E : nullptr;
	}

	call_iterator call_begin() {
	return call_iterator(Edges.begin(), Edges.end());
	}
	call_iterator call_end() { return call_iterator(Edges.end(), Edges.end()); }

	iterator_range<call_iterator> calls() {
	return make_range(call_begin(), call_end());
	}

	bool empty() {
	for (auto &E : Edges)
	if (E)
	return false;

	return true;
	}

	private:
	VectorT Edges;
	DenseMap<Node *, int> EdgeIndexMap;

	EdgeSequence() = default;

	/// Internal helper to insert an edge to a node.
	void insertEdgeInternal(Node &ChildN, Edge::Kind EK);

	/// Internal helper to change an edge kind.
	void setEdgeKind(Node &ChildN, Edge::Kind EK);

	/// Internal helper to remove the edge to the given function.
	bool removeEdgeInternal(Node &ChildN);
	};

	/// A node in the call graph.
	///
	/// This represents a single node. It's primary roles are to cache the list of
	/// callees, de-duplicate and provide fast testing of whether a function is
	/// a callee, and facilitate iteration of child nodes in the graph.
	///
	/// The node works much like an optional in order to lazily populate the
	/// edges of each node. Until populated, there are no edges. Once populated,
	/// you can access the edges by dereferencing the node or using the `->`
	/// operator as if the node was an `Optional<EdgeSequence>`.
	class Node {
	friend class LazyCallGraph;
	friend class LazyCallGraph::RefSCC;

	public:
	LazyCallGraph &getGraph() const { return *G; }

	Function &getFunction() const { return *F; }

	StringRef getName() const { return F->getName(); }

	/// Equality is defined as address equality.
	bool operator==(const Node &N) const { return this == &N; }
	bool operator!=(const Node &N) const { return !operator==(N); }

	/// Tests whether the node has been populated with edges.
	bool isPopulated() const { return Edges.hasValue(); }

	/// Tests whether this is actually a dead node and no longer valid.
	///
	/// Users rarely interact with nodes in this state and other methods are
	/// invalid. This is used to model a node in an edge list where the
	/// function has been completely removed.
	bool isDead() const {
	assert(!G == !F &&
	"Both graph and function pointers should be null or non-null.");
	return !G;
	}

	// We allow accessing the edges by dereferencing or using the arrow
	// operator, essentially wrapping the internal optional.
	EdgeSequence &operator*() const {
	// Rip const off because the node itself isn't changing here.
	return const_cast<EdgeSequence &>(*Edges);
	}
	EdgeSequence operator->() const { return &*this; }

	/// Populate the edges of this node if necessary.
	///
	/// The first time this is called it will populate the edges for this node
	/// in the graph. It does this by scanning the underlying function, so once
	/// this is done, any changes to that function must be explicitly reflected
	/// in updates to the graph.
	///
	/// \returns the populated \c EdgeSequence to simplify walking it.
	///
	/// This will not update or re-scan anything if called repeatedly. Instead,
	/// the edge sequence is cached and returned immediately on subsequent
	/// calls.
	EdgeSequence &populate() {
	if (Edges)
	return *Edges;

	return populateSlow();
	}

	private:
	LazyCallGraph *G;
	Function *F;

	// We provide for the DFS numbering and Tarjan walk lowlink numbers to be
	// stored directly within the node. These are both '-1' when nodes are part
	// of an SCC (or RefSCC), or '0' when not yet reached in a DFS walk.
	int DFSNumber = 0;
	int LowLink = 0;

	Optional<EdgeSequence> Edges;

	/// Basic constructor implements the scanning of F into Edges and
	/// EdgeIndexMap.
	Node(LazyCallGraph &G, Function &F) : G(&G), F(&F) {}

	/// Implementation of the scan when populating.
	EdgeSequence &populateSlow();

	/// Internal helper to directly replace the function with a new one.
	///
	/// This is used to facilitate tranfsormations which need to replace the
	/// formal Function object but directly move the body and users from one to
	/// the other.
	void replaceFunction(Function &NewF);

	void clear() { Edges.reset(); }

	/// Print the name of this node's function.
	friend raw_ostream &operator<<(raw_ostream &OS, const Node &N) {
	return OS << N.F->getName();
	}

	/// Dump the name of this node's function to stderr.
	void dump() const;
	};

	/// An SCC of the call graph.
	///
	/// This represents a Strongly Connected Component of the direct call graph
	/// -- ignoring indirect calls and function references. It stores this as
	/// a collection of call graph nodes. While the order of nodes in the SCC is
	/// stable, it is not any particular order.
	///
	/// The SCCs are nested within a \c RefSCC, see below for details about that
	/// outer structure. SCCs do not support mutation of the call graph, that
	/// must be done through the containing \c RefSCC in order to fully reason
	/// about the ordering and connections of the graph.
	- class SCC {
	+ class LLVM_EXTERNAL_VISIBILITY SCC {
	friend class LazyCallGraph;
	friend class LazyCallGraph::Node;

	RefSCC *OuterRefSCC;
	SmallVector<Node *, 1> Nodes;

	template <typename NodeRangeT>
	SCC(RefSCC &OuterRefSCC, NodeRangeT &&Nodes)
	: OuterRefSCC(&OuterRefSCC), Nodes(std::forward<NodeRangeT>(Nodes)) {}

	void clear() {
	OuterRefSCC = nullptr;
	Nodes.clear();
	}

	/// Print a short descrtiption useful for debugging or logging.
	///
	/// We print the function names in the SCC wrapped in '()'s and skipping
	/// the middle functions if there are a large number.
	//
	// Note: this is defined inline to dodge issues with GCC's interpretation
	// of enclosing namespaces for friend function declarations.
	friend raw_ostream &operator<<(raw_ostream &OS, const SCC &C) {
	OS << '(';
	int i = 0;
	for (LazyCallGraph::Node &N : C) {
	if (i > 0)
	OS << ", ";
	// Elide the inner elements if there are too many.
	if (i > 8) {
	OS << "..., " << *C.Nodes.back();
	break;
	}
	OS << N;
	++i;
	}
	OS << ')';
	return OS;
	}

	/// Dump a short description of this SCC to stderr.
	void dump() const;

	#if !defined(NDEBUG) \|\| defined(EXPENSIVE_CHECKS)
	/// Verify invariants about the SCC.
	///
	/// This will attempt to validate all of the basic invariants within an
	/// SCC, but not that it is a strongly connected componet per-se. Primarily
	/// useful while building and updating the graph to check that basic
	/// properties are in place rather than having inexplicable crashes later.
	void verify();
	#endif

	public:
	using iterator = pointee_iterator<SmallVectorImpl<Node *>::const_iterator>;

	iterator begin() const { return Nodes.begin(); }
	iterator end() const { return Nodes.end(); }

	int size() const { return Nodes.size(); }

	RefSCC &getOuterRefSCC() const { return *OuterRefSCC; }

	/// Test if this SCC is a parent of \a C.
	///
	/// Note that this is linear in the number of edges departing the current
	/// SCC.
	bool isParentOf(const SCC &C) const;

	/// Test if this SCC is an ancestor of \a C.
	///
	/// Note that in the worst case this is linear in the number of edges
	/// departing the current SCC and every SCC in the entire graph reachable
	/// from this SCC. Thus this very well may walk every edge in the entire
	/// call graph! Do not call this in a tight loop!
	bool isAncestorOf(const SCC &C) const;

	/// Test if this SCC is a child of \a C.
	///
	/// See the comments for \c isParentOf for detailed notes about the
	/// complexity of this routine.
	bool isChildOf(const SCC &C) const { return C.isParentOf(*this); }

	/// Test if this SCC is a descendant of \a C.
	///
	/// See the comments for \c isParentOf for detailed notes about the
	/// complexity of this routine.
	bool isDescendantOf(const SCC &C) const { return C.isAncestorOf(*this); }

	/// Provide a short name by printing this SCC to a std::string.
	///
	/// This copes with the fact that we don't have a name per-se for an SCC
	/// while still making the use of this in debugging and logging useful.
	std::string getName() const {
	std::string Name;
	raw_string_ostream OS(Name);
	OS << *this;
	OS.flush();
	return Name;
	}
	};

	/// A RefSCC of the call graph.
	///
	/// This models a Strongly Connected Component of function reference edges in
	/// the call graph. As opposed to actual SCCs, these can be used to scope
	/// subgraphs of the module which are independent from other subgraphs of the
	/// module because they do not reference it in any way. This is also the unit
	/// where we do mutation of the graph in order to restrict mutations to those
	/// which don't violate this independence.
	///
	/// A RefSCC contains a DAG of actual SCCs. All the nodes within the RefSCC
	/// are necessarily within some actual SCC that nests within it. Since
	/// a direct call is a reference, there will always be at least one RefSCC
	/// around any SCC.
	class RefSCC {
	friend class LazyCallGraph;
	friend class LazyCallGraph::Node;

	LazyCallGraph *G;

	/// A postorder list of the inner SCCs.
	SmallVector<SCC *, 4> SCCs;

	/// A map from SCC to index in the postorder list.
	SmallDenseMap<SCC *, int, 4> SCCIndices;

	/// Fast-path constructor. RefSCCs should instead be constructed by calling
	/// formRefSCCFast on the graph itself.
	RefSCC(LazyCallGraph &G);

	void clear() {
	SCCs.clear();
	SCCIndices.clear();
	}

	/// Print a short description useful for debugging or logging.
	///
	/// We print the SCCs wrapped in '[]'s and skipping the middle SCCs if
	/// there are a large number.
	//
	// Note: this is defined inline to dodge issues with GCC's interpretation
	// of enclosing namespaces for friend function declarations.
	friend raw_ostream &operator<<(raw_ostream &OS, const RefSCC &RC) {
	OS << '[';
	int i = 0;
	for (LazyCallGraph::SCC &C : RC) {
	if (i > 0)
	OS << ", ";
	// Elide the inner elements if there are too many.
	if (i > 4) {
	OS << "..., " << *RC.SCCs.back();
	break;
	}
	OS << C;
	++i;
	}
	OS << ']';
	return OS;
	}

	/// Dump a short description of this RefSCC to stderr.
	void dump() const;

	#if !defined(NDEBUG) \|\| defined(EXPENSIVE_CHECKS)
	/// Verify invariants about the RefSCC and all its SCCs.
	///
	/// This will attempt to validate all of the invariants within the
	/// RefSCC, but not that it is a strongly connected component of the larger
	/// graph. This makes it useful even when partially through an update.
	///
	/// Invariants checked:
	/// - SCCs and their indices match.
	/// - The SCCs list is in fact in post-order.
	void verify();
	#endif

	public:
	using iterator = pointee_iterator<SmallVectorImpl<SCC *>::const_iterator>;
	using range = iterator_range<iterator>;
	using parent_iterator =
	pointee_iterator<SmallPtrSetImpl<RefSCC *>::const_iterator>;

	iterator begin() const { return SCCs.begin(); }
	iterator end() const { return SCCs.end(); }

	ssize_t size() const { return SCCs.size(); }

	SCC &operator[](int Idx) { return *SCCs[Idx]; }

	iterator find(SCC &C) const {
	return SCCs.begin() + SCCIndices.find(&C)->second;
	}

	/// Test if this RefSCC is a parent of \a RC.
	///
	/// CAUTION: This method walks every edge in the \c RefSCC, it can be very
	/// expensive.
	bool isParentOf(const RefSCC &RC) const;

	/// Test if this RefSCC is an ancestor of \a RC.
	///
	/// CAUTION: This method walks the directed graph of edges as far as
	/// necessary to find a possible path to the argument. In the worst case
	/// this may walk the entire graph and can be extremely expensive.
	bool isAncestorOf(const RefSCC &RC) const;

	/// Test if this RefSCC is a child of \a RC.
	///
	/// CAUTION: This method walks every edge in the argument \c RefSCC, it can
	/// be very expensive.
	bool isChildOf(const RefSCC &RC) const { return RC.isParentOf(*this); }

	/// Test if this RefSCC is a descendant of \a RC.
	///
	/// CAUTION: This method walks the directed graph of edges as far as
	/// necessary to find a possible path from the argument. In the worst case
	/// this may walk the entire graph and can be extremely expensive.
	bool isDescendantOf(const RefSCC &RC) const {
	return RC.isAncestorOf(*this);
	}

	/// Provide a short name by printing this RefSCC to a std::string.
	///
	/// This copes with the fact that we don't have a name per-se for an RefSCC
	/// while still making the use of this in debugging and logging useful.
	std::string getName() const {
	std::string Name;
	raw_string_ostream OS(Name);
	OS << *this;
	OS.flush();
	return Name;
	}

	///@{
	/// \name Mutation API
	///
	/// These methods provide the core API for updating the call graph in the
	/// presence of (potentially still in-flight) DFS-found RefSCCs and SCCs.
	///
	/// Note that these methods sometimes have complex runtimes, so be careful
	/// how you call them.

	/// Make an existing internal ref edge into a call edge.
	///
	/// This may form a larger cycle and thus collapse SCCs into TargetN's SCC.
	/// If that happens, the optional callback \p MergedCB will be invoked (if
	/// provided) on the SCCs being merged away prior to actually performing
	/// the merge. Note that this will never include the target SCC as that
	/// will be the SCC functions are merged into to resolve the cycle. Once
	/// this function returns, these merged SCCs are not in a valid state but
	/// the pointers will remain valid until destruction of the parent graph
	/// instance for the purpose of clearing cached information. This function
	/// also returns 'true' if a cycle was formed and some SCCs merged away as
	/// a convenience.
	///
	/// After this operation, both SourceN's SCC and TargetN's SCC may move
	/// position within this RefSCC's postorder list. Any SCCs merged are
	/// merged into the TargetN's SCC in order to preserve reachability analyses
	/// which took place on that SCC.
	bool switchInternalEdgeToCall(
	Node &SourceN, Node &TargetN,
	function_ref<void(ArrayRef<SCC *> MergedSCCs)> MergeCB = {});

	/// Make an existing internal call edge between separate SCCs into a ref
	/// edge.
	///
	/// If SourceN and TargetN in separate SCCs within this RefSCC, changing
	/// the call edge between them to a ref edge is a trivial operation that
	/// does not require any structural changes to the call graph.
	void switchTrivialInternalEdgeToRef(Node &SourceN, Node &TargetN);

	/// Make an existing internal call edge within a single SCC into a ref
	/// edge.
	///
	/// Since SourceN and TargetN are part of a single SCC, this SCC may be
	/// split up due to breaking a cycle in the call edges that formed it. If
	/// that happens, then this routine will insert new SCCs into the postorder
	/// list before the SCC of TargetN (previously the SCC of both). This
	/// preserves postorder as the TargetN can reach all of the other nodes by
	/// definition of previously being in a single SCC formed by the cycle from
	/// SourceN to TargetN.
	///
	/// The newly added SCCs are added immediately and contiguously
	/// prior to the TargetN SCC and return the range covering the new SCCs in
	/// the RefSCC's postorder sequence. You can directly iterate the returned
	/// range to observe all of the new SCCs in postorder.
	///
	/// Note that if SourceN and TargetN are in separate SCCs, the simpler
	/// routine `switchTrivialInternalEdgeToRef` should be used instead.
	iterator_range<iterator> switchInternalEdgeToRef(Node &SourceN,
	Node &TargetN);

	/// Make an existing outgoing ref edge into a call edge.
	///
	/// Note that this is trivial as there are no cyclic impacts and there
	/// remains a reference edge.
	void switchOutgoingEdgeToCall(Node &SourceN, Node &TargetN);

	/// Make an existing outgoing call edge into a ref edge.
	///
	/// This is trivial as there are no cyclic impacts and there remains
	/// a reference edge.
	void switchOutgoingEdgeToRef(Node &SourceN, Node &TargetN);

	/// Insert a ref edge from one node in this RefSCC to another in this
	/// RefSCC.
	///
	/// This is always a trivial operation as it doesn't change any part of the
	/// graph structure besides connecting the two nodes.
	///
	/// Note that we don't support directly inserting internal call edges
	/// because that could change the graph structure and requires returning
	/// information about what became invalid. As a consequence, the pattern
	/// should be to first insert the necessary ref edge, and then to switch it
	/// to a call edge if needed and handle any invalidation that results. See
	/// the \c switchInternalEdgeToCall routine for details.
	void insertInternalRefEdge(Node &SourceN, Node &TargetN);

	/// Insert an edge whose parent is in this RefSCC and child is in some
	/// child RefSCC.
	///
	/// There must be an existing path from the \p SourceN to the \p TargetN.
	/// This operation is inexpensive and does not change the set of SCCs and
	/// RefSCCs in the graph.
	void insertOutgoingEdge(Node &SourceN, Node &TargetN, Edge::Kind EK);

	/// Insert an edge whose source is in a descendant RefSCC and target is in
	/// this RefSCC.
	///
	/// There must be an existing path from the target to the source in this
	/// case.
	///
	/// NB! This is has the potential to be a very expensive function. It
	/// inherently forms a cycle in the prior RefSCC DAG and we have to merge
	/// RefSCCs to resolve that cycle. But finding all of the RefSCCs which
	/// participate in the cycle can in the worst case require traversing every
	/// RefSCC in the graph. Every attempt is made to avoid that, but passes
	/// must still exercise caution calling this routine repeatedly.
	///
	/// Also note that this can only insert ref edges. In order to insert
	/// a call edge, first insert a ref edge and then switch it to a call edge.
	/// These are intentionally kept as separate interfaces because each step
	/// of the operation invalidates a different set of data structures.
	///
	/// This returns all the RefSCCs which were merged into the this RefSCC
	/// (the target's). This allows callers to invalidate any cached
	/// information.
	///
	/// FIXME: We could possibly optimize this quite a bit for cases where the
	/// caller and callee are very nearby in the graph. See comments in the
	/// implementation for details, but that use case might impact users.
	SmallVector<RefSCC *, 1> insertIncomingRefEdge(Node &SourceN,
	Node &TargetN);

	/// Remove an edge whose source is in this RefSCC and target is not.
	///
	/// This removes an inter-RefSCC edge. All inter-RefSCC edges originating
	/// from this SCC have been fully explored by any in-flight DFS graph
	/// formation, so this is always safe to call once you have the source
	/// RefSCC.
	///
	/// This operation does not change the cyclic structure of the graph and so
	/// is very inexpensive. It may change the connectivity graph of the SCCs
	/// though, so be careful calling this while iterating over them.
	void removeOutgoingEdge(Node &SourceN, Node &TargetN);

	/// Remove a list of ref edges which are entirely within this RefSCC.
	///
	/// Both the \a SourceN and all of the \a TargetNs must be within this
	/// RefSCC. Removing these edges may break cycles that form this RefSCC and
	/// thus this operation may change the RefSCC graph significantly. In
	/// particular, this operation will re-form new RefSCCs based on the
	/// remaining connectivity of the graph. The following invariants are
	/// guaranteed to hold after calling this method:
	///
	/// 1) If a ref-cycle remains after removal, it leaves this RefSCC intact
	/// and in the graph. No new RefSCCs are built.
	/// 2) Otherwise, this RefSCC will be dead after this call and no longer in
	/// the graph or the postorder traversal of the call graph. Any iterator
	/// pointing at this RefSCC will become invalid.
	/// 3) All newly formed RefSCCs will be returned and the order of the
	/// RefSCCs returned will be a valid postorder traversal of the new
	/// RefSCCs.
	/// 4) No RefSCC other than this RefSCC has its member set changed (this is
	/// inherent in the definition of removing such an edge).
	///
	/// These invariants are very important to ensure that we can build
	/// optimization pipelines on top of the CGSCC pass manager which
	/// intelligently update the RefSCC graph without invalidating other parts
	/// of the RefSCC graph.
	///
	/// Note that we provide no routine to remove a call edge. Instead, you
	/// must first switch it to a ref edge using \c switchInternalEdgeToRef.
	/// This split API is intentional as each of these two steps can invalidate
	/// a different aspect of the graph structure and needs to have the
	/// invalidation handled independently.
	///
	/// The runtime complexity of this method is, in the worst case, O(V+E)
	/// where V is the number of nodes in this RefSCC and E is the number of
	/// edges leaving the nodes in this RefSCC. Note that E includes both edges
	/// within this RefSCC and edges from this RefSCC to child RefSCCs. Some
	/// effort has been made to minimize the overhead of common cases such as
	/// self-edges and edge removals which result in a spanning tree with no
	/// more cycles.
	SmallVector<RefSCC *, 1> removeInternalRefEdge(Node &SourceN,
	ArrayRef<Node *> TargetNs);

	/// A convenience wrapper around the above to handle trivial cases of
	/// inserting a new call edge.
	///
	/// This is trivial whenever the target is in the same SCC as the source or
	/// the edge is an outgoing edge to some descendant SCC. In these cases
	/// there is no change to the cyclic structure of SCCs or RefSCCs.
	///
	/// To further make calling this convenient, it also handles inserting
	/// already existing edges.
	void insertTrivialCallEdge(Node &SourceN, Node &TargetN);

	/// A convenience wrapper around the above to handle trivial cases of
	/// inserting a new ref edge.
	///
	/// This is trivial whenever the target is in the same RefSCC as the source
	/// or the edge is an outgoing edge to some descendant RefSCC. In these
	/// cases there is no change to the cyclic structure of the RefSCCs.
	///
	/// To further make calling this convenient, it also handles inserting
	/// already existing edges.
	void insertTrivialRefEdge(Node &SourceN, Node &TargetN);

	/// Directly replace a node's function with a new function.
	///
	/// This should be used when moving the body and users of a function to
	/// a new formal function object but not otherwise changing the call graph
	/// structure in any way.
	///
	/// It requires that the old function in the provided node have zero uses
	/// and the new function must have calls and references to it establishing
	/// an equivalent graph.
	void replaceNodeFunction(Node &N, Function &NewF);

	///@}
	};

	/// A post-order depth-first RefSCC iterator over the call graph.
	///
	/// This iterator walks the cached post-order sequence of RefSCCs. However,
	/// it trades stability for flexibility. It is restricted to a forward
	/// iterator but will survive mutations which insert new RefSCCs and continue
	/// to point to the same RefSCC even if it moves in the post-order sequence.
	class postorder_ref_scc_iterator
	: public iterator_facade_base<postorder_ref_scc_iterator,
	std::forward_iterator_tag, RefSCC> {
	friend class LazyCallGraph;
	friend class LazyCallGraph::Node;

	/// Nonce type to select the constructor for the end iterator.
	struct IsAtEndT {};

	LazyCallGraph *G;
	RefSCC *RC = nullptr;

	/// Build the begin iterator for a node.
	postorder_ref_scc_iterator(LazyCallGraph &G) : G(&G), RC(getRC(G, 0)) {}

	/// Build the end iterator for a node. This is selected purely by overload.
	postorder_ref_scc_iterator(LazyCallGraph &G, IsAtEndT /Nonce/) : G(&G) {}

	/// Get the post-order RefSCC at the given index of the postorder walk,
	/// populating it if necessary.
	static RefSCC *getRC(LazyCallGraph &G, int Index) {
	if (Index == (int)G.PostOrderRefSCCs.size())
	// We're at the end.
	return nullptr;

	return G.PostOrderRefSCCs[Index];
	}

	public:
	bool operator==(const postorder_ref_scc_iterator &Arg) const {
	return G == Arg.G && RC == Arg.RC;
	}

	reference operator() const { return RC; }

	using iterator_facade_base::operator++;
	postorder_ref_scc_iterator &operator++() {
	assert(RC && "Cannot increment the end iterator!");
	RC = getRC(*G, G->RefSCCIndices.find(RC)->second + 1);
	return *this;
	}
	};

	/// Construct a graph for the given module.
	///
	/// This sets up the graph and computes all of the entry points of the graph.
	/// No function definitions are scanned until their nodes in the graph are
	/// requested during traversal.
	LazyCallGraph(Module &M,
	function_ref<TargetLibraryInfo &(Function &)> GetTLI);

	LazyCallGraph(LazyCallGraph &&G);
	LazyCallGraph &operator=(LazyCallGraph &&RHS);

	bool invalidate(Module &, const PreservedAnalyses &PA,
	ModuleAnalysisManager::Invalidator &);

	EdgeSequence::iterator begin() { return EntryEdges.begin(); }
	EdgeSequence::iterator end() { return EntryEdges.end(); }

	void buildRefSCCs();

	postorder_ref_scc_iterator postorder_ref_scc_begin() {
	if (!EntryEdges.empty())
	assert(!PostOrderRefSCCs.empty() &&
	"Must form RefSCCs before iterating them!");
	return postorder_ref_scc_iterator(*this);
	}
	postorder_ref_scc_iterator postorder_ref_scc_end() {
	if (!EntryEdges.empty())
	assert(!PostOrderRefSCCs.empty() &&
	"Must form RefSCCs before iterating them!");
	return postorder_ref_scc_iterator(*this,
	postorder_ref_scc_iterator::IsAtEndT());
	}

	iterator_range<postorder_ref_scc_iterator> postorder_ref_sccs() {
	return make_range(postorder_ref_scc_begin(), postorder_ref_scc_end());
	}

	/// Lookup a function in the graph which has already been scanned and added.
	Node *lookup(const Function &F) const { return NodeMap.lookup(&F); }

	/// Lookup a function's SCC in the graph.
	///
	/// \returns null if the function hasn't been assigned an SCC via the RefSCC
	/// iterator walk.
	SCC *lookupSCC(Node &N) const { return SCCMap.lookup(&N); }

	/// Lookup a function's RefSCC in the graph.
	///
	/// \returns null if the function hasn't been assigned a RefSCC via the
	/// RefSCC iterator walk.
	RefSCC *lookupRefSCC(Node &N) const {
	if (SCC *C = lookupSCC(N))
	return &C->getOuterRefSCC();

	return nullptr;
	}

	/// Get a graph node for a given function, scanning it to populate the graph
	/// data as necessary.
	Node &get(Function &F) {
	Node *&N = NodeMap[&F];
	if (N)
	return *N;

	return insertInto(F, N);
	}

	/// Get the sequence of known and defined library functions.
	///
	/// These functions, because they are known to LLVM, can have calls
	/// introduced out of thin air from arbitrary IR.
	ArrayRef<Function *> getLibFunctions() const {
	return LibFunctions.getArrayRef();
	}

	/// Test whether a function is a known and defined library function tracked by
	/// the call graph.
	///
	/// Because these functions are known to LLVM they are specially modeled in
	/// the call graph and even when all IR-level references have been removed
	/// remain active and reachable.
	bool isLibFunction(Function &F) const { return LibFunctions.count(&F); }

	///@{
	/// \name Pre-SCC Mutation API
	///
	/// These methods are only valid to call prior to forming any SCCs for this
	/// call graph. They can be used to update the core node-graph during
	/// a node-based inorder traversal that precedes any SCC-based traversal.
	///
	/// Once you begin manipulating a call graph's SCCs, most mutation of the
	/// graph must be performed via a RefSCC method. There are some exceptions
	/// below.

	/// Update the call graph after inserting a new edge.
	void insertEdge(Node &SourceN, Node &TargetN, Edge::Kind EK);

	/// Update the call graph after inserting a new edge.
	void insertEdge(Function &Source, Function &Target, Edge::Kind EK) {
	return insertEdge(get(Source), get(Target), EK);
	}

	/// Update the call graph after deleting an edge.
	void removeEdge(Node &SourceN, Node &TargetN);

	/// Update the call graph after deleting an edge.
	void removeEdge(Function &Source, Function &Target) {
	return removeEdge(get(Source), get(Target));
	}

	///@}

	///@{
	/// \name General Mutation API
	///
	/// There are a very limited set of mutations allowed on the graph as a whole
	/// once SCCs have started to be formed. These routines have strict contracts
	/// but may be called at any point.

	/// Remove a dead function from the call graph (typically to delete it).
	///
	/// Note that the function must have an empty use list, and the call graph
	/// must be up-to-date prior to calling this. That means it is by itself in
	/// a maximal SCC which is by itself in a maximal RefSCC, etc. No structural
	/// changes result from calling this routine other than potentially removing
	/// entry points into the call graph.
	///
	/// If SCC formation has begun, this function must not be part of the current
	/// DFS in order to call this safely. Typically, the function will have been
	/// fully visited by the DFS prior to calling this routine.
	void removeDeadFunction(Function &F);

	/// Add a new function split/outlined from an existing function.
	///
	/// The new function may only reference other functions that the original
	/// function did.
	///
	/// The original function must reference (either directly or indirectly) the
	/// new function.
	///
	/// The new function may also reference the original function.
	/// It may end up in a parent SCC in the case that the original function's
	/// edge to the new function is a ref edge, and the edge back is a call edge.
	void addSplitFunction(Function &OriginalFunction, Function &NewFunction);

	/// Add new ref-recursive functions split/outlined from an existing function.
	///
	/// The new functions may only reference other functions that the original
	/// function did. The new functions may reference (not call) the original
	/// function.
	///
	/// The original function must reference (not call) all new functions.
	/// All new functions must reference (not call) each other.
	void addSplitRefRecursiveFunctions(Function &OriginalFunction,
	ArrayRef<Function *> NewFunctions);

	///@}

	///@{
	/// \name Static helpers for code doing updates to the call graph.
	///
	/// These helpers are used to implement parts of the call graph but are also
	/// useful to code doing updates or otherwise wanting to walk the IR in the
	/// same patterns as when we build the call graph.

	/// Recursively visits the defined functions whose address is reachable from
	/// every constant in the \p Worklist.
	///
	/// Doesn't recurse through any constants already in the \p Visited set, and
	/// updates that set with every constant visited.
	///
	/// For each defined function, calls \p Callback with that function.
	template <typename CallbackT>
	static void visitReferences(SmallVectorImpl<Constant *> &Worklist,
	SmallPtrSetImpl<Constant *> &Visited,
	CallbackT Callback) {
	while (!Worklist.empty()) {
	Constant *C = Worklist.pop_back_val();

	if (Function *F = dyn_cast<Function>(C)) {
	if (!F->isDeclaration())
	Callback(*F);
	continue;
	}

	// The blockaddress constant expression is a weird special case, we can't
	// generically walk its operands the way we do for all other constants.
	if (BlockAddress *BA = dyn_cast<BlockAddress>(C)) {
	// If we've already visited the function referred to by the block
	// address, we don't need to revisit it.
	if (Visited.count(BA->getFunction()))
	continue;

	// If all of the blockaddress' users are instructions within the
	// referred to function, we don't need to insert a cycle.
	if (llvm::all_of(BA->users(), [&](User *U) {
	if (Instruction *I = dyn_cast<Instruction>(U))
	return I->getFunction() == BA->getFunction();
	return false;
	}))
	continue;

	// Otherwise we should go visit the referred to function.
	Visited.insert(BA->getFunction());
	Worklist.push_back(BA->getFunction());
	continue;
	}

	for (Value *Op : C->operand_values())
	if (Visited.insert(cast<Constant>(Op)).second)
	Worklist.push_back(cast<Constant>(Op));
	}
	}

	///@}

	private:
	using node_stack_iterator = SmallVectorImpl<Node *>::reverse_iterator;
	using node_stack_range = iterator_range<node_stack_iterator>;

	/// Allocator that holds all the call graph nodes.
	SpecificBumpPtrAllocator<Node> BPA;

	/// Maps function->node for fast lookup.
	DenseMap<const Function , Node > NodeMap;

	/// The entry edges into the graph.
	///
	/// These edges are from "external" sources. Put another way, they
	/// escape at the module scope.
	EdgeSequence EntryEdges;

	/// Allocator that holds all the call graph SCCs.
	SpecificBumpPtrAllocator<SCC> SCCBPA;

	/// Maps Function -> SCC for fast lookup.
	DenseMap<Node , SCC > SCCMap;

	/// Allocator that holds all the call graph RefSCCs.
	SpecificBumpPtrAllocator<RefSCC> RefSCCBPA;

	/// The post-order sequence of RefSCCs.
	///
	/// This list is lazily formed the first time we walk the graph.
	SmallVector<RefSCC *, 16> PostOrderRefSCCs;

	/// A map from RefSCC to the index for it in the postorder sequence of
	/// RefSCCs.
	DenseMap<RefSCC *, int> RefSCCIndices;

	/// Defined functions that are also known library functions which the
	/// optimizer can reason about and therefore might introduce calls to out of
	/// thin air.
	SmallSetVector<Function *, 4> LibFunctions;

	/// Helper to insert a new function, with an already looked-up entry in
	/// the NodeMap.
	Node &insertInto(Function &F, Node *&MappedN);

	/// Helper to initialize a new node created outside of creating SCCs and add
	/// it to the NodeMap if necessary. For example, useful when a function is
	/// split.
	Node &initNode(Function &F);

	/// Helper to update pointers back to the graph object during moves.
	void updateGraphPtrs();

	/// Allocates an SCC and constructs it using the graph allocator.
	///
	/// The arguments are forwarded to the constructor.
	template <typename... Ts> SCC *createSCC(Ts &&... Args) {
	return new (SCCBPA.Allocate()) SCC(std::forward<Ts>(Args)...);
	}

	/// Allocates a RefSCC and constructs it using the graph allocator.
	///
	/// The arguments are forwarded to the constructor.
	template <typename... Ts> RefSCC *createRefSCC(Ts &&... Args) {
	return new (RefSCCBPA.Allocate()) RefSCC(std::forward<Ts>(Args)...);
	}

	/// Common logic for building SCCs from a sequence of roots.
	///
	/// This is a very generic implementation of the depth-first walk and SCC
	/// formation algorithm. It uses a generic sequence of roots and generic
	/// callbacks for each step. This is designed to be used to implement both
	/// the RefSCC formation and SCC formation with shared logic.
	///
	/// Currently this is a relatively naive implementation of Tarjan's DFS
	/// algorithm to form the SCCs.
	///
	/// FIXME: We should consider newer variants such as Nuutila.
	template <typename RootsT, typename GetBeginT, typename GetEndT,
	typename GetNodeT, typename FormSCCCallbackT>
	static void buildGenericSCCs(RootsT &&Roots, GetBeginT &&GetBegin,
	GetEndT &&GetEnd, GetNodeT &&GetNode,
	FormSCCCallbackT &&FormSCC);

	/// Build the SCCs for a RefSCC out of a list of nodes.
	void buildSCCs(RefSCC &RC, node_stack_range Nodes);

	/// Get the index of a RefSCC within the postorder traversal.
	///
	/// Requires that this RefSCC is a valid one in the (perhaps partial)
	/// postorder traversed part of the graph.
	int getRefSCCIndex(RefSCC &RC) {
	auto IndexIt = RefSCCIndices.find(&RC);
	assert(IndexIt != RefSCCIndices.end() && "RefSCC doesn't have an index!");
	assert(PostOrderRefSCCs[IndexIt->second] == &RC &&
	"Index does not point back at RC!");
	return IndexIt->second;
	}
	};

	inline LazyCallGraph::Edge::Edge() : Value() {}
	inline LazyCallGraph::Edge::Edge(Node &N, Kind K) : Value(&N, K) {}

	inline LazyCallGraph::Edge::operator bool() const {
	return Value.getPointer() && !Value.getPointer()->isDead();
	}

	inline LazyCallGraph::Edge::Kind LazyCallGraph::Edge::getKind() const {
	assert(*this && "Queried a null edge!");
	return Value.getInt();
	}

	inline bool LazyCallGraph::Edge::isCall() const {
	assert(*this && "Queried a null edge!");
	return getKind() == Call;
	}

	inline LazyCallGraph::Node &LazyCallGraph::Edge::getNode() const {
	assert(*this && "Queried a null edge!");
	return *Value.getPointer();
	}

	inline Function &LazyCallGraph::Edge::getFunction() const {
	assert(*this && "Queried a null edge!");
	return getNode().getFunction();
	}

	// Provide GraphTraits specializations for call graphs.
	template <> struct GraphTraits<LazyCallGraph::Node *> {
	using NodeRef = LazyCallGraph::Node *;
	using ChildIteratorType = LazyCallGraph::EdgeSequence::iterator;

	static NodeRef getEntryNode(NodeRef N) { return N; }
	static ChildIteratorType child_begin(NodeRef N) { return (*N)->begin(); }
	static ChildIteratorType child_end(NodeRef N) { return (*N)->end(); }
	};
	template <> struct GraphTraits<LazyCallGraph *> {
	using NodeRef = LazyCallGraph::Node *;
	using ChildIteratorType = LazyCallGraph::EdgeSequence::iterator;

	static NodeRef getEntryNode(NodeRef N) { return N; }
	static ChildIteratorType child_begin(NodeRef N) { return (*N)->begin(); }
	static ChildIteratorType child_end(NodeRef N) { return (*N)->end(); }
	};

	/// An analysis pass which computes the call graph for a module.
	class LazyCallGraphAnalysis : public AnalysisInfoMixin<LazyCallGraphAnalysis> {
	friend AnalysisInfoMixin<LazyCallGraphAnalysis>;

	static AnalysisKey Key;

	public:
	/// Inform generic clients of the result type.
	using Result = LazyCallGraph;

	/// Compute the \c LazyCallGraph for the module \c M.
	///
	/// This just builds the set of entry points to the call graph. The rest is
	/// built lazily as it is walked.
	LazyCallGraph run(Module &M, ModuleAnalysisManager &AM) {
	FunctionAnalysisManager &FAM =
	AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
	auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
	return FAM.getResult<TargetLibraryAnalysis>(F);
	};
	return LazyCallGraph(M, GetTLI);
	}
	};

	/// A pass which prints the call graph to a \c raw_ostream.
	///
	/// This is primarily useful for testing the analysis.
	class LazyCallGraphPrinterPass
	: public PassInfoMixin<LazyCallGraphPrinterPass> {
	raw_ostream &OS;

	public:
	explicit LazyCallGraphPrinterPass(raw_ostream &OS);

	PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
	};

	/// A pass which prints the call graph as a DOT file to a \c raw_ostream.
	///
	/// This is primarily useful for visualization purposes.
	class LazyCallGraphDOTPrinterPass
	: public PassInfoMixin<LazyCallGraphDOTPrinterPass> {
	raw_ostream &OS;

	public:
	explicit LazyCallGraphDOTPrinterPass(raw_ostream &OS);

	PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
	};

	} // end namespace llvm

	#endif // LLVM_ANALYSIS_LAZYCALLGRAPH_H
	diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/LoopInfo.h b/contrib/llvm-project/llvm/include/llvm/Analysis/LoopInfo.h
	index 164ec50e47bc..5983f98d84cf 100644
	--- a/contrib/llvm-project/llvm/include/llvm/Analysis/LoopInfo.h
	+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/LoopInfo.h
	@@ -1,1356 +1,1356 @@
	//===- llvm/Analysis/LoopInfo.h - Natural Loop Calculator -------- C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file defines the LoopInfo class that is used to identify natural loops
	// and determine the loop depth of various nodes of the CFG. A natural loop
	// has exactly one entry-point, which is called the header. Note that natural
	// loops may actually be several loops that share the same header node.
	//
	// This analysis calculates the nesting structure of loops in a function. For
	// each natural loop identified, this analysis identifies natural loops
	// contained entirely within the loop and the basic blocks the make up the loop.
	//
	// It can calculate on the fly various bits of information, for example:
	//
	// * whether there is a preheader for the loop
	// * the number of back edges to the header
	// * whether or not a particular block branches out of the loop
	// * the successor blocks of the loop
	// * the loop depth
	// * etc...
	//
	// Note that this analysis specifically identifies Loops not cycles or SCCs
	// in the CFG. There can be strongly connected components in the CFG which
	// this analysis will not recognize and that will not be represented by a Loop
	// instance. In particular, a Loop might be inside such a non-loop SCC, or a
	// non-loop SCC might contain a sub-SCC which is a Loop.
	//
	// For an overview of terminology used in this API (and thus all of our loop
	// analyses or transforms), see docs/LoopTerminology.rst.
	//
	//===----------------------------------------------------------------------===//

	#ifndef LLVM_ANALYSIS_LOOPINFO_H
	#define LLVM_ANALYSIS_LOOPINFO_H

	#include "llvm/ADT/DenseMap.h"
	#include "llvm/ADT/DenseSet.h"
	#include "llvm/ADT/GraphTraits.h"
	#include "llvm/ADT/SmallPtrSet.h"
	#include "llvm/ADT/SmallVector.h"
	#include "llvm/IR/CFG.h"
	#include "llvm/IR/Instruction.h"
	#include "llvm/IR/Instructions.h"
	#include "llvm/IR/PassManager.h"
	#include "llvm/Pass.h"
	#include "llvm/Support/Allocator.h"
	#include <algorithm>
	#include <utility>

	namespace llvm {

	class DominatorTree;
	class LoopInfo;
	class Loop;
	class InductionDescriptor;
	class MDNode;
	class MemorySSAUpdater;
	class ScalarEvolution;
	class raw_ostream;
	template <class N, bool IsPostDom> class DominatorTreeBase;
	template <class N, class M> class LoopInfoBase;
	template <class N, class M> class LoopBase;

	//===----------------------------------------------------------------------===//
	/// Instances of this class are used to represent loops that are detected in the
	/// flow graph.
	///
	template <class BlockT, class LoopT> class LoopBase {
	LoopT *ParentLoop;
	// Loops contained entirely within this one.
	std::vector<LoopT *> SubLoops;

	// The list of blocks in this loop. First entry is the header node.
	std::vector<BlockT *> Blocks;

	SmallPtrSet<const BlockT *, 8> DenseBlockSet;

	#if LLVM_ENABLE_ABI_BREAKING_CHECKS
	/// Indicator that this loop is no longer a valid loop.
	bool IsInvalid = false;
	#endif

	LoopBase(const LoopBase<BlockT, LoopT> &) = delete;
	const LoopBase<BlockT, LoopT> &
	operator=(const LoopBase<BlockT, LoopT> &) = delete;

	public:
	/// Return the nesting level of this loop. An outer-most loop has depth 1,
	/// for consistency with loop depth values used for basic blocks, where depth
	/// 0 is used for blocks not inside any loops.
	unsigned getLoopDepth() const {
	assert(!isInvalid() && "Loop not in a valid state!");
	unsigned D = 1;
	for (const LoopT *CurLoop = ParentLoop; CurLoop;
	CurLoop = CurLoop->ParentLoop)
	++D;
	return D;
	}
	BlockT *getHeader() const { return getBlocks().front(); }
	/// Return the parent loop if it exists or nullptr for top
	/// level loops.

	/// A loop is either top-level in a function (that is, it is not
	/// contained in any other loop) or it is entirely enclosed in
	/// some other loop.
	/// If a loop is top-level, it has no parent, otherwise its
	/// parent is the innermost loop in which it is enclosed.
	LoopT *getParentLoop() const { return ParentLoop; }

	/// This is a raw interface for bypassing addChildLoop.
	void setParentLoop(LoopT *L) {
	assert(!isInvalid() && "Loop not in a valid state!");
	ParentLoop = L;
	}

	/// Return true if the specified loop is contained within in this loop.
	bool contains(const LoopT *L) const {
	assert(!isInvalid() && "Loop not in a valid state!");
	if (L == this)
	return true;
	if (!L)
	return false;
	return contains(L->getParentLoop());
	}

	/// Return true if the specified basic block is in this loop.
	bool contains(const BlockT *BB) const {
	assert(!isInvalid() && "Loop not in a valid state!");
	return DenseBlockSet.count(BB);
	}

	/// Return true if the specified instruction is in this loop.
	template <class InstT> bool contains(const InstT *Inst) const {
	return contains(Inst->getParent());
	}

	/// Return the loops contained entirely within this loop.
	const std::vector<LoopT *> &getSubLoops() const {
	assert(!isInvalid() && "Loop not in a valid state!");
	return SubLoops;
	}
	std::vector<LoopT *> &getSubLoopsVector() {
	assert(!isInvalid() && "Loop not in a valid state!");
	return SubLoops;
	}
	typedef typename std::vector<LoopT *>::const_iterator iterator;
	typedef
	typename std::vector<LoopT *>::const_reverse_iterator reverse_iterator;
	iterator begin() const { return getSubLoops().begin(); }
	iterator end() const { return getSubLoops().end(); }
	reverse_iterator rbegin() const { return getSubLoops().rbegin(); }
	reverse_iterator rend() const { return getSubLoops().rend(); }

	// LoopInfo does not detect irreducible control flow, just natural
	// loops. That is, it is possible that there is cyclic control
	// flow within the "innermost loop" or around the "outermost
	// loop".

	/// Return true if the loop does not contain any (natural) loops.
	bool isInnermost() const { return getSubLoops().empty(); }
	/// Return true if the loop does not have a parent (natural) loop
	// (i.e. it is outermost, which is the same as top-level).
	bool isOutermost() const { return getParentLoop() == nullptr; }

	/// Get a list of the basic blocks which make up this loop.
	ArrayRef<BlockT *> getBlocks() const {
	assert(!isInvalid() && "Loop not in a valid state!");
	return Blocks;
	}
	typedef typename ArrayRef<BlockT *>::const_iterator block_iterator;
	block_iterator block_begin() const { return getBlocks().begin(); }
	block_iterator block_end() const { return getBlocks().end(); }
	inline iterator_range<block_iterator> blocks() const {
	assert(!isInvalid() && "Loop not in a valid state!");
	return make_range(block_begin(), block_end());
	}

	/// Get the number of blocks in this loop in constant time.
	/// Invalidate the loop, indicating that it is no longer a loop.
	unsigned getNumBlocks() const {
	assert(!isInvalid() && "Loop not in a valid state!");
	return Blocks.size();
	}

	/// Return a direct, mutable handle to the blocks vector so that we can
	/// mutate it efficiently with techniques like `std::remove`.
	std::vector<BlockT *> &getBlocksVector() {
	assert(!isInvalid() && "Loop not in a valid state!");
	return Blocks;
	}
	/// Return a direct, mutable handle to the blocks set so that we can
	/// mutate it efficiently.
	SmallPtrSetImpl<const BlockT *> &getBlocksSet() {
	assert(!isInvalid() && "Loop not in a valid state!");
	return DenseBlockSet;
	}

	/// Return a direct, immutable handle to the blocks set.
	const SmallPtrSetImpl<const BlockT *> &getBlocksSet() const {
	assert(!isInvalid() && "Loop not in a valid state!");
	return DenseBlockSet;
	}

	/// Return true if this loop is no longer valid. The only valid use of this
	/// helper is "assert(L.isInvalid())" or equivalent, since IsInvalid is set to
	/// true by the destructor. In other words, if this accessor returns true,
	/// the caller has already triggered UB by calling this accessor; and so it
	/// can only be called in a context where a return value of true indicates a
	/// programmer error.
	bool isInvalid() const {
	#if LLVM_ENABLE_ABI_BREAKING_CHECKS
	return IsInvalid;
	#else
	return false;
	#endif
	}

	/// True if terminator in the block can branch to another block that is
	/// outside of the current loop. \p BB must be inside the loop.
	bool isLoopExiting(const BlockT *BB) const {
	assert(!isInvalid() && "Loop not in a valid state!");
	assert(contains(BB) && "Exiting block must be part of the loop");
	for (const auto Succ : children<const BlockT >(BB)) {
	if (!contains(Succ))
	return true;
	}
	return false;
	}

	/// Returns true if \p BB is a loop-latch.
	/// A latch block is a block that contains a branch back to the header.
	/// This function is useful when there are multiple latches in a loop
	/// because \fn getLoopLatch will return nullptr in that case.
	bool isLoopLatch(const BlockT *BB) const {
	assert(!isInvalid() && "Loop not in a valid state!");
	assert(contains(BB) && "block does not belong to the loop");

	BlockT *Header = getHeader();
	auto PredBegin = GraphTraits<Inverse<BlockT *>>::child_begin(Header);
	auto PredEnd = GraphTraits<Inverse<BlockT *>>::child_end(Header);
	return std::find(PredBegin, PredEnd, BB) != PredEnd;
	}

	/// Calculate the number of back edges to the loop header.
	unsigned getNumBackEdges() const {
	assert(!isInvalid() && "Loop not in a valid state!");
	unsigned NumBackEdges = 0;
	BlockT *H = getHeader();

	for (const auto Pred : children<Inverse<BlockT *>>(H))
	if (contains(Pred))
	++NumBackEdges;

	return NumBackEdges;
	}

	//===--------------------------------------------------------------------===//
	// APIs for simple analysis of the loop.
	//
	// Note that all of these methods can fail on general loops (ie, there may not
	// be a preheader, etc). For best success, the loop simplification and
	// induction variable canonicalization pass should be used to normalize loops
	// for easy analysis. These methods assume canonical loops.

	/// Return all blocks inside the loop that have successors outside of the
	/// loop. These are the blocks _inside of the current loop_ which branch out.
	/// The returned list is always unique.
	void getExitingBlocks(SmallVectorImpl<BlockT *> &ExitingBlocks) const;

	/// If getExitingBlocks would return exactly one block, return that block.
	/// Otherwise return null.
	BlockT *getExitingBlock() const;

	/// Return all of the successor blocks of this loop. These are the blocks
	/// _outside of the current loop_ which are branched to.
	void getExitBlocks(SmallVectorImpl<BlockT *> &ExitBlocks) const;

	/// If getExitBlocks would return exactly one block, return that block.
	/// Otherwise return null.
	BlockT *getExitBlock() const;

	/// Return true if no exit block for the loop has a predecessor that is
	/// outside the loop.
	bool hasDedicatedExits() const;

	/// Return all unique successor blocks of this loop.
	/// These are the blocks _outside of the current loop_ which are branched to.
	void getUniqueExitBlocks(SmallVectorImpl<BlockT *> &ExitBlocks) const;

	/// Return all unique successor blocks of this loop except successors from
	/// Latch block are not considered. If the exit comes from Latch has also
	/// non Latch predecessor in a loop it will be added to ExitBlocks.
	/// These are the blocks _outside of the current loop_ which are branched to.
	void getUniqueNonLatchExitBlocks(SmallVectorImpl<BlockT *> &ExitBlocks) const;

	/// If getUniqueExitBlocks would return exactly one block, return that block.
	/// Otherwise return null.
	BlockT *getUniqueExitBlock() const;

	/// Return true if this loop does not have any exit blocks.
	bool hasNoExitBlocks() const;

	/// Edge type.
	typedef std::pair<BlockT , BlockT > Edge;

	/// Return all pairs of (_inside_block_,_outside_block_).
	void getExitEdges(SmallVectorImpl<Edge> &ExitEdges) const;

	/// If there is a preheader for this loop, return it. A loop has a preheader
	/// if there is only one edge to the header of the loop from outside of the
	/// loop. If this is the case, the block branching to the header of the loop
	/// is the preheader node.
	///
	/// This method returns null if there is no preheader for the loop.
	BlockT *getLoopPreheader() const;

	/// If the given loop's header has exactly one unique predecessor outside the
	/// loop, return it. Otherwise return null.
	/// This is less strict that the loop "preheader" concept, which requires
	/// the predecessor to have exactly one successor.
	BlockT *getLoopPredecessor() const;

	/// If there is a single latch block for this loop, return it.
	/// A latch block is a block that contains a branch back to the header.
	BlockT *getLoopLatch() const;

	/// Return all loop latch blocks of this loop. A latch block is a block that
	/// contains a branch back to the header.
	void getLoopLatches(SmallVectorImpl<BlockT *> &LoopLatches) const {
	assert(!isInvalid() && "Loop not in a valid state!");
	BlockT *H = getHeader();
	for (const auto Pred : children<Inverse<BlockT *>>(H))
	if (contains(Pred))
	LoopLatches.push_back(Pred);
	}

	/// Return all inner loops in the loop nest rooted by the loop in preorder,
	/// with siblings in forward program order.
	template <class Type>
	static void getInnerLoopsInPreorder(const LoopT &L,
	SmallVectorImpl<Type> &PreOrderLoops) {
	SmallVector<LoopT *, 4> PreOrderWorklist;
	PreOrderWorklist.append(L.rbegin(), L.rend());

	while (!PreOrderWorklist.empty()) {
	LoopT *L = PreOrderWorklist.pop_back_val();
	// Sub-loops are stored in forward program order, but will process the
	// worklist backwards so append them in reverse order.
	PreOrderWorklist.append(L->rbegin(), L->rend());
	PreOrderLoops.push_back(L);
	}
	}

	/// Return all loops in the loop nest rooted by the loop in preorder, with
	/// siblings in forward program order.
	SmallVector<const LoopT *, 4> getLoopsInPreorder() const {
	SmallVector<const LoopT *, 4> PreOrderLoops;
	const LoopT CurLoop = static_cast<const LoopT >(this);
	PreOrderLoops.push_back(CurLoop);
	getInnerLoopsInPreorder(*CurLoop, PreOrderLoops);
	return PreOrderLoops;
	}
	SmallVector<LoopT *, 4> getLoopsInPreorder() {
	SmallVector<LoopT *, 4> PreOrderLoops;
	LoopT CurLoop = static_cast<LoopT >(this);
	PreOrderLoops.push_back(CurLoop);
	getInnerLoopsInPreorder(*CurLoop, PreOrderLoops);
	return PreOrderLoops;
	}

	//===--------------------------------------------------------------------===//
	// APIs for updating loop information after changing the CFG
	//

	/// This method is used by other analyses to update loop information.
	/// NewBB is set to be a new member of the current loop.
	/// Because of this, it is added as a member of all parent loops, and is added
	/// to the specified LoopInfo object as being in the current basic block. It
	/// is not valid to replace the loop header with this method.
	void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase<BlockT, LoopT> &LI);

	/// This is used when splitting loops up. It replaces the OldChild entry in
	/// our children list with NewChild, and updates the parent pointer of
	/// OldChild to be null and the NewChild to be this loop.
	/// This updates the loop depth of the new child.
	void replaceChildLoopWith(LoopT OldChild, LoopT NewChild);

	/// Add the specified loop to be a child of this loop.
	/// This updates the loop depth of the new child.
	void addChildLoop(LoopT *NewChild) {
	assert(!isInvalid() && "Loop not in a valid state!");
	assert(!NewChild->ParentLoop && "NewChild already has a parent!");
	NewChild->ParentLoop = static_cast<LoopT *>(this);
	SubLoops.push_back(NewChild);
	}

	/// This removes the specified child from being a subloop of this loop. The
	/// loop is not deleted, as it will presumably be inserted into another loop.
	LoopT *removeChildLoop(iterator I) {
	assert(!isInvalid() && "Loop not in a valid state!");
	assert(I != SubLoops.end() && "Cannot remove end iterator!");
	LoopT Child = I;
	assert(Child->ParentLoop == this && "Child is not a child of this loop!");
	SubLoops.erase(SubLoops.begin() + (I - begin()));
	Child->ParentLoop = nullptr;
	return Child;
	}

	/// This removes the specified child from being a subloop of this loop. The
	/// loop is not deleted, as it will presumably be inserted into another loop.
	LoopT removeChildLoop(LoopT Child) {
	return removeChildLoop(llvm::find(*this, Child));
	}

	/// This adds a basic block directly to the basic block list.
	/// This should only be used by transformations that create new loops. Other
	/// transformations should use addBasicBlockToLoop.
	void addBlockEntry(BlockT *BB) {
	assert(!isInvalid() && "Loop not in a valid state!");
	Blocks.push_back(BB);
	DenseBlockSet.insert(BB);
	}

	/// interface to reverse Blocks[from, end of loop] in this loop
	void reverseBlock(unsigned from) {
	assert(!isInvalid() && "Loop not in a valid state!");
	std::reverse(Blocks.begin() + from, Blocks.end());
	}

	/// interface to do reserve() for Blocks
	void reserveBlocks(unsigned size) {
	assert(!isInvalid() && "Loop not in a valid state!");
	Blocks.reserve(size);
	}

	/// This method is used to move BB (which must be part of this loop) to be the
	/// loop header of the loop (the block that dominates all others).
	void moveToHeader(BlockT *BB) {
	assert(!isInvalid() && "Loop not in a valid state!");
	if (Blocks[0] == BB)
	return;
	for (unsigned i = 0;; ++i) {
	assert(i != Blocks.size() && "Loop does not contain BB!");
	if (Blocks[i] == BB) {
	Blocks[i] = Blocks[0];
	Blocks[0] = BB;
	return;
	}
	}
	}

	/// This removes the specified basic block from the current loop, updating the
	/// Blocks as appropriate. This does not update the mapping in the LoopInfo
	/// class.
	void removeBlockFromLoop(BlockT *BB) {
	assert(!isInvalid() && "Loop not in a valid state!");
	auto I = find(Blocks, BB);
	assert(I != Blocks.end() && "N is not in this list!");
	Blocks.erase(I);

	DenseBlockSet.erase(BB);
	}

	/// Verify loop structure
	void verifyLoop() const;

	/// Verify loop structure of this loop and all nested loops.
	void verifyLoopNest(DenseSet<const LoopT > Loops) const;

	/// Returns true if the loop is annotated parallel.
	///
	/// Derived classes can override this method using static template
	/// polymorphism.
	bool isAnnotatedParallel() const { return false; }

	/// Print loop with all the BBs inside it.
	void print(raw_ostream &OS, bool Verbose = false, bool PrintNested = true,
	unsigned Depth = 0) const;

	protected:
	friend class LoopInfoBase<BlockT, LoopT>;

	/// This creates an empty loop.
	LoopBase() : ParentLoop(nullptr) {}

	explicit LoopBase(BlockT *BB) : ParentLoop(nullptr) {
	Blocks.push_back(BB);
	DenseBlockSet.insert(BB);
	}

	// Since loop passes like SCEV are allowed to key analysis results off of
	// `Loop` pointers, we cannot re-use pointers within a loop pass manager.
	// This means loop passes should not be `delete` ing `Loop` objects directly
	// (and risk a later `Loop` allocation re-using the address of a previous one)
	// but should be using LoopInfo::markAsRemoved, which keeps around the `Loop`
	// pointer till the end of the lifetime of the `LoopInfo` object.
	//
	// To make it easier to follow this rule, we mark the destructor as
	// non-public.
	~LoopBase() {
	for (auto *SubLoop : SubLoops)
	SubLoop->~LoopT();

	#if LLVM_ENABLE_ABI_BREAKING_CHECKS
	IsInvalid = true;
	#endif
	SubLoops.clear();
	Blocks.clear();
	DenseBlockSet.clear();
	ParentLoop = nullptr;
	}
	};

	template <class BlockT, class LoopT>
	raw_ostream &operator<<(raw_ostream &OS, const LoopBase<BlockT, LoopT> &Loop) {
	Loop.print(OS);
	return OS;
	}

	// Implementation in LoopInfoImpl.h
	extern template class LoopBase<BasicBlock, Loop>;

	/// Represents a single loop in the control flow graph. Note that not all SCCs
	/// in the CFG are necessarily loops.
	-class Loop : public LoopBase<BasicBlock, Loop> {
	+class LLVM_EXTERNAL_VISIBILITY Loop : public LoopBase<BasicBlock, Loop> {
	public:
	/// A range representing the start and end location of a loop.
	class LocRange {
	DebugLoc Start;
	DebugLoc End;

	public:
	LocRange() {}
	LocRange(DebugLoc Start) : Start(Start), End(Start) {}
	LocRange(DebugLoc Start, DebugLoc End)
	: Start(std::move(Start)), End(std::move(End)) {}

	const DebugLoc &getStart() const { return Start; }
	const DebugLoc &getEnd() const { return End; }

	/// Check for null.
	///
	explicit operator bool() const { return Start && End; }
	};

	/// Return true if the specified value is loop invariant.
	bool isLoopInvariant(const Value *V) const;

	/// Return true if all the operands of the specified instruction are loop
	/// invariant.
	bool hasLoopInvariantOperands(const Instruction *I) const;

	/// If the given value is an instruction inside of the loop and it can be
	/// hoisted, do so to make it trivially loop-invariant.
	/// Return true if the value after any hoisting is loop invariant. This
	/// function can be used as a slightly more aggressive replacement for
	/// isLoopInvariant.
	///
	/// If InsertPt is specified, it is the point to hoist instructions to.
	/// If null, the terminator of the loop preheader is used.
	bool makeLoopInvariant(Value *V, bool &Changed,
	Instruction *InsertPt = nullptr,
	MemorySSAUpdater *MSSAU = nullptr) const;

	/// If the given instruction is inside of the loop and it can be hoisted, do
	/// so to make it trivially loop-invariant.
	/// Return true if the instruction after any hoisting is loop invariant. This
	/// function can be used as a slightly more aggressive replacement for
	/// isLoopInvariant.
	///
	/// If InsertPt is specified, it is the point to hoist instructions to.
	/// If null, the terminator of the loop preheader is used.
	///
	bool makeLoopInvariant(Instruction *I, bool &Changed,
	Instruction *InsertPt = nullptr,
	MemorySSAUpdater *MSSAU = nullptr) const;

	/// Check to see if the loop has a canonical induction variable: an integer
	/// recurrence that starts at 0 and increments by one each time through the
	/// loop. If so, return the phi node that corresponds to it.
	///
	/// The IndVarSimplify pass transforms loops to have a canonical induction
	/// variable.
	///
	PHINode *getCanonicalInductionVariable() const;

	/// Get the latch condition instruction.
	ICmpInst *getLatchCmpInst() const;

	/// Obtain the unique incoming and back edge. Return false if they are
	/// non-unique or the loop is dead; otherwise, return true.
	bool getIncomingAndBackEdge(BasicBlock *&Incoming,
	BasicBlock *&Backedge) const;

	/// Below are some utilities to get the loop guard, loop bounds and induction
	/// variable, and to check if a given phinode is an auxiliary induction
	/// variable, if the loop is guarded, and if the loop is canonical.
	///
	/// Here is an example:
	/// \code
	/// for (int i = lb; i < ub; i+=step)
	/// <loop body>
	/// --- pseudo LLVMIR ---
	/// beforeloop:
	/// guardcmp = (lb < ub)
	/// if (guardcmp) goto preheader; else goto afterloop
	/// preheader:
	/// loop:
	/// i_1 = phi[{lb, preheader}, {i_2, latch}]
	/// <loop body>
	/// i_2 = i_1 + step
	/// latch:
	/// cmp = (i_2 < ub)
	/// if (cmp) goto loop
	/// exit:
	/// afterloop:
	/// \endcode
	///
	/// - getBounds
	/// - getInitialIVValue --> lb
	/// - getStepInst --> i_2 = i_1 + step
	/// - getStepValue --> step
	/// - getFinalIVValue --> ub
	/// - getCanonicalPredicate --> '<'
	/// - getDirection --> Increasing
	///
	/// - getInductionVariable --> i_1
	/// - isAuxiliaryInductionVariable(x) --> true if x == i_1
	/// - getLoopGuardBranch()
	/// --> `if (guardcmp) goto preheader; else goto afterloop`
	/// - isGuarded() --> true
	/// - isCanonical --> false
	struct LoopBounds {
	/// Return the LoopBounds object if
	/// - the given \p IndVar is an induction variable
	/// - the initial value of the induction variable can be found
	/// - the step instruction of the induction variable can be found
	/// - the final value of the induction variable can be found
	///
	/// Else None.
	static Optional<Loop::LoopBounds> getBounds(const Loop &L, PHINode &IndVar,
	ScalarEvolution &SE);

	/// Get the initial value of the loop induction variable.
	Value &getInitialIVValue() const { return InitialIVValue; }

	/// Get the instruction that updates the loop induction variable.
	Instruction &getStepInst() const { return StepInst; }

	/// Get the step that the loop induction variable gets updated by in each
	/// loop iteration. Return nullptr if not found.
	Value *getStepValue() const { return StepValue; }

	/// Get the final value of the loop induction variable.
	Value &getFinalIVValue() const { return FinalIVValue; }

	/// Return the canonical predicate for the latch compare instruction, if
	/// able to be calcuated. Else BAD_ICMP_PREDICATE.
	///
	/// A predicate is considered as canonical if requirements below are all
	/// satisfied:
	/// 1. The first successor of the latch branch is the loop header
	/// If not, inverse the predicate.
	/// 2. One of the operands of the latch comparison is StepInst
	/// If not, and
	/// - if the current calcuated predicate is not ne or eq, flip the
	/// predicate.
	/// - else if the loop is increasing, return slt
	/// (notice that it is safe to change from ne or eq to sign compare)
	/// - else if the loop is decreasing, return sgt
	/// (notice that it is safe to change from ne or eq to sign compare)
	///
	/// Here is an example when both (1) and (2) are not satisfied:
	/// \code
	/// loop.header:
	/// %iv = phi [%initialiv, %loop.preheader], [%inc, %loop.header]
	/// %inc = add %iv, %step
	/// %cmp = slt %iv, %finaliv
	/// br %cmp, %loop.exit, %loop.header
	/// loop.exit:
	/// \endcode
	/// - The second successor of the latch branch is the loop header instead
	/// of the first successor (slt -> sge)
	/// - The first operand of the latch comparison (%cmp) is the IndVar (%iv)
	/// instead of the StepInst (%inc) (sge -> sgt)
	///
	/// The predicate would be sgt if both (1) and (2) are satisfied.
	/// getCanonicalPredicate() returns sgt for this example.
	/// Note: The IR is not changed.
	ICmpInst::Predicate getCanonicalPredicate() const;

	/// An enum for the direction of the loop
	/// - for (int i = 0; i < ub; ++i) --> Increasing
	/// - for (int i = ub; i > 0; --i) --> Descresing
	/// - for (int i = x; i != y; i+=z) --> Unknown
	enum class Direction { Increasing, Decreasing, Unknown };

	/// Get the direction of the loop.
	Direction getDirection() const;

	private:
	LoopBounds(const Loop &Loop, Value &I, Instruction &SI, Value *SV, Value &F,
	ScalarEvolution &SE)
	: L(Loop), InitialIVValue(I), StepInst(SI), StepValue(SV),
	FinalIVValue(F), SE(SE) {}

	const Loop &L;

	// The initial value of the loop induction variable
	Value &InitialIVValue;

	// The instruction that updates the loop induction variable
	Instruction &StepInst;

	// The value that the loop induction variable gets updated by in each loop
	// iteration
	Value *StepValue;

	// The final value of the loop induction variable
	Value &FinalIVValue;

	ScalarEvolution &SE;
	};

	/// Return the struct LoopBounds collected if all struct members are found,
	/// else None.
	Optional<LoopBounds> getBounds(ScalarEvolution &SE) const;

	/// Return the loop induction variable if found, else return nullptr.
	/// An instruction is considered as the loop induction variable if
	/// - it is an induction variable of the loop; and
	/// - it is used to determine the condition of the branch in the loop latch
	///
	/// Note: the induction variable doesn't need to be canonical, i.e. starts at
	/// zero and increments by one each time through the loop (but it can be).
	PHINode *getInductionVariable(ScalarEvolution &SE) const;

	/// Get the loop induction descriptor for the loop induction variable. Return
	/// true if the loop induction variable is found.
	bool getInductionDescriptor(ScalarEvolution &SE,
	InductionDescriptor &IndDesc) const;

	/// Return true if the given PHINode \p AuxIndVar is
	/// - in the loop header
	/// - not used outside of the loop
	/// - incremented by a loop invariant step for each loop iteration
	/// - step instruction opcode should be add or sub
	/// Note: auxiliary induction variable is not required to be used in the
	/// conditional branch in the loop latch. (but it can be)
	bool isAuxiliaryInductionVariable(PHINode &AuxIndVar,
	ScalarEvolution &SE) const;

	/// Return the loop guard branch, if it exists.
	///
	/// This currently only works on simplified loop, as it requires a preheader
	/// and a latch to identify the guard. It will work on loops of the form:
	/// \code
	/// GuardBB:
	/// br cond1, Preheader, ExitSucc <== GuardBranch
	/// Preheader:
	/// br Header
	/// Header:
	/// ...
	/// br Latch
	/// Latch:
	/// br cond2, Header, ExitBlock
	/// ExitBlock:
	/// br ExitSucc
	/// ExitSucc:
	/// \endcode
	BranchInst *getLoopGuardBranch() const;

	/// Return true iff the loop is
	/// - in simplify rotated form, and
	/// - guarded by a loop guard branch.
	bool isGuarded() const { return (getLoopGuardBranch() != nullptr); }

	/// Return true if the loop is in rotated form.
	///
	/// This does not check if the loop was rotated by loop rotation, instead it
	/// only checks if the loop is in rotated form (has a valid latch that exists
	/// the loop).
	bool isRotatedForm() const {
	assert(!isInvalid() && "Loop not in a valid state!");
	BasicBlock *Latch = getLoopLatch();
	return Latch && isLoopExiting(Latch);
	}

	/// Return true if the loop induction variable starts at zero and increments
	/// by one each time through the loop.
	bool isCanonical(ScalarEvolution &SE) const;

	/// Return true if the Loop is in LCSSA form.
	bool isLCSSAForm(const DominatorTree &DT) const;

	/// Return true if this Loop and all inner subloops are in LCSSA form.
	bool isRecursivelyLCSSAForm(const DominatorTree &DT,
	const LoopInfo &LI) const;

	/// Return true if the Loop is in the form that the LoopSimplify form
	/// transforms loops to, which is sometimes called normal form.
	bool isLoopSimplifyForm() const;

	/// Return true if the loop body is safe to clone in practice.
	bool isSafeToClone() const;

	/// Returns true if the loop is annotated parallel.
	///
	/// A parallel loop can be assumed to not contain any dependencies between
	/// iterations by the compiler. That is, any loop-carried dependency checking
	/// can be skipped completely when parallelizing the loop on the target
	/// machine. Thus, if the parallel loop information originates from the
	/// programmer, e.g. via the OpenMP parallel for pragma, it is the
	/// programmer's responsibility to ensure there are no loop-carried
	/// dependencies. The final execution order of the instructions across
	/// iterations is not guaranteed, thus, the end result might or might not
	/// implement actual concurrent execution of instructions across multiple
	/// iterations.
	bool isAnnotatedParallel() const;

	/// Return the llvm.loop loop id metadata node for this loop if it is present.
	///
	/// If this loop contains the same llvm.loop metadata on each branch to the
	/// header then the node is returned. If any latch instruction does not
	/// contain llvm.loop or if multiple latches contain different nodes then
	/// 0 is returned.
	MDNode *getLoopID() const;
	/// Set the llvm.loop loop id metadata for this loop.
	///
	/// The LoopID metadata node will be added to each terminator instruction in
	/// the loop that branches to the loop header.
	///
	/// The LoopID metadata node should have one or more operands and the first
	/// operand should be the node itself.
	void setLoopID(MDNode *LoopID) const;

	/// Add llvm.loop.unroll.disable to this loop's loop id metadata.
	///
	/// Remove existing unroll metadata and add unroll disable metadata to
	/// indicate the loop has already been unrolled. This prevents a loop
	/// from being unrolled more than is directed by a pragma if the loop
	/// unrolling pass is run more than once (which it generally is).
	void setLoopAlreadyUnrolled();

	/// Add llvm.loop.mustprogress to this loop's loop id metadata.
	void setLoopMustProgress();

	void dump() const;
	void dumpVerbose() const;

	/// Return the debug location of the start of this loop.
	/// This looks for a BB terminating instruction with a known debug
	/// location by looking at the preheader and header blocks. If it
	/// cannot find a terminating instruction with location information,
	/// it returns an unknown location.
	DebugLoc getStartLoc() const;

	/// Return the source code span of the loop.
	LocRange getLocRange() const;

	StringRef getName() const {
	if (BasicBlock *Header = getHeader())
	if (Header->hasName())
	return Header->getName();
	return "<unnamed loop>";
	}

	private:
	Loop() = default;

	friend class LoopInfoBase<BasicBlock, Loop>;
	friend class LoopBase<BasicBlock, Loop>;
	explicit Loop(BasicBlock *BB) : LoopBase<BasicBlock, Loop>(BB) {}
	~Loop() = default;
	};

	//===----------------------------------------------------------------------===//
	/// This class builds and contains all of the top-level loop
	/// structures in the specified function.
	///

	template <class BlockT, class LoopT> class LoopInfoBase {
	// BBMap - Mapping of basic blocks to the inner most loop they occur in
	DenseMap<const BlockT , LoopT > BBMap;
	std::vector<LoopT *> TopLevelLoops;
	BumpPtrAllocator LoopAllocator;

	friend class LoopBase<BlockT, LoopT>;
	friend class LoopInfo;

	void operator=(const LoopInfoBase &) = delete;
	LoopInfoBase(const LoopInfoBase &) = delete;

	public:
	LoopInfoBase() {}
	~LoopInfoBase() { releaseMemory(); }

	LoopInfoBase(LoopInfoBase &&Arg)
	: BBMap(std::move(Arg.BBMap)),
	TopLevelLoops(std::move(Arg.TopLevelLoops)),
	LoopAllocator(std::move(Arg.LoopAllocator)) {
	// We have to clear the arguments top level loops as we've taken ownership.
	Arg.TopLevelLoops.clear();
	}
	LoopInfoBase &operator=(LoopInfoBase &&RHS) {
	BBMap = std::move(RHS.BBMap);

	for (auto *L : TopLevelLoops)
	L->~LoopT();

	TopLevelLoops = std::move(RHS.TopLevelLoops);
	LoopAllocator = std::move(RHS.LoopAllocator);
	RHS.TopLevelLoops.clear();
	return *this;
	}

	void releaseMemory() {
	BBMap.clear();

	for (auto *L : TopLevelLoops)
	L->~LoopT();
	TopLevelLoops.clear();
	LoopAllocator.Reset();
	}

	template <typename... ArgsTy> LoopT *AllocateLoop(ArgsTy &&... Args) {
	LoopT *Storage = LoopAllocator.Allocate<LoopT>();
	return new (Storage) LoopT(std::forward<ArgsTy>(Args)...);
	}

	/// iterator/begin/end - The interface to the top-level loops in the current
	/// function.
	///
	typedef typename std::vector<LoopT *>::const_iterator iterator;
	typedef
	typename std::vector<LoopT *>::const_reverse_iterator reverse_iterator;
	iterator begin() const { return TopLevelLoops.begin(); }
	iterator end() const { return TopLevelLoops.end(); }
	reverse_iterator rbegin() const { return TopLevelLoops.rbegin(); }
	reverse_iterator rend() const { return TopLevelLoops.rend(); }
	bool empty() const { return TopLevelLoops.empty(); }

	/// Return all of the loops in the function in preorder across the loop
	/// nests, with siblings in forward program order.
	///
	/// Note that because loops form a forest of trees, preorder is equivalent to
	/// reverse postorder.
	SmallVector<LoopT *, 4> getLoopsInPreorder();

	/// Return all of the loops in the function in preorder across the loop
	/// nests, with siblings in reverse program order.
	///
	/// Note that because loops form a forest of trees, preorder is equivalent to
	/// reverse postorder.
	///
	/// Also note that this is not a reverse preorder. Only the siblings are in
	/// reverse program order.
	SmallVector<LoopT *, 4> getLoopsInReverseSiblingPreorder();

	/// Return the inner most loop that BB lives in. If a basic block is in no
	/// loop (for example the entry node), null is returned.
	LoopT getLoopFor(const BlockT BB) const { return BBMap.lookup(BB); }

	/// Same as getLoopFor.
	const LoopT operator[](const BlockT BB) const { return getLoopFor(BB); }

	/// Return the loop nesting level of the specified block. A depth of 0 means
	/// the block is not inside any loop.
	unsigned getLoopDepth(const BlockT *BB) const {
	const LoopT *L = getLoopFor(BB);
	return L ? L->getLoopDepth() : 0;
	}

	// True if the block is a loop header node
	bool isLoopHeader(const BlockT *BB) const {
	const LoopT *L = getLoopFor(BB);
	return L && L->getHeader() == BB;
	}

	/// Return the top-level loops.
	const std::vector<LoopT *> &getTopLevelLoops() const { return TopLevelLoops; }

	/// Return the top-level loops.
	std::vector<LoopT *> &getTopLevelLoopsVector() { return TopLevelLoops; }

	/// This removes the specified top-level loop from this loop info object.
	/// The loop is not deleted, as it will presumably be inserted into
	/// another loop.
	LoopT *removeLoop(iterator I) {
	assert(I != end() && "Cannot remove end iterator!");
	LoopT L = I;
	assert(L->isOutermost() && "Not a top-level loop!");
	TopLevelLoops.erase(TopLevelLoops.begin() + (I - begin()));
	return L;
	}

	/// Change the top-level loop that contains BB to the specified loop.
	/// This should be used by transformations that restructure the loop hierarchy
	/// tree.
	void changeLoopFor(BlockT BB, LoopT L) {
	if (!L) {
	BBMap.erase(BB);
	return;
	}
	BBMap[BB] = L;
	}

	/// Replace the specified loop in the top-level loops list with the indicated
	/// loop.
	void changeTopLevelLoop(LoopT OldLoop, LoopT NewLoop) {
	auto I = find(TopLevelLoops, OldLoop);
	assert(I != TopLevelLoops.end() && "Old loop not at top level!");
	*I = NewLoop;
	assert(!NewLoop->ParentLoop && !OldLoop->ParentLoop &&
	"Loops already embedded into a subloop!");
	}

	/// This adds the specified loop to the collection of top-level loops.
	void addTopLevelLoop(LoopT *New) {
	assert(New->isOutermost() && "Loop already in subloop!");
	TopLevelLoops.push_back(New);
	}

	/// This method completely removes BB from all data structures,
	/// including all of the Loop objects it is nested in and our mapping from
	/// BasicBlocks to loops.
	void removeBlock(BlockT *BB) {
	auto I = BBMap.find(BB);
	if (I != BBMap.end()) {
	for (LoopT *L = I->second; L; L = L->getParentLoop())
	L->removeBlockFromLoop(BB);

	BBMap.erase(I);
	}
	}

	// Internals

	static bool isNotAlreadyContainedIn(const LoopT *SubLoop,
	const LoopT *ParentLoop) {
	if (!SubLoop)
	return true;
	if (SubLoop == ParentLoop)
	return false;
	return isNotAlreadyContainedIn(SubLoop->getParentLoop(), ParentLoop);
	}

	/// Create the loop forest using a stable algorithm.
	void analyze(const DominatorTreeBase<BlockT, false> &DomTree);

	// Debugging
	void print(raw_ostream &OS) const;

	void verify(const DominatorTreeBase<BlockT, false> &DomTree) const;

	/// Destroy a loop that has been removed from the `LoopInfo` nest.
	///
	/// This runs the destructor of the loop object making it invalid to
	/// reference afterward. The memory is retained so that the pointer to the
	/// loop remains valid.
	///
	/// The caller is responsible for removing this loop from the loop nest and
	/// otherwise disconnecting it from the broader `LoopInfo` data structures.
	/// Callers that don't naturally handle this themselves should probably call
	/// `erase' instead.
	void destroy(LoopT *L) {
	L->~LoopT();

	// Since LoopAllocator is a BumpPtrAllocator, this Deallocate only poisons
	// \c L, but the pointer remains valid for non-dereferencing uses.
	LoopAllocator.Deallocate(L);
	}
	};

	// Implementation in LoopInfoImpl.h
	extern template class LoopInfoBase<BasicBlock, Loop>;

	class LoopInfo : public LoopInfoBase<BasicBlock, Loop> {
	typedef LoopInfoBase<BasicBlock, Loop> BaseT;

	friend class LoopBase<BasicBlock, Loop>;

	void operator=(const LoopInfo &) = delete;
	LoopInfo(const LoopInfo &) = delete;

	public:
	LoopInfo() {}
	explicit LoopInfo(const DominatorTreeBase<BasicBlock, false> &DomTree);

	LoopInfo(LoopInfo &&Arg) : BaseT(std::move(static_cast<BaseT &>(Arg))) {}
	LoopInfo &operator=(LoopInfo &&RHS) {
	BaseT::operator=(std::move(static_cast<BaseT &>(RHS)));
	return *this;
	}

	/// Handle invalidation explicitly.
	bool invalidate(Function &F, const PreservedAnalyses &PA,
	FunctionAnalysisManager::Invalidator &);

	// Most of the public interface is provided via LoopInfoBase.

	/// Update LoopInfo after removing the last backedge from a loop. This updates
	/// the loop forest and parent loops for each block so that \c L is no longer
	/// referenced, but does not actually delete \c L immediately. The pointer
	/// will remain valid until this LoopInfo's memory is released.
	void erase(Loop *L);

	/// Returns true if replacing From with To everywhere is guaranteed to
	/// preserve LCSSA form.
	bool replacementPreservesLCSSAForm(Instruction From, Value To) {
	// Preserving LCSSA form is only problematic if the replacing value is an
	// instruction.
	Instruction *I = dyn_cast<Instruction>(To);
	if (!I)
	return true;
	// If both instructions are defined in the same basic block then replacement
	// cannot break LCSSA form.
	if (I->getParent() == From->getParent())
	return true;
	// If the instruction is not defined in a loop then it can safely replace
	// anything.
	Loop *ToLoop = getLoopFor(I->getParent());
	if (!ToLoop)
	return true;
	// If the replacing instruction is defined in the same loop as the original
	// instruction, or in a loop that contains it as an inner loop, then using
	// it as a replacement will not break LCSSA form.
	return ToLoop->contains(getLoopFor(From->getParent()));
	}

	/// Checks if moving a specific instruction can break LCSSA in any loop.
	///
	/// Return true if moving \p Inst to before \p NewLoc will break LCSSA,
	/// assuming that the function containing \p Inst and \p NewLoc is currently
	/// in LCSSA form.
	bool movementPreservesLCSSAForm(Instruction Inst, Instruction NewLoc) {
	assert(Inst->getFunction() == NewLoc->getFunction() &&
	"Can't reason about IPO!");

	auto *OldBB = Inst->getParent();
	auto *NewBB = NewLoc->getParent();

	// Movement within the same loop does not break LCSSA (the equality check is
	// to avoid doing a hashtable lookup in case of intra-block movement).
	if (OldBB == NewBB)
	return true;

	auto *OldLoop = getLoopFor(OldBB);
	auto *NewLoop = getLoopFor(NewBB);

	if (OldLoop == NewLoop)
	return true;

	// Check if Outer contains Inner; with the null loop counting as the
	// "outermost" loop.
	auto Contains = [](const Loop Outer, const Loop Inner) {
	return !Outer \|\| Outer->contains(Inner);
	};

	// To check that the movement of Inst to before NewLoc does not break LCSSA,
	// we need to check two sets of uses for possible LCSSA violations at
	// NewLoc: the users of NewInst, and the operands of NewInst.

	// If we know we're hoisting Inst out of an inner loop to an outer loop,
	// then the uses of Inst don't need to be checked.

	if (!Contains(NewLoop, OldLoop)) {
	for (Use &U : Inst->uses()) {
	auto *UI = cast<Instruction>(U.getUser());
	auto *UBB = isa<PHINode>(UI) ? cast<PHINode>(UI)->getIncomingBlock(U)
	: UI->getParent();
	if (UBB != NewBB && getLoopFor(UBB) != NewLoop)
	return false;
	}
	}

	// If we know we're sinking Inst from an outer loop into an inner loop, then
	// the operands of Inst don't need to be checked.

	if (!Contains(OldLoop, NewLoop)) {
	// See below on why we can't handle phi nodes here.
	if (isa<PHINode>(Inst))
	return false;

	for (Use &U : Inst->operands()) {
	auto *DefI = dyn_cast<Instruction>(U.get());
	if (!DefI)
	return false;

	// This would need adjustment if we allow Inst to be a phi node -- the
	// new use block won't simply be NewBB.

	auto *DefBlock = DefI->getParent();
	if (DefBlock != NewBB && getLoopFor(DefBlock) != NewLoop)
	return false;
	}
	}

	return true;
	}

	// Return true if a new use of V added in ExitBB would require an LCSSA PHI
	// to be inserted at the begining of the block. Note that V is assumed to
	// dominate ExitBB, and ExitBB must be the exit block of some loop. The
	// IR is assumed to be in LCSSA form before the planned insertion.
	bool wouldBeOutOfLoopUseRequiringLCSSA(const Value *V,
	const BasicBlock *ExitBB) const;

	};

	// Allow clients to walk the list of nested loops...
	template <> struct GraphTraits<const Loop *> {
	typedef const Loop *NodeRef;
	typedef LoopInfo::iterator ChildIteratorType;

	static NodeRef getEntryNode(const Loop *L) { return L; }
	static ChildIteratorType child_begin(NodeRef N) { return N->begin(); }
	static ChildIteratorType child_end(NodeRef N) { return N->end(); }
	};

	template <> struct GraphTraits<Loop *> {
	typedef Loop *NodeRef;
	typedef LoopInfo::iterator ChildIteratorType;

	static NodeRef getEntryNode(Loop *L) { return L; }
	static ChildIteratorType child_begin(NodeRef N) { return N->begin(); }
	static ChildIteratorType child_end(NodeRef N) { return N->end(); }
	};

	/// Analysis pass that exposes the \c LoopInfo for a function.
	class LoopAnalysis : public AnalysisInfoMixin<LoopAnalysis> {
	friend AnalysisInfoMixin<LoopAnalysis>;
	static AnalysisKey Key;

	public:
	typedef LoopInfo Result;

	LoopInfo run(Function &F, FunctionAnalysisManager &AM);
	};

	/// Printer pass for the \c LoopAnalysis results.
	class LoopPrinterPass : public PassInfoMixin<LoopPrinterPass> {
	raw_ostream &OS;

	public:
	explicit LoopPrinterPass(raw_ostream &OS) : OS(OS) {}
	PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
	};

	/// Verifier pass for the \c LoopAnalysis results.
	struct LoopVerifierPass : public PassInfoMixin<LoopVerifierPass> {
	PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
	};

	/// The legacy pass manager's analysis pass to compute loop information.
	class LoopInfoWrapperPass : public FunctionPass {
	LoopInfo LI;

	public:
	static char ID; // Pass identification, replacement for typeid

	LoopInfoWrapperPass();

	LoopInfo &getLoopInfo() { return LI; }
	const LoopInfo &getLoopInfo() const { return LI; }

	/// Calculate the natural loop information for a given function.
	bool runOnFunction(Function &F) override;

	void verifyAnalysis() const override;

	void releaseMemory() override { LI.releaseMemory(); }

	void print(raw_ostream &O, const Module *M = nullptr) const override;

	void getAnalysisUsage(AnalysisUsage &AU) const override;
	};

	/// Function to print a loop's contents as LLVM's text IR assembly.
	void printLoop(Loop &L, raw_ostream &OS, const std::string &Banner = "");

	/// Find and return the loop attribute node for the attribute @p Name in
	/// @p LoopID. Return nullptr if there is no such attribute.
	MDNode findOptionMDForLoopID(MDNode LoopID, StringRef Name);

	/// Find string metadata for a loop.
	///
	/// Returns the MDNode where the first operand is the metadata's name. The
	/// following operands are the metadata's values. If no metadata with @p Name is
	/// found, return nullptr.
	MDNode findOptionMDForLoop(const Loop TheLoop, StringRef Name);

	Optional<bool> getOptionalBoolLoopAttribute(const Loop *TheLoop,
	StringRef Name);

	/// Returns true if Name is applied to TheLoop and enabled.
	bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name);

	/// Find named metadata for a loop with an integer value.
	llvm::Optional<int>
	getOptionalIntLoopAttribute(const Loop *TheLoop, StringRef Name);

	/// Find string metadata for loop
	///
	/// If it has a value (e.g. {"llvm.distribute", 1} return the value as an
	/// operand or null otherwise. If the string metadata is not found return
	/// Optional's not-a-value.
	Optional<const MDOperand > findStringMetadataForLoop(const Loop TheLoop,
	StringRef Name);

	/// Look for the loop attribute that requires progress within the loop.
	/// Note: Most consumers probably want "isMustProgress" which checks
	/// the containing function attribute too.
	bool hasMustProgress(const Loop *L);

	/// Return true if this loop can be assumed to make progress. (i.e. can't
	/// be infinite without side effects without also being undefined)
	bool isMustProgress(const Loop *L);

	/// Return whether an MDNode might represent an access group.
	///
	/// Access group metadata nodes have to be distinct and empty. Being
	/// always-empty ensures that it never needs to be changed (which -- because
	/// MDNodes are designed immutable -- would require creating a new MDNode). Note
	/// that this is not a sufficient condition: not every distinct and empty NDNode
	/// is representing an access group.
	bool isValidAsAccessGroup(MDNode *AccGroup);

	/// Create a new LoopID after the loop has been transformed.
	///
	/// This can be used when no follow-up loop attributes are defined
	/// (llvm::makeFollowupLoopID returning None) to stop transformations to be
	/// applied again.
	///
	/// @param Context The LLVMContext in which to create the new LoopID.
	/// @param OrigLoopID The original LoopID; can be nullptr if the original
	/// loop has no LoopID.
	/// @param RemovePrefixes Remove all loop attributes that have these prefixes.
	/// Use to remove metadata of the transformation that has
	/// been applied.
	/// @param AddAttrs Add these loop attributes to the new LoopID.
	///
	/// @return A new LoopID that can be applied using Loop::setLoopID().
	llvm::MDNode *
	makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID,
	llvm::ArrayRef<llvm::StringRef> RemovePrefixes,
	llvm::ArrayRef<llvm::MDNode *> AddAttrs);

	} // End llvm namespace

	#endif
	diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/LoopNestAnalysis.h b/contrib/llvm-project/llvm/include/llvm/Analysis/LoopNestAnalysis.h
	index 9a749a1c8eae..df10e126c31a 100644
	--- a/contrib/llvm-project/llvm/include/llvm/Analysis/LoopNestAnalysis.h
	+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/LoopNestAnalysis.h
	@@ -1,181 +1,181 @@
	//===- llvm/Analysis/LoopNestAnalysis.h -------------------------- C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	///
	/// \file
	/// This file defines the interface for the loop nest analysis.
	///
	//===----------------------------------------------------------------------===//

	#ifndef LLVM_ANALYSIS_LOOPNESTANALYSIS_H
	#define LLVM_ANALYSIS_LOOPNESTANALYSIS_H

	#include "llvm/ADT/STLExtras.h"
	#include "llvm/Analysis/LoopAnalysisManager.h"
	#include "llvm/Analysis/LoopInfo.h"

	namespace llvm {

	using LoopVectorTy = SmallVector<Loop *, 8>;
	class LPMUpdater;

	/// This class represents a loop nest and can be used to query its properties.
	-class LoopNest {
	+class LLVM_EXTERNAL_VISIBILITY LoopNest {
	public:
	/// Construct a loop nest rooted by loop \p Root.
	LoopNest(Loop &Root, ScalarEvolution &SE);

	LoopNest() = delete;

	/// Construct a LoopNest object.
	static std::unique_ptr<LoopNest> getLoopNest(Loop &Root, ScalarEvolution &SE);

	/// Return true if the given loops \p OuterLoop and \p InnerLoop are
	/// perfectly nested with respect to each other, and false otherwise.
	/// Example:
	/// \code
	/// for(i)
	/// for(j)
	/// for(k)
	/// \endcode
	/// arePerfectlyNested(loop_i, loop_j, SE) would return true.
	/// arePerfectlyNested(loop_j, loop_k, SE) would return true.
	/// arePerfectlyNested(loop_i, loop_k, SE) would return false.
	static bool arePerfectlyNested(const Loop &OuterLoop, const Loop &InnerLoop,
	ScalarEvolution &SE);

	/// Return the maximum nesting depth of the loop nest rooted by loop \p Root.
	/// For example given the loop nest:
	/// \code
	/// for(i) // loop at level 1 and Root of the nest
	/// for(j) // loop at level 2
	/// <code>
	/// for(k) // loop at level 3
	/// \endcode
	/// getMaxPerfectDepth(Loop_i) would return 2.
	static unsigned getMaxPerfectDepth(const Loop &Root, ScalarEvolution &SE);

	/// Recursivelly traverse all empty 'single successor' basic blocks of \p From
	/// (if there are any). When \p CheckUniquePred is set to true, check if
	/// each of the empty single successors has a unique predecessor. Return
	/// the last basic block found or \p End if it was reached during the search.
	static const BasicBlock &skipEmptyBlockUntil(const BasicBlock *From,
	const BasicBlock *End,
	bool CheckUniquePred = false);

	/// Return the outermost loop in the loop nest.
	Loop &getOutermostLoop() const { return *Loops.front(); }

	/// Return the innermost loop in the loop nest if the nest has only one
	/// innermost loop, and a nullptr otherwise.
	/// Note: the innermost loop returned is not necessarily perfectly nested.
	Loop *getInnermostLoop() const {
	if (Loops.size() == 1)
	return Loops.back();

	// The loops in the 'Loops' vector have been collected in breadth first
	// order, therefore if the last 2 loops in it have the same nesting depth
	// there isn't a unique innermost loop in the nest.
	Loop *LastLoop = Loops.back();
	auto SecondLastLoopIter = ++Loops.rbegin();
	return (LastLoop->getLoopDepth() == (*SecondLastLoopIter)->getLoopDepth())
	? nullptr
	: LastLoop;
	}

	/// Return the loop at the given \p Index.
	Loop *getLoop(unsigned Index) const {
	assert(Index < Loops.size() && "Index is out of bounds");
	return Loops[Index];
	}

	/// Return the number of loops in the nest.
	size_t getNumLoops() const { return Loops.size(); }

	/// Get the loops in the nest.
	ArrayRef<Loop *> getLoops() const { return Loops; }

	/// Retrieve a vector of perfect loop nests contained in the current loop
	/// nest. For example, given the following nest containing 4 loops, this
	/// member function would return {{L1,L2},{L3,L4}}.
	/// \code
	/// for(i) // L1
	/// for(j) // L2
	/// <code>
	/// for(k) // L3
	/// for(l) // L4
	/// \endcode
	SmallVector<LoopVectorTy, 4> getPerfectLoops(ScalarEvolution &SE) const;

	/// Return the loop nest depth (i.e. the loop depth of the 'deepest' loop)
	/// For example given the loop nest:
	/// \code
	/// for(i) // loop at level 1 and Root of the nest
	/// for(j1) // loop at level 2
	/// for(k) // loop at level 3
	/// for(j2) // loop at level 2
	/// \endcode
	/// getNestDepth() would return 3.
	unsigned getNestDepth() const {
	int NestDepth =
	Loops.back()->getLoopDepth() - Loops.front()->getLoopDepth() + 1;
	assert(NestDepth > 0 && "Expecting NestDepth to be at least 1");
	return NestDepth;
	}

	/// Return the maximum perfect nesting depth.
	unsigned getMaxPerfectDepth() const { return MaxPerfectDepth; }

	/// Return true if all loops in the loop nest are in simplify form.
	bool areAllLoopsSimplifyForm() const {
	return all_of(Loops, [](const Loop *L) { return L->isLoopSimplifyForm(); });
	}

	/// Return true if all loops in the loop nest are in rotated form.
	bool areAllLoopsRotatedForm() const {
	return all_of(Loops, [](const Loop *L) { return L->isRotatedForm(); });
	}

	/// Return the function to which the loop-nest belongs.
	Function *getParent() const {
	return Loops.front()->getHeader()->getParent();
	}

	StringRef getName() const { return Loops.front()->getName(); }

	protected:
	const unsigned MaxPerfectDepth; // maximum perfect nesting depth level.
	LoopVectorTy Loops; // the loops in the nest (in breadth first order).
	};

	raw_ostream &operator<<(raw_ostream &, const LoopNest &);

	/// This analysis provides information for a loop nest. The analysis runs on
	/// demand and can be initiated via AM.getResult<LoopNestAnalysis>.
	class LoopNestAnalysis : public AnalysisInfoMixin<LoopNestAnalysis> {
	friend AnalysisInfoMixin<LoopNestAnalysis>;
	static AnalysisKey Key;

	public:
	using Result = LoopNest;
	Result run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR);
	};

	/// Printer pass for the \c LoopNest results.
	class LoopNestPrinterPass : public PassInfoMixin<LoopNestPrinterPass> {
	raw_ostream &OS;

	public:
	explicit LoopNestPrinterPass(raw_ostream &OS) : OS(OS) {}

	PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
	LoopStandardAnalysisResults &AR, LPMUpdater &U);
	};

	} // namespace llvm

	#endif // LLVM_ANALYSIS_LOOPNESTANALYSIS_H
	diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfo.h b/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfo.h
	index da9e00e0e8e1..5ab58ca0646a 100644
	--- a/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfo.h
	+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfo.h
	@@ -1,2394 +1,2393 @@
	//===- TargetTransformInfo.h ------------------------------------- C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	/// \file
	/// This pass exposes codegen information to IR-level passes. Every
	/// transformation that uses codegen information is broken into three parts:
	/// 1. The IR-level analysis pass.
	/// 2. The IR-level transformation interface which provides the needed
	/// information.
	/// 3. Codegen-level implementation which uses target-specific hooks.
	///
	/// This file defines #2, which is the interface that IR-level transformations
	/// use for querying the codegen.
	///
	//===----------------------------------------------------------------------===//

	#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
	#define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H

	#include "llvm/Analysis/IVDescriptors.h"
	#include "llvm/IR/InstrTypes.h"
	#include "llvm/IR/Operator.h"
	#include "llvm/IR/PassManager.h"
	#include "llvm/Pass.h"
	#include "llvm/Support/AtomicOrdering.h"
	#include "llvm/Support/BranchProbability.h"
	#include "llvm/Support/DataTypes.h"
	#include "llvm/Support/InstructionCost.h"
	#include <functional>

	namespace llvm {

	namespace Intrinsic {
	typedef unsigned ID;
	}

	class AssumptionCache;
	class BlockFrequencyInfo;
	class DominatorTree;
	class BranchInst;
	class CallBase;
	class ExtractElementInst;
	class Function;
	class GlobalValue;
	class InstCombiner;
	class IntrinsicInst;
	class LoadInst;
	class LoopAccessInfo;
	class Loop;
	class LoopInfo;
	class ProfileSummaryInfo;
	class SCEV;
	class ScalarEvolution;
	class StoreInst;
	class SwitchInst;
	class TargetLibraryInfo;
	class Type;
	class User;
	class Value;
	class VPIntrinsic;
	struct KnownBits;
	template <typename T> class Optional;

	/// Information about a load/store intrinsic defined by the target.
	struct MemIntrinsicInfo {
	/// This is the pointer that the intrinsic is loading from or storing to.
	/// If this is non-null, then analysis/optimization passes can assume that
	/// this intrinsic is functionally equivalent to a load/store from this
	/// pointer.
	Value *PtrVal = nullptr;

	// Ordering for atomic operations.
	AtomicOrdering Ordering = AtomicOrdering::NotAtomic;

	// Same Id is set by the target for corresponding load/store intrinsics.
	unsigned short MatchingId = 0;

	bool ReadMem = false;
	bool WriteMem = false;
	bool IsVolatile = false;

	bool isUnordered() const {
	return (Ordering == AtomicOrdering::NotAtomic \|\|
	Ordering == AtomicOrdering::Unordered) &&
	!IsVolatile;
	}
	};

	/// Attributes of a target dependent hardware loop.
	struct HardwareLoopInfo {
	HardwareLoopInfo() = delete;
	HardwareLoopInfo(Loop *L) : L(L) {}
	Loop *L = nullptr;
	BasicBlock *ExitBlock = nullptr;
	BranchInst *ExitBranch = nullptr;
	const SCEV *ExitCount = nullptr;
	- const SCEV *TripCount = nullptr;
	IntegerType *CountType = nullptr;
	Value *LoopDecrement = nullptr; // Decrement the loop counter by this
	// value in every iteration.
	bool IsNestingLegal = false; // Can a hardware loop be a parent to
	// another hardware loop?
	bool CounterInReg = false; // Should loop counter be updated in
	// the loop via a phi?
	bool PerformEntryTest = false; // Generate the intrinsic which also performs
	// icmp ne zero on the loop counter value and
	// produces an i1 to guard the loop entry.
	bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI,
	DominatorTree &DT, bool ForceNestedLoop = false,
	bool ForceHardwareLoopPHI = false);
	bool canAnalyze(LoopInfo &LI);
	};

	class IntrinsicCostAttributes {
	const IntrinsicInst *II = nullptr;
	Type *RetTy = nullptr;
	Intrinsic::ID IID;
	SmallVector<Type *, 4> ParamTys;
	SmallVector<const Value *, 4> Arguments;
	FastMathFlags FMF;
	// If ScalarizationCost is UINT_MAX, the cost of scalarizing the
	// arguments and the return value will be computed based on types.
	InstructionCost ScalarizationCost = InstructionCost::getInvalid();

	public:
	IntrinsicCostAttributes(
	Intrinsic::ID Id, const CallBase &CI,
	InstructionCost ScalarCost = InstructionCost::getInvalid());

	IntrinsicCostAttributes(
	Intrinsic::ID Id, Type RTy, ArrayRef<Type > Tys,
	FastMathFlags Flags = FastMathFlags(), const IntrinsicInst *I = nullptr,
	InstructionCost ScalarCost = InstructionCost::getInvalid());

	IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
	ArrayRef<const Value *> Args);

	IntrinsicCostAttributes(
	Intrinsic::ID Id, Type RTy, ArrayRef<const Value > Args,
	ArrayRef<Type *> Tys, FastMathFlags Flags = FastMathFlags(),
	const IntrinsicInst *I = nullptr,
	InstructionCost ScalarCost = InstructionCost::getInvalid());

	Intrinsic::ID getID() const { return IID; }
	const IntrinsicInst *getInst() const { return II; }
	Type *getReturnType() const { return RetTy; }
	FastMathFlags getFlags() const { return FMF; }
	InstructionCost getScalarizationCost() const { return ScalarizationCost; }
	const SmallVectorImpl<const Value *> &getArgs() const { return Arguments; }
	const SmallVectorImpl<Type *> &getArgTypes() const { return ParamTys; }

	bool isTypeBasedOnly() const {
	return Arguments.empty();
	}

	bool skipScalarizationCost() const { return ScalarizationCost.isValid(); }
	};

	class TargetTransformInfo;
	typedef TargetTransformInfo TTI;

	/// This pass provides access to the codegen interfaces that are needed
	/// for IR-level transformations.
	class TargetTransformInfo {
	public:
	/// Construct a TTI object using a type implementing the \c Concept
	/// API below.
	///
	/// This is used by targets to construct a TTI wrapping their target-specific
	/// implementation that encodes appropriate costs for their target.
	template <typename T> TargetTransformInfo(T Impl);

	/// Construct a baseline TTI object using a minimal implementation of
	/// the \c Concept API below.
	///
	/// The TTI implementation will reflect the information in the DataLayout
	/// provided if non-null.
	explicit TargetTransformInfo(const DataLayout &DL);

	// Provide move semantics.
	TargetTransformInfo(TargetTransformInfo &&Arg);
	TargetTransformInfo &operator=(TargetTransformInfo &&RHS);

	// We need to define the destructor out-of-line to define our sub-classes
	// out-of-line.
	~TargetTransformInfo();

	/// Handle the invalidation of this information.
	///
	/// When used as a result of \c TargetIRAnalysis this method will be called
	/// when the function this was computed for changes. When it returns false,
	/// the information is preserved across those changes.
	bool invalidate(Function &, const PreservedAnalyses &,
	FunctionAnalysisManager::Invalidator &) {
	// FIXME: We should probably in some way ensure that the subtarget
	// information for a function hasn't changed.
	return false;
	}

	/// \name Generic Target Information
	/// @{

	/// The kind of cost model.
	///
	/// There are several different cost models that can be customized by the
	/// target. The normalization of each cost model may be target specific.
	enum TargetCostKind {
	TCK_RecipThroughput, ///< Reciprocal throughput.
	TCK_Latency, ///< The latency of instruction.
	TCK_CodeSize, ///< Instruction code size.
	TCK_SizeAndLatency ///< The weighted sum of size and latency.
	};

	/// Query the cost of a specified instruction.
	///
	/// Clients should use this interface to query the cost of an existing
	/// instruction. The instruction must have a valid parent (basic block).
	///
	/// Note, this method does not cache the cost calculation and it
	/// can be expensive in some cases.
	InstructionCost getInstructionCost(const Instruction *I,
	enum TargetCostKind kind) const {
	InstructionCost Cost;
	switch (kind) {
	case TCK_RecipThroughput:
	Cost = getInstructionThroughput(I);
	break;
	case TCK_Latency:
	Cost = getInstructionLatency(I);
	break;
	case TCK_CodeSize:
	case TCK_SizeAndLatency:
	Cost = getUserCost(I, kind);
	break;
	}
	return Cost;
	}

	/// Underlying constants for 'cost' values in this interface.
	///
	/// Many APIs in this interface return a cost. This enum defines the
	/// fundamental values that should be used to interpret (and produce) those
	/// costs. The costs are returned as an int rather than a member of this
	/// enumeration because it is expected that the cost of one IR instruction
	/// may have a multiplicative factor to it or otherwise won't fit directly
	/// into the enum. Moreover, it is common to sum or average costs which works
	/// better as simple integral values. Thus this enum only provides constants.
	/// Also note that the returned costs are signed integers to make it natural
	/// to add, subtract, and test with zero (a common boundary condition). It is
	/// not expected that 2^32 is a realistic cost to be modeling at any point.
	///
	/// Note that these costs should usually reflect the intersection of code-size
	/// cost and execution cost. A free instruction is typically one that folds
	/// into another instruction. For example, reg-to-reg moves can often be
	/// skipped by renaming the registers in the CPU, but they still are encoded
	/// and thus wouldn't be considered 'free' here.
	enum TargetCostConstants {
	TCC_Free = 0, ///< Expected to fold away in lowering.
	TCC_Basic = 1, ///< The cost of a typical 'add' instruction.
	TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.
	};

	/// Estimate the cost of a GEP operation when lowered.
	InstructionCost
	getGEPCost(Type PointeeType, const Value Ptr,
	ArrayRef<const Value *> Operands,
	TargetCostKind CostKind = TCK_SizeAndLatency) const;

	/// \returns A value by which our inlining threshold should be multiplied.
	/// This is primarily used to bump up the inlining threshold wholesale on
	/// targets where calls are unusually expensive.
	///
	/// TODO: This is a rather blunt instrument. Perhaps altering the costs of
	/// individual classes of instructions would be better.
	unsigned getInliningThresholdMultiplier() const;

	/// \returns A value to be added to the inlining threshold.
	unsigned adjustInliningThreshold(const CallBase *CB) const;

	/// \returns Vector bonus in percent.
	///
	/// Vector bonuses: We want to more aggressively inline vector-dense kernels
	/// and apply this bonus based on the percentage of vector instructions. A
	/// bonus is applied if the vector instructions exceed 50% and half that
	/// amount is applied if it exceeds 10%. Note that these bonuses are some what
	/// arbitrary and evolved over time by accident as much as because they are
	/// principled bonuses.
	/// FIXME: It would be nice to base the bonus values on something more
	/// scientific. A target may has no bonus on vector instructions.
	int getInlinerVectorBonusPercent() const;

	/// \return the expected cost of a memcpy, which could e.g. depend on the
	/// source/destination type and alignment and the number of bytes copied.
	InstructionCost getMemcpyCost(const Instruction *I) const;

	/// \return The estimated number of case clusters when lowering \p 'SI'.
	/// \p JTSize Set a jump table size only when \p SI is suitable for a jump
	/// table.
	unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
	unsigned &JTSize,
	ProfileSummaryInfo *PSI,
	BlockFrequencyInfo *BFI) const;

	/// Estimate the cost of a given IR user when lowered.
	///
	/// This can estimate the cost of either a ConstantExpr or Instruction when
	/// lowered.
	///
	/// \p Operands is a list of operands which can be a result of transformations
	/// of the current operands. The number of the operands on the list must equal
	/// to the number of the current operands the IR user has. Their order on the
	/// list must be the same as the order of the current operands the IR user
	/// has.
	///
	/// The returned cost is defined in terms of \c TargetCostConstants, see its
	/// comments for a detailed explanation of the cost values.
	InstructionCost getUserCost(const User U, ArrayRef<const Value > Operands,
	TargetCostKind CostKind) const;

	/// This is a helper function which calls the two-argument getUserCost
	/// with \p Operands which are the current operands U has.
	InstructionCost getUserCost(const User *U, TargetCostKind CostKind) const {
	SmallVector<const Value *, 4> Operands(U->operand_values());
	return getUserCost(U, Operands, CostKind);
	}

	/// If a branch or a select condition is skewed in one direction by more than
	/// this factor, it is very likely to be predicted correctly.
	BranchProbability getPredictableBranchThreshold() const;

	/// Return true if branch divergence exists.
	///
	/// Branch divergence has a significantly negative impact on GPU performance
	/// when threads in the same wavefront take different paths due to conditional
	/// branches.
	bool hasBranchDivergence() const;

	/// Return true if the target prefers to use GPU divergence analysis to
	/// replace the legacy version.
	bool useGPUDivergenceAnalysis() const;

	/// Returns whether V is a source of divergence.
	///
	/// This function provides the target-dependent information for
	/// the target-independent LegacyDivergenceAnalysis. LegacyDivergenceAnalysis
	/// first builds the dependency graph, and then runs the reachability
	/// algorithm starting with the sources of divergence.
	bool isSourceOfDivergence(const Value *V) const;

	// Returns true for the target specific
	// set of operations which produce uniform result
	// even taking non-uniform arguments
	bool isAlwaysUniform(const Value *V) const;

	/// Returns the address space ID for a target's 'flat' address space. Note
	/// this is not necessarily the same as addrspace(0), which LLVM sometimes
	/// refers to as the generic address space. The flat address space is a
	/// generic address space that can be used access multiple segments of memory
	/// with different address spaces. Access of a memory location through a
	/// pointer with this address space is expected to be legal but slower
	/// compared to the same memory location accessed through a pointer with a
	/// different address space.
	//
	/// This is for targets with different pointer representations which can
	/// be converted with the addrspacecast instruction. If a pointer is converted
	/// to this address space, optimizations should attempt to replace the access
	/// with the source address space.
	///
	/// \returns ~0u if the target does not have such a flat address space to
	/// optimize away.
	unsigned getFlatAddressSpace() const;

	/// Return any intrinsic address operand indexes which may be rewritten if
	/// they use a flat address space pointer.
	///
	/// \returns true if the intrinsic was handled.
	bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
	Intrinsic::ID IID) const;

	bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;

	unsigned getAssumedAddrSpace(const Value *V) const;

	/// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p
	/// NewV, which has a different address space. This should happen for every
	/// operand index that collectFlatAddressOperands returned for the intrinsic.
	/// \returns nullptr if the intrinsic was not handled. Otherwise, returns the
	/// new value (which may be the original \p II with modified operands).
	Value rewriteIntrinsicWithAddressSpace(IntrinsicInst II, Value *OldV,
	Value *NewV) const;

	/// Test whether calls to a function lower to actual program function
	/// calls.
	///
	/// The idea is to test whether the program is likely to require a 'call'
	/// instruction or equivalent in order to call the given function.
	///
	/// FIXME: It's not clear that this is a good or useful query API. Client's
	/// should probably move to simpler cost metrics using the above.
	/// Alternatively, we could split the cost interface into distinct code-size
	/// and execution-speed costs. This would allow modelling the core of this
	/// query more accurately as a call is a single small instruction, but
	/// incurs significant execution cost.
	bool isLoweredToCall(const Function *F) const;

	struct LSRCost {
	/// TODO: Some of these could be merged. Also, a lexical ordering
	/// isn't always optimal.
	unsigned Insns;
	unsigned NumRegs;
	unsigned AddRecCost;
	unsigned NumIVMuls;
	unsigned NumBaseAdds;
	unsigned ImmCost;
	unsigned SetupCost;
	unsigned ScaleCost;
	};

	/// Parameters that control the generic loop unrolling transformation.
	struct UnrollingPreferences {
	/// The cost threshold for the unrolled loop. Should be relative to the
	/// getUserCost values returned by this API, and the expectation is that
	/// the unrolled loop's instructions when run through that interface should
	/// not exceed this cost. However, this is only an estimate. Also, specific
	/// loops may be unrolled even with a cost above this threshold if deemed
	/// profitable. Set this to UINT_MAX to disable the loop body cost
	/// restriction.
	unsigned Threshold;
	/// If complete unrolling will reduce the cost of the loop, we will boost
	/// the Threshold by a certain percent to allow more aggressive complete
	/// unrolling. This value provides the maximum boost percentage that we
	/// can apply to Threshold (The value should be no less than 100).
	/// BoostedThreshold = Threshold * min(RolledCost / UnrolledCost,
	/// MaxPercentThresholdBoost / 100)
	/// E.g. if complete unrolling reduces the loop execution time by 50%
	/// then we boost the threshold by the factor of 2x. If unrolling is not
	/// expected to reduce the running time, then we do not increase the
	/// threshold.
	unsigned MaxPercentThresholdBoost;
	/// The cost threshold for the unrolled loop when optimizing for size (set
	/// to UINT_MAX to disable).
	unsigned OptSizeThreshold;
	/// The cost threshold for the unrolled loop, like Threshold, but used
	/// for partial/runtime unrolling (set to UINT_MAX to disable).
	unsigned PartialThreshold;
	/// The cost threshold for the unrolled loop when optimizing for size, like
	/// OptSizeThreshold, but used for partial/runtime unrolling (set to
	/// UINT_MAX to disable).
	unsigned PartialOptSizeThreshold;
	/// A forced unrolling factor (the number of concatenated bodies of the
	/// original loop in the unrolled loop body). When set to 0, the unrolling
	/// transformation will select an unrolling factor based on the current cost
	/// threshold and other factors.
	unsigned Count;
	/// Default unroll count for loops with run-time trip count.
	unsigned DefaultUnrollRuntimeCount;
	// Set the maximum unrolling factor. The unrolling factor may be selected
	// using the appropriate cost threshold, but may not exceed this number
	// (set to UINT_MAX to disable). This does not apply in cases where the
	// loop is being fully unrolled.
	unsigned MaxCount;
	/// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
	/// applies even if full unrolling is selected. This allows a target to fall
	/// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
	unsigned FullUnrollMaxCount;
	// Represents number of instructions optimized when "back edge"
	// becomes "fall through" in unrolled loop.
	// For now we count a conditional branch on a backedge and a comparison
	// feeding it.
	unsigned BEInsns;
	/// Allow partial unrolling (unrolling of loops to expand the size of the
	/// loop body, not only to eliminate small constant-trip-count loops).
	bool Partial;
	/// Allow runtime unrolling (unrolling of loops to expand the size of the
	/// loop body even when the number of loop iterations is not known at
	/// compile time).
	bool Runtime;
	/// Allow generation of a loop remainder (extra iterations after unroll).
	bool AllowRemainder;
	/// Allow emitting expensive instructions (such as divisions) when computing
	/// the trip count of a loop for runtime unrolling.
	bool AllowExpensiveTripCount;
	/// Apply loop unroll on any kind of loop
	/// (mainly to loops that fail runtime unrolling).
	bool Force;
	/// Allow using trip count upper bound to unroll loops.
	bool UpperBound;
	/// Allow unrolling of all the iterations of the runtime loop remainder.
	bool UnrollRemainder;
	/// Allow unroll and jam. Used to enable unroll and jam for the target.
	bool UnrollAndJam;
	/// Threshold for unroll and jam, for inner loop size. The 'Threshold'
	/// value above is used during unroll and jam for the outer loop size.
	/// This value is used in the same manner to limit the size of the inner
	/// loop.
	unsigned UnrollAndJamInnerLoopThreshold;
	/// Don't allow loop unrolling to simulate more than this number of
	/// iterations when checking full unroll profitability
	unsigned MaxIterationsCountToAnalyze;
	};

	/// Get target-customized preferences for the generic loop unrolling
	/// transformation. The caller will initialize UP with the current
	/// target-independent defaults.
	void getUnrollingPreferences(Loop *L, ScalarEvolution &,
	UnrollingPreferences &UP) const;

	/// Query the target whether it would be profitable to convert the given loop
	/// into a hardware loop.
	bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
	AssumptionCache &AC, TargetLibraryInfo *LibInfo,
	HardwareLoopInfo &HWLoopInfo) const;

	/// Query the target whether it would be prefered to create a predicated
	/// vector loop, which can avoid the need to emit a scalar epilogue loop.
	bool preferPredicateOverEpilogue(Loop L, LoopInfo LI, ScalarEvolution &SE,
	AssumptionCache &AC, TargetLibraryInfo *TLI,
	DominatorTree *DT,
	const LoopAccessInfo *LAI) const;

	/// Query the target whether lowering of the llvm.get.active.lane.mask
	/// intrinsic is supported.
	bool emitGetActiveLaneMask() const;

	// Parameters that control the loop peeling transformation
	struct PeelingPreferences {
	/// A forced peeling factor (the number of bodied of the original loop
	/// that should be peeled off before the loop body). When set to 0, the
	/// a peeling factor based on profile information and other factors.
	unsigned PeelCount;
	/// Allow peeling off loop iterations.
	bool AllowPeeling;
	/// Allow peeling off loop iterations for loop nests.
	bool AllowLoopNestsPeeling;
	/// Allow peeling basing on profile. Uses to enable peeling off all
	/// iterations basing on provided profile.
	/// If the value is true the peeling cost model can decide to peel only
	/// some iterations and in this case it will set this to false.
	bool PeelProfiledIterations;
	};

	/// Get target-customized preferences for the generic loop peeling
	/// transformation. The caller will initialize \p PP with the current
	/// target-independent defaults with information from \p L and \p SE.
	void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
	PeelingPreferences &PP) const;

	/// Targets can implement their own combinations for target-specific
	/// intrinsics. This function will be called from the InstCombine pass every
	/// time a target-specific intrinsic is encountered.
	///
	/// \returns None to not do anything target specific or a value that will be
	/// returned from the InstCombiner. It is possible to return null and stop
	/// further processing of the intrinsic by returning nullptr.
	Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
	IntrinsicInst &II) const;
	/// Can be used to implement target-specific instruction combining.
	/// \see instCombineIntrinsic
	Optional<Value *>
	simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
	APInt DemandedMask, KnownBits &Known,
	bool &KnownBitsComputed) const;
	/// Can be used to implement target-specific instruction combining.
	/// \see instCombineIntrinsic
	Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
	InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
	APInt &UndefElts2, APInt &UndefElts3,
	std::function<void(Instruction *, unsigned, APInt, APInt &)>
	SimplifyAndSetOp) const;
	/// @}

	/// \name Scalar Target Information
	/// @{

	/// Flags indicating the kind of support for population count.
	///
	/// Compared to the SW implementation, HW support is supposed to
	/// significantly boost the performance when the population is dense, and it
	/// may or may not degrade performance if the population is sparse. A HW
	/// support is considered as "Fast" if it can outperform, or is on a par
	/// with, SW implementation when the population is sparse; otherwise, it is
	/// considered as "Slow".
	enum PopcntSupportKind { PSK_Software, PSK_SlowHardware, PSK_FastHardware };

	/// Return true if the specified immediate is legal add immediate, that
	/// is the target has add instructions which can add a register with the
	/// immediate without having to materialize the immediate into a register.
	bool isLegalAddImmediate(int64_t Imm) const;

	/// Return true if the specified immediate is legal icmp immediate,
	/// that is the target has icmp instructions which can compare a register
	/// against the immediate without having to materialize the immediate into a
	/// register.
	bool isLegalICmpImmediate(int64_t Imm) const;

	/// Return true if the addressing mode represented by AM is legal for
	/// this target, for a load/store of the specified type.
	/// The type may be VoidTy, in which case only return true if the addressing
	/// mode is legal for a load/store of any legal type.
	/// If target returns true in LSRWithInstrQueries(), I may be valid.
	/// TODO: Handle pre/postinc as well.
	bool isLegalAddressingMode(Type Ty, GlobalValue BaseGV, int64_t BaseOffset,
	bool HasBaseReg, int64_t Scale,
	unsigned AddrSpace = 0,
	Instruction *I = nullptr) const;

	/// Return true if LSR cost of C1 is lower than C1.
	bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
	TargetTransformInfo::LSRCost &C2) const;

	/// Return true if LSR major cost is number of registers. Targets which
	/// implement their own isLSRCostLess and unset number of registers as major
	/// cost should return false, otherwise return true.
	bool isNumRegsMajorCostOfLSR() const;

	/// \returns true if LSR should not optimize a chain that includes \p I.
	bool isProfitableLSRChainElement(Instruction *I) const;

	/// Return true if the target can fuse a compare and branch.
	/// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost
	/// calculation for the instructions in a loop.
	bool canMacroFuseCmp() const;

	/// Return true if the target can save a compare for loop count, for example
	/// hardware loop saves a compare.
	bool canSaveCmp(Loop L, BranchInst BI, ScalarEvolution SE, LoopInfo *LI,
	DominatorTree DT, AssumptionCache AC,
	TargetLibraryInfo *LibInfo) const;

	enum AddressingModeKind {
	AMK_PreIndexed,
	AMK_PostIndexed,
	AMK_None
	};

	/// Return the preferred addressing mode LSR should make efforts to generate.
	AddressingModeKind getPreferredAddressingMode(const Loop *L,
	ScalarEvolution *SE) const;

	/// Return true if the target supports masked store.
	bool isLegalMaskedStore(Type *DataType, Align Alignment) const;
	/// Return true if the target supports masked load.
	bool isLegalMaskedLoad(Type *DataType, Align Alignment) const;

	/// Return true if the target supports nontemporal store.
	bool isLegalNTStore(Type *DataType, Align Alignment) const;
	/// Return true if the target supports nontemporal load.
	bool isLegalNTLoad(Type *DataType, Align Alignment) const;

	/// Return true if the target supports masked scatter.
	bool isLegalMaskedScatter(Type *DataType, Align Alignment) const;
	/// Return true if the target supports masked gather.
	bool isLegalMaskedGather(Type *DataType, Align Alignment) const;

	/// Return true if the target supports masked compress store.
	bool isLegalMaskedCompressStore(Type *DataType) const;
	/// Return true if the target supports masked expand load.
	bool isLegalMaskedExpandLoad(Type *DataType) const;

	/// Return true if the target has a unified operation to calculate division
	/// and remainder. If so, the additional implicit multiplication and
	/// subtraction required to calculate a remainder from division are free. This
	/// can enable more aggressive transformations for division and remainder than
	/// would typically be allowed using throughput or size cost models.
	bool hasDivRemOp(Type *DataType, bool IsSigned) const;

	/// Return true if the given instruction (assumed to be a memory access
	/// instruction) has a volatile variant. If that's the case then we can avoid
	/// addrspacecast to generic AS for volatile loads/stores. Default
	/// implementation returns false, which prevents address space inference for
	/// volatile loads/stores.
	bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const;

	/// Return true if target doesn't mind addresses in vectors.
	bool prefersVectorizedAddressing() const;

	/// Return the cost of the scaling factor used in the addressing
	/// mode represented by AM for this target, for a load/store
	/// of the specified type.
	/// If the AM is supported, the return value must be >= 0.
	/// If the AM is not supported, it returns a negative value.
	/// TODO: Handle pre/postinc as well.
	InstructionCost getScalingFactorCost(Type Ty, GlobalValue BaseGV,
	int64_t BaseOffset, bool HasBaseReg,
	int64_t Scale,
	unsigned AddrSpace = 0) const;

	/// Return true if the loop strength reduce pass should make
	/// Instruction* based TTI queries to isLegalAddressingMode(). This is
	/// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned
	/// immediate offset and no index register.
	bool LSRWithInstrQueries() const;

	/// Return true if it's free to truncate a value of type Ty1 to type
	/// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
	/// by referencing its sub-register AX.
	bool isTruncateFree(Type Ty1, Type Ty2) const;

	/// Return true if it is profitable to hoist instruction in the
	/// then/else to before if.
	bool isProfitableToHoist(Instruction *I) const;

	bool useAA() const;

	/// Return true if this type is legal.
	bool isTypeLegal(Type *Ty) const;

	/// Returns the estimated number of registers required to represent \p Ty.
	InstructionCost getRegUsageForType(Type *Ty) const;

	/// Return true if switches should be turned into lookup tables for the
	/// target.
	bool shouldBuildLookupTables() const;

	/// Return true if switches should be turned into lookup tables
	/// containing this constant value for the target.
	bool shouldBuildLookupTablesForConstant(Constant *C) const;

	/// Return true if lookup tables should be turned into relative lookup tables.
	bool shouldBuildRelLookupTables() const;

	/// Return true if the input function which is cold at all call sites,
	/// should use coldcc calling convention.
	bool useColdCCForColdCall(Function &F) const;

	/// Estimate the overhead of scalarizing an instruction. Insert and Extract
	/// are set if the demanded result elements need to be inserted and/or
	/// extracted from vectors.
	InstructionCost getScalarizationOverhead(VectorType *Ty,
	const APInt &DemandedElts,
	bool Insert, bool Extract) const;

	/// Estimate the overhead of scalarizing an instructions unique
	/// non-constant operands. The (potentially vector) types to use for each of
	/// argument are passes via Tys.
	InstructionCost getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
	ArrayRef<Type *> Tys) const;

	/// If target has efficient vector element load/store instructions, it can
	/// return true here so that insertion/extraction costs are not added to
	/// the scalarization cost of a load/store.
	bool supportsEfficientVectorElementLoadStore() const;

	/// Don't restrict interleaved unrolling to small loops.
	bool enableAggressiveInterleaving(bool LoopHasReductions) const;

	/// Returns options for expansion of memcmp. IsZeroCmp is
	// true if this is the expansion of memcmp(p1, p2, s) == 0.
	struct MemCmpExpansionOptions {
	// Return true if memcmp expansion is enabled.
	operator bool() const { return MaxNumLoads > 0; }

	// Maximum number of load operations.
	unsigned MaxNumLoads = 0;

	// The list of available load sizes (in bytes), sorted in decreasing order.
	SmallVector<unsigned, 8> LoadSizes;

	// For memcmp expansion when the memcmp result is only compared equal or
	// not-equal to 0, allow up to this number of load pairs per block. As an
	// example, this may allow 'memcmp(a, b, 3) == 0' in a single block:
	// a0 = load2bytes &a[0]
	// b0 = load2bytes &b[0]
	// a2 = load1byte &a[2]
	// b2 = load1byte &b[2]
	// r = cmp eq (a0 ^ b0 \| a2 ^ b2), 0
	unsigned NumLoadsPerBlock = 1;

	// Set to true to allow overlapping loads. For example, 7-byte compares can
	// be done with two 4-byte compares instead of 4+2+1-byte compares. This
	// requires all loads in LoadSizes to be doable in an unaligned way.
	bool AllowOverlappingLoads = false;
	};
	MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
	bool IsZeroCmp) const;

	/// Enable matching of interleaved access groups.
	bool enableInterleavedAccessVectorization() const;

	/// Enable matching of interleaved access groups that contain predicated
	/// accesses or gaps and therefore vectorized using masked
	/// vector loads/stores.
	bool enableMaskedInterleavedAccessVectorization() const;

	/// Indicate that it is potentially unsafe to automatically vectorize
	/// floating-point operations because the semantics of vector and scalar
	/// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
	/// does not support IEEE-754 denormal numbers, while depending on the
	/// platform, scalar floating-point math does.
	/// This applies to floating-point math operations and calls, not memory
	/// operations, shuffles, or casts.
	bool isFPVectorizationPotentiallyUnsafe() const;

	/// Determine if the target supports unaligned memory accesses.
	bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
	unsigned AddressSpace = 0,
	Align Alignment = Align(1),
	bool *Fast = nullptr) const;

	/// Return hardware support for population count.
	PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;

	/// Return true if the hardware has a fast square-root instruction.
	bool haveFastSqrt(Type *Ty) const;

	/// Return true if it is faster to check if a floating-point value is NaN
	/// (or not-NaN) versus a comparison against a constant FP zero value.
	/// Targets should override this if materializing a 0.0 for comparison is
	/// generally as cheap as checking for ordered/unordered.
	bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const;

	/// Return the expected cost of supporting the floating point operation
	/// of the specified type.
	InstructionCost getFPOpCost(Type *Ty) const;

	/// Return the expected cost of materializing for the given integer
	/// immediate of the specified type.
	InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
	TargetCostKind CostKind) const;

	/// Return the expected cost of materialization for the given integer
	/// immediate of the specified type for a given instruction. The cost can be
	/// zero if the immediate can be folded into the specified instruction.
	InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
	const APInt &Imm, Type *Ty,
	TargetCostKind CostKind,
	Instruction *Inst = nullptr) const;
	InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
	const APInt &Imm, Type *Ty,
	TargetCostKind CostKind) const;

	/// Return the expected cost for the given integer when optimising
	/// for size. This is different than the other integer immediate cost
	/// functions in that it is subtarget agnostic. This is useful when you e.g.
	/// target one ISA such as Aarch32 but smaller encodings could be possible
	/// with another such as Thumb. This return value is used as a penalty when
	/// the total costs for a constant is calculated (the bigger the cost, the
	/// more beneficial constant hoisting is).
	InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
	const APInt &Imm, Type *Ty) const;
	/// @}

	/// \name Vector Target Information
	/// @{

	/// The various kinds of shuffle patterns for vector queries.
	enum ShuffleKind {
	SK_Broadcast, ///< Broadcast element 0 to all other elements.
	SK_Reverse, ///< Reverse the order of the vector.
	SK_Select, ///< Selects elements from the corresponding lane of
	///< either source operand. This is equivalent to a
	///< vector select with a constant condition operand.
	SK_Transpose, ///< Transpose two vectors.
	SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
	SK_ExtractSubvector, ///< ExtractSubvector Index indicates start offset.
	SK_PermuteTwoSrc, ///< Merge elements from two source vectors into one
	///< with any shuffle mask.
	SK_PermuteSingleSrc, ///< Shuffle elements of single source vector with any
	///< shuffle mask.
	SK_Splice ///< Concatenates elements from the first input vector
	///< with elements of the second input vector. Returning
	///< a vector of the same type as the input vectors.
	};

	/// Additional information about an operand's possible values.
	enum OperandValueKind {
	OK_AnyValue, // Operand can have any value.
	OK_UniformValue, // Operand is uniform (splat of a value).
	OK_UniformConstantValue, // Operand is uniform constant.
	OK_NonUniformConstantValue // Operand is a non uniform constant value.
	};

	/// Additional properties of an operand's values.
	enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 };

	/// \return the number of registers in the target-provided register class.
	unsigned getNumberOfRegisters(unsigned ClassID) const;

	/// \return the target-provided register class ID for the provided type,
	/// accounting for type promotion and other type-legalization techniques that
	/// the target might apply. However, it specifically does not account for the
	/// scalarization or splitting of vector types. Should a vector type require
	/// scalarization or splitting into multiple underlying vector registers, that
	/// type should be mapped to a register class containing no registers.
	/// Specifically, this is designed to provide a simple, high-level view of the
	/// register allocation later performed by the backend. These register classes
	/// don't necessarily map onto the register classes used by the backend.
	/// FIXME: It's not currently possible to determine how many registers
	/// are used by the provided type.
	unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const;

	/// \return the target-provided register class name
	const char *getRegisterClassName(unsigned ClassID) const;

	enum RegisterKind { RGK_Scalar, RGK_FixedWidthVector, RGK_ScalableVector };

	/// \return The width of the largest scalar or vector register type.
	TypeSize getRegisterBitWidth(RegisterKind K) const;

	/// \return The width of the smallest vector register type.
	unsigned getMinVectorRegisterBitWidth() const;

	/// \return The maximum value of vscale if the target specifies an
	/// architectural maximum vector length, and None otherwise.
	Optional<unsigned> getMaxVScale() const;

	/// \return True if the vectorization factor should be chosen to
	/// make the vector of the smallest element type match the size of a
	/// vector register. For wider element types, this could result in
	/// creating vectors that span multiple vector registers.
	/// If false, the vectorization factor will be chosen based on the
	/// size of the widest element type.
	bool shouldMaximizeVectorBandwidth() const;

	/// \return The minimum vectorization factor for types of given element
	/// bit width, or 0 if there is no minimum VF. The returned value only
	/// applies when shouldMaximizeVectorBandwidth returns true.
	/// If IsScalable is true, the returned ElementCount must be a scalable VF.
	ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const;

	/// \return The maximum vectorization factor for types of given element
	/// bit width and opcode, or 0 if there is no maximum VF.
	/// Currently only used by the SLP vectorizer.
	unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const;

	/// \return True if it should be considered for address type promotion.
	/// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
	/// profitable without finding other extensions fed by the same input.
	bool shouldConsiderAddressTypePromotion(
	const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;

	/// \return The size of a cache line in bytes.
	unsigned getCacheLineSize() const;

	/// The possible cache levels
	enum class CacheLevel {
	L1D, // The L1 data cache
	L2D, // The L2 data cache

	// We currently do not model L3 caches, as their sizes differ widely between
	// microarchitectures. Also, we currently do not have a use for L3 cache
	// size modeling yet.
	};

	/// \return The size of the cache level in bytes, if available.
	Optional<unsigned> getCacheSize(CacheLevel Level) const;

	/// \return The associativity of the cache level, if available.
	Optional<unsigned> getCacheAssociativity(CacheLevel Level) const;

	/// \return How much before a load we should place the prefetch
	/// instruction. This is currently measured in number of
	/// instructions.
	unsigned getPrefetchDistance() const;

	/// Some HW prefetchers can handle accesses up to a certain constant stride.
	/// Sometimes prefetching is beneficial even below the HW prefetcher limit,
	/// and the arguments provided are meant to serve as a basis for deciding this
	/// for a particular loop.
	///
	/// \param NumMemAccesses Number of memory accesses in the loop.
	/// \param NumStridedMemAccesses Number of the memory accesses that
	/// ScalarEvolution could find a known stride
	/// for.
	/// \param NumPrefetches Number of software prefetches that will be
	/// emitted as determined by the addresses
	/// involved and the cache line size.
	/// \param HasCall True if the loop contains a call.
	///
	/// \return This is the minimum stride in bytes where it makes sense to start
	/// adding SW prefetches. The default is 1, i.e. prefetch with any
	/// stride.
	unsigned getMinPrefetchStride(unsigned NumMemAccesses,
	unsigned NumStridedMemAccesses,
	unsigned NumPrefetches, bool HasCall) const;

	/// \return The maximum number of iterations to prefetch ahead. If
	/// the required number of iterations is more than this number, no
	/// prefetching is performed.
	unsigned getMaxPrefetchIterationsAhead() const;

	/// \return True if prefetching should also be done for writes.
	bool enableWritePrefetching() const;

	/// \return The maximum interleave factor that any transform should try to
	/// perform for this target. This number depends on the level of parallelism
	/// and the number of execution units in the CPU.
	unsigned getMaxInterleaveFactor(unsigned VF) const;

	/// Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
	static OperandValueKind getOperandInfo(const Value *V,
	OperandValueProperties &OpProps);

	/// This is an approximation of reciprocal throughput of a math/logic op.
	/// A higher cost indicates less expected throughput.
	/// From Agner Fog's guides, reciprocal throughput is "the average number of
	/// clock cycles per instruction when the instructions are not part of a
	/// limiting dependency chain."
	/// Therefore, costs should be scaled to account for multiple execution units
	/// on the target that can process this type of instruction. For example, if
	/// there are 5 scalar integer units and 2 vector integer units that can
	/// calculate an 'add' in a single cycle, this model should indicate that the
	/// cost of the vector add instruction is 2.5 times the cost of the scalar
	/// add instruction.
	/// \p Args is an optional argument which holds the instruction operands
	/// values so the TTI can analyze those values searching for special
	/// cases or optimizations based on those values.
	/// \p CxtI is the optional original context instruction, if one exists, to
	/// provide even more information.
	InstructionCost getArithmeticInstrCost(
	unsigned Opcode, Type *Ty,
	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
	OperandValueKind Opd1Info = OK_AnyValue,
	OperandValueKind Opd2Info = OK_AnyValue,
	OperandValueProperties Opd1PropInfo = OP_None,
	OperandValueProperties Opd2PropInfo = OP_None,
	ArrayRef<const Value > Args = ArrayRef<const Value >(),
	const Instruction *CxtI = nullptr) const;

	/// \return The cost of a shuffle instruction of kind Kind and of type Tp.
	/// The exact mask may be passed as Mask, or else the array will be empty.
	/// The index and subtype parameters are used by the subvector insertion and
	/// extraction shuffle kinds to show the insert/extract point and the type of
	/// the subvector being inserted/extracted.
	/// NOTE: For subvector extractions Tp represents the source type.
	InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
	ArrayRef<int> Mask = None, int Index = 0,
	VectorType *SubTp = nullptr) const;

	/// Represents a hint about the context in which a cast is used.
	///
	/// For zext/sext, the context of the cast is the operand, which must be a
	/// load of some kind. For trunc, the context is of the cast is the single
	/// user of the instruction, which must be a store of some kind.
	///
	/// This enum allows the vectorizer to give getCastInstrCost an idea of the
	/// type of cast it's dealing with, as not every cast is equal. For instance,
	/// the zext of a load may be free, but the zext of an interleaving load can
	//// be (very) expensive!
	///
	/// See \c getCastContextHint to compute a CastContextHint from a cast
	/// Instruction*. Callers can use it if they don't need to override the
	/// context and just want it to be calculated from the instruction.
	///
	/// FIXME: This handles the types of load/store that the vectorizer can
	/// produce, which are the cases where the context instruction is most
	/// likely to be incorrect. There are other situations where that can happen
	/// too, which might be handled here but in the long run a more general
	/// solution of costing multiple instructions at the same times may be better.
	enum class CastContextHint : uint8_t {
	None, ///< The cast is not used with a load/store of any kind.
	Normal, ///< The cast is used with a normal load/store.
	Masked, ///< The cast is used with a masked load/store.
	GatherScatter, ///< The cast is used with a gather/scatter.
	Interleave, ///< The cast is used with an interleaved load/store.
	Reversed, ///< The cast is used with a reversed load/store.
	};

	/// Calculates a CastContextHint from \p I.
	/// This should be used by callers of getCastInstrCost if they wish to
	/// determine the context from some instruction.
	/// \returns the CastContextHint for ZExt/SExt/Trunc, None if \p I is nullptr,
	/// or if it's another type of cast.
	static CastContextHint getCastContextHint(const Instruction *I);

	/// \return The expected cost of cast instructions, such as bitcast, trunc,
	/// zext, etc. If there is an existing instruction that holds Opcode, it
	/// may be passed in the 'I' parameter.
	InstructionCost
	getCastInstrCost(unsigned Opcode, Type Dst, Type Src,
	TTI::CastContextHint CCH,
	TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
	const Instruction *I = nullptr) const;

	/// \return The expected cost of a sign- or zero-extended vector extract. Use
	/// -1 to indicate that there is no information about the index value.
	InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
	VectorType *VecTy,
	unsigned Index = -1) const;

	/// \return The expected cost of control-flow related instructions such as
	/// Phi, Ret, Br, Switch.
	InstructionCost
	getCFInstrCost(unsigned Opcode,
	TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
	const Instruction *I = nullptr) const;

	/// \returns The expected cost of compare and select instructions. If there
	/// is an existing instruction that holds Opcode, it may be passed in the
	/// 'I' parameter. The \p VecPred parameter can be used to indicate the select
	/// is using a compare with the specified predicate as condition. When vector
	/// types are passed, \p VecPred must be used for all lanes.
	InstructionCost
	getCmpSelInstrCost(unsigned Opcode, Type ValTy, Type CondTy = nullptr,
	CmpInst::Predicate VecPred = CmpInst::BAD_ICMP_PREDICATE,
	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
	const Instruction *I = nullptr) const;

	/// \return The expected cost of vector Insert and Extract.
	/// Use -1 to indicate that there is no information on the index value.
	InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
	unsigned Index = -1) const;

	/// \return The cost of Load and Store instructions.
	InstructionCost
	getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
	unsigned AddressSpace,
	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
	const Instruction *I = nullptr) const;

	/// \return The cost of masked Load and Store instructions.
	InstructionCost getMaskedMemoryOpCost(
	unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;

	/// \return The cost of Gather or Scatter operation
	/// \p Opcode - is a type of memory access Load or Store
	/// \p DataTy - a vector type of the data to be loaded or stored
	/// \p Ptr - pointer [or vector of pointers] - address[es] in memory
	/// \p VariableMask - true when the memory access is predicated with a mask
	/// that is not a compile-time constant
	/// \p Alignment - alignment of single element
	/// \p I - the optional original context instruction, if one exists, e.g. the
	/// load/store to transform or the call to the gather/scatter intrinsic
	InstructionCost getGatherScatterOpCost(
	unsigned Opcode, Type DataTy, const Value Ptr, bool VariableMask,
	Align Alignment, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
	const Instruction *I = nullptr) const;

	/// \return The cost of the interleaved memory operation.
	/// \p Opcode is the memory operation code
	/// \p VecTy is the vector type of the interleaved access.
	/// \p Factor is the interleave factor
	/// \p Indices is the indices for interleaved load members (as interleaved
	/// load allows gaps)
	/// \p Alignment is the alignment of the memory operation
	/// \p AddressSpace is address space of the pointer.
	/// \p UseMaskForCond indicates if the memory access is predicated.
	/// \p UseMaskForGaps indicates if gaps should be masked.
	InstructionCost getInterleavedMemoryOpCost(
	unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
	Align Alignment, unsigned AddressSpace,
	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
	bool UseMaskForCond = false, bool UseMaskForGaps = false) const;

	/// A helper function to determine the type of reduction algorithm used
	/// for a given \p Opcode and set of FastMathFlags \p FMF.
	static bool requiresOrderedReduction(Optional<FastMathFlags> FMF) {
	return FMF != None && !(*FMF).allowReassoc();
	}

	/// Calculate the cost of vector reduction intrinsics.
	///
	/// This is the cost of reducing the vector value of type \p Ty to a scalar
	/// value using the operation denoted by \p Opcode. The FastMathFlags
	/// parameter \p FMF indicates what type of reduction we are performing:
	/// 1. Tree-wise. This is the typical 'fast' reduction performed that
	/// involves successively splitting a vector into half and doing the
	/// operation on the pair of halves until you have a scalar value. For
	/// example:
	/// (v0, v1, v2, v3)
	/// ((v0+v2), (v1+v3), undef, undef)
	/// ((v0+v2+v1+v3), undef, undef, undef)
	/// This is the default behaviour for integer operations, whereas for
	/// floating point we only do this if \p FMF indicates that
	/// reassociation is allowed.
	/// 2. Ordered. For a vector with N elements this involves performing N
	/// operations in lane order, starting with an initial scalar value, i.e.
	/// result = InitVal + v0
	/// result = result + v1
	/// result = result + v2
	/// result = result + v3
	/// This is only the case for FP operations and when reassociation is not
	/// allowed.
	///
	InstructionCost getArithmeticReductionCost(
	unsigned Opcode, VectorType *Ty, Optional<FastMathFlags> FMF,
	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;

	InstructionCost getMinMaxReductionCost(
	VectorType Ty, VectorType CondTy, bool IsUnsigned,
	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;

	/// Calculate the cost of an extended reduction pattern, similar to
	/// getArithmeticReductionCost of an Add reduction with an extension and
	/// optional multiply. This is the cost of as:
	/// ResTy vecreduce.add(ext(Ty A)), or if IsMLA flag is set then:
	/// ResTy vecreduce.add(mul(ext(Ty A), ext(Ty B)). The reduction happens
	/// on a VectorType with ResTy elements and Ty lanes.
	InstructionCost getExtendedAddReductionCost(
	bool IsMLA, bool IsUnsigned, Type ResTy, VectorType Ty,
	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;

	/// \returns The cost of Intrinsic instructions. Analyses the real arguments.
	/// Three cases are handled: 1. scalar instruction 2. vector instruction
	/// 3. scalar instruction which is to be vectorized.
	InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
	TTI::TargetCostKind CostKind) const;

	/// \returns The cost of Call instructions.
	InstructionCost getCallInstrCost(
	Function F, Type RetTy, ArrayRef<Type *> Tys,
	TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const;

	/// \returns The number of pieces into which the provided type must be
	/// split during legalization. Zero is returned when the answer is unknown.
	unsigned getNumberOfParts(Type *Tp) const;

	/// \returns The cost of the address computation. For most targets this can be
	/// merged into the instruction indexing mode. Some targets might want to
	/// distinguish between address computation for memory operations on vector
	/// types and scalar types. Such targets should override this function.
	/// The 'SE' parameter holds pointer for the scalar evolution object which
	/// is used in order to get the Ptr step value in case of constant stride.
	/// The 'Ptr' parameter holds SCEV of the access pointer.
	InstructionCost getAddressComputationCost(Type *Ty,
	ScalarEvolution *SE = nullptr,
	const SCEV *Ptr = nullptr) const;

	/// \returns The cost, if any, of keeping values of the given types alive
	/// over a callsite.
	///
	/// Some types may require the use of register classes that do not have
	/// any callee-saved registers, so would require a spill and fill.
	InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const;

	/// \returns True if the intrinsic is a supported memory intrinsic. Info
	/// will contain additional information - whether the intrinsic may write
	/// or read to memory, volatility and the pointer. Info is undefined
	/// if false is returned.
	bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;

	/// \returns The maximum element size, in bytes, for an element
	/// unordered-atomic memory intrinsic.
	unsigned getAtomicMemIntrinsicMaxElementSize() const;

	/// \returns A value which is the result of the given memory intrinsic. New
	/// instructions may be created to extract the result from the given intrinsic
	/// memory operation. Returns nullptr if the target cannot create a result
	/// from the given intrinsic.
	Value getOrCreateResultFromMemIntrinsic(IntrinsicInst Inst,
	Type *ExpectedType) const;

	/// \returns The type to use in a loop expansion of a memcpy call.
	Type getMemcpyLoopLoweringType(LLVMContext &Context, Value Length,
	unsigned SrcAddrSpace, unsigned DestAddrSpace,
	unsigned SrcAlign, unsigned DestAlign) const;

	/// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
	/// \param RemainingBytes The number of bytes to copy.
	///
	/// Calculates the operand types to use when copying \p RemainingBytes of
	/// memory, where source and destination alignments are \p SrcAlign and
	/// \p DestAlign respectively.
	void getMemcpyLoopResidualLoweringType(
	SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
	unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
	unsigned SrcAlign, unsigned DestAlign) const;

	/// \returns True if the two functions have compatible attributes for inlining
	/// purposes.
	bool areInlineCompatible(const Function *Caller,
	const Function *Callee) const;

	/// \returns True if the caller and callee agree on how \p Args will be passed
	/// to the callee.
	/// \param[out] Args The list of compatible arguments. The implementation may
	/// filter out any incompatible args from this list.
	bool areFunctionArgsABICompatible(const Function *Caller,
	const Function *Callee,
	SmallPtrSetImpl<Argument *> &Args) const;

	/// The type of load/store indexing.
	enum MemIndexedMode {
	MIM_Unindexed, ///< No indexing.
	MIM_PreInc, ///< Pre-incrementing.
	MIM_PreDec, ///< Pre-decrementing.
	MIM_PostInc, ///< Post-incrementing.
	MIM_PostDec ///< Post-decrementing.
	};

	/// \returns True if the specified indexed load for the given type is legal.
	bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const;

	/// \returns True if the specified indexed store for the given type is legal.
	bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const;

	/// \returns The bitwidth of the largest vector type that should be used to
	/// load/store in the given address space.
	unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;

	/// \returns True if the load instruction is legal to vectorize.
	bool isLegalToVectorizeLoad(LoadInst *LI) const;

	/// \returns True if the store instruction is legal to vectorize.
	bool isLegalToVectorizeStore(StoreInst *SI) const;

	/// \returns True if it is legal to vectorize the given load chain.
	bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
	unsigned AddrSpace) const;

	/// \returns True if it is legal to vectorize the given store chain.
	bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
	unsigned AddrSpace) const;

	/// \returns True if it is legal to vectorize the given reduction kind.
	bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
	ElementCount VF) const;

	/// \returns True if the given type is supported for scalable vectors
	bool isElementTypeLegalForScalableVector(Type *Ty) const;

	/// \returns The new vector factor value if the target doesn't support \p
	/// SizeInBytes loads or has a better vector factor.
	unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
	unsigned ChainSizeInBytes,
	VectorType *VecTy) const;

	/// \returns The new vector factor value if the target doesn't support \p
	/// SizeInBytes stores or has a better vector factor.
	unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
	unsigned ChainSizeInBytes,
	VectorType *VecTy) const;

	/// Flags describing the kind of vector reduction.
	struct ReductionFlags {
	ReductionFlags() : IsMaxOp(false), IsSigned(false), NoNaN(false) {}
	bool IsMaxOp; ///< If the op a min/max kind, true if it's a max operation.
	bool IsSigned; ///< Whether the operation is a signed int reduction.
	bool NoNaN; ///< If op is an fp min/max, whether NaNs may be present.
	};

	/// \returns True if the target prefers reductions in loop.
	bool preferInLoopReduction(unsigned Opcode, Type *Ty,
	ReductionFlags Flags) const;

	/// \returns True if the target prefers reductions select kept in the loop
	/// when tail folding. i.e.
	/// loop:
	/// p = phi (0, s)
	/// a = add (p, x)
	/// s = select (mask, a, p)
	/// vecreduce.add(s)
	///
	/// As opposed to the normal scheme of p = phi (0, a) which allows the select
	/// to be pulled out of the loop. If the select(.., add, ..) can be predicated
	/// by the target, this can lead to cleaner code generation.
	bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
	ReductionFlags Flags) const;

	/// \returns True if the target wants to expand the given reduction intrinsic
	/// into a shuffle sequence.
	bool shouldExpandReduction(const IntrinsicInst *II) const;

	/// \returns the size cost of rematerializing a GlobalValue address relative
	/// to a stack reload.
	unsigned getGISelRematGlobalCost() const;

	/// \returns True if the target supports scalable vectors.
	bool supportsScalableVectors() const;

	/// \name Vector Predication Information
	/// @{
	/// Whether the target supports the %evl parameter of VP intrinsic efficiently
	/// in hardware. (see LLVM Language Reference - "Vector Predication
	/// Intrinsics") Use of %evl is discouraged when that is not the case.
	bool hasActiveVectorLength() const;

	struct VPLegalization {
	enum VPTransform {
	// keep the predicating parameter
	Legal = 0,
	// where legal, discard the predicate parameter
	Discard = 1,
	// transform into something else that is also predicating
	Convert = 2
	};

	// How to transform the EVL parameter.
	// Legal: keep the EVL parameter as it is.
	// Discard: Ignore the EVL parameter where it is safe to do so.
	// Convert: Fold the EVL into the mask parameter.
	VPTransform EVLParamStrategy;

	// How to transform the operator.
	// Legal: The target supports this operator.
	// Convert: Convert this to a non-VP operation.
	// The 'Discard' strategy is invalid.
	VPTransform OpStrategy;

	bool shouldDoNothing() const {
	return (EVLParamStrategy == Legal) && (OpStrategy == Legal);
	}
	VPLegalization(VPTransform EVLParamStrategy, VPTransform OpStrategy)
	: EVLParamStrategy(EVLParamStrategy), OpStrategy(OpStrategy) {}
	};

	/// \returns How the target needs this vector-predicated operation to be
	/// transformed.
	VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const;
	/// @}

	/// @}

	private:
	/// Estimate the latency of specified instruction.
	/// Returns 1 as the default value.
	InstructionCost getInstructionLatency(const Instruction *I) const;

	/// Returns the expected throughput cost of the instruction.
	/// Returns -1 if the cost is unknown.
	InstructionCost getInstructionThroughput(const Instruction *I) const;

	/// The abstract base class used to type erase specific TTI
	/// implementations.
	class Concept;

	/// The template model for the base class which wraps a concrete
	/// implementation in a type erased interface.
	template <typename T> class Model;

	std::unique_ptr<Concept> TTIImpl;
	};

	class TargetTransformInfo::Concept {
	public:
	virtual ~Concept() = 0;
	virtual const DataLayout &getDataLayout() const = 0;
	virtual InstructionCost getGEPCost(Type PointeeType, const Value Ptr,
	ArrayRef<const Value *> Operands,
	TTI::TargetCostKind CostKind) = 0;
	virtual unsigned getInliningThresholdMultiplier() = 0;
	virtual unsigned adjustInliningThreshold(const CallBase *CB) = 0;
	virtual int getInlinerVectorBonusPercent() = 0;
	virtual InstructionCost getMemcpyCost(const Instruction *I) = 0;
	virtual unsigned
	getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize,
	ProfileSummaryInfo *PSI,
	BlockFrequencyInfo *BFI) = 0;
	virtual InstructionCost getUserCost(const User *U,
	ArrayRef<const Value *> Operands,
	TargetCostKind CostKind) = 0;
	virtual BranchProbability getPredictableBranchThreshold() = 0;
	virtual bool hasBranchDivergence() = 0;
	virtual bool useGPUDivergenceAnalysis() = 0;
	virtual bool isSourceOfDivergence(const Value *V) = 0;
	virtual bool isAlwaysUniform(const Value *V) = 0;
	virtual unsigned getFlatAddressSpace() = 0;
	virtual bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
	Intrinsic::ID IID) const = 0;
	virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0;
	virtual unsigned getAssumedAddrSpace(const Value *V) const = 0;
	virtual Value rewriteIntrinsicWithAddressSpace(IntrinsicInst II,
	Value *OldV,
	Value *NewV) const = 0;
	virtual bool isLoweredToCall(const Function *F) = 0;
	virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,
	UnrollingPreferences &UP) = 0;
	virtual void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
	PeelingPreferences &PP) = 0;
	virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
	AssumptionCache &AC,
	TargetLibraryInfo *LibInfo,
	HardwareLoopInfo &HWLoopInfo) = 0;
	virtual bool
	preferPredicateOverEpilogue(Loop L, LoopInfo LI, ScalarEvolution &SE,
	AssumptionCache &AC, TargetLibraryInfo *TLI,
	DominatorTree DT, const LoopAccessInfo LAI) = 0;
	virtual bool emitGetActiveLaneMask() = 0;
	virtual Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
	IntrinsicInst &II) = 0;
	virtual Optional<Value *>
	simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
	APInt DemandedMask, KnownBits &Known,
	bool &KnownBitsComputed) = 0;
	virtual Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
	InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
	APInt &UndefElts2, APInt &UndefElts3,
	std::function<void(Instruction *, unsigned, APInt, APInt &)>
	SimplifyAndSetOp) = 0;
	virtual bool isLegalAddImmediate(int64_t Imm) = 0;
	virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
	virtual bool isLegalAddressingMode(Type Ty, GlobalValue BaseGV,
	int64_t BaseOffset, bool HasBaseReg,
	int64_t Scale, unsigned AddrSpace,
	Instruction *I) = 0;
	virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
	TargetTransformInfo::LSRCost &C2) = 0;
	virtual bool isNumRegsMajorCostOfLSR() = 0;
	virtual bool isProfitableLSRChainElement(Instruction *I) = 0;
	virtual bool canMacroFuseCmp() = 0;
	virtual bool canSaveCmp(Loop L, BranchInst BI, ScalarEvolution SE,
	LoopInfo LI, DominatorTree DT, AssumptionCache *AC,
	TargetLibraryInfo *LibInfo) = 0;
	virtual AddressingModeKind
	getPreferredAddressingMode(const Loop L, ScalarEvolution SE) const = 0;
	virtual bool isLegalMaskedStore(Type *DataType, Align Alignment) = 0;
	virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment) = 0;
	virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0;
	virtual bool isLegalNTLoad(Type *DataType, Align Alignment) = 0;
	virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment) = 0;
	virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) = 0;
	virtual bool isLegalMaskedCompressStore(Type *DataType) = 0;
	virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0;
	virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
	virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
	virtual bool prefersVectorizedAddressing() = 0;
	virtual InstructionCost getScalingFactorCost(Type Ty, GlobalValue BaseGV,
	int64_t BaseOffset,
	bool HasBaseReg, int64_t Scale,
	unsigned AddrSpace) = 0;
	virtual bool LSRWithInstrQueries() = 0;
	virtual bool isTruncateFree(Type Ty1, Type Ty2) = 0;
	virtual bool isProfitableToHoist(Instruction *I) = 0;
	virtual bool useAA() = 0;
	virtual bool isTypeLegal(Type *Ty) = 0;
	virtual InstructionCost getRegUsageForType(Type *Ty) = 0;
	virtual bool shouldBuildLookupTables() = 0;
	virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;
	virtual bool shouldBuildRelLookupTables() = 0;
	virtual bool useColdCCForColdCall(Function &F) = 0;
	virtual InstructionCost getScalarizationOverhead(VectorType *Ty,
	const APInt &DemandedElts,
	bool Insert,
	bool Extract) = 0;
	virtual InstructionCost
	getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
	ArrayRef<Type *> Tys) = 0;
	virtual bool supportsEfficientVectorElementLoadStore() = 0;
	virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
	virtual MemCmpExpansionOptions
	enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const = 0;
	virtual bool enableInterleavedAccessVectorization() = 0;
	virtual bool enableMaskedInterleavedAccessVectorization() = 0;
	virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
	virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
	unsigned BitWidth,
	unsigned AddressSpace,
	Align Alignment,
	bool *Fast) = 0;
	virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
	virtual bool haveFastSqrt(Type *Ty) = 0;
	virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0;
	virtual InstructionCost getFPOpCost(Type *Ty) = 0;
	virtual InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
	const APInt &Imm, Type *Ty) = 0;
	virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
	TargetCostKind CostKind) = 0;
	virtual InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
	const APInt &Imm, Type *Ty,
	TargetCostKind CostKind,
	Instruction *Inst = nullptr) = 0;
	virtual InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
	const APInt &Imm, Type *Ty,
	TargetCostKind CostKind) = 0;
	virtual unsigned getNumberOfRegisters(unsigned ClassID) const = 0;
	virtual unsigned getRegisterClassForType(bool Vector,
	Type *Ty = nullptr) const = 0;
	virtual const char *getRegisterClassName(unsigned ClassID) const = 0;
	virtual TypeSize getRegisterBitWidth(RegisterKind K) const = 0;
	virtual unsigned getMinVectorRegisterBitWidth() const = 0;
	virtual Optional<unsigned> getMaxVScale() const = 0;
	virtual bool shouldMaximizeVectorBandwidth() const = 0;
	virtual ElementCount getMinimumVF(unsigned ElemWidth,
	bool IsScalable) const = 0;
	virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const = 0;
	virtual bool shouldConsiderAddressTypePromotion(
	const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
	virtual unsigned getCacheLineSize() const = 0;
	virtual Optional<unsigned> getCacheSize(CacheLevel Level) const = 0;
	virtual Optional<unsigned> getCacheAssociativity(CacheLevel Level) const = 0;

	/// \return How much before a load we should place the prefetch
	/// instruction. This is currently measured in number of
	/// instructions.
	virtual unsigned getPrefetchDistance() const = 0;

	/// \return Some HW prefetchers can handle accesses up to a certain
	/// constant stride. This is the minimum stride in bytes where it
	/// makes sense to start adding SW prefetches. The default is 1,
	/// i.e. prefetch with any stride. Sometimes prefetching is beneficial
	/// even below the HW prefetcher limit, and the arguments provided are
	/// meant to serve as a basis for deciding this for a particular loop.
	virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses,
	unsigned NumStridedMemAccesses,
	unsigned NumPrefetches,
	bool HasCall) const = 0;

	/// \return The maximum number of iterations to prefetch ahead. If
	/// the required number of iterations is more than this number, no
	/// prefetching is performed.
	virtual unsigned getMaxPrefetchIterationsAhead() const = 0;

	/// \return True if prefetching should also be done for writes.
	virtual bool enableWritePrefetching() const = 0;

	virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
	virtual InstructionCost getArithmeticInstrCost(
	unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
	OperandValueKind Opd1Info, OperandValueKind Opd2Info,
	OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo,
	ArrayRef<const Value > Args, const Instruction CxtI = nullptr) = 0;
	virtual InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
	ArrayRef<int> Mask, int Index,
	VectorType *SubTp) = 0;
	virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst,
	Type *Src, CastContextHint CCH,
	TTI::TargetCostKind CostKind,
	const Instruction *I) = 0;
	virtual InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
	VectorType *VecTy,
	unsigned Index) = 0;
	virtual InstructionCost getCFInstrCost(unsigned Opcode,
	TTI::TargetCostKind CostKind,
	const Instruction *I = nullptr) = 0;
	virtual InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
	Type *CondTy,
	CmpInst::Predicate VecPred,
	TTI::TargetCostKind CostKind,
	const Instruction *I) = 0;
	virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
	unsigned Index) = 0;
	virtual InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
	Align Alignment,
	unsigned AddressSpace,
	TTI::TargetCostKind CostKind,
	const Instruction *I) = 0;
	virtual InstructionCost
	getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
	unsigned AddressSpace,
	TTI::TargetCostKind CostKind) = 0;
	virtual InstructionCost
	getGatherScatterOpCost(unsigned Opcode, Type DataTy, const Value Ptr,
	bool VariableMask, Align Alignment,
	TTI::TargetCostKind CostKind,
	const Instruction *I = nullptr) = 0;

	virtual InstructionCost getInterleavedMemoryOpCost(
	unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
	Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
	bool UseMaskForCond = false, bool UseMaskForGaps = false) = 0;
	virtual InstructionCost
	getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
	Optional<FastMathFlags> FMF,
	TTI::TargetCostKind CostKind) = 0;
	virtual InstructionCost
	getMinMaxReductionCost(VectorType Ty, VectorType CondTy, bool IsUnsigned,
	TTI::TargetCostKind CostKind) = 0;
	virtual InstructionCost getExtendedAddReductionCost(
	bool IsMLA, bool IsUnsigned, Type ResTy, VectorType Ty,
	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) = 0;
	virtual InstructionCost
	getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
	TTI::TargetCostKind CostKind) = 0;
	virtual InstructionCost getCallInstrCost(Function F, Type RetTy,
	ArrayRef<Type *> Tys,
	TTI::TargetCostKind CostKind) = 0;
	virtual unsigned getNumberOfParts(Type *Tp) = 0;
	virtual InstructionCost
	getAddressComputationCost(Type Ty, ScalarEvolution SE, const SCEV *Ptr) = 0;
	virtual InstructionCost
	getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0;
	virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
	MemIntrinsicInfo &Info) = 0;
	virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0;
	virtual Value getOrCreateResultFromMemIntrinsic(IntrinsicInst Inst,
	Type *ExpectedType) = 0;
	virtual Type getMemcpyLoopLoweringType(LLVMContext &Context, Value Length,
	unsigned SrcAddrSpace,
	unsigned DestAddrSpace,
	unsigned SrcAlign,
	unsigned DestAlign) const = 0;
	virtual void getMemcpyLoopResidualLoweringType(
	SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
	unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
	unsigned SrcAlign, unsigned DestAlign) const = 0;
	virtual bool areInlineCompatible(const Function *Caller,
	const Function *Callee) const = 0;
	virtual bool
	areFunctionArgsABICompatible(const Function Caller, const Function Callee,
	SmallPtrSetImpl<Argument *> &Args) const = 0;
	virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0;
	virtual bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const = 0;
	virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;
	virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0;
	virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0;
	virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
	Align Alignment,
	unsigned AddrSpace) const = 0;
	virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
	Align Alignment,
	unsigned AddrSpace) const = 0;
	virtual bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
	ElementCount VF) const = 0;
	virtual bool isElementTypeLegalForScalableVector(Type *Ty) const = 0;
	virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
	unsigned ChainSizeInBytes,
	VectorType *VecTy) const = 0;
	virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
	unsigned ChainSizeInBytes,
	VectorType *VecTy) const = 0;
	virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty,
	ReductionFlags) const = 0;
	virtual bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
	ReductionFlags) const = 0;
	virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
	virtual unsigned getGISelRematGlobalCost() const = 0;
	virtual bool supportsScalableVectors() const = 0;
	virtual bool hasActiveVectorLength() const = 0;
	virtual InstructionCost getInstructionLatency(const Instruction *I) = 0;
	virtual VPLegalization
	getVPLegalizationStrategy(const VPIntrinsic &PI) const = 0;
	};

	template <typename T>
	class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
	T Impl;

	public:
	Model(T Impl) : Impl(std::move(Impl)) {}
	~Model() override {}

	const DataLayout &getDataLayout() const override {
	return Impl.getDataLayout();
	}

	InstructionCost
	getGEPCost(Type PointeeType, const Value Ptr,
	ArrayRef<const Value *> Operands,
	enum TargetTransformInfo::TargetCostKind CostKind) override {
	return Impl.getGEPCost(PointeeType, Ptr, Operands);
	}
	unsigned getInliningThresholdMultiplier() override {
	return Impl.getInliningThresholdMultiplier();
	}
	unsigned adjustInliningThreshold(const CallBase *CB) override {
	return Impl.adjustInliningThreshold(CB);
	}
	int getInlinerVectorBonusPercent() override {
	return Impl.getInlinerVectorBonusPercent();
	}
	InstructionCost getMemcpyCost(const Instruction *I) override {
	return Impl.getMemcpyCost(I);
	}
	InstructionCost getUserCost(const User U, ArrayRef<const Value > Operands,
	TargetCostKind CostKind) override {
	return Impl.getUserCost(U, Operands, CostKind);
	}
	BranchProbability getPredictableBranchThreshold() override {
	return Impl.getPredictableBranchThreshold();
	}
	bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
	bool useGPUDivergenceAnalysis() override {
	return Impl.useGPUDivergenceAnalysis();
	}
	bool isSourceOfDivergence(const Value *V) override {
	return Impl.isSourceOfDivergence(V);
	}

	bool isAlwaysUniform(const Value *V) override {
	return Impl.isAlwaysUniform(V);
	}

	unsigned getFlatAddressSpace() override { return Impl.getFlatAddressSpace(); }

	bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
	Intrinsic::ID IID) const override {
	return Impl.collectFlatAddressOperands(OpIndexes, IID);
	}

	bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override {
	return Impl.isNoopAddrSpaceCast(FromAS, ToAS);
	}

	unsigned getAssumedAddrSpace(const Value *V) const override {
	return Impl.getAssumedAddrSpace(V);
	}

	Value rewriteIntrinsicWithAddressSpace(IntrinsicInst II, Value *OldV,
	Value *NewV) const override {
	return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
	}

	bool isLoweredToCall(const Function *F) override {
	return Impl.isLoweredToCall(F);
	}
	void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
	UnrollingPreferences &UP) override {
	return Impl.getUnrollingPreferences(L, SE, UP);
	}
	void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
	PeelingPreferences &PP) override {
	return Impl.getPeelingPreferences(L, SE, PP);
	}
	bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
	AssumptionCache &AC, TargetLibraryInfo *LibInfo,
	HardwareLoopInfo &HWLoopInfo) override {
	return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
	}
	bool preferPredicateOverEpilogue(Loop L, LoopInfo LI, ScalarEvolution &SE,
	AssumptionCache &AC, TargetLibraryInfo *TLI,
	DominatorTree *DT,
	const LoopAccessInfo *LAI) override {
	return Impl.preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI);
	}
	bool emitGetActiveLaneMask() override {
	return Impl.emitGetActiveLaneMask();
	}
	Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
	IntrinsicInst &II) override {
	return Impl.instCombineIntrinsic(IC, II);
	}
	Optional<Value *>
	simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
	APInt DemandedMask, KnownBits &Known,
	bool &KnownBitsComputed) override {
	return Impl.simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
	KnownBitsComputed);
	}
	Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
	InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
	APInt &UndefElts2, APInt &UndefElts3,
	std::function<void(Instruction *, unsigned, APInt, APInt &)>
	SimplifyAndSetOp) override {
	return Impl.simplifyDemandedVectorEltsIntrinsic(
	IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
	SimplifyAndSetOp);
	}
	bool isLegalAddImmediate(int64_t Imm) override {
	return Impl.isLegalAddImmediate(Imm);
	}
	bool isLegalICmpImmediate(int64_t Imm) override {
	return Impl.isLegalICmpImmediate(Imm);
	}
	bool isLegalAddressingMode(Type Ty, GlobalValue BaseGV, int64_t BaseOffset,
	bool HasBaseReg, int64_t Scale, unsigned AddrSpace,
	Instruction *I) override {
	return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
	AddrSpace, I);
	}
	bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
	TargetTransformInfo::LSRCost &C2) override {
	return Impl.isLSRCostLess(C1, C2);
	}
	bool isNumRegsMajorCostOfLSR() override {
	return Impl.isNumRegsMajorCostOfLSR();
	}
	bool isProfitableLSRChainElement(Instruction *I) override {
	return Impl.isProfitableLSRChainElement(I);
	}
	bool canMacroFuseCmp() override { return Impl.canMacroFuseCmp(); }
	bool canSaveCmp(Loop L, BranchInst BI, ScalarEvolution SE, LoopInfo *LI,
	DominatorTree DT, AssumptionCache AC,
	TargetLibraryInfo *LibInfo) override {
	return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
	}
	AddressingModeKind
	getPreferredAddressingMode(const Loop *L,
	ScalarEvolution *SE) const override {
	return Impl.getPreferredAddressingMode(L, SE);
	}
	bool isLegalMaskedStore(Type *DataType, Align Alignment) override {
	return Impl.isLegalMaskedStore(DataType, Alignment);
	}
	bool isLegalMaskedLoad(Type *DataType, Align Alignment) override {
	return Impl.isLegalMaskedLoad(DataType, Alignment);
	}
	bool isLegalNTStore(Type *DataType, Align Alignment) override {
	return Impl.isLegalNTStore(DataType, Alignment);
	}
	bool isLegalNTLoad(Type *DataType, Align Alignment) override {
	return Impl.isLegalNTLoad(DataType, Alignment);
	}
	bool isLegalMaskedScatter(Type *DataType, Align Alignment) override {
	return Impl.isLegalMaskedScatter(DataType, Alignment);
	}
	bool isLegalMaskedGather(Type *DataType, Align Alignment) override {
	return Impl.isLegalMaskedGather(DataType, Alignment);
	}
	bool isLegalMaskedCompressStore(Type *DataType) override {
	return Impl.isLegalMaskedCompressStore(DataType);
	}
	bool isLegalMaskedExpandLoad(Type *DataType) override {
	return Impl.isLegalMaskedExpandLoad(DataType);
	}
	bool hasDivRemOp(Type *DataType, bool IsSigned) override {
	return Impl.hasDivRemOp(DataType, IsSigned);
	}
	bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override {
	return Impl.hasVolatileVariant(I, AddrSpace);
	}
	bool prefersVectorizedAddressing() override {
	return Impl.prefersVectorizedAddressing();
	}
	InstructionCost getScalingFactorCost(Type Ty, GlobalValue BaseGV,
	int64_t BaseOffset, bool HasBaseReg,
	int64_t Scale,
	unsigned AddrSpace) override {
	return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
	AddrSpace);
	}
	bool LSRWithInstrQueries() override { return Impl.LSRWithInstrQueries(); }
	bool isTruncateFree(Type Ty1, Type Ty2) override {
	return Impl.isTruncateFree(Ty1, Ty2);
	}
	bool isProfitableToHoist(Instruction *I) override {
	return Impl.isProfitableToHoist(I);
	}
	bool useAA() override { return Impl.useAA(); }
	bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
	InstructionCost getRegUsageForType(Type *Ty) override {
	return Impl.getRegUsageForType(Ty);
	}
	bool shouldBuildLookupTables() override {
	return Impl.shouldBuildLookupTables();
	}
	bool shouldBuildLookupTablesForConstant(Constant *C) override {
	return Impl.shouldBuildLookupTablesForConstant(C);
	}
	bool shouldBuildRelLookupTables() override {
	return Impl.shouldBuildRelLookupTables();
	}
	bool useColdCCForColdCall(Function &F) override {
	return Impl.useColdCCForColdCall(F);
	}

	InstructionCost getScalarizationOverhead(VectorType *Ty,
	const APInt &DemandedElts,
	bool Insert, bool Extract) override {
	return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract);
	}
	InstructionCost
	getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
	ArrayRef<Type *> Tys) override {
	return Impl.getOperandsScalarizationOverhead(Args, Tys);
	}

	bool supportsEfficientVectorElementLoadStore() override {
	return Impl.supportsEfficientVectorElementLoadStore();
	}

	bool enableAggressiveInterleaving(bool LoopHasReductions) override {
	return Impl.enableAggressiveInterleaving(LoopHasReductions);
	}
	MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
	bool IsZeroCmp) const override {
	return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp);
	}
	bool enableInterleavedAccessVectorization() override {
	return Impl.enableInterleavedAccessVectorization();
	}
	bool enableMaskedInterleavedAccessVectorization() override {
	return Impl.enableMaskedInterleavedAccessVectorization();
	}
	bool isFPVectorizationPotentiallyUnsafe() override {
	return Impl.isFPVectorizationPotentiallyUnsafe();
	}
	bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
	unsigned AddressSpace, Align Alignment,
	bool *Fast) override {
	return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
	Alignment, Fast);
	}
	PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
	return Impl.getPopcntSupport(IntTyWidthInBit);
	}
	bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }

	bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override {
	return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
	}

	InstructionCost getFPOpCost(Type *Ty) override {
	return Impl.getFPOpCost(Ty);
	}

	InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
	const APInt &Imm, Type *Ty) override {
	return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
	}
	InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
	TargetCostKind CostKind) override {
	return Impl.getIntImmCost(Imm, Ty, CostKind);
	}
	InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
	const APInt &Imm, Type *Ty,
	TargetCostKind CostKind,
	Instruction *Inst = nullptr) override {
	return Impl.getIntImmCostInst(Opc, Idx, Imm, Ty, CostKind, Inst);
	}
	InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
	const APInt &Imm, Type *Ty,
	TargetCostKind CostKind) override {
	return Impl.getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);
	}
	unsigned getNumberOfRegisters(unsigned ClassID) const override {
	return Impl.getNumberOfRegisters(ClassID);
	}
	unsigned getRegisterClassForType(bool Vector,
	Type *Ty = nullptr) const override {
	return Impl.getRegisterClassForType(Vector, Ty);
	}
	const char *getRegisterClassName(unsigned ClassID) const override {
	return Impl.getRegisterClassName(ClassID);
	}
	TypeSize getRegisterBitWidth(RegisterKind K) const override {
	return Impl.getRegisterBitWidth(K);
	}
	unsigned getMinVectorRegisterBitWidth() const override {
	return Impl.getMinVectorRegisterBitWidth();
	}
	Optional<unsigned> getMaxVScale() const override {
	return Impl.getMaxVScale();
	}
	bool shouldMaximizeVectorBandwidth() const override {
	return Impl.shouldMaximizeVectorBandwidth();
	}
	ElementCount getMinimumVF(unsigned ElemWidth,
	bool IsScalable) const override {
	return Impl.getMinimumVF(ElemWidth, IsScalable);
	}
	unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override {
	return Impl.getMaximumVF(ElemWidth, Opcode);
	}
	bool shouldConsiderAddressTypePromotion(
	const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
	return Impl.shouldConsiderAddressTypePromotion(
	I, AllowPromotionWithoutCommonHeader);
	}
	unsigned getCacheLineSize() const override { return Impl.getCacheLineSize(); }
	Optional<unsigned> getCacheSize(CacheLevel Level) const override {
	return Impl.getCacheSize(Level);
	}
	Optional<unsigned> getCacheAssociativity(CacheLevel Level) const override {
	return Impl.getCacheAssociativity(Level);
	}

	/// Return the preferred prefetch distance in terms of instructions.
	///
	unsigned getPrefetchDistance() const override {
	return Impl.getPrefetchDistance();
	}

	/// Return the minimum stride necessary to trigger software
	/// prefetching.
	///
	unsigned getMinPrefetchStride(unsigned NumMemAccesses,
	unsigned NumStridedMemAccesses,
	unsigned NumPrefetches,
	bool HasCall) const override {
	return Impl.getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
	NumPrefetches, HasCall);
	}

	/// Return the maximum prefetch distance in terms of loop
	/// iterations.
	///
	unsigned getMaxPrefetchIterationsAhead() const override {
	return Impl.getMaxPrefetchIterationsAhead();
	}

	/// \return True if prefetching should also be done for writes.
	bool enableWritePrefetching() const override {
	return Impl.enableWritePrefetching();
	}

	unsigned getMaxInterleaveFactor(unsigned VF) override {
	return Impl.getMaxInterleaveFactor(VF);
	}
	unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
	unsigned &JTSize,
	ProfileSummaryInfo *PSI,
	BlockFrequencyInfo *BFI) override {
	return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
	}
	InstructionCost getArithmeticInstrCost(
	unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
	OperandValueKind Opd1Info, OperandValueKind Opd2Info,
	OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo,
	ArrayRef<const Value *> Args,
	const Instruction *CxtI = nullptr) override {
	return Impl.getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info,
	Opd1PropInfo, Opd2PropInfo, Args, CxtI);
	}
	InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
	ArrayRef<int> Mask, int Index,
	VectorType *SubTp) override {
	return Impl.getShuffleCost(Kind, Tp, Mask, Index, SubTp);
	}
	InstructionCost getCastInstrCost(unsigned Opcode, Type Dst, Type Src,
	CastContextHint CCH,
	TTI::TargetCostKind CostKind,
	const Instruction *I) override {
	return Impl.getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
	}
	InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
	VectorType *VecTy,
	unsigned Index) override {
	return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
	}
	InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
	const Instruction *I = nullptr) override {
	return Impl.getCFInstrCost(Opcode, CostKind, I);
	}
	InstructionCost getCmpSelInstrCost(unsigned Opcode, Type ValTy, Type CondTy,
	CmpInst::Predicate VecPred,
	TTI::TargetCostKind CostKind,
	const Instruction *I) override {
	return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
	}
	InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
	unsigned Index) override {
	return Impl.getVectorInstrCost(Opcode, Val, Index);
	}
	InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
	unsigned AddressSpace,
	TTI::TargetCostKind CostKind,
	const Instruction *I) override {
	return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
	CostKind, I);
	}
	InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
	Align Alignment, unsigned AddressSpace,
	TTI::TargetCostKind CostKind) override {
	return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
	CostKind);
	}
	InstructionCost
	getGatherScatterOpCost(unsigned Opcode, Type DataTy, const Value Ptr,
	bool VariableMask, Align Alignment,
	TTI::TargetCostKind CostKind,
	const Instruction *I = nullptr) override {
	return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
	Alignment, CostKind, I);
	}
	InstructionCost getInterleavedMemoryOpCost(
	unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
	Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
	bool UseMaskForCond, bool UseMaskForGaps) override {
	return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
	Alignment, AddressSpace, CostKind,
	UseMaskForCond, UseMaskForGaps);
	}
	InstructionCost
	getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
	Optional<FastMathFlags> FMF,
	TTI::TargetCostKind CostKind) override {
	return Impl.getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
	}
	InstructionCost
	getMinMaxReductionCost(VectorType Ty, VectorType CondTy, bool IsUnsigned,
	TTI::TargetCostKind CostKind) override {
	return Impl.getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind);
	}
	InstructionCost getExtendedAddReductionCost(
	bool IsMLA, bool IsUnsigned, Type ResTy, VectorType Ty,
	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) override {
	return Impl.getExtendedAddReductionCost(IsMLA, IsUnsigned, ResTy, Ty,
	CostKind);
	}
	InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
	TTI::TargetCostKind CostKind) override {
	return Impl.getIntrinsicInstrCost(ICA, CostKind);
	}
	InstructionCost getCallInstrCost(Function F, Type RetTy,
	ArrayRef<Type *> Tys,
	TTI::TargetCostKind CostKind) override {
	return Impl.getCallInstrCost(F, RetTy, Tys, CostKind);
	}
	unsigned getNumberOfParts(Type *Tp) override {
	return Impl.getNumberOfParts(Tp);
	}
	InstructionCost getAddressComputationCost(Type Ty, ScalarEvolution SE,
	const SCEV *Ptr) override {
	return Impl.getAddressComputationCost(Ty, SE, Ptr);
	}
	InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
	return Impl.getCostOfKeepingLiveOverCall(Tys);
	}
	bool getTgtMemIntrinsic(IntrinsicInst *Inst,
	MemIntrinsicInfo &Info) override {
	return Impl.getTgtMemIntrinsic(Inst, Info);
	}
	unsigned getAtomicMemIntrinsicMaxElementSize() const override {
	return Impl.getAtomicMemIntrinsicMaxElementSize();
	}
	Value getOrCreateResultFromMemIntrinsic(IntrinsicInst Inst,
	Type *ExpectedType) override {
	return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
	}
	Type getMemcpyLoopLoweringType(LLVMContext &Context, Value Length,
	unsigned SrcAddrSpace, unsigned DestAddrSpace,
	unsigned SrcAlign,
	unsigned DestAlign) const override {
	return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace,
	DestAddrSpace, SrcAlign, DestAlign);
	}
	void getMemcpyLoopResidualLoweringType(
	SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
	unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
	unsigned SrcAlign, unsigned DestAlign) const override {
	Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
	SrcAddrSpace, DestAddrSpace,
	SrcAlign, DestAlign);
	}
	bool areInlineCompatible(const Function *Caller,
	const Function *Callee) const override {
	return Impl.areInlineCompatible(Caller, Callee);
	}
	bool areFunctionArgsABICompatible(
	const Function Caller, const Function Callee,
	SmallPtrSetImpl<Argument *> &Args) const override {
	return Impl.areFunctionArgsABICompatible(Caller, Callee, Args);
	}
	bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override {
	return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout());
	}
	bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const override {
	return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout());
	}
	unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {
	return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
	}
	bool isLegalToVectorizeLoad(LoadInst *LI) const override {
	return Impl.isLegalToVectorizeLoad(LI);
	}
	bool isLegalToVectorizeStore(StoreInst *SI) const override {
	return Impl.isLegalToVectorizeStore(SI);
	}
	bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
	unsigned AddrSpace) const override {
	return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
	AddrSpace);
	}
	bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
	unsigned AddrSpace) const override {
	return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
	AddrSpace);
	}
	bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
	ElementCount VF) const override {
	return Impl.isLegalToVectorizeReduction(RdxDesc, VF);
	}
	bool isElementTypeLegalForScalableVector(Type *Ty) const override {
	return Impl.isElementTypeLegalForScalableVector(Ty);
	}
	unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
	unsigned ChainSizeInBytes,
	VectorType *VecTy) const override {
	return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
	}
	unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
	unsigned ChainSizeInBytes,
	VectorType *VecTy) const override {
	return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
	}
	bool preferInLoopReduction(unsigned Opcode, Type *Ty,
	ReductionFlags Flags) const override {
	return Impl.preferInLoopReduction(Opcode, Ty, Flags);
	}
	bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
	ReductionFlags Flags) const override {
	return Impl.preferPredicatedReductionSelect(Opcode, Ty, Flags);
	}
	bool shouldExpandReduction(const IntrinsicInst *II) const override {
	return Impl.shouldExpandReduction(II);
	}

	unsigned getGISelRematGlobalCost() const override {
	return Impl.getGISelRematGlobalCost();
	}

	bool supportsScalableVectors() const override {
	return Impl.supportsScalableVectors();
	}

	bool hasActiveVectorLength() const override {
	return Impl.hasActiveVectorLength();
	}

	InstructionCost getInstructionLatency(const Instruction *I) override {
	return Impl.getInstructionLatency(I);
	}

	VPLegalization
	getVPLegalizationStrategy(const VPIntrinsic &PI) const override {
	return Impl.getVPLegalizationStrategy(PI);
	}
	};

	template <typename T>
	TargetTransformInfo::TargetTransformInfo(T Impl)
	: TTIImpl(new Model<T>(Impl)) {}

	/// Analysis pass providing the \c TargetTransformInfo.
	///
	/// The core idea of the TargetIRAnalysis is to expose an interface through
	/// which LLVM targets can analyze and provide information about the middle
	/// end's target-independent IR. This supports use cases such as target-aware
	/// cost modeling of IR constructs.
	///
	/// This is a function analysis because much of the cost modeling for targets
	/// is done in a subtarget specific way and LLVM supports compiling different
	/// functions targeting different subtargets in order to support runtime
	/// dispatch according to the observed subtarget.
	class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
	public:
	typedef TargetTransformInfo Result;

	/// Default construct a target IR analysis.
	///
	/// This will use the module's datalayout to construct a baseline
	/// conservative TTI result.
	TargetIRAnalysis();

	/// Construct an IR analysis pass around a target-provide callback.
	///
	/// The callback will be called with a particular function for which the TTI
	/// is needed and must return a TTI object for that function.
	TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);

	// Value semantics. We spell out the constructors for MSVC.
	TargetIRAnalysis(const TargetIRAnalysis &Arg)
	: TTICallback(Arg.TTICallback) {}
	TargetIRAnalysis(TargetIRAnalysis &&Arg)
	: TTICallback(std::move(Arg.TTICallback)) {}
	TargetIRAnalysis &operator=(const TargetIRAnalysis &RHS) {
	TTICallback = RHS.TTICallback;
	return *this;
	}
	TargetIRAnalysis &operator=(TargetIRAnalysis &&RHS) {
	TTICallback = std::move(RHS.TTICallback);
	return *this;
	}

	Result run(const Function &F, FunctionAnalysisManager &);

	private:
	friend AnalysisInfoMixin<TargetIRAnalysis>;
	static AnalysisKey Key;

	/// The callback used to produce a result.
	///
	/// We use a completely opaque callback so that targets can provide whatever
	/// mechanism they desire for constructing the TTI for a given function.
	///
	/// FIXME: Should we really use std::function? It's relatively inefficient.
	/// It might be possible to arrange for even stateful callbacks to outlive
	/// the analysis and thus use a function_ref which would be lighter weight.
	/// This may also be less error prone as the callback is likely to reference
	/// the external TargetMachine, and that reference needs to never dangle.
	std::function<Result(const Function &)> TTICallback;

	/// Helper function used as the callback in the default constructor.
	static Result getDefaultTTI(const Function &F);
	};

	/// Wrapper pass for TargetTransformInfo.
	///
	/// This pass can be constructed from a TTI object which it stores internally
	/// and is queried by passes.
	class TargetTransformInfoWrapperPass : public ImmutablePass {
	TargetIRAnalysis TIRA;
	Optional<TargetTransformInfo> TTI;

	virtual void anchor();

	public:
	static char ID;

	/// We must provide a default constructor for the pass but it should
	/// never be used.
	///
	/// Use the constructor below or call one of the creation routines.
	TargetTransformInfoWrapperPass();

	explicit TargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);

	TargetTransformInfo &getTTI(const Function &F);
	};

	/// Create an analysis pass wrapper around a TTI object.
	///
	/// This analysis pass just holds the TTI instance and makes it available to
	/// clients.
	ImmutablePass *createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);

	} // namespace llvm

	#endif
	diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineFunction.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineFunction.h
	index 786fe908f68f..c63a5d42e9b3 100644
	--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineFunction.h
	+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineFunction.h
	@@ -1,1287 +1,1287 @@
	//===- llvm/CodeGen/MachineFunction.h ---------------------------- C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// Collect native machine code for a function. This class contains a list of
	// MachineBasicBlock instances that make up the current compiled function.
	//
	// This class also contains pointers to various classes which hold
	// target-specific information about the generated code.
	//
	//===----------------------------------------------------------------------===//

	#ifndef LLVM_CODEGEN_MACHINEFUNCTION_H
	#define LLVM_CODEGEN_MACHINEFUNCTION_H

	#include "llvm/ADT/ArrayRef.h"
	#include "llvm/ADT/BitVector.h"
	#include "llvm/ADT/DenseMap.h"
	#include "llvm/ADT/GraphTraits.h"
	#include "llvm/ADT/SmallVector.h"
	#include "llvm/ADT/ilist.h"
	#include "llvm/ADT/iterator.h"
	#include "llvm/Analysis/EHPersonalities.h"
	#include "llvm/CodeGen/MachineBasicBlock.h"
	#include "llvm/CodeGen/MachineInstr.h"
	#include "llvm/CodeGen/MachineMemOperand.h"
	#include "llvm/Support/Allocator.h"
	#include "llvm/Support/ArrayRecycler.h"
	#include "llvm/Support/AtomicOrdering.h"
	#include "llvm/Support/Compiler.h"
	#include "llvm/Support/Recycler.h"
	#include "llvm/Target/TargetOptions.h"
	#include <cassert>
	#include <cstdint>
	#include <memory>
	#include <utility>
	#include <vector>

	namespace llvm {

	class BasicBlock;
	class BlockAddress;
	class DataLayout;
	class DebugLoc;
	struct DenormalMode;
	class DIExpression;
	class DILocalVariable;
	class DILocation;
	class Function;
	class GISelChangeObserver;
	class GlobalValue;
	class LLVMTargetMachine;
	class MachineConstantPool;
	class MachineFrameInfo;
	class MachineFunction;
	class MachineJumpTableInfo;
	class MachineModuleInfo;
	class MachineRegisterInfo;
	class MCContext;
	class MCInstrDesc;
	class MCSymbol;
	class MCSection;
	class Pass;
	class PseudoSourceValueManager;
	class raw_ostream;
	class SlotIndexes;
	class StringRef;
	class TargetRegisterClass;
	class TargetSubtargetInfo;
	struct WasmEHFuncInfo;
	struct WinEHFuncInfo;

	template <> struct ilist_alloc_traits<MachineBasicBlock> {
	void deleteNode(MachineBasicBlock *MBB);
	};

	template <> struct ilist_callback_traits<MachineBasicBlock> {
	void addNodeToList(MachineBasicBlock* N);
	void removeNodeFromList(MachineBasicBlock* N);

	template <class Iterator>
	void transferNodesFromList(ilist_callback_traits &OldList, Iterator, Iterator) {
	assert(this == &OldList && "never transfer MBBs between functions");
	}
	};

	/// MachineFunctionInfo - This class can be derived from and used by targets to
	/// hold private target-specific information for each MachineFunction. Objects
	/// of type are accessed/created with MF::getInfo and destroyed when the
	/// MachineFunction is destroyed.
	struct MachineFunctionInfo {
	virtual ~MachineFunctionInfo();

	/// Factory function: default behavior is to call new using the
	/// supplied allocator.
	///
	/// This function can be overridden in a derive class.
	template<typename Ty>
	static Ty *create(BumpPtrAllocator &Allocator, MachineFunction &MF) {
	return new (Allocator.Allocate<Ty>()) Ty(MF);
	}
	};

	/// Properties which a MachineFunction may have at a given point in time.
	/// Each of these has checking code in the MachineVerifier, and passes can
	/// require that a property be set.
	class MachineFunctionProperties {
	// Possible TODO: Allow targets to extend this (perhaps by allowing the
	// constructor to specify the size of the bit vector)
	// Possible TODO: Allow requiring the negative (e.g. VRegsAllocated could be
	// stated as the negative of "has vregs"

	public:
	// The properties are stated in "positive" form; i.e. a pass could require
	// that the property hold, but not that it does not hold.

	// Property descriptions:
	// IsSSA: True when the machine function is in SSA form and virtual registers
	// have a single def.
	// NoPHIs: The machine function does not contain any PHI instruction.
	// TracksLiveness: True when tracking register liveness accurately.
	// While this property is set, register liveness information in basic block
	// live-in lists and machine instruction operands (e.g. implicit defs) is
	// accurate, kill flags are conservatively accurate (kill flag correctly
	// indicates the last use of a register, an operand without kill flag may or
	// may not be the last use of a register). This means it can be used to
	// change the code in ways that affect the values in registers, for example
	// by the register scavenger.
	// When this property is cleared at a very late time, liveness is no longer
	// reliable.
	// NoVRegs: The machine function does not use any virtual registers.
	// Legalized: In GlobalISel: the MachineLegalizer ran and all pre-isel generic
	// instructions have been legalized; i.e., all instructions are now one of:
	// - generic and always legal (e.g., COPY)
	// - target-specific
	// - legal pre-isel generic instructions.
	// RegBankSelected: In GlobalISel: the RegBankSelect pass ran and all generic
	// virtual registers have been assigned to a register bank.
	// Selected: In GlobalISel: the InstructionSelect pass ran and all pre-isel
	// generic instructions have been eliminated; i.e., all instructions are now
	// target-specific or non-pre-isel generic instructions (e.g., COPY).
	// Since only pre-isel generic instructions can have generic virtual register
	// operands, this also means that all generic virtual registers have been
	// constrained to virtual registers (assigned to register classes) and that
	// all sizes attached to them have been eliminated.
	// TiedOpsRewritten: The twoaddressinstruction pass will set this flag, it
	// means that tied-def have been rewritten to meet the RegConstraint.
	enum class Property : unsigned {
	IsSSA,
	NoPHIs,
	TracksLiveness,
	NoVRegs,
	FailedISel,
	Legalized,
	RegBankSelected,
	Selected,
	TiedOpsRewritten,
	LastProperty = TiedOpsRewritten,
	};

	bool hasProperty(Property P) const {
	return Properties[static_cast<unsigned>(P)];
	}

	MachineFunctionProperties &set(Property P) {
	Properties.set(static_cast<unsigned>(P));
	return *this;
	}

	MachineFunctionProperties &reset(Property P) {
	Properties.reset(static_cast<unsigned>(P));
	return *this;
	}

	/// Reset all the properties.
	MachineFunctionProperties &reset() {
	Properties.reset();
	return *this;
	}

	MachineFunctionProperties &set(const MachineFunctionProperties &MFP) {
	Properties \|= MFP.Properties;
	return *this;
	}

	MachineFunctionProperties &reset(const MachineFunctionProperties &MFP) {
	Properties.reset(MFP.Properties);
	return *this;
	}

	// Returns true if all properties set in V (i.e. required by a pass) are set
	// in this.
	bool verifyRequiredProperties(const MachineFunctionProperties &V) const {
	return !V.Properties.test(Properties);
	}

	/// Print the MachineFunctionProperties in human-readable form.
	void print(raw_ostream &OS) const;

	private:
	BitVector Properties =
	BitVector(static_cast<unsigned>(Property::LastProperty)+1);
	};

	struct SEHHandler {
	/// Filter or finally function. Null indicates a catch-all.
	const Function *FilterOrFinally;

	/// Address of block to recover at. Null for a finally handler.
	const BlockAddress *RecoverBA;
	};

	/// This structure is used to retain landing pad info for the current function.
	struct LandingPadInfo {
	MachineBasicBlock *LandingPadBlock; // Landing pad block.
	SmallVector<MCSymbol *, 1> BeginLabels; // Labels prior to invoke.
	SmallVector<MCSymbol *, 1> EndLabels; // Labels after invoke.
	SmallVector<SEHHandler, 1> SEHHandlers; // SEH handlers active at this lpad.
	MCSymbol *LandingPadLabel = nullptr; // Label at beginning of landing pad.
	std::vector<int> TypeIds; // List of type ids (filters negative).

	explicit LandingPadInfo(MachineBasicBlock *MBB)
	: LandingPadBlock(MBB) {}
	};

	-class MachineFunction {
	+class LLVM_EXTERNAL_VISIBILITY MachineFunction {
	Function &F;
	const LLVMTargetMachine &Target;
	const TargetSubtargetInfo *STI;
	MCContext &Ctx;
	MachineModuleInfo &MMI;

	// RegInfo - Information about each register in use in the function.
	MachineRegisterInfo *RegInfo;

	// Used to keep track of target-specific per-machine function information for
	// the target implementation.
	MachineFunctionInfo *MFInfo;

	// Keep track of objects allocated on the stack.
	MachineFrameInfo *FrameInfo;

	// Keep track of constants which are spilled to memory
	MachineConstantPool *ConstantPool;

	// Keep track of jump tables for switch instructions
	MachineJumpTableInfo *JumpTableInfo;

	// Keep track of the function section.
	MCSection *Section = nullptr;

	// Keeps track of Wasm exception handling related data. This will be null for
	// functions that aren't using a wasm EH personality.
	WasmEHFuncInfo *WasmEHInfo = nullptr;

	// Keeps track of Windows exception handling related data. This will be null
	// for functions that aren't using a funclet-based EH personality.
	WinEHFuncInfo *WinEHInfo = nullptr;

	// Function-level unique numbering for MachineBasicBlocks. When a
	// MachineBasicBlock is inserted into a MachineFunction is it automatically
	// numbered and this vector keeps track of the mapping from ID's to MBB's.
	std::vector<MachineBasicBlock*> MBBNumbering;

	// Unary encoding of basic block symbols is used to reduce size of ".strtab".
	// Basic block number 'i' gets a prefix of length 'i'. The ith character also
	// denotes the type of basic block number 'i'. Return blocks are marked with
	// 'r', landing pads with 'l' and regular blocks with 'a'.
	std::vector<char> BBSectionsSymbolPrefix;

	// Pool-allocate MachineFunction-lifetime and IR objects.
	BumpPtrAllocator Allocator;

	// Allocation management for instructions in function.
	Recycler<MachineInstr> InstructionRecycler;

	// Allocation management for operand arrays on instructions.
	ArrayRecycler<MachineOperand> OperandRecycler;

	// Allocation management for basic blocks in function.
	Recycler<MachineBasicBlock> BasicBlockRecycler;

	// List of machine basic blocks in function
	using BasicBlockListType = ilist<MachineBasicBlock>;
	BasicBlockListType BasicBlocks;

	/// FunctionNumber - This provides a unique ID for each function emitted in
	/// this translation unit.
	///
	unsigned FunctionNumber;

	/// Alignment - The alignment of the function.
	Align Alignment;

	/// ExposesReturnsTwice - True if the function calls setjmp or related
	/// functions with attribute "returns twice", but doesn't have
	/// the attribute itself.
	/// This is used to limit optimizations which cannot reason
	/// about the control flow of such functions.
	bool ExposesReturnsTwice = false;

	/// True if the function includes any inline assembly.
	bool HasInlineAsm = false;

	/// True if any WinCFI instruction have been emitted in this function.
	bool HasWinCFI = false;

	/// Current high-level properties of the IR of the function (e.g. is in SSA
	/// form or whether registers have been allocated)
	MachineFunctionProperties Properties;

	// Allocation management for pseudo source values.
	std::unique_ptr<PseudoSourceValueManager> PSVManager;

	/// List of moves done by a function's prolog. Used to construct frame maps
	/// by debug and exception handling consumers.
	std::vector<MCCFIInstruction> FrameInstructions;

	/// List of basic blocks immediately following calls to _setjmp. Used to
	/// construct a table of valid longjmp targets for Windows Control Flow Guard.
	std::vector<MCSymbol *> LongjmpTargets;

	/// List of basic blocks that are the target of catchrets. Used to construct
	/// a table of valid targets for Windows EHCont Guard.
	std::vector<MCSymbol *> CatchretTargets;

	/// \name Exception Handling
	/// \{

	/// List of LandingPadInfo describing the landing pad information.
	std::vector<LandingPadInfo> LandingPads;

	/// Map a landing pad's EH symbol to the call site indexes.
	DenseMap<MCSymbol*, SmallVector<unsigned, 4>> LPadToCallSiteMap;

	/// Map a landing pad to its index.
	DenseMap<const MachineBasicBlock *, unsigned> WasmLPadToIndexMap;

	/// Map of invoke call site index values to associated begin EH_LABEL.
	DenseMap<MCSymbol*, unsigned> CallSiteMap;

	/// CodeView label annotations.
	std::vector<std::pair<MCSymbol , MDNode >> CodeViewAnnotations;

	bool CallsEHReturn = false;
	bool CallsUnwindInit = false;
	bool HasEHCatchret = false;
	bool HasEHScopes = false;
	bool HasEHFunclets = false;

	/// Section Type for basic blocks, only relevant with basic block sections.
	BasicBlockSection BBSectionsType = BasicBlockSection::None;

	/// List of C++ TypeInfo used.
	std::vector<const GlobalValue *> TypeInfos;

	/// List of typeids encoding filters used.
	std::vector<unsigned> FilterIds;

	/// List of the indices in FilterIds corresponding to filter terminators.
	std::vector<unsigned> FilterEnds;

	EHPersonality PersonalityTypeCache = EHPersonality::Unknown;

	/// \}

	/// Clear all the members of this MachineFunction, but the ones used
	/// to initialize again the MachineFunction.
	/// More specifically, this deallocates all the dynamically allocated
	/// objects and get rid of all the XXXInfo data structure, but keep
	/// unchanged the references to Fn, Target, MMI, and FunctionNumber.
	void clear();
	/// Allocate and initialize the different members.
	/// In particular, the XXXInfo data structure.
	/// \pre Fn, Target, MMI, and FunctionNumber are properly set.
	void init();

	public:
	struct VariableDbgInfo {
	const DILocalVariable *Var;
	const DIExpression *Expr;
	// The Slot can be negative for fixed stack objects.
	int Slot;
	const DILocation *Loc;

	VariableDbgInfo(const DILocalVariable Var, const DIExpression Expr,
	int Slot, const DILocation *Loc)
	: Var(Var), Expr(Expr), Slot(Slot), Loc(Loc) {}
	};

	class Delegate {
	virtual void anchor();

	public:
	virtual ~Delegate() = default;
	/// Callback after an insertion. This should not modify the MI directly.
	virtual void MF_HandleInsertion(MachineInstr &MI) = 0;
	/// Callback before a removal. This should not modify the MI directly.
	virtual void MF_HandleRemoval(MachineInstr &MI) = 0;
	};

	/// Structure used to represent pair of argument number after call lowering
	/// and register used to transfer that argument.
	/// For now we support only cases when argument is transferred through one
	/// register.
	struct ArgRegPair {
	Register Reg;
	uint16_t ArgNo;
	ArgRegPair(Register R, unsigned Arg) : Reg(R), ArgNo(Arg) {
	assert(Arg < (1 << 16) && "Arg out of range");
	}
	};
	/// Vector of call argument and its forwarding register.
	using CallSiteInfo = SmallVector<ArgRegPair, 1>;
	using CallSiteInfoImpl = SmallVectorImpl<ArgRegPair>;

	private:
	Delegate *TheDelegate = nullptr;
	GISelChangeObserver *Observer = nullptr;

	using CallSiteInfoMap = DenseMap<const MachineInstr *, CallSiteInfo>;
	/// Map a call instruction to call site arguments forwarding info.
	CallSiteInfoMap CallSitesInfo;

	/// A helper function that returns call site info for a give call
	/// instruction if debug entry value support is enabled.
	CallSiteInfoMap::iterator getCallSiteInfo(const MachineInstr *MI);

	// Callbacks for insertion and removal.
	void handleInsertion(MachineInstr &MI);
	void handleRemoval(MachineInstr &MI);
	friend struct ilist_traits<MachineInstr>;

	public:
	using VariableDbgInfoMapTy = SmallVector<VariableDbgInfo, 4>;
	VariableDbgInfoMapTy VariableDbgInfos;

	/// A count of how many instructions in the function have had numbers
	/// assigned to them. Used for debug value tracking, to determine the
	/// next instruction number.
	unsigned DebugInstrNumberingCount = 0;

	/// Set value of DebugInstrNumberingCount field. Avoid using this unless
	/// you're deserializing this data.
	void setDebugInstrNumberingCount(unsigned Num);

	/// Pair of instruction number and operand number.
	using DebugInstrOperandPair = std::pair<unsigned, unsigned>;

	/// Replacement definition for a debug instruction reference. Made up of a
	/// source instruction / operand pair, destination pair, and a qualifying
	/// subregister indicating what bits in the operand make up the substitution.
	// For example, a debug user
	/// of %1:
	/// %0:gr32 = someinst, debug-instr-number 1
	/// %1:gr16 = %0.some_16_bit_subreg, debug-instr-number 2
	/// Would receive the substitution {{2, 0}, {1, 0}, $subreg}, where $subreg is
	/// the subregister number for some_16_bit_subreg.
	class DebugSubstitution {
	public:
	DebugInstrOperandPair Src; ///< Source instruction / operand pair.
	DebugInstrOperandPair Dest; ///< Replacement instruction / operand pair.
	unsigned Subreg; ///< Qualifier for which part of Dest is read.

	DebugSubstitution(const DebugInstrOperandPair &Src,
	const DebugInstrOperandPair &Dest, unsigned Subreg)
	: Src(Src), Dest(Dest), Subreg(Subreg) {}

	/// Order only by source instruction / operand pair: there should never
	/// be duplicate entries for the same source in any collection.
	bool operator<(const DebugSubstitution &Other) const {
	return Src < Other.Src;
	}
	};

	/// Debug value substitutions: a collection of DebugSubstitution objects,
	/// recording changes in where a value is defined. For example, when one
	/// instruction is substituted for another. Keeping a record allows recovery
	/// of variable locations after compilation finishes.
	SmallVector<DebugSubstitution, 8> DebugValueSubstitutions;

	/// Location of a PHI instruction that is also a debug-info variable value,
	/// for the duration of register allocation. Loaded by the PHI-elimination
	/// pass, and emitted as DBG_PHI instructions during VirtRegRewriter, with
	/// maintenance applied by intermediate passes that edit registers (such as
	/// coalescing and the allocator passes).
	class DebugPHIRegallocPos {
	public:
	MachineBasicBlock *MBB; ///< Block where this PHI was originally located.
	Register Reg; ///< VReg where the control-flow-merge happens.
	unsigned SubReg; ///< Optional subreg qualifier within Reg.
	DebugPHIRegallocPos(MachineBasicBlock *MBB, Register Reg, unsigned SubReg)
	: MBB(MBB), Reg(Reg), SubReg(SubReg) {}
	};

	/// Map of debug instruction numbers to the position of their PHI instructions
	/// during register allocation. See DebugPHIRegallocPos.
	DenseMap<unsigned, DebugPHIRegallocPos> DebugPHIPositions;

	/// Create a substitution between one <instr,operand> value to a different,
	/// new value.
	void makeDebugValueSubstitution(DebugInstrOperandPair, DebugInstrOperandPair,
	unsigned SubReg = 0);

	/// Create substitutions for any tracked values in \p Old, to point at
	/// \p New. Needed when we re-create an instruction during optimization,
	/// which has the same signature (i.e., def operands in the same place) but
	/// a modified instruction type, flags, or otherwise. An example: X86 moves
	/// are sometimes transformed into equivalent LEAs.
	/// If the two instructions are not the same opcode, limit which operands to
	/// examine for substitutions to the first N operands by setting
	/// \p MaxOperand.
	void substituteDebugValuesForInst(const MachineInstr &Old, MachineInstr &New,
	unsigned MaxOperand = UINT_MAX);

	/// Find the underlying defining instruction / operand for a COPY instruction
	/// while in SSA form. Copies do not actually define values -- they move them
	/// between registers. Labelling a COPY-like instruction with an instruction
	/// number is to be avoided as it makes value numbers non-unique later in
	/// compilation. This method follows the definition chain for any sequence of
	/// COPY-like instructions to find whatever non-COPY-like instruction defines
	/// the copied value; or for parameters, creates a DBG_PHI on entry.
	/// May insert instructions into the entry block!
	/// \p MI The copy-like instruction to salvage.
	/// \returns An instruction/operand pair identifying the defining value.
	DebugInstrOperandPair salvageCopySSA(MachineInstr &MI);

	/// Finalise any partially emitted debug instructions. These are DBG_INSTR_REF
	/// instructions where we only knew the vreg of the value they use, not the
	/// instruction that defines that vreg. Once isel finishes, we should have
	/// enough information for every DBG_INSTR_REF to point at an instruction
	/// (or DBG_PHI).
	void finalizeDebugInstrRefs();

	MachineFunction(Function &F, const LLVMTargetMachine &Target,
	const TargetSubtargetInfo &STI, unsigned FunctionNum,
	MachineModuleInfo &MMI);
	MachineFunction(const MachineFunction &) = delete;
	MachineFunction &operator=(const MachineFunction &) = delete;
	~MachineFunction();

	/// Reset the instance as if it was just created.
	void reset() {
	clear();
	init();
	}

	/// Reset the currently registered delegate - otherwise assert.
	void resetDelegate(Delegate *delegate) {
	assert(TheDelegate == delegate &&
	"Only the current delegate can perform reset!");
	TheDelegate = nullptr;
	}

	/// Set the delegate. resetDelegate must be called before attempting
	/// to set.
	void setDelegate(Delegate *delegate) {
	assert(delegate && !TheDelegate &&
	"Attempted to set delegate to null, or to change it without "
	"first resetting it!");

	TheDelegate = delegate;
	}

	void setObserver(GISelChangeObserver *O) { Observer = O; }

	GISelChangeObserver *getObserver() const { return Observer; }

	MachineModuleInfo &getMMI() const { return MMI; }
	MCContext &getContext() const { return Ctx; }

	/// Returns the Section this function belongs to.
	MCSection *getSection() const { return Section; }

	/// Indicates the Section this function belongs to.
	void setSection(MCSection *S) { Section = S; }

	PseudoSourceValueManager &getPSVManager() const { return *PSVManager; }

	/// Return the DataLayout attached to the Module associated to this MF.
	const DataLayout &getDataLayout() const;

	/// Return the LLVM function that this machine code represents
	Function &getFunction() { return F; }

	/// Return the LLVM function that this machine code represents
	const Function &getFunction() const { return F; }

	/// getName - Return the name of the corresponding LLVM function.
	StringRef getName() const;

	/// getFunctionNumber - Return a unique ID for the current function.
	unsigned getFunctionNumber() const { return FunctionNumber; }

	/// Returns true if this function has basic block sections enabled.
	bool hasBBSections() const {
	return (BBSectionsType == BasicBlockSection::All \|\|
	BBSectionsType == BasicBlockSection::List \|\|
	BBSectionsType == BasicBlockSection::Preset);
	}

	/// Returns true if basic block labels are to be generated for this function.
	bool hasBBLabels() const {
	return BBSectionsType == BasicBlockSection::Labels;
	}

	void setBBSectionsType(BasicBlockSection V) { BBSectionsType = V; }

	/// Assign IsBeginSection IsEndSection fields for basic blocks in this
	/// function.
	void assignBeginEndSections();

	/// getTarget - Return the target machine this machine code is compiled with
	const LLVMTargetMachine &getTarget() const { return Target; }

	/// getSubtarget - Return the subtarget for which this machine code is being
	/// compiled.
	const TargetSubtargetInfo &getSubtarget() const { return *STI; }

	/// getSubtarget - This method returns a pointer to the specified type of
	/// TargetSubtargetInfo. In debug builds, it verifies that the object being
	/// returned is of the correct type.
	template<typename STC> const STC &getSubtarget() const {
	return static_cast<const STC >(STI);
	}

	/// getRegInfo - Return information about the registers currently in use.
	MachineRegisterInfo &getRegInfo() { return *RegInfo; }
	const MachineRegisterInfo &getRegInfo() const { return *RegInfo; }

	/// getFrameInfo - Return the frame info object for the current function.
	/// This object contains information about objects allocated on the stack
	/// frame of the current function in an abstract way.
	MachineFrameInfo &getFrameInfo() { return *FrameInfo; }
	const MachineFrameInfo &getFrameInfo() const { return *FrameInfo; }

	/// getJumpTableInfo - Return the jump table info object for the current
	/// function. This object contains information about jump tables in the
	/// current function. If the current function has no jump tables, this will
	/// return null.
	const MachineJumpTableInfo *getJumpTableInfo() const { return JumpTableInfo; }
	MachineJumpTableInfo *getJumpTableInfo() { return JumpTableInfo; }

	/// getOrCreateJumpTableInfo - Get the JumpTableInfo for this function, if it
	/// does already exist, allocate one.
	MachineJumpTableInfo *getOrCreateJumpTableInfo(unsigned JTEntryKind);

	/// getConstantPool - Return the constant pool object for the current
	/// function.
	MachineConstantPool *getConstantPool() { return ConstantPool; }
	const MachineConstantPool *getConstantPool() const { return ConstantPool; }

	/// getWasmEHFuncInfo - Return information about how the current function uses
	/// Wasm exception handling. Returns null for functions that don't use wasm
	/// exception handling.
	const WasmEHFuncInfo *getWasmEHFuncInfo() const { return WasmEHInfo; }
	WasmEHFuncInfo *getWasmEHFuncInfo() { return WasmEHInfo; }

	/// getWinEHFuncInfo - Return information about how the current function uses
	/// Windows exception handling. Returns null for functions that don't use
	/// funclets for exception handling.
	const WinEHFuncInfo *getWinEHFuncInfo() const { return WinEHInfo; }
	WinEHFuncInfo *getWinEHFuncInfo() { return WinEHInfo; }

	/// getAlignment - Return the alignment of the function.
	Align getAlignment() const { return Alignment; }

	/// setAlignment - Set the alignment of the function.
	void setAlignment(Align A) { Alignment = A; }

	/// ensureAlignment - Make sure the function is at least A bytes aligned.
	void ensureAlignment(Align A) {
	if (Alignment < A)
	Alignment = A;
	}

	/// exposesReturnsTwice - Returns true if the function calls setjmp or
	/// any other similar functions with attribute "returns twice" without
	/// having the attribute itself.
	bool exposesReturnsTwice() const {
	return ExposesReturnsTwice;
	}

	/// setCallsSetJmp - Set a flag that indicates if there's a call to
	/// a "returns twice" function.
	void setExposesReturnsTwice(bool B) {
	ExposesReturnsTwice = B;
	}

	/// Returns true if the function contains any inline assembly.
	bool hasInlineAsm() const {
	return HasInlineAsm;
	}

	/// Set a flag that indicates that the function contains inline assembly.
	void setHasInlineAsm(bool B) {
	HasInlineAsm = B;
	}

	bool hasWinCFI() const {
	return HasWinCFI;
	}
	void setHasWinCFI(bool v) { HasWinCFI = v; }

	/// True if this function needs frame moves for debug or exceptions.
	bool needsFrameMoves() const;

	/// Get the function properties
	const MachineFunctionProperties &getProperties() const { return Properties; }
	MachineFunctionProperties &getProperties() { return Properties; }

	/// getInfo - Keep track of various per-function pieces of information for
	/// backends that would like to do so.
	///
	template<typename Ty>
	Ty *getInfo() {
	if (!MFInfo)
	MFInfo = Ty::template create<Ty>(Allocator, *this);
	return static_cast<Ty*>(MFInfo);
	}

	template<typename Ty>
	const Ty *getInfo() const {
	return const_cast<MachineFunction*>(this)->getInfo<Ty>();
	}

	/// Returns the denormal handling type for the default rounding mode of the
	/// function.
	DenormalMode getDenormalMode(const fltSemantics &FPType) const;

	/// getBlockNumbered - MachineBasicBlocks are automatically numbered when they
	/// are inserted into the machine function. The block number for a machine
	/// basic block can be found by using the MBB::getNumber method, this method
	/// provides the inverse mapping.
	MachineBasicBlock *getBlockNumbered(unsigned N) const {
	assert(N < MBBNumbering.size() && "Illegal block number");
	assert(MBBNumbering[N] && "Block was removed from the machine function!");
	return MBBNumbering[N];
	}

	/// Should we be emitting segmented stack stuff for the function
	bool shouldSplitStack() const;

	/// getNumBlockIDs - Return the number of MBB ID's allocated.
	unsigned getNumBlockIDs() const { return (unsigned)MBBNumbering.size(); }

	/// RenumberBlocks - This discards all of the MachineBasicBlock numbers and
	/// recomputes them. This guarantees that the MBB numbers are sequential,
	/// dense, and match the ordering of the blocks within the function. If a
	/// specific MachineBasicBlock is specified, only that block and those after
	/// it are renumbered.
	void RenumberBlocks(MachineBasicBlock *MBBFrom = nullptr);

	/// print - Print out the MachineFunction in a format suitable for debugging
	/// to the specified stream.
	void print(raw_ostream &OS, const SlotIndexes* = nullptr) const;

	/// viewCFG - This function is meant for use from the debugger. You can just
	/// say 'call F->viewCFG()' and a ghostview window should pop up from the
	/// program, displaying the CFG of the current function with the code for each
	/// basic block inside. This depends on there being a 'dot' and 'gv' program
	/// in your path.
	void viewCFG() const;

	/// viewCFGOnly - This function is meant for use from the debugger. It works
	/// just like viewCFG, but it does not include the contents of basic blocks
	/// into the nodes, just the label. If you are only interested in the CFG
	/// this can make the graph smaller.
	///
	void viewCFGOnly() const;

	/// dump - Print the current MachineFunction to cerr, useful for debugger use.
	void dump() const;

	/// Run the current MachineFunction through the machine code verifier, useful
	/// for debugger use.
	/// \returns true if no problems were found.
	bool verify(Pass p = nullptr, const char Banner = nullptr,
	bool AbortOnError = true) const;

	// Provide accessors for the MachineBasicBlock list...
	using iterator = BasicBlockListType::iterator;
	using const_iterator = BasicBlockListType::const_iterator;
	using const_reverse_iterator = BasicBlockListType::const_reverse_iterator;
	using reverse_iterator = BasicBlockListType::reverse_iterator;

	/// Support for MachineBasicBlock::getNextNode().
	static BasicBlockListType MachineFunction::*
	getSublistAccess(MachineBasicBlock *) {
	return &MachineFunction::BasicBlocks;
	}

	/// addLiveIn - Add the specified physical register as a live-in value and
	/// create a corresponding virtual register for it.
	Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC);

	//===--------------------------------------------------------------------===//
	// BasicBlock accessor functions.
	//
	iterator begin() { return BasicBlocks.begin(); }
	const_iterator begin() const { return BasicBlocks.begin(); }
	iterator end () { return BasicBlocks.end(); }
	const_iterator end () const { return BasicBlocks.end(); }

	reverse_iterator rbegin() { return BasicBlocks.rbegin(); }
	const_reverse_iterator rbegin() const { return BasicBlocks.rbegin(); }
	reverse_iterator rend () { return BasicBlocks.rend(); }
	const_reverse_iterator rend () const { return BasicBlocks.rend(); }

	unsigned size() const { return (unsigned)BasicBlocks.size();}
	bool empty() const { return BasicBlocks.empty(); }
	const MachineBasicBlock &front() const { return BasicBlocks.front(); }
	MachineBasicBlock &front() { return BasicBlocks.front(); }
	const MachineBasicBlock & back() const { return BasicBlocks.back(); }
	MachineBasicBlock & back() { return BasicBlocks.back(); }

	void push_back (MachineBasicBlock *MBB) { BasicBlocks.push_back (MBB); }
	void push_front(MachineBasicBlock *MBB) { BasicBlocks.push_front(MBB); }
	void insert(iterator MBBI, MachineBasicBlock *MBB) {
	BasicBlocks.insert(MBBI, MBB);
	}
	void splice(iterator InsertPt, iterator MBBI) {
	BasicBlocks.splice(InsertPt, BasicBlocks, MBBI);
	}
	void splice(iterator InsertPt, MachineBasicBlock *MBB) {
	BasicBlocks.splice(InsertPt, BasicBlocks, MBB);
	}
	void splice(iterator InsertPt, iterator MBBI, iterator MBBE) {
	BasicBlocks.splice(InsertPt, BasicBlocks, MBBI, MBBE);
	}

	void remove(iterator MBBI) { BasicBlocks.remove(MBBI); }
	void remove(MachineBasicBlock *MBBI) { BasicBlocks.remove(MBBI); }
	void erase(iterator MBBI) { BasicBlocks.erase(MBBI); }
	void erase(MachineBasicBlock *MBBI) { BasicBlocks.erase(MBBI); }

	template <typename Comp>
	void sort(Comp comp) {
	BasicBlocks.sort(comp);
	}

	/// Return the number of \p MachineInstrs in this \p MachineFunction.
	unsigned getInstructionCount() const {
	unsigned InstrCount = 0;
	for (const MachineBasicBlock &MBB : BasicBlocks)
	InstrCount += MBB.size();
	return InstrCount;
	}

	//===--------------------------------------------------------------------===//
	// Internal functions used to automatically number MachineBasicBlocks

	/// Adds the MBB to the internal numbering. Returns the unique number
	/// assigned to the MBB.
	unsigned addToMBBNumbering(MachineBasicBlock *MBB) {
	MBBNumbering.push_back(MBB);
	return (unsigned)MBBNumbering.size()-1;
	}

	/// removeFromMBBNumbering - Remove the specific machine basic block from our
	/// tracker, this is only really to be used by the MachineBasicBlock
	/// implementation.
	void removeFromMBBNumbering(unsigned N) {
	assert(N < MBBNumbering.size() && "Illegal basic block #");
	MBBNumbering[N] = nullptr;
	}

	/// CreateMachineInstr - Allocate a new MachineInstr. Use this instead
	/// of `new MachineInstr'.
	MachineInstr *CreateMachineInstr(const MCInstrDesc &MCID, const DebugLoc &DL,
	bool NoImplicit = false);

	/// Create a new MachineInstr which is a copy of \p Orig, identical in all
	/// ways except the instruction has no parent, prev, or next. Bundling flags
	/// are reset.
	///
	/// Note: Clones a single instruction, not whole instruction bundles.
	/// Does not perform target specific adjustments; consider using
	/// TargetInstrInfo::duplicate() instead.
	MachineInstr CloneMachineInstr(const MachineInstr Orig);

	/// Clones instruction or the whole instruction bundle \p Orig and insert
	/// into \p MBB before \p InsertBefore.
	///
	/// Note: Does not perform target specific adjustments; consider using
	/// TargetInstrInfo::duplicate() intead.
	MachineInstr &CloneMachineInstrBundle(MachineBasicBlock &MBB,
	MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig);

	/// DeleteMachineInstr - Delete the given MachineInstr.
	void DeleteMachineInstr(MachineInstr *MI);

	/// CreateMachineBasicBlock - Allocate a new MachineBasicBlock. Use this
	/// instead of `new MachineBasicBlock'.
	MachineBasicBlock CreateMachineBasicBlock(const BasicBlock bb = nullptr);

	/// DeleteMachineBasicBlock - Delete the given MachineBasicBlock.
	void DeleteMachineBasicBlock(MachineBasicBlock *MBB);

	/// getMachineMemOperand - Allocate a new MachineMemOperand.
	/// MachineMemOperands are owned by the MachineFunction and need not be
	/// explicitly deallocated.
	MachineMemOperand *getMachineMemOperand(
	MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s,
	Align base_alignment, const AAMDNodes &AAInfo = AAMDNodes(),
	const MDNode *Ranges = nullptr, SyncScope::ID SSID = SyncScope::System,
	AtomicOrdering Ordering = AtomicOrdering::NotAtomic,
	AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic);

	MachineMemOperand *getMachineMemOperand(
	MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy,
	Align base_alignment, const AAMDNodes &AAInfo = AAMDNodes(),
	const MDNode *Ranges = nullptr, SyncScope::ID SSID = SyncScope::System,
	AtomicOrdering Ordering = AtomicOrdering::NotAtomic,
	AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic);

	/// getMachineMemOperand - Allocate a new MachineMemOperand by copying
	/// an existing one, adjusting by an offset and using the given size.
	/// MachineMemOperands are owned by the MachineFunction and need not be
	/// explicitly deallocated.
	MachineMemOperand getMachineMemOperand(const MachineMemOperand MMO,
	int64_t Offset, LLT Ty);
	MachineMemOperand getMachineMemOperand(const MachineMemOperand MMO,
	int64_t Offset, uint64_t Size) {
	return getMachineMemOperand(MMO, Offset, LLT::scalar(8 * Size));
	}

	/// getMachineMemOperand - Allocate a new MachineMemOperand by copying
	/// an existing one, replacing only the MachinePointerInfo and size.
	/// MachineMemOperands are owned by the MachineFunction and need not be
	/// explicitly deallocated.
	MachineMemOperand getMachineMemOperand(const MachineMemOperand MMO,
	const MachinePointerInfo &PtrInfo,
	uint64_t Size);
	MachineMemOperand getMachineMemOperand(const MachineMemOperand MMO,
	const MachinePointerInfo &PtrInfo,
	LLT Ty);

	/// Allocate a new MachineMemOperand by copying an existing one,
	/// replacing only AliasAnalysis information. MachineMemOperands are owned
	/// by the MachineFunction and need not be explicitly deallocated.
	MachineMemOperand getMachineMemOperand(const MachineMemOperand MMO,
	const AAMDNodes &AAInfo);

	/// Allocate a new MachineMemOperand by copying an existing one,
	/// replacing the flags. MachineMemOperands are owned
	/// by the MachineFunction and need not be explicitly deallocated.
	MachineMemOperand getMachineMemOperand(const MachineMemOperand MMO,
	MachineMemOperand::Flags Flags);

	using OperandCapacity = ArrayRecycler<MachineOperand>::Capacity;

	/// Allocate an array of MachineOperands. This is only intended for use by
	/// internal MachineInstr functions.
	MachineOperand *allocateOperandArray(OperandCapacity Cap) {
	return OperandRecycler.allocate(Cap, Allocator);
	}

	/// Dellocate an array of MachineOperands and recycle the memory. This is
	/// only intended for use by internal MachineInstr functions.
	/// Cap must be the same capacity that was used to allocate the array.
	void deallocateOperandArray(OperandCapacity Cap, MachineOperand *Array) {
	OperandRecycler.deallocate(Cap, Array);
	}

	/// Allocate and initialize a register mask with @p NumRegister bits.
	uint32_t *allocateRegMask();

	ArrayRef<int> allocateShuffleMask(ArrayRef<int> Mask);

	/// Allocate and construct an extra info structure for a `MachineInstr`.
	///
	/// This is allocated on the function's allocator and so lives the life of
	/// the function.
	MachineInstr::ExtraInfo *createMIExtraInfo(
	ArrayRef<MachineMemOperand > MMOs, MCSymbol PreInstrSymbol = nullptr,
	MCSymbol PostInstrSymbol = nullptr, MDNode HeapAllocMarker = nullptr);

	/// Allocate a string and populate it with the given external symbol name.
	const char *createExternalSymbolName(StringRef Name);

	//===--------------------------------------------------------------------===//
	// Label Manipulation.

	/// getJTISymbol - Return the MCSymbol for the specified non-empty jump table.
	/// If isLinkerPrivate is specified, an 'l' label is returned, otherwise a
	/// normal 'L' label is returned.
	MCSymbol *getJTISymbol(unsigned JTI, MCContext &Ctx,
	bool isLinkerPrivate = false) const;

	/// getPICBaseSymbol - Return a function-local symbol to represent the PIC
	/// base.
	MCSymbol *getPICBaseSymbol() const;

	/// Returns a reference to a list of cfi instructions in the function's
	/// prologue. Used to construct frame maps for debug and exception handling
	/// comsumers.
	const std::vector<MCCFIInstruction> &getFrameInstructions() const {
	return FrameInstructions;
	}

	LLVM_NODISCARD unsigned addFrameInst(const MCCFIInstruction &Inst);

	/// Returns a reference to a list of symbols immediately following calls to
	/// _setjmp in the function. Used to construct the longjmp target table used
	/// by Windows Control Flow Guard.
	const std::vector<MCSymbol *> &getLongjmpTargets() const {
	return LongjmpTargets;
	}

	/// Add the specified symbol to the list of valid longjmp targets for Windows
	/// Control Flow Guard.
	void addLongjmpTarget(MCSymbol *Target) { LongjmpTargets.push_back(Target); }

	/// Returns a reference to a list of symbols that we have catchrets.
	/// Used to construct the catchret target table used by Windows EHCont Guard.
	const std::vector<MCSymbol *> &getCatchretTargets() const {
	return CatchretTargets;
	}

	/// Add the specified symbol to the list of valid catchret targets for Windows
	/// EHCont Guard.
	void addCatchretTarget(MCSymbol *Target) {
	CatchretTargets.push_back(Target);
	}

	/// \name Exception Handling
	/// \{

	bool callsEHReturn() const { return CallsEHReturn; }
	void setCallsEHReturn(bool b) { CallsEHReturn = b; }

	bool callsUnwindInit() const { return CallsUnwindInit; }
	void setCallsUnwindInit(bool b) { CallsUnwindInit = b; }

	bool hasEHCatchret() const { return HasEHCatchret; }
	void setHasEHCatchret(bool V) { HasEHCatchret = V; }

	bool hasEHScopes() const { return HasEHScopes; }
	void setHasEHScopes(bool V) { HasEHScopes = V; }

	bool hasEHFunclets() const { return HasEHFunclets; }
	void setHasEHFunclets(bool V) { HasEHFunclets = V; }

	/// Find or create an LandingPadInfo for the specified MachineBasicBlock.
	LandingPadInfo &getOrCreateLandingPadInfo(MachineBasicBlock *LandingPad);

	/// Remap landing pad labels and remove any deleted landing pads.
	void tidyLandingPads(DenseMap<MCSymbol , uintptr_t> LPMap = nullptr,
	bool TidyIfNoBeginLabels = true);

	/// Return a reference to the landing pad info for the current function.
	const std::vector<LandingPadInfo> &getLandingPads() const {
	return LandingPads;
	}

	/// Provide the begin and end labels of an invoke style call and associate it
	/// with a try landing pad block.
	void addInvoke(MachineBasicBlock *LandingPad,
	MCSymbol BeginLabel, MCSymbol EndLabel);

	/// Add a new panding pad, and extract the exception handling information from
	/// the landingpad instruction. Returns the label ID for the landing pad
	/// entry.
	MCSymbol addLandingPad(MachineBasicBlock LandingPad);

	/// Provide the catch typeinfo for a landing pad.
	void addCatchTypeInfo(MachineBasicBlock *LandingPad,
	ArrayRef<const GlobalValue *> TyInfo);

	/// Provide the filter typeinfo for a landing pad.
	void addFilterTypeInfo(MachineBasicBlock *LandingPad,
	ArrayRef<const GlobalValue *> TyInfo);

	/// Add a cleanup action for a landing pad.
	void addCleanup(MachineBasicBlock *LandingPad);

	void addSEHCatchHandler(MachineBasicBlock LandingPad, const Function Filter,
	const BlockAddress *RecoverBA);

	void addSEHCleanupHandler(MachineBasicBlock *LandingPad,
	const Function *Cleanup);

	/// Return the type id for the specified typeinfo. This is function wide.
	unsigned getTypeIDFor(const GlobalValue *TI);

	/// Return the id of the filter encoded by TyIds. This is function wide.
	int getFilterIDFor(std::vector<unsigned> &TyIds);

	/// Map the landing pad's EH symbol to the call site indexes.
	void setCallSiteLandingPad(MCSymbol *Sym, ArrayRef<unsigned> Sites);

	/// Map the landing pad to its index. Used for Wasm exception handling.
	void setWasmLandingPadIndex(const MachineBasicBlock *LPad, unsigned Index) {
	WasmLPadToIndexMap[LPad] = Index;
	}

	/// Returns true if the landing pad has an associate index in wasm EH.
	bool hasWasmLandingPadIndex(const MachineBasicBlock *LPad) const {
	return WasmLPadToIndexMap.count(LPad);
	}

	/// Get the index in wasm EH for a given landing pad.
	unsigned getWasmLandingPadIndex(const MachineBasicBlock *LPad) const {
	assert(hasWasmLandingPadIndex(LPad));
	return WasmLPadToIndexMap.lookup(LPad);
	}

	/// Get the call site indexes for a landing pad EH symbol.
	SmallVectorImpl<unsigned> &getCallSiteLandingPad(MCSymbol *Sym) {
	assert(hasCallSiteLandingPad(Sym) &&
	"missing call site number for landing pad!");
	return LPadToCallSiteMap[Sym];
	}

	/// Return true if the landing pad Eh symbol has an associated call site.
	bool hasCallSiteLandingPad(MCSymbol *Sym) {
	return !LPadToCallSiteMap[Sym].empty();
	}

	/// Map the begin label for a call site.
	void setCallSiteBeginLabel(MCSymbol *BeginLabel, unsigned Site) {
	CallSiteMap[BeginLabel] = Site;
	}

	/// Get the call site number for a begin label.
	unsigned getCallSiteBeginLabel(MCSymbol *BeginLabel) const {
	assert(hasCallSiteBeginLabel(BeginLabel) &&
	"Missing call site number for EH_LABEL!");
	return CallSiteMap.lookup(BeginLabel);
	}

	/// Return true if the begin label has a call site number associated with it.
	bool hasCallSiteBeginLabel(MCSymbol *BeginLabel) const {
	return CallSiteMap.count(BeginLabel);
	}

	/// Record annotations associated with a particular label.
	void addCodeViewAnnotation(MCSymbol Label, MDNode MD) {
	CodeViewAnnotations.push_back({Label, MD});
	}

	ArrayRef<std::pair<MCSymbol , MDNode >> getCodeViewAnnotations() const {
	return CodeViewAnnotations;
	}

	/// Return a reference to the C++ typeinfo for the current function.
	const std::vector<const GlobalValue *> &getTypeInfos() const {
	return TypeInfos;
	}

	/// Return a reference to the typeids encoding filters used in the current
	/// function.
	const std::vector<unsigned> &getFilterIds() const {
	return FilterIds;
	}

	/// \}

	/// Collect information used to emit debugging information of a variable.
	void setVariableDbgInfo(const DILocalVariable Var, const DIExpression Expr,
	int Slot, const DILocation *Loc) {
	VariableDbgInfos.emplace_back(Var, Expr, Slot, Loc);
	}

	VariableDbgInfoMapTy &getVariableDbgInfo() { return VariableDbgInfos; }
	const VariableDbgInfoMapTy &getVariableDbgInfo() const {
	return VariableDbgInfos;
	}

	/// Start tracking the arguments passed to the call \p CallI.
	void addCallArgsForwardingRegs(const MachineInstr *CallI,
	CallSiteInfoImpl &&CallInfo) {
	assert(CallI->isCandidateForCallSiteEntry());
	bool Inserted =
	CallSitesInfo.try_emplace(CallI, std::move(CallInfo)).second;
	(void)Inserted;
	assert(Inserted && "Call site info not unique");
	}

	const CallSiteInfoMap &getCallSitesInfo() const {
	return CallSitesInfo;
	}

	/// Following functions update call site info. They should be called before
	/// removing, replacing or copying call instruction.

	/// Erase the call site info for \p MI. It is used to remove a call
	/// instruction from the instruction stream.
	void eraseCallSiteInfo(const MachineInstr *MI);
	/// Copy the call site info from \p Old to \ New. Its usage is when we are
	/// making a copy of the instruction that will be inserted at different point
	/// of the instruction stream.
	void copyCallSiteInfo(const MachineInstr *Old,
	const MachineInstr *New);

	const std::vector<char> &getBBSectionsSymbolPrefix() const {
	return BBSectionsSymbolPrefix;
	}

	/// Move the call site info from \p Old to \New call site info. This function
	/// is used when we are replacing one call instruction with another one to
	/// the same callee.
	void moveCallSiteInfo(const MachineInstr *Old,
	const MachineInstr *New);

	unsigned getNewDebugInstrNum() {
	return ++DebugInstrNumberingCount;
	}
	};

	//===--------------------------------------------------------------------===//
	// GraphTraits specializations for function basic block graphs (CFGs)
	//===--------------------------------------------------------------------===//

	// Provide specializations of GraphTraits to be able to treat a
	// machine function as a graph of machine basic blocks... these are
	// the same as the machine basic block iterators, except that the root
	// node is implicitly the first node of the function.
	//
	template <> struct GraphTraits<MachineFunction*> :
	public GraphTraits<MachineBasicBlock*> {
	static NodeRef getEntryNode(MachineFunction *F) { return &F->front(); }

	// nodes_iterator/begin/end - Allow iteration over all nodes in the graph
	using nodes_iterator = pointer_iterator<MachineFunction::iterator>;

	static nodes_iterator nodes_begin(MachineFunction *F) {
	return nodes_iterator(F->begin());
	}

	static nodes_iterator nodes_end(MachineFunction *F) {
	return nodes_iterator(F->end());
	}

	static unsigned size (MachineFunction *F) { return F->size(); }
	};
	template <> struct GraphTraits<const MachineFunction*> :
	public GraphTraits<const MachineBasicBlock*> {
	static NodeRef getEntryNode(const MachineFunction *F) { return &F->front(); }

	// nodes_iterator/begin/end - Allow iteration over all nodes in the graph
	using nodes_iterator = pointer_iterator<MachineFunction::const_iterator>;

	static nodes_iterator nodes_begin(const MachineFunction *F) {
	return nodes_iterator(F->begin());
	}

	static nodes_iterator nodes_end (const MachineFunction *F) {
	return nodes_iterator(F->end());
	}

	static unsigned size (const MachineFunction *F) {
	return F->size();
	}
	};

	// Provide specializations of GraphTraits to be able to treat a function as a
	// graph of basic blocks... and to walk it in inverse order. Inverse order for
	// a function is considered to be when traversing the predecessor edges of a BB
	// instead of the successor edges.
	//
	template <> struct GraphTraits<Inverse<MachineFunction*>> :
	public GraphTraits<Inverse<MachineBasicBlock*>> {
	static NodeRef getEntryNode(Inverse<MachineFunction *> G) {
	return &G.Graph->front();
	}
	};
	template <> struct GraphTraits<Inverse<const MachineFunction*>> :
	public GraphTraits<Inverse<const MachineBasicBlock*>> {
	static NodeRef getEntryNode(Inverse<const MachineFunction *> G) {
	return &G.Graph->front();
	}
	};

	class MachineFunctionAnalysisManager;
	void verifyMachineFunction(MachineFunctionAnalysisManager *,
	const std::string &Banner,
	const MachineFunction &MF);

	} // end namespace llvm

	#endif // LLVM_CODEGEN_MACHINEFUNCTION_H
	diff --git a/contrib/llvm-project/llvm/include/llvm/IR/Function.h b/contrib/llvm-project/llvm/include/llvm/IR/Function.h
	index e0094e2afff2..c33e8e94b467 100644
	--- a/contrib/llvm-project/llvm/include/llvm/IR/Function.h
	+++ b/contrib/llvm-project/llvm/include/llvm/IR/Function.h
	@@ -1,971 +1,972 @@
	//===- llvm/Function.h - Class to represent a single function ---- C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file contains the declaration of the Function class, which represents a
	// single function/procedure in LLVM.
	//
	// A function basically consists of a list of basic blocks, a list of arguments,
	// and a symbol table.
	//
	//===----------------------------------------------------------------------===//

	#ifndef LLVM_IR_FUNCTION_H
	#define LLVM_IR_FUNCTION_H

	#include "llvm/ADT/DenseSet.h"
	#include "llvm/ADT/StringRef.h"
	#include "llvm/ADT/Twine.h"
	#include "llvm/ADT/ilist_node.h"
	#include "llvm/ADT/iterator_range.h"
	#include "llvm/IR/Argument.h"
	#include "llvm/IR/Attributes.h"
	#include "llvm/IR/BasicBlock.h"
	#include "llvm/IR/CallingConv.h"
	#include "llvm/IR/DerivedTypes.h"
	#include "llvm/IR/GlobalObject.h"
	#include "llvm/IR/GlobalValue.h"
	#include "llvm/IR/OperandTraits.h"
	#include "llvm/IR/SymbolTableListTraits.h"
	#include "llvm/IR/Value.h"
	#include "llvm/Support/Casting.h"
	#include "llvm/Support/Compiler.h"
	#include <cassert>
	#include <cstddef>
	#include <cstdint>
	#include <memory>
	#include <string>

	namespace llvm {

	namespace Intrinsic {
	typedef unsigned ID;
	}

	class AssemblyAnnotationWriter;
	class Constant;
	class DISubprogram;
	class LLVMContext;
	class Module;
	template <typename T> class Optional;
	class raw_ostream;
	class Type;
	class User;
	class BranchProbabilityInfo;
	class BlockFrequencyInfo;

	-class Function : public GlobalObject, public ilist_node<Function> {
	+class LLVM_EXTERNAL_VISIBILITY Function : public GlobalObject,
	+ public ilist_node<Function> {
	public:
	using BasicBlockListType = SymbolTableList<BasicBlock>;

	// BasicBlock iterators...
	using iterator = BasicBlockListType::iterator;
	using const_iterator = BasicBlockListType::const_iterator;

	using arg_iterator = Argument *;
	using const_arg_iterator = const Argument *;

	private:
	// Important things that make up a function!
	BasicBlockListType BasicBlocks; ///< The basic blocks
	mutable Argument *Arguments = nullptr; ///< The formal arguments
	size_t NumArgs;
	std::unique_ptr<ValueSymbolTable>
	SymTab; ///< Symbol table of args/instructions
	AttributeList AttributeSets; ///< Parameter attributes

	/*
	* Value::SubclassData
	*
	* bit 0 : HasLazyArguments
	* bit 1 : HasPrefixData
	* bit 2 : HasPrologueData
	* bit 3 : HasPersonalityFn
	* bits 4-13 : CallingConvention
	* bits 14 : HasGC
	* bits 15 : [reserved]
	*/

	/// Bits from GlobalObject::GlobalObjectSubclassData.
	enum {
	/// Whether this function is materializable.
	IsMaterializableBit = 0,
	};

	friend class SymbolTableListTraits<Function>;

	/// hasLazyArguments/CheckLazyArguments - The argument list of a function is
	/// built on demand, so that the list isn't allocated until the first client
	/// needs it. The hasLazyArguments predicate returns true if the arg list
	/// hasn't been set up yet.
	public:
	bool hasLazyArguments() const {
	return getSubclassDataFromValue() & (1<<0);
	}

	private:
	void CheckLazyArguments() const {
	if (hasLazyArguments())
	BuildLazyArguments();
	}

	void BuildLazyArguments() const;

	void clearArguments();

	/// Function ctor - If the (optional) Module argument is specified, the
	/// function is automatically inserted into the end of the function list for
	/// the module.
	///
	Function(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace,
	const Twine &N = "", Module *M = nullptr);

	public:
	Function(const Function&) = delete;
	void operator=(const Function&) = delete;
	~Function();

	// This is here to help easily convert from FunctionT * (Function * or
	// MachineFunction ) in BlockFrequencyInfoImpl to Function by calling
	// FunctionT->getFunction().
	const Function &getFunction() const { return *this; }

	static Function Create(FunctionType Ty, LinkageTypes Linkage,
	unsigned AddrSpace, const Twine &N = "",
	Module *M = nullptr) {
	return new Function(Ty, Linkage, AddrSpace, N, M);
	}

	// TODO: remove this once all users have been updated to pass an AddrSpace
	static Function Create(FunctionType Ty, LinkageTypes Linkage,
	const Twine &N = "", Module *M = nullptr) {
	return new Function(Ty, Linkage, static_cast<unsigned>(-1), N, M);
	}

	/// Creates a new function and attaches it to a module.
	///
	/// Places the function in the program address space as specified
	/// by the module's data layout.
	static Function Create(FunctionType Ty, LinkageTypes Linkage,
	const Twine &N, Module &M);

	/// Creates a function with some attributes recorded in llvm.module.flags
	/// applied.
	///
	/// Use this when synthesizing new functions that need attributes that would
	/// have been set by command line options.
	static Function createWithDefaultAttr(FunctionType Ty, LinkageTypes Linkage,
	unsigned AddrSpace,
	const Twine &N = "",
	Module *M = nullptr);

	// Provide fast operand accessors.
	DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);

	/// Returns the number of non-debug IR instructions in this function.
	/// This is equivalent to the sum of the sizes of each basic block contained
	/// within this function.
	unsigned getInstructionCount() const;

	/// Returns the FunctionType for me.
	FunctionType *getFunctionType() const {
	return cast<FunctionType>(getValueType());
	}

	/// Returns the type of the ret val.
	Type *getReturnType() const { return getFunctionType()->getReturnType(); }

	/// getContext - Return a reference to the LLVMContext associated with this
	/// function.
	LLVMContext &getContext() const;

	/// isVarArg - Return true if this function takes a variable number of
	/// arguments.
	bool isVarArg() const { return getFunctionType()->isVarArg(); }

	bool isMaterializable() const {
	return getGlobalObjectSubClassData() & (1 << IsMaterializableBit);
	}
	void setIsMaterializable(bool V) {
	unsigned Mask = 1 << IsMaterializableBit;
	setGlobalObjectSubClassData((~Mask & getGlobalObjectSubClassData()) \|
	(V ? Mask : 0u));
	}

	/// getIntrinsicID - This method returns the ID number of the specified
	/// function, or Intrinsic::not_intrinsic if the function is not an
	/// intrinsic, or if the pointer is null. This value is always defined to be
	/// zero to allow easy checking for whether a function is intrinsic or not.
	/// The particular intrinsic functions which correspond to this value are
	/// defined in llvm/Intrinsics.h.
	Intrinsic::ID getIntrinsicID() const LLVM_READONLY { return IntID; }

	/// isIntrinsic - Returns true if the function's name starts with "llvm.".
	/// It's possible for this function to return true while getIntrinsicID()
	/// returns Intrinsic::not_intrinsic!
	bool isIntrinsic() const { return HasLLVMReservedName; }

	/// isTargetIntrinsic - Returns true if IID is an intrinsic specific to a
	/// certain target. If it is a generic intrinsic false is returned.
	static bool isTargetIntrinsic(Intrinsic::ID IID);

	/// isTargetIntrinsic - Returns true if this function is an intrinsic and the
	/// intrinsic is specific to a certain target. If this is not an intrinsic
	/// or a generic intrinsic, false is returned.
	bool isTargetIntrinsic() const;

	/// Returns true if the function is one of the "Constrained Floating-Point
	/// Intrinsics". Returns false if not, and returns false when
	/// getIntrinsicID() returns Intrinsic::not_intrinsic.
	bool isConstrainedFPIntrinsic() const;

	static Intrinsic::ID lookupIntrinsicID(StringRef Name);

	/// Recalculate the ID for this function if it is an Intrinsic defined
	/// in llvm/Intrinsics.h. Sets the intrinsic ID to Intrinsic::not_intrinsic
	/// if the name of this function does not match an intrinsic in that header.
	/// Note, this method does not need to be called directly, as it is called
	/// from Value::setName() whenever the name of this function changes.
	void recalculateIntrinsicID();

	/// getCallingConv()/setCallingConv(CC) - These method get and set the
	/// calling convention of this function. The enum values for the known
	/// calling conventions are defined in CallingConv.h.
	CallingConv::ID getCallingConv() const {
	return static_cast<CallingConv::ID>((getSubclassDataFromValue() >> 4) &
	CallingConv::MaxID);
	}
	void setCallingConv(CallingConv::ID CC) {
	auto ID = static_cast<unsigned>(CC);
	assert(!(ID & ~CallingConv::MaxID) && "Unsupported calling convention");
	setValueSubclassData((getSubclassDataFromValue() & 0xc00f) \| (ID << 4));
	}

	/// Return the attribute list for this Function.
	AttributeList getAttributes() const { return AttributeSets; }

	/// Set the attribute list for this Function.
	void setAttributes(AttributeList Attrs) { AttributeSets = Attrs; }

	/// Add function attributes to this function.
	void addFnAttr(Attribute::AttrKind Kind) {
	addAttribute(AttributeList::FunctionIndex, Kind);
	}

	/// Add function attributes to this function.
	void addFnAttr(StringRef Kind, StringRef Val = StringRef()) {
	addAttribute(AttributeList::FunctionIndex,
	Attribute::get(getContext(), Kind, Val));
	}

	/// Add function attributes to this function.
	void addFnAttr(Attribute Attr) {
	addAttribute(AttributeList::FunctionIndex, Attr);
	}

	/// Remove function attributes from this function.
	void removeFnAttr(Attribute::AttrKind Kind) {
	removeAttribute(AttributeList::FunctionIndex, Kind);
	}

	/// Remove function attribute from this function.
	void removeFnAttr(StringRef Kind) {
	setAttributes(getAttributes().removeAttribute(
	getContext(), AttributeList::FunctionIndex, Kind));
	}

	/// A function will have the "coroutine.presplit" attribute if it's
	/// a coroutine and has not gone through full CoroSplit pass.
	bool isPresplitCoroutine() const {
	return hasFnAttribute("coroutine.presplit");
	}

	enum ProfileCountType { PCT_Invalid, PCT_Real, PCT_Synthetic };

	/// Class to represent profile counts.
	///
	/// This class represents both real and synthetic profile counts.
	class ProfileCount {
	private:
	uint64_t Count;
	ProfileCountType PCT;
	static ProfileCount Invalid;

	public:
	ProfileCount() : Count(-1), PCT(PCT_Invalid) {}
	ProfileCount(uint64_t Count, ProfileCountType PCT)
	: Count(Count), PCT(PCT) {}
	bool hasValue() const { return PCT != PCT_Invalid; }
	uint64_t getCount() const { return Count; }
	ProfileCountType getType() const { return PCT; }
	bool isSynthetic() const { return PCT == PCT_Synthetic; }
	explicit operator bool() { return hasValue(); }
	bool operator!() const { return !hasValue(); }
	// Update the count retaining the same profile count type.
	ProfileCount &setCount(uint64_t C) {
	Count = C;
	return *this;
	}
	static ProfileCount getInvalid() { return ProfileCount(-1, PCT_Invalid); }
	};

	/// Set the entry count for this function.
	///
	/// Entry count is the number of times this function was executed based on
	/// pgo data. \p Imports points to a set of GUIDs that needs to
	/// be imported by the function for sample PGO, to enable the same inlines as
	/// the profiled optimized binary.
	void setEntryCount(ProfileCount Count,
	const DenseSet<GlobalValue::GUID> *Imports = nullptr);

	/// A convenience wrapper for setting entry count
	void setEntryCount(uint64_t Count, ProfileCountType Type = PCT_Real,
	const DenseSet<GlobalValue::GUID> *Imports = nullptr);

	/// Get the entry count for this function.
	///
	/// Entry count is the number of times the function was executed.
	/// When AllowSynthetic is false, only pgo_data will be returned.
	ProfileCount getEntryCount(bool AllowSynthetic = false) const;

	/// Return true if the function is annotated with profile data.
	///
	/// Presence of entry counts from a profile run implies the function has
	/// profile annotations. If IncludeSynthetic is false, only return true
	/// when the profile data is real.
	bool hasProfileData(bool IncludeSynthetic = false) const {
	return getEntryCount(IncludeSynthetic).hasValue();
	}

	/// Returns the set of GUIDs that needs to be imported to the function for
	/// sample PGO, to enable the same inlines as the profiled optimized binary.
	DenseSet<GlobalValue::GUID> getImportGUIDs() const;

	/// Set the section prefix for this function.
	void setSectionPrefix(StringRef Prefix);

	/// Get the section prefix for this function.
	Optional<StringRef> getSectionPrefix() const;

	/// Return true if the function has the attribute.
	bool hasFnAttribute(Attribute::AttrKind Kind) const {
	return AttributeSets.hasFnAttribute(Kind);
	}

	/// Return true if the function has the attribute.
	bool hasFnAttribute(StringRef Kind) const {
	return AttributeSets.hasFnAttribute(Kind);
	}

	/// Return the attribute for the given attribute kind.
	Attribute getFnAttribute(Attribute::AttrKind Kind) const {
	return getAttribute(AttributeList::FunctionIndex, Kind);
	}

	/// Return the attribute for the given attribute kind.
	Attribute getFnAttribute(StringRef Kind) const {
	return getAttribute(AttributeList::FunctionIndex, Kind);
	}

	/// Return the stack alignment for the function.
	unsigned getFnStackAlignment() const {
	if (!hasFnAttribute(Attribute::StackAlignment))
	return 0;
	if (const auto MA =
	AttributeSets.getStackAlignment(AttributeList::FunctionIndex))
	return MA->value();
	return 0;
	}

	/// Return the stack alignment for the function.
	MaybeAlign getFnStackAlign() const {
	if (!hasFnAttribute(Attribute::StackAlignment))
	return None;
	return AttributeSets.getStackAlignment(AttributeList::FunctionIndex);
	}

	/// hasGC/getGC/setGC/clearGC - The name of the garbage collection algorithm
	/// to use during code generation.
	bool hasGC() const {
	return getSubclassDataFromValue() & (1<<14);
	}
	const std::string &getGC() const;
	void setGC(std::string Str);
	void clearGC();

	/// Returns true if the function has ssp, sspstrong, or sspreq fn attrs.
	bool hasStackProtectorFnAttr() const;

	/// adds the attribute to the list of attributes.
	void addAttribute(unsigned i, Attribute::AttrKind Kind);

	/// adds the attribute to the list of attributes.
	void addAttribute(unsigned i, Attribute Attr);

	/// adds the attributes to the list of attributes.
	void addAttributes(unsigned i, const AttrBuilder &Attrs);

	/// adds the attribute to the list of attributes for the given arg.
	void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind);

	/// adds the attribute to the list of attributes for the given arg.
	void addParamAttr(unsigned ArgNo, Attribute Attr);

	/// adds the attributes to the list of attributes for the given arg.
	void addParamAttrs(unsigned ArgNo, const AttrBuilder &Attrs);

	/// removes the attribute from the list of attributes.
	void removeAttribute(unsigned i, Attribute::AttrKind Kind);

	/// removes the attribute from the list of attributes.
	void removeAttribute(unsigned i, StringRef Kind);

	/// removes the attributes from the list of attributes.
	void removeAttributes(unsigned i, const AttrBuilder &Attrs);

	/// removes the attribute from the list of attributes.
	void removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind);

	/// removes the attribute from the list of attributes.
	void removeParamAttr(unsigned ArgNo, StringRef Kind);

	/// removes the attribute from the list of attributes.
	void removeParamAttrs(unsigned ArgNo, const AttrBuilder &Attrs);

	/// removes noundef and other attributes that imply undefined behavior if a
	/// `undef` or `poison` value is passed from the list of attributes.
	void removeParamUndefImplyingAttrs(unsigned ArgNo);

	/// check if an attributes is in the list of attributes.
	bool hasAttribute(unsigned i, Attribute::AttrKind Kind) const {
	return getAttributes().hasAttribute(i, Kind);
	}

	/// check if an attributes is in the list of attributes.
	bool hasParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const {
	return getAttributes().hasParamAttribute(ArgNo, Kind);
	}

	/// gets the specified attribute from the list of attributes.
	Attribute getParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const {
	return getAttributes().getParamAttr(ArgNo, Kind);
	}

	/// gets the attribute from the list of attributes.
	Attribute getAttribute(unsigned i, Attribute::AttrKind Kind) const {
	return AttributeSets.getAttribute(i, Kind);
	}

	/// gets the attribute from the list of attributes.
	Attribute getAttribute(unsigned i, StringRef Kind) const {
	return AttributeSets.getAttribute(i, Kind);
	}

	/// adds the dereferenceable attribute to the list of attributes.
	void addDereferenceableAttr(unsigned i, uint64_t Bytes);

	/// adds the dereferenceable attribute to the list of attributes for
	/// the given arg.
	void addDereferenceableParamAttr(unsigned ArgNo, uint64_t Bytes);

	/// adds the dereferenceable_or_null attribute to the list of
	/// attributes.
	void addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes);

	/// adds the dereferenceable_or_null attribute to the list of
	/// attributes for the given arg.
	void addDereferenceableOrNullParamAttr(unsigned ArgNo, uint64_t Bytes);

	/// Extract the alignment for a call or parameter (0=unknown).
	/// FIXME: Remove this function once transition to Align is over.
	/// Use getParamAlign() instead.
	unsigned getParamAlignment(unsigned ArgNo) const {
	if (const auto MA = getParamAlign(ArgNo))
	return MA->value();
	return 0;
	}

	MaybeAlign getParamAlign(unsigned ArgNo) const {
	return AttributeSets.getParamAlignment(ArgNo);
	}

	MaybeAlign getParamStackAlign(unsigned ArgNo) const {
	return AttributeSets.getParamStackAlignment(ArgNo);
	}

	/// Extract the byval type for a parameter.
	Type *getParamByValType(unsigned ArgNo) const {
	return AttributeSets.getParamByValType(ArgNo);
	}

	/// Extract the sret type for a parameter.
	Type *getParamStructRetType(unsigned ArgNo) const {
	return AttributeSets.getParamStructRetType(ArgNo);
	}

	/// Extract the inalloca type for a parameter.
	Type *getParamInAllocaType(unsigned ArgNo) const {
	return AttributeSets.getParamInAllocaType(ArgNo);
	}

	/// Extract the byref type for a parameter.
	Type *getParamByRefType(unsigned ArgNo) const {
	return AttributeSets.getParamByRefType(ArgNo);
	}

	/// Extract the number of dereferenceable bytes for a call or
	/// parameter (0=unknown).
	/// @param i AttributeList index, referring to a return value or argument.
	uint64_t getDereferenceableBytes(unsigned i) const {
	return AttributeSets.getDereferenceableBytes(i);
	}

	/// Extract the number of dereferenceable bytes for a parameter.
	/// @param ArgNo Index of an argument, with 0 being the first function arg.
	uint64_t getParamDereferenceableBytes(unsigned ArgNo) const {
	return AttributeSets.getParamDereferenceableBytes(ArgNo);
	}

	/// Extract the number of dereferenceable_or_null bytes for a call or
	/// parameter (0=unknown).
	/// @param i AttributeList index, referring to a return value or argument.
	uint64_t getDereferenceableOrNullBytes(unsigned i) const {
	return AttributeSets.getDereferenceableOrNullBytes(i);
	}

	/// Extract the number of dereferenceable_or_null bytes for a
	/// parameter.
	/// @param ArgNo AttributeList ArgNo, referring to an argument.
	uint64_t getParamDereferenceableOrNullBytes(unsigned ArgNo) const {
	return AttributeSets.getParamDereferenceableOrNullBytes(ArgNo);
	}

	/// Determine if the function does not access memory.
	bool doesNotAccessMemory() const {
	return hasFnAttribute(Attribute::ReadNone);
	}
	void setDoesNotAccessMemory() {
	addFnAttr(Attribute::ReadNone);
	}

	/// Determine if the function does not access or only reads memory.
	bool onlyReadsMemory() const {
	return doesNotAccessMemory() \|\| hasFnAttribute(Attribute::ReadOnly);
	}
	void setOnlyReadsMemory() {
	addFnAttr(Attribute::ReadOnly);
	}

	/// Determine if the function does not access or only writes memory.
	bool doesNotReadMemory() const {
	return doesNotAccessMemory() \|\| hasFnAttribute(Attribute::WriteOnly);
	}
	void setDoesNotReadMemory() {
	addFnAttr(Attribute::WriteOnly);
	}

	/// Determine if the call can access memmory only using pointers based
	/// on its arguments.
	bool onlyAccessesArgMemory() const {
	return hasFnAttribute(Attribute::ArgMemOnly);
	}
	void setOnlyAccessesArgMemory() { addFnAttr(Attribute::ArgMemOnly); }

	/// Determine if the function may only access memory that is
	/// inaccessible from the IR.
	bool onlyAccessesInaccessibleMemory() const {
	return hasFnAttribute(Attribute::InaccessibleMemOnly);
	}
	void setOnlyAccessesInaccessibleMemory() {
	addFnAttr(Attribute::InaccessibleMemOnly);
	}

	/// Determine if the function may only access memory that is
	/// either inaccessible from the IR or pointed to by its arguments.
	bool onlyAccessesInaccessibleMemOrArgMem() const {
	return hasFnAttribute(Attribute::InaccessibleMemOrArgMemOnly);
	}
	void setOnlyAccessesInaccessibleMemOrArgMem() {
	addFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
	}

	/// Determine if the function cannot return.
	bool doesNotReturn() const {
	return hasFnAttribute(Attribute::NoReturn);
	}
	void setDoesNotReturn() {
	addFnAttr(Attribute::NoReturn);
	}

	/// Determine if the function should not perform indirect branch tracking.
	bool doesNoCfCheck() const { return hasFnAttribute(Attribute::NoCfCheck); }

	/// Determine if the function cannot unwind.
	bool doesNotThrow() const {
	return hasFnAttribute(Attribute::NoUnwind);
	}
	void setDoesNotThrow() {
	addFnAttr(Attribute::NoUnwind);
	}

	/// Determine if the call cannot be duplicated.
	bool cannotDuplicate() const {
	return hasFnAttribute(Attribute::NoDuplicate);
	}
	void setCannotDuplicate() {
	addFnAttr(Attribute::NoDuplicate);
	}

	/// Determine if the call is convergent.
	bool isConvergent() const {
	return hasFnAttribute(Attribute::Convergent);
	}
	void setConvergent() {
	addFnAttr(Attribute::Convergent);
	}
	void setNotConvergent() {
	removeFnAttr(Attribute::Convergent);
	}

	/// Determine if the call has sideeffects.
	bool isSpeculatable() const {
	return hasFnAttribute(Attribute::Speculatable);
	}
	void setSpeculatable() {
	addFnAttr(Attribute::Speculatable);
	}

	/// Determine if the call might deallocate memory.
	bool doesNotFreeMemory() const {
	return onlyReadsMemory() \|\| hasFnAttribute(Attribute::NoFree);
	}
	void setDoesNotFreeMemory() {
	addFnAttr(Attribute::NoFree);
	}

	/// Determine if the call can synchroize with other threads
	bool hasNoSync() const {
	return hasFnAttribute(Attribute::NoSync);
	}
	void setNoSync() {
	addFnAttr(Attribute::NoSync);
	}

	/// Determine if the function is known not to recurse, directly or
	/// indirectly.
	bool doesNotRecurse() const {
	return hasFnAttribute(Attribute::NoRecurse);
	}
	void setDoesNotRecurse() {
	addFnAttr(Attribute::NoRecurse);
	}

	/// Determine if the function is required to make forward progress.
	bool mustProgress() const {
	return hasFnAttribute(Attribute::MustProgress) \|\|
	hasFnAttribute(Attribute::WillReturn);
	}
	void setMustProgress() { addFnAttr(Attribute::MustProgress); }

	/// Determine if the function will return.
	bool willReturn() const { return hasFnAttribute(Attribute::WillReturn); }
	void setWillReturn() { addFnAttr(Attribute::WillReturn); }

	/// True if the ABI mandates (or the user requested) that this
	/// function be in a unwind table.
	bool hasUWTable() const {
	return hasFnAttribute(Attribute::UWTable);
	}
	void setHasUWTable() {
	addFnAttr(Attribute::UWTable);
	}

	/// True if this function needs an unwind table.
	bool needsUnwindTableEntry() const {
	return hasUWTable() \|\| !doesNotThrow() \|\| hasPersonalityFn();
	}

	/// Determine if the function returns a structure through first
	/// or second pointer argument.
	bool hasStructRetAttr() const {
	return AttributeSets.hasParamAttribute(0, Attribute::StructRet) \|\|
	AttributeSets.hasParamAttribute(1, Attribute::StructRet);
	}

	/// Determine if the parameter or return value is marked with NoAlias
	/// attribute.
	bool returnDoesNotAlias() const {
	return AttributeSets.hasAttribute(AttributeList::ReturnIndex,
	Attribute::NoAlias);
	}
	void setReturnDoesNotAlias() {
	addAttribute(AttributeList::ReturnIndex, Attribute::NoAlias);
	}

	/// Do not optimize this function (-O0).
	bool hasOptNone() const { return hasFnAttribute(Attribute::OptimizeNone); }

	/// Optimize this function for minimum size (-Oz).
	bool hasMinSize() const { return hasFnAttribute(Attribute::MinSize); }

	/// Optimize this function for size (-Os) or minimum size (-Oz).
	bool hasOptSize() const {
	return hasFnAttribute(Attribute::OptimizeForSize) \|\| hasMinSize();
	}

	/// Returns the denormal handling type for the default rounding mode of the
	/// function.
	DenormalMode getDenormalMode(const fltSemantics &FPType) const;

	/// copyAttributesFrom - copy all additional attributes (those not needed to
	/// create a Function) from the Function Src to this one.
	void copyAttributesFrom(const Function *Src);

	/// deleteBody - This method deletes the body of the function, and converts
	/// the linkage to external.
	///
	void deleteBody() {
	dropAllReferences();
	setLinkage(ExternalLinkage);
	}

	/// removeFromParent - This method unlinks 'this' from the containing module,
	/// but does not delete it.
	///
	void removeFromParent();

	/// eraseFromParent - This method unlinks 'this' from the containing module
	/// and deletes it.
	///
	void eraseFromParent();

	/// Steal arguments from another function.
	///
	/// Drop this function's arguments and splice in the ones from \c Src.
	/// Requires that this has no function body.
	void stealArgumentListFrom(Function &Src);

	/// Get the underlying elements of the Function... the basic block list is
	/// empty for external functions.
	///
	const BasicBlockListType &getBasicBlockList() const { return BasicBlocks; }
	BasicBlockListType &getBasicBlockList() { return BasicBlocks; }

	static BasicBlockListType Function::getSublistAccess(BasicBlock) {
	return &Function::BasicBlocks;
	}

	const BasicBlock &getEntryBlock() const { return front(); }
	BasicBlock &getEntryBlock() { return front(); }

	//===--------------------------------------------------------------------===//
	// Symbol Table Accessing functions...

	/// getSymbolTable() - Return the symbol table if any, otherwise nullptr.
	///
	inline ValueSymbolTable *getValueSymbolTable() { return SymTab.get(); }
	inline const ValueSymbolTable *getValueSymbolTable() const {
	return SymTab.get();
	}

	//===--------------------------------------------------------------------===//
	// BasicBlock iterator forwarding functions
	//
	iterator begin() { return BasicBlocks.begin(); }
	const_iterator begin() const { return BasicBlocks.begin(); }
	iterator end () { return BasicBlocks.end(); }
	const_iterator end () const { return BasicBlocks.end(); }

	size_t size() const { return BasicBlocks.size(); }
	bool empty() const { return BasicBlocks.empty(); }
	const BasicBlock &front() const { return BasicBlocks.front(); }
	BasicBlock &front() { return BasicBlocks.front(); }
	const BasicBlock &back() const { return BasicBlocks.back(); }
	BasicBlock &back() { return BasicBlocks.back(); }

	/// @name Function Argument Iteration
	/// @{

	arg_iterator arg_begin() {
	CheckLazyArguments();
	return Arguments;
	}
	const_arg_iterator arg_begin() const {
	CheckLazyArguments();
	return Arguments;
	}

	arg_iterator arg_end() {
	CheckLazyArguments();
	return Arguments + NumArgs;
	}
	const_arg_iterator arg_end() const {
	CheckLazyArguments();
	return Arguments + NumArgs;
	}

	Argument* getArg(unsigned i) const {
	assert (i < NumArgs && "getArg() out of range!");
	CheckLazyArguments();
	return Arguments + i;
	}

	iterator_range<arg_iterator> args() {
	return make_range(arg_begin(), arg_end());
	}
	iterator_range<const_arg_iterator> args() const {
	return make_range(arg_begin(), arg_end());
	}

	/// @}

	size_t arg_size() const { return NumArgs; }
	bool arg_empty() const { return arg_size() == 0; }

	/// Check whether this function has a personality function.
	bool hasPersonalityFn() const {
	return getSubclassDataFromValue() & (1<<3);
	}

	/// Get the personality function associated with this function.
	Constant *getPersonalityFn() const;
	void setPersonalityFn(Constant *Fn);

	/// Check whether this function has prefix data.
	bool hasPrefixData() const {
	return getSubclassDataFromValue() & (1<<1);
	}

	/// Get the prefix data associated with this function.
	Constant *getPrefixData() const;
	void setPrefixData(Constant *PrefixData);

	/// Check whether this function has prologue data.
	bool hasPrologueData() const {
	return getSubclassDataFromValue() & (1<<2);
	}

	/// Get the prologue data associated with this function.
	Constant *getPrologueData() const;
	void setPrologueData(Constant *PrologueData);

	/// Print the function to an output stream with an optional
	/// AssemblyAnnotationWriter.
	void print(raw_ostream &OS, AssemblyAnnotationWriter *AAW = nullptr,
	bool ShouldPreserveUseListOrder = false,
	bool IsForDebug = false) const;

	/// viewCFG - This function is meant for use from the debugger. You can just
	/// say 'call F->viewCFG()' and a ghostview window should pop up from the
	/// program, displaying the CFG of the current function with the code for each
	/// basic block inside. This depends on there being a 'dot' and 'gv' program
	/// in your path.
	///
	void viewCFG() const;

	/// Extended form to print edge weights.
	void viewCFG(bool ViewCFGOnly, const BlockFrequencyInfo *BFI,
	const BranchProbabilityInfo *BPI) const;

	/// viewCFGOnly - This function is meant for use from the debugger. It works
	/// just like viewCFG, but it does not include the contents of basic blocks
	/// into the nodes, just the label. If you are only interested in the CFG
	/// this can make the graph smaller.
	///
	void viewCFGOnly() const;

	/// Extended form to print edge weights.
	void viewCFGOnly(const BlockFrequencyInfo *BFI,
	const BranchProbabilityInfo *BPI) const;

	/// Methods for support type inquiry through isa, cast, and dyn_cast:
	static bool classof(const Value *V) {
	return V->getValueID() == Value::FunctionVal;
	}

	/// dropAllReferences() - This method causes all the subinstructions to "let
	/// go" of all references that they are maintaining. This allows one to
	/// 'delete' a whole module at a time, even though there may be circular
	/// references... first all references are dropped, and all use counts go to
	/// zero. Then everything is deleted for real. Note that no operations are
	/// valid on an object that has "dropped all references", except operator
	/// delete.
	///
	/// Since no other object in the module can have references into the body of a
	/// function, dropping all references deletes the entire body of the function,
	/// including any contained basic blocks.
	///
	void dropAllReferences();

	/// hasAddressTaken - returns true if there are any uses of this function
	/// other than direct calls or invokes to it, or blockaddress expressions.
	/// Optionally passes back an offending user for diagnostic purposes,
	/// ignores callback uses, assume like pointer annotation calls, and
	/// references in llvm.used and llvm.compiler.used variables.
	///
	bool hasAddressTaken(const User ** = nullptr,
	bool IgnoreCallbackUses = false,
	bool IgnoreAssumeLikeCalls = true,
	bool IngoreLLVMUsed = false) const;

	/// isDefTriviallyDead - Return true if it is trivially safe to remove
	/// this function definition from the module (because it isn't externally
	/// visible, does not have its address taken, and has no callers). To make
	/// this more accurate, call removeDeadConstantUsers first.
	bool isDefTriviallyDead() const;

	/// callsFunctionThatReturnsTwice - Return true if the function has a call to
	/// setjmp or other function that gcc recognizes as "returning twice".
	bool callsFunctionThatReturnsTwice() const;

	/// Set the attached subprogram.
	///
	/// Calls \a setMetadata() with \a LLVMContext::MD_dbg.
	void setSubprogram(DISubprogram *SP);

	/// Get the attached subprogram.
	///
	/// Calls \a getMetadata() with \a LLVMContext::MD_dbg and casts the result
	/// to \a DISubprogram.
	DISubprogram *getSubprogram() const;

	/// Returns true if we should emit debug info for profiling.
	bool isDebugInfoForProfiling() const;

	/// Check if null pointer dereferencing is considered undefined behavior for
	/// the function.
	/// Return value: false => null pointer dereference is undefined.
	/// Return value: true => null pointer dereference is not undefined.
	bool nullPointerIsDefined() const;

	private:
	void allocHungoffUselist();
	template<int Idx> void setHungoffOperand(Constant *C);

	/// Shadow Value::setValueSubclassData with a private forwarding method so
	/// that subclasses cannot accidentally use it.
	void setValueSubclassData(unsigned short D) {
	Value::setValueSubclassData(D);
	}
	void setValueSubclassDataBit(unsigned Bit, bool On);
	};

	/// Check whether null pointer dereferencing is considered undefined behavior
	/// for a given function or an address space.
	/// Null pointer access in non-zero address space is not considered undefined.
	/// Return value: false => null pointer dereference is undefined.
	/// Return value: true => null pointer dereference is not undefined.
	bool NullPointerIsDefined(const Function *F, unsigned AS = 0);

	template <>
	struct OperandTraits<Function> : public HungoffOperandTraits<3> {};

	DEFINE_TRANSPARENT_OPERAND_ACCESSORS(Function, Value)

	} // end namespace llvm

	#endif // LLVM_IR_FUNCTION_H
	diff --git a/contrib/llvm-project/llvm/include/llvm/IR/Module.h b/contrib/llvm-project/llvm/include/llvm/IR/Module.h
	index 97aea5aedf22..bd3a196c7181 100644
	--- a/contrib/llvm-project/llvm/include/llvm/IR/Module.h
	+++ b/contrib/llvm-project/llvm/include/llvm/IR/Module.h
	@@ -1,964 +1,964 @@
	//===- llvm/Module.h - C++ class to represent a VM module -------- C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	/// @file
	/// Module.h This file contains the declarations for the Module class.
	//
	//===----------------------------------------------------------------------===//

	#ifndef LLVM_IR_MODULE_H
	#define LLVM_IR_MODULE_H

	#include "llvm-c/Types.h"
	#include "llvm/ADT/Optional.h"
	#include "llvm/ADT/STLExtras.h"
	#include "llvm/ADT/StringMap.h"
	#include "llvm/ADT/StringRef.h"
	#include "llvm/ADT/iterator_range.h"
	#include "llvm/IR/Attributes.h"
	#include "llvm/IR/Comdat.h"
	#include "llvm/IR/DataLayout.h"
	#include "llvm/IR/Function.h"
	#include "llvm/IR/GlobalAlias.h"
	#include "llvm/IR/GlobalIFunc.h"
	#include "llvm/IR/GlobalVariable.h"
	#include "llvm/IR/Metadata.h"
	#include "llvm/IR/ProfileSummary.h"
	#include "llvm/IR/SymbolTableListTraits.h"
	#include "llvm/Support/CBindingWrapping.h"
	#include "llvm/Support/CodeGen.h"
	#include <cstddef>
	#include <cstdint>
	#include <iterator>
	#include <memory>
	#include <string>
	#include <vector>

	namespace llvm {

	class Error;
	class FunctionType;
	class GVMaterializer;
	class LLVMContext;
	class MemoryBuffer;
	class ModuleSummaryIndex;
	class Pass;
	class RandomNumberGenerator;
	template <class PtrType> class SmallPtrSetImpl;
	class StructType;
	class VersionTuple;

	/// A Module instance is used to store all the information related to an
	/// LLVM module. Modules are the top level container of all other LLVM
	/// Intermediate Representation (IR) objects. Each module directly contains a
	/// list of globals variables, a list of functions, a list of libraries (or
	/// other modules) this module depends on, a symbol table, and various data
	/// about the target's characteristics.
	///
	/// A module maintains a GlobalValRefMap object that is used to hold all
	/// constant references to global variables in the module. When a global
	/// variable is destroyed, it should have no entries in the GlobalValueRefMap.
	/// The main container class for the LLVM Intermediate Representation.
	-class Module {
	-/// @name Types And Enumerations
	-/// @{
	+class LLVM_EXTERNAL_VISIBILITY Module {
	+ /// @name Types And Enumerations
	+ /// @{
	public:
	/// The type for the list of global variables.
	using GlobalListType = SymbolTableList<GlobalVariable>;
	/// The type for the list of functions.
	using FunctionListType = SymbolTableList<Function>;
	/// The type for the list of aliases.
	using AliasListType = SymbolTableList<GlobalAlias>;
	/// The type for the list of ifuncs.
	using IFuncListType = SymbolTableList<GlobalIFunc>;
	/// The type for the list of named metadata.
	using NamedMDListType = ilist<NamedMDNode>;
	/// The type of the comdat "symbol" table.
	using ComdatSymTabType = StringMap<Comdat>;
	/// The type for mapping names to named metadata.
	using NamedMDSymTabType = StringMap<NamedMDNode *>;

	/// The Global Variable iterator.
	using global_iterator = GlobalListType::iterator;
	/// The Global Variable constant iterator.
	using const_global_iterator = GlobalListType::const_iterator;

	/// The Function iterators.
	using iterator = FunctionListType::iterator;
	/// The Function constant iterator
	using const_iterator = FunctionListType::const_iterator;

	/// The Function reverse iterator.
	using reverse_iterator = FunctionListType::reverse_iterator;
	/// The Function constant reverse iterator.
	using const_reverse_iterator = FunctionListType::const_reverse_iterator;

	/// The Global Alias iterators.
	using alias_iterator = AliasListType::iterator;
	/// The Global Alias constant iterator
	using const_alias_iterator = AliasListType::const_iterator;

	/// The Global IFunc iterators.
	using ifunc_iterator = IFuncListType::iterator;
	/// The Global IFunc constant iterator
	using const_ifunc_iterator = IFuncListType::const_iterator;

	/// The named metadata iterators.
	using named_metadata_iterator = NamedMDListType::iterator;
	/// The named metadata constant iterators.
	using const_named_metadata_iterator = NamedMDListType::const_iterator;

	/// This enumeration defines the supported behaviors of module flags.
	enum ModFlagBehavior {
	/// Emits an error if two values disagree, otherwise the resulting value is
	/// that of the operands.
	Error = 1,

	/// Emits a warning if two values disagree. The result value will be the
	/// operand for the flag from the first module being linked.
	Warning = 2,

	/// Adds a requirement that another module flag be present and have a
	/// specified value after linking is performed. The value must be a metadata
	/// pair, where the first element of the pair is the ID of the module flag
	/// to be restricted, and the second element of the pair is the value the
	/// module flag should be restricted to. This behavior can be used to
	/// restrict the allowable results (via triggering of an error) of linking
	/// IDs with the Override behavior.
	Require = 3,

	/// Uses the specified value, regardless of the behavior or value of the
	/// other module. If both modules specify Override, but the values
	/// differ, an error will be emitted.
	Override = 4,

	/// Appends the two values, which are required to be metadata nodes.
	Append = 5,

	/// Appends the two values, which are required to be metadata
	/// nodes. However, duplicate entries in the second list are dropped
	/// during the append operation.
	AppendUnique = 6,

	/// Takes the max of the two values, which are required to be integers.
	Max = 7,

	// Markers:
	ModFlagBehaviorFirstVal = Error,
	ModFlagBehaviorLastVal = Max
	};

	/// Checks if Metadata represents a valid ModFlagBehavior, and stores the
	/// converted result in MFB.
	static bool isValidModFlagBehavior(Metadata *MD, ModFlagBehavior &MFB);

	/// Check if the given module flag metadata represents a valid module flag,
	/// and store the flag behavior, the key string and the value metadata.
	static bool isValidModuleFlag(const MDNode &ModFlag, ModFlagBehavior &MFB,
	MDString &Key, Metadata &Val);

	struct ModuleFlagEntry {
	ModFlagBehavior Behavior;
	MDString *Key;
	Metadata *Val;

	ModuleFlagEntry(ModFlagBehavior B, MDString K, Metadata V)
	: Behavior(B), Key(K), Val(V) {}
	};

	/// @}
	/// @name Member Variables
	/// @{
	private:
	LLVMContext &Context; ///< The LLVMContext from which types and
	///< constants are allocated.
	GlobalListType GlobalList; ///< The Global Variables in the module
	FunctionListType FunctionList; ///< The Functions in the module
	AliasListType AliasList; ///< The Aliases in the module
	IFuncListType IFuncList; ///< The IFuncs in the module
	NamedMDListType NamedMDList; ///< The named metadata in the module
	std::string GlobalScopeAsm; ///< Inline Asm at global scope.
	std::unique_ptr<ValueSymbolTable> ValSymTab; ///< Symbol table for values
	ComdatSymTabType ComdatSymTab; ///< Symbol table for COMDATs
	std::unique_ptr<MemoryBuffer>
	OwnedMemoryBuffer; ///< Memory buffer directly owned by this
	///< module, for legacy clients only.
	std::unique_ptr<GVMaterializer>
	Materializer; ///< Used to materialize GlobalValues
	std::string ModuleID; ///< Human readable identifier for the module
	std::string SourceFileName; ///< Original source file name for module,
	///< recorded in bitcode.
	std::string TargetTriple; ///< Platform target triple Module compiled on
	///< Format: (arch)(sub)-(vendor)-(sys0-(abi)
	NamedMDSymTabType NamedMDSymTab; ///< NamedMDNode names.
	DataLayout DL; ///< DataLayout associated with the module
	StringMap<unsigned>
	CurrentIntrinsicIds; ///< Keep track of the current unique id count for
	///< the specified intrinsic basename.
	DenseMap<std::pair<Intrinsic::ID, const FunctionType *>, unsigned>
	UniquedIntrinsicNames; ///< Keep track of uniqued names of intrinsics
	///< based on unnamed types. The combination of
	///< ID and FunctionType maps to the extension that
	///< is used to make the intrinsic name unique.

	friend class Constant;

	/// @}
	/// @name Constructors
	/// @{
	public:
	/// The Module constructor. Note that there is no default constructor. You
	/// must provide a name for the module upon construction.
	explicit Module(StringRef ModuleID, LLVMContext& C);
	/// The module destructor. This will dropAllReferences.
	~Module();

	/// @}
	/// @name Module Level Accessors
	/// @{

	/// Get the module identifier which is, essentially, the name of the module.
	/// @returns the module identifier as a string
	const std::string &getModuleIdentifier() const { return ModuleID; }

	/// Returns the number of non-debug IR instructions in the module.
	/// This is equivalent to the sum of the IR instruction counts of each
	/// function contained in the module.
	unsigned getInstructionCount() const;

	/// Get the module's original source file name. When compiling from
	/// bitcode, this is taken from a bitcode record where it was recorded.
	/// For other compiles it is the same as the ModuleID, which would
	/// contain the source file name.
	const std::string &getSourceFileName() const { return SourceFileName; }

	/// Get a short "name" for the module.
	///
	/// This is useful for debugging or logging. It is essentially a convenience
	/// wrapper around getModuleIdentifier().
	StringRef getName() const { return ModuleID; }

	/// Get the data layout string for the module's target platform. This is
	/// equivalent to getDataLayout()->getStringRepresentation().
	const std::string &getDataLayoutStr() const {
	return DL.getStringRepresentation();
	}

	/// Get the data layout for the module's target platform.
	const DataLayout &getDataLayout() const;

	/// Get the target triple which is a string describing the target host.
	/// @returns a string containing the target triple.
	const std::string &getTargetTriple() const { return TargetTriple; }

	/// Get the global data context.
	/// @returns LLVMContext - a container for LLVM's global information
	LLVMContext &getContext() const { return Context; }

	/// Get any module-scope inline assembly blocks.
	/// @returns a string containing the module-scope inline assembly blocks.
	const std::string &getModuleInlineAsm() const { return GlobalScopeAsm; }

	/// Get a RandomNumberGenerator salted for use with this module. The
	/// RNG can be seeded via -rng-seed=<uint64> and is salted with the
	/// ModuleID and the provided pass salt. The returned RNG should not
	/// be shared across threads or passes.
	///
	/// A unique RNG per pass ensures a reproducible random stream even
	/// when other randomness consuming passes are added or removed. In
	/// addition, the random stream will be reproducible across LLVM
	/// versions when the pass does not change.
	std::unique_ptr<RandomNumberGenerator> createRNG(const StringRef Name) const;

	/// Return true if size-info optimization remark is enabled, false
	/// otherwise.
	bool shouldEmitInstrCountChangedRemark() {
	return getContext().getDiagHandlerPtr()->isAnalysisRemarkEnabled(
	"size-info");
	}

	/// @}
	/// @name Module Level Mutators
	/// @{

	/// Set the module identifier.
	void setModuleIdentifier(StringRef ID) { ModuleID = std::string(ID); }

	/// Set the module's original source file name.
	void setSourceFileName(StringRef Name) { SourceFileName = std::string(Name); }

	/// Set the data layout
	void setDataLayout(StringRef Desc);
	void setDataLayout(const DataLayout &Other);

	/// Set the target triple.
	void setTargetTriple(StringRef T) { TargetTriple = std::string(T); }

	/// Set the module-scope inline assembly blocks.
	/// A trailing newline is added if the input doesn't have one.
	void setModuleInlineAsm(StringRef Asm) {
	GlobalScopeAsm = std::string(Asm);
	if (!GlobalScopeAsm.empty() && GlobalScopeAsm.back() != '\n')
	GlobalScopeAsm += '\n';
	}

	/// Append to the module-scope inline assembly blocks.
	/// A trailing newline is added if the input doesn't have one.
	void appendModuleInlineAsm(StringRef Asm) {
	GlobalScopeAsm += Asm;
	if (!GlobalScopeAsm.empty() && GlobalScopeAsm.back() != '\n')
	GlobalScopeAsm += '\n';
	}

	/// @}
	/// @name Generic Value Accessors
	/// @{

	/// Return the global value in the module with the specified name, of
	/// arbitrary type. This method returns null if a global with the specified
	/// name is not found.
	GlobalValue *getNamedValue(StringRef Name) const;

	/// Return the number of global values in the module.
	unsigned getNumNamedValues() const;

	/// Return a unique non-zero ID for the specified metadata kind. This ID is
	/// uniqued across modules in the current LLVMContext.
	unsigned getMDKindID(StringRef Name) const;

	/// Populate client supplied SmallVector with the name for custom metadata IDs
	/// registered in this LLVMContext.
	void getMDKindNames(SmallVectorImpl<StringRef> &Result) const;

	/// Populate client supplied SmallVector with the bundle tags registered in
	/// this LLVMContext. The bundle tags are ordered by increasing bundle IDs.
	/// \see LLVMContext::getOperandBundleTagID
	void getOperandBundleTags(SmallVectorImpl<StringRef> &Result) const;

	std::vector<StructType *> getIdentifiedStructTypes() const;

	/// Return a unique name for an intrinsic whose mangling is based on an
	/// unnamed type. The Proto represents the function prototype.
	std::string getUniqueIntrinsicName(StringRef BaseName, Intrinsic::ID Id,
	const FunctionType *Proto);

	/// @}
	/// @name Function Accessors
	/// @{

	/// Look up the specified function in the module symbol table. Four
	/// possibilities:
	/// 1. If it does not exist, add a prototype for the function and return it.
	/// 2. Otherwise, if the existing function has the correct prototype, return
	/// the existing function.
	/// 3. Finally, the function exists but has the wrong prototype: return the
	/// function with a constantexpr cast to the right prototype.
	///
	/// In all cases, the returned value is a FunctionCallee wrapper around the
	/// 'FunctionType T' passed in, as well as a 'Value' either of the Function or
	/// the bitcast to the function.
	FunctionCallee getOrInsertFunction(StringRef Name, FunctionType *T,
	AttributeList AttributeList);

	FunctionCallee getOrInsertFunction(StringRef Name, FunctionType *T);

	/// Look up the specified function in the module symbol table. If it does not
	/// exist, add a prototype for the function and return it. This function
	/// guarantees to return a constant of pointer to the specified function type
	/// or a ConstantExpr BitCast of that type if the named function has a
	/// different type. This version of the method takes a list of
	/// function arguments, which makes it easier for clients to use.
	template <typename... ArgsTy>
	FunctionCallee getOrInsertFunction(StringRef Name,
	AttributeList AttributeList, Type *RetTy,
	ArgsTy... Args) {
	SmallVector<Type*, sizeof...(ArgsTy)> ArgTys{Args...};
	return getOrInsertFunction(Name,
	FunctionType::get(RetTy, ArgTys, false),
	AttributeList);
	}

	/// Same as above, but without the attributes.
	template <typename... ArgsTy>
	FunctionCallee getOrInsertFunction(StringRef Name, Type *RetTy,
	ArgsTy... Args) {
	return getOrInsertFunction(Name, AttributeList{}, RetTy, Args...);
	}

	// Avoid an incorrect ordering that'd otherwise compile incorrectly.
	template <typename... ArgsTy>
	FunctionCallee
	getOrInsertFunction(StringRef Name, AttributeList AttributeList,
	FunctionType *Invalid, ArgsTy... Args) = delete;

	/// Look up the specified function in the module symbol table. If it does not
	/// exist, return null.
	Function *getFunction(StringRef Name) const;

	/// @}
	/// @name Global Variable Accessors
	/// @{

	/// Look up the specified global variable in the module symbol table. If it
	/// does not exist, return null. If AllowInternal is set to true, this
	/// function will return types that have InternalLinkage. By default, these
	/// types are not returned.
	GlobalVariable *getGlobalVariable(StringRef Name) const {
	return getGlobalVariable(Name, false);
	}

	GlobalVariable *getGlobalVariable(StringRef Name, bool AllowInternal) const;

	GlobalVariable *getGlobalVariable(StringRef Name,
	bool AllowInternal = false) {
	return static_cast<const Module *>(this)->getGlobalVariable(Name,
	AllowInternal);
	}

	/// Return the global variable in the module with the specified name, of
	/// arbitrary type. This method returns null if a global with the specified
	/// name is not found.
	const GlobalVariable *getNamedGlobal(StringRef Name) const {
	return getGlobalVariable(Name, true);
	}
	GlobalVariable *getNamedGlobal(StringRef Name) {
	return const_cast<GlobalVariable *>(
	static_cast<const Module *>(this)->getNamedGlobal(Name));
	}

	/// Look up the specified global in the module symbol table.
	/// If it does not exist, invoke a callback to create a declaration of the
	/// global and return it. The global is constantexpr casted to the expected
	/// type if necessary.
	Constant *
	getOrInsertGlobal(StringRef Name, Type *Ty,
	function_ref<GlobalVariable *()> CreateGlobalCallback);

	/// Look up the specified global in the module symbol table. If required, this
	/// overload constructs the global variable using its constructor's defaults.
	Constant getOrInsertGlobal(StringRef Name, Type Ty);

	/// @}
	/// @name Global Alias Accessors
	/// @{

	/// Return the global alias in the module with the specified name, of
	/// arbitrary type. This method returns null if a global with the specified
	/// name is not found.
	GlobalAlias *getNamedAlias(StringRef Name) const;

	/// @}
	/// @name Global IFunc Accessors
	/// @{

	/// Return the global ifunc in the module with the specified name, of
	/// arbitrary type. This method returns null if a global with the specified
	/// name is not found.
	GlobalIFunc *getNamedIFunc(StringRef Name) const;

	/// @}
	/// @name Named Metadata Accessors
	/// @{

	/// Return the first NamedMDNode in the module with the specified name. This
	/// method returns null if a NamedMDNode with the specified name is not found.
	NamedMDNode *getNamedMetadata(const Twine &Name) const;

	/// Return the named MDNode in the module with the specified name. This method
	/// returns a new NamedMDNode if a NamedMDNode with the specified name is not
	/// found.
	NamedMDNode *getOrInsertNamedMetadata(StringRef Name);

	/// Remove the given NamedMDNode from this module and delete it.
	void eraseNamedMetadata(NamedMDNode *NMD);

	/// @}
	/// @name Comdat Accessors
	/// @{

	/// Return the Comdat in the module with the specified name. It is created
	/// if it didn't already exist.
	Comdat *getOrInsertComdat(StringRef Name);

	/// @}
	/// @name Module Flags Accessors
	/// @{

	/// Returns the module flags in the provided vector.
	void getModuleFlagsMetadata(SmallVectorImpl<ModuleFlagEntry> &Flags) const;

	/// Return the corresponding value if Key appears in module flags, otherwise
	/// return null.
	Metadata *getModuleFlag(StringRef Key) const;

	/// Returns the NamedMDNode in the module that represents module-level flags.
	/// This method returns null if there are no module-level flags.
	NamedMDNode *getModuleFlagsMetadata() const;

	/// Returns the NamedMDNode in the module that represents module-level flags.
	/// If module-level flags aren't found, it creates the named metadata that
	/// contains them.
	NamedMDNode *getOrInsertModuleFlagsMetadata();

	/// Add a module-level flag to the module-level flags metadata. It will create
	/// the module-level flags named metadata if it doesn't already exist.
	void addModuleFlag(ModFlagBehavior Behavior, StringRef Key, Metadata *Val);
	void addModuleFlag(ModFlagBehavior Behavior, StringRef Key, Constant *Val);
	void addModuleFlag(ModFlagBehavior Behavior, StringRef Key, uint32_t Val);
	void addModuleFlag(MDNode *Node);
	/// Like addModuleFlag but replaces the old module flag if it already exists.
	void setModuleFlag(ModFlagBehavior Behavior, StringRef Key, Metadata *Val);

	/// @}
	/// @name Materialization
	/// @{

	/// Sets the GVMaterializer to GVM. This module must not yet have a
	/// Materializer. To reset the materializer for a module that already has one,
	/// call materializeAll first. Destroying this module will destroy
	/// its materializer without materializing any more GlobalValues. Without
	/// destroying the Module, there is no way to detach or destroy a materializer
	/// without materializing all the GVs it controls, to avoid leaving orphan
	/// unmaterialized GVs.
	void setMaterializer(GVMaterializer *GVM);
	/// Retrieves the GVMaterializer, if any, for this Module.
	GVMaterializer *getMaterializer() const { return Materializer.get(); }
	bool isMaterialized() const { return !getMaterializer(); }

	/// Make sure the GlobalValue is fully read.
	llvm::Error materialize(GlobalValue *GV);

	/// Make sure all GlobalValues in this Module are fully read and clear the
	/// Materializer.
	llvm::Error materializeAll();

	llvm::Error materializeMetadata();

	/// @}
	/// @name Direct access to the globals list, functions list, and symbol table
	/// @{

	/// Get the Module's list of global variables (constant).
	const GlobalListType &getGlobalList() const { return GlobalList; }
	/// Get the Module's list of global variables.
	GlobalListType &getGlobalList() { return GlobalList; }

	static GlobalListType Module::getSublistAccess(GlobalVariable) {
	return &Module::GlobalList;
	}

	/// Get the Module's list of functions (constant).
	const FunctionListType &getFunctionList() const { return FunctionList; }
	/// Get the Module's list of functions.
	FunctionListType &getFunctionList() { return FunctionList; }
	static FunctionListType Module::getSublistAccess(Function) {
	return &Module::FunctionList;
	}

	/// Get the Module's list of aliases (constant).
	const AliasListType &getAliasList() const { return AliasList; }
	/// Get the Module's list of aliases.
	AliasListType &getAliasList() { return AliasList; }

	static AliasListType Module::getSublistAccess(GlobalAlias) {
	return &Module::AliasList;
	}

	/// Get the Module's list of ifuncs (constant).
	const IFuncListType &getIFuncList() const { return IFuncList; }
	/// Get the Module's list of ifuncs.
	IFuncListType &getIFuncList() { return IFuncList; }

	static IFuncListType Module::getSublistAccess(GlobalIFunc) {
	return &Module::IFuncList;
	}

	/// Get the Module's list of named metadata (constant).
	const NamedMDListType &getNamedMDList() const { return NamedMDList; }
	/// Get the Module's list of named metadata.
	NamedMDListType &getNamedMDList() { return NamedMDList; }

	static NamedMDListType Module::getSublistAccess(NamedMDNode) {
	return &Module::NamedMDList;
	}

	/// Get the symbol table of global variable and function identifiers
	const ValueSymbolTable &getValueSymbolTable() const { return *ValSymTab; }
	/// Get the Module's symbol table of global variable and function identifiers.
	ValueSymbolTable &getValueSymbolTable() { return *ValSymTab; }

	/// Get the Module's symbol table for COMDATs (constant).
	const ComdatSymTabType &getComdatSymbolTable() const { return ComdatSymTab; }
	/// Get the Module's symbol table for COMDATs.
	ComdatSymTabType &getComdatSymbolTable() { return ComdatSymTab; }

	/// @}
	/// @name Global Variable Iteration
	/// @{

	global_iterator global_begin() { return GlobalList.begin(); }
	const_global_iterator global_begin() const { return GlobalList.begin(); }
	global_iterator global_end () { return GlobalList.end(); }
	const_global_iterator global_end () const { return GlobalList.end(); }
	size_t global_size () const { return GlobalList.size(); }
	bool global_empty() const { return GlobalList.empty(); }

	iterator_range<global_iterator> globals() {
	return make_range(global_begin(), global_end());
	}
	iterator_range<const_global_iterator> globals() const {
	return make_range(global_begin(), global_end());
	}

	/// @}
	/// @name Function Iteration
	/// @{

	iterator begin() { return FunctionList.begin(); }
	const_iterator begin() const { return FunctionList.begin(); }
	iterator end () { return FunctionList.end(); }
	const_iterator end () const { return FunctionList.end(); }
	reverse_iterator rbegin() { return FunctionList.rbegin(); }
	const_reverse_iterator rbegin() const{ return FunctionList.rbegin(); }
	reverse_iterator rend() { return FunctionList.rend(); }
	const_reverse_iterator rend() const { return FunctionList.rend(); }
	size_t size() const { return FunctionList.size(); }
	bool empty() const { return FunctionList.empty(); }

	iterator_range<iterator> functions() {
	return make_range(begin(), end());
	}
	iterator_range<const_iterator> functions() const {
	return make_range(begin(), end());
	}

	/// @}
	/// @name Alias Iteration
	/// @{

	alias_iterator alias_begin() { return AliasList.begin(); }
	const_alias_iterator alias_begin() const { return AliasList.begin(); }
	alias_iterator alias_end () { return AliasList.end(); }
	const_alias_iterator alias_end () const { return AliasList.end(); }
	size_t alias_size () const { return AliasList.size(); }
	bool alias_empty() const { return AliasList.empty(); }

	iterator_range<alias_iterator> aliases() {
	return make_range(alias_begin(), alias_end());
	}
	iterator_range<const_alias_iterator> aliases() const {
	return make_range(alias_begin(), alias_end());
	}

	/// @}
	/// @name IFunc Iteration
	/// @{

	ifunc_iterator ifunc_begin() { return IFuncList.begin(); }
	const_ifunc_iterator ifunc_begin() const { return IFuncList.begin(); }
	ifunc_iterator ifunc_end () { return IFuncList.end(); }
	const_ifunc_iterator ifunc_end () const { return IFuncList.end(); }
	size_t ifunc_size () const { return IFuncList.size(); }
	bool ifunc_empty() const { return IFuncList.empty(); }

	iterator_range<ifunc_iterator> ifuncs() {
	return make_range(ifunc_begin(), ifunc_end());
	}
	iterator_range<const_ifunc_iterator> ifuncs() const {
	return make_range(ifunc_begin(), ifunc_end());
	}

	/// @}
	/// @name Convenience iterators
	/// @{

	using global_object_iterator =
	concat_iterator<GlobalObject, iterator, global_iterator>;
	using const_global_object_iterator =
	concat_iterator<const GlobalObject, const_iterator,
	const_global_iterator>;

	iterator_range<global_object_iterator> global_objects();
	iterator_range<const_global_object_iterator> global_objects() const;

	using global_value_iterator =
	concat_iterator<GlobalValue, iterator, global_iterator, alias_iterator,
	ifunc_iterator>;
	using const_global_value_iterator =
	concat_iterator<const GlobalValue, const_iterator, const_global_iterator,
	const_alias_iterator, const_ifunc_iterator>;

	iterator_range<global_value_iterator> global_values();
	iterator_range<const_global_value_iterator> global_values() const;

	/// @}
	/// @name Named Metadata Iteration
	/// @{

	named_metadata_iterator named_metadata_begin() { return NamedMDList.begin(); }
	const_named_metadata_iterator named_metadata_begin() const {
	return NamedMDList.begin();
	}

	named_metadata_iterator named_metadata_end() { return NamedMDList.end(); }
	const_named_metadata_iterator named_metadata_end() const {
	return NamedMDList.end();
	}

	size_t named_metadata_size() const { return NamedMDList.size(); }
	bool named_metadata_empty() const { return NamedMDList.empty(); }

	iterator_range<named_metadata_iterator> named_metadata() {
	return make_range(named_metadata_begin(), named_metadata_end());
	}
	iterator_range<const_named_metadata_iterator> named_metadata() const {
	return make_range(named_metadata_begin(), named_metadata_end());
	}

	/// An iterator for DICompileUnits that skips those marked NoDebug.
	class debug_compile_units_iterator {
	NamedMDNode *CUs;
	unsigned Idx;

	void SkipNoDebugCUs();

	public:
	using iterator_category = std::input_iterator_tag;
	using value_type = DICompileUnit *;
	using difference_type = std::ptrdiff_t;
	using pointer = value_type *;
	using reference = value_type &;

	explicit debug_compile_units_iterator(NamedMDNode *CUs, unsigned Idx)
	: CUs(CUs), Idx(Idx) {
	SkipNoDebugCUs();
	}

	debug_compile_units_iterator &operator++() {
	++Idx;
	SkipNoDebugCUs();
	return *this;
	}

	debug_compile_units_iterator operator++(int) {
	debug_compile_units_iterator T(*this);
	++Idx;
	return T;
	}

	bool operator==(const debug_compile_units_iterator &I) const {
	return Idx == I.Idx;
	}

	bool operator!=(const debug_compile_units_iterator &I) const {
	return Idx != I.Idx;
	}

	DICompileUnit operator() const;
	DICompileUnit *operator->() const;
	};

	debug_compile_units_iterator debug_compile_units_begin() const {
	auto *CUs = getNamedMetadata("llvm.dbg.cu");
	return debug_compile_units_iterator(CUs, 0);
	}

	debug_compile_units_iterator debug_compile_units_end() const {
	auto *CUs = getNamedMetadata("llvm.dbg.cu");
	return debug_compile_units_iterator(CUs, CUs ? CUs->getNumOperands() : 0);
	}

	/// Return an iterator for all DICompileUnits listed in this Module's
	/// llvm.dbg.cu named metadata node and aren't explicitly marked as
	/// NoDebug.
	iterator_range<debug_compile_units_iterator> debug_compile_units() const {
	auto *CUs = getNamedMetadata("llvm.dbg.cu");
	return make_range(
	debug_compile_units_iterator(CUs, 0),
	debug_compile_units_iterator(CUs, CUs ? CUs->getNumOperands() : 0));
	}
	/// @}

	/// Destroy ConstantArrays in LLVMContext if they are not used.
	/// ConstantArrays constructed during linking can cause quadratic memory
	/// explosion. Releasing all unused constants can cause a 20% LTO compile-time
	/// slowdown for a large application.
	///
	/// NOTE: Constants are currently owned by LLVMContext. This can then only
	/// be called where all uses of the LLVMContext are understood.
	void dropTriviallyDeadConstantArrays();

	/// @name Utility functions for printing and dumping Module objects
	/// @{

	/// Print the module to an output stream with an optional
	/// AssemblyAnnotationWriter. If \c ShouldPreserveUseListOrder, then include
	/// uselistorder directives so that use-lists can be recreated when reading
	/// the assembly.
	void print(raw_ostream &OS, AssemblyAnnotationWriter *AAW,
	bool ShouldPreserveUseListOrder = false,
	bool IsForDebug = false) const;

	/// Dump the module to stderr (for debugging).
	void dump() const;

	/// This function causes all the subinstructions to "let go" of all references
	/// that they are maintaining. This allows one to 'delete' a whole class at
	/// a time, even though there may be circular references... first all
	/// references are dropped, and all use counts go to zero. Then everything
	/// is delete'd for real. Note that no operations are valid on an object
	/// that has "dropped all references", except operator delete.
	void dropAllReferences();

	/// @}
	/// @name Utility functions for querying Debug information.
	/// @{

	/// Returns the Number of Register ParametersDwarf Version by checking
	/// module flags.
	unsigned getNumberRegisterParameters() const;

	/// Returns the Dwarf Version by checking module flags.
	unsigned getDwarfVersion() const;

	/// Returns the DWARF format by checking module flags.
	bool isDwarf64() const;

	/// Returns the CodeView Version by checking module flags.
	/// Returns zero if not present in module.
	unsigned getCodeViewFlag() const;

	/// @}
	/// @name Utility functions for querying and setting PIC level
	/// @{

	/// Returns the PIC level (small or large model)
	PICLevel::Level getPICLevel() const;

	/// Set the PIC level (small or large model)
	void setPICLevel(PICLevel::Level PL);
	/// @}

	/// @}
	/// @name Utility functions for querying and setting PIE level
	/// @{

	/// Returns the PIE level (small or large model)
	PIELevel::Level getPIELevel() const;

	/// Set the PIE level (small or large model)
	void setPIELevel(PIELevel::Level PL);
	/// @}

	/// @}
	/// @name Utility function for querying and setting code model
	/// @{

	/// Returns the code model (tiny, small, kernel, medium or large model)
	Optional<CodeModel::Model> getCodeModel() const;

	/// Set the code model (tiny, small, kernel, medium or large)
	void setCodeModel(CodeModel::Model CL);
	/// @}

	/// @name Utility functions for querying and setting PGO summary
	/// @{

	/// Attach profile summary metadata to this module.
	void setProfileSummary(Metadata *M, ProfileSummary::Kind Kind);

	/// Returns profile summary metadata. When IsCS is true, use the context
	/// sensitive profile summary.
	Metadata *getProfileSummary(bool IsCS) const;
	/// @}

	/// Returns whether semantic interposition is to be respected.
	bool getSemanticInterposition() const;

	/// Set whether semantic interposition is to be respected.
	void setSemanticInterposition(bool);

	/// Returns true if PLT should be avoided for RTLib calls.
	bool getRtLibUseGOT() const;

	/// Set that PLT should be avoid for RTLib calls.
	void setRtLibUseGOT();

	/// Get/set whether synthesized functions should get the uwtable attribute.
	bool getUwtable() const;
	void setUwtable();

	/// Get/set whether synthesized functions should get the "frame-pointer"
	/// attribute.
	FramePointerKind getFramePointer() const;
	void setFramePointer(FramePointerKind Kind);

	/// Get/set what kind of stack protector guard to use.
	StringRef getStackProtectorGuard() const;
	void setStackProtectorGuard(StringRef Kind);

	/// Get/set which register to use as the stack protector guard register. The
	/// empty string is equivalent to "global". Other values may be "tls" or
	/// "sysreg".
	StringRef getStackProtectorGuardReg() const;
	void setStackProtectorGuardReg(StringRef Reg);

	/// Get/set what offset from the stack protector to use.
	int getStackProtectorGuardOffset() const;
	void setStackProtectorGuardOffset(int Offset);

	/// Get/set the stack alignment overridden from the default.
	unsigned getOverrideStackAlignment() const;
	void setOverrideStackAlignment(unsigned Align);

	/// @name Utility functions for querying and setting the build SDK version
	/// @{

	/// Attach a build SDK version metadata to this module.
	void setSDKVersion(const VersionTuple &V);

	/// Get the build SDK version metadata.
	///
	/// An empty version is returned if no such metadata is attached.
	VersionTuple getSDKVersion() const;
	/// @}

	/// Take ownership of the given memory buffer.
	void setOwnedMemoryBuffer(std::unique_ptr<MemoryBuffer> MB);

	/// Set the partial sample profile ratio in the profile summary module flag,
	/// if applicable.
	void setPartialSampleProfileRatio(const ModuleSummaryIndex &Index);
	};

	/// Given "llvm.used" or "llvm.compiler.used" as a global name, collect the
	/// initializer elements of that global in a SmallVector and return the global
	/// itself.
	GlobalVariable *collectUsedGlobalVariables(const Module &M,
	SmallVectorImpl<GlobalValue *> &Vec,
	bool CompilerUsed);

	/// An raw_ostream inserter for modules.
	inline raw_ostream &operator<<(raw_ostream &O, const Module &M) {
	M.print(O, nullptr);
	return O;
	}

	// Create wrappers for C Binding types (see CBindingWrapping.h).
	DEFINE_SIMPLE_CONVERSION_FUNCTIONS(Module, LLVMModuleRef)

	/* LLVMModuleProviderRef exists for historical reasons, but now just holds a
	* Module.
	*/
	inline Module *unwrap(LLVMModuleProviderRef MP) {
	return reinterpret_cast<Module*>(MP);
	}

	} // end namespace llvm

	#endif // LLVM_IR_MODULE_H
	diff --git a/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp
	index f22d834b5e57..2d980e6935b3 100644
	--- a/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp
	+++ b/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp
	@@ -1,14093 +1,14093 @@
	//===- ScalarEvolution.cpp - Scalar Evolution Analysis --------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file contains the implementation of the scalar evolution analysis
	// engine, which is used primarily to analyze expressions involving induction
	// variables in loops.
	//
	// There are several aspects to this library. First is the representation of
	// scalar expressions, which are represented as subclasses of the SCEV class.
	// These classes are used to represent certain types of subexpressions that we
	// can handle. We only create one SCEV of a particular shape, so
	// pointer-comparisons for equality are legal.
	//
	// One important aspect of the SCEV objects is that they are never cyclic, even
	// if there is a cycle in the dataflow for an expression (ie, a PHI node). If
	// the PHI node is one of the idioms that we can represent (e.g., a polynomial
	// recurrence) then we represent it directly as a recurrence node, otherwise we
	// represent it as a SCEVUnknown node.
	//
	// In addition to being able to represent expressions of various types, we also
	// have folders that are used to build the canonical representation for a
	// particular expression. These folders are capable of using a variety of
	// rewrite rules to simplify the expressions.
	//
	// Once the folders are defined, we can implement the more interesting
	// higher-level code, such as the code that recognizes PHI nodes of various
	// types, computes the execution count of a loop, etc.
	//
	// TODO: We should use these routines and value representations to implement
	// dependence analysis!
	//
	//===----------------------------------------------------------------------===//
	//
	// There are several good references for the techniques used in this analysis.
	//
	// Chains of recurrences -- a method to expedite the evaluation
	// of closed-form functions
	// Olaf Bachmann, Paul S. Wang, Eugene V. Zima
	//
	// On computational properties of chains of recurrences
	// Eugene V. Zima
	//
	// Symbolic Evaluation of Chains of Recurrences for Loop Optimization
	// Robert A. van Engelen
	//
	// Efficient Symbolic Analysis for Optimizing Compilers
	// Robert A. van Engelen
	//
	// Using the chains of recurrences algebra for data dependence testing and
	// induction variable substitution
	// MS Thesis, Johnie Birch
	//
	//===----------------------------------------------------------------------===//

	#include "llvm/Analysis/ScalarEvolution.h"
	#include "llvm/ADT/APInt.h"
	#include "llvm/ADT/ArrayRef.h"
	#include "llvm/ADT/DenseMap.h"
	#include "llvm/ADT/DepthFirstIterator.h"
	#include "llvm/ADT/EquivalenceClasses.h"
	#include "llvm/ADT/FoldingSet.h"
	#include "llvm/ADT/None.h"
	#include "llvm/ADT/Optional.h"
	#include "llvm/ADT/STLExtras.h"
	#include "llvm/ADT/ScopeExit.h"
	#include "llvm/ADT/Sequence.h"
	#include "llvm/ADT/SetVector.h"
	#include "llvm/ADT/SmallPtrSet.h"
	#include "llvm/ADT/SmallSet.h"
	#include "llvm/ADT/SmallVector.h"
	#include "llvm/ADT/Statistic.h"
	#include "llvm/ADT/StringRef.h"
	#include "llvm/Analysis/AssumptionCache.h"
	#include "llvm/Analysis/ConstantFolding.h"
	#include "llvm/Analysis/InstructionSimplify.h"
	#include "llvm/Analysis/LoopInfo.h"
	#include "llvm/Analysis/ScalarEvolutionDivision.h"
	#include "llvm/Analysis/ScalarEvolutionExpressions.h"
	#include "llvm/Analysis/TargetLibraryInfo.h"
	#include "llvm/Analysis/ValueTracking.h"
	#include "llvm/Config/llvm-config.h"
	#include "llvm/IR/Argument.h"
	#include "llvm/IR/BasicBlock.h"
	#include "llvm/IR/CFG.h"
	#include "llvm/IR/Constant.h"
	#include "llvm/IR/ConstantRange.h"
	#include "llvm/IR/Constants.h"
	#include "llvm/IR/DataLayout.h"
	#include "llvm/IR/DerivedTypes.h"
	#include "llvm/IR/Dominators.h"
	#include "llvm/IR/Function.h"
	#include "llvm/IR/GlobalAlias.h"
	#include "llvm/IR/GlobalValue.h"
	#include "llvm/IR/GlobalVariable.h"
	#include "llvm/IR/InstIterator.h"
	#include "llvm/IR/InstrTypes.h"
	#include "llvm/IR/Instruction.h"
	#include "llvm/IR/Instructions.h"
	#include "llvm/IR/IntrinsicInst.h"
	#include "llvm/IR/Intrinsics.h"
	#include "llvm/IR/LLVMContext.h"
	#include "llvm/IR/Metadata.h"
	#include "llvm/IR/Operator.h"
	#include "llvm/IR/PatternMatch.h"
	#include "llvm/IR/Type.h"
	#include "llvm/IR/Use.h"
	#include "llvm/IR/User.h"
	#include "llvm/IR/Value.h"
	#include "llvm/IR/Verifier.h"
	#include "llvm/InitializePasses.h"
	#include "llvm/Pass.h"
	#include "llvm/Support/Casting.h"
	#include "llvm/Support/CommandLine.h"
	#include "llvm/Support/Compiler.h"
	#include "llvm/Support/Debug.h"
	#include "llvm/Support/ErrorHandling.h"
	#include "llvm/Support/KnownBits.h"
	#include "llvm/Support/SaveAndRestore.h"
	#include "llvm/Support/raw_ostream.h"
	#include <algorithm>
	#include <cassert>
	#include <climits>
	#include <cstddef>
	#include <cstdint>
	#include <cstdlib>
	#include <map>
	#include <memory>
	#include <tuple>
	#include <utility>
	#include <vector>

	using namespace llvm;
	using namespace PatternMatch;

	#define DEBUG_TYPE "scalar-evolution"

	STATISTIC(NumArrayLenItCounts,
	"Number of trip counts computed with array length");
	STATISTIC(NumTripCountsComputed,
	"Number of loops with predictable loop counts");
	STATISTIC(NumTripCountsNotComputed,
	"Number of loops without predictable loop counts");
	STATISTIC(NumBruteForceTripCountsComputed,
	"Number of loops with trip counts computed by force");

	static cl::opt<unsigned>
	MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden,
	cl::ZeroOrMore,
	cl::desc("Maximum number of iterations SCEV will "
	"symbolically execute a constant "
	"derived loop"),
	cl::init(100));

	// FIXME: Enable this with EXPENSIVE_CHECKS when the test suite is clean.
	static cl::opt<bool> VerifySCEV(
	"verify-scev", cl::Hidden,
	cl::desc("Verify ScalarEvolution's backedge taken counts (slow)"));
	static cl::opt<bool> VerifySCEVStrict(
	"verify-scev-strict", cl::Hidden,
	cl::desc("Enable stricter verification with -verify-scev is passed"));
	static cl::opt<bool>
	VerifySCEVMap("verify-scev-maps", cl::Hidden,
	cl::desc("Verify no dangling value in ScalarEvolution's "
	"ExprValueMap (slow)"));

	static cl::opt<bool> VerifyIR(
	"scev-verify-ir", cl::Hidden,
	cl::desc("Verify IR correctness when making sensitive SCEV queries (slow)"),
	cl::init(false));

	static cl::opt<unsigned> MulOpsInlineThreshold(
	"scev-mulops-inline-threshold", cl::Hidden,
	cl::desc("Threshold for inlining multiplication operands into a SCEV"),
	cl::init(32));

	static cl::opt<unsigned> AddOpsInlineThreshold(
	"scev-addops-inline-threshold", cl::Hidden,
	cl::desc("Threshold for inlining addition operands into a SCEV"),
	cl::init(500));

	static cl::opt<unsigned> MaxSCEVCompareDepth(
	"scalar-evolution-max-scev-compare-depth", cl::Hidden,
	cl::desc("Maximum depth of recursive SCEV complexity comparisons"),
	cl::init(32));

	static cl::opt<unsigned> MaxSCEVOperationsImplicationDepth(
	"scalar-evolution-max-scev-operations-implication-depth", cl::Hidden,
	cl::desc("Maximum depth of recursive SCEV operations implication analysis"),
	cl::init(2));

	static cl::opt<unsigned> MaxValueCompareDepth(
	"scalar-evolution-max-value-compare-depth", cl::Hidden,
	cl::desc("Maximum depth of recursive value complexity comparisons"),
	cl::init(2));

	static cl::opt<unsigned>
	MaxArithDepth("scalar-evolution-max-arith-depth", cl::Hidden,
	cl::desc("Maximum depth of recursive arithmetics"),
	cl::init(32));

	static cl::opt<unsigned> MaxConstantEvolvingDepth(
	"scalar-evolution-max-constant-evolving-depth", cl::Hidden,
	cl::desc("Maximum depth of recursive constant evolving"), cl::init(32));

	static cl::opt<unsigned>
	MaxCastDepth("scalar-evolution-max-cast-depth", cl::Hidden,
	cl::desc("Maximum depth of recursive SExt/ZExt/Trunc"),
	cl::init(8));

	static cl::opt<unsigned>
	MaxAddRecSize("scalar-evolution-max-add-rec-size", cl::Hidden,
	cl::desc("Max coefficients in AddRec during evolving"),
	cl::init(8));

	static cl::opt<unsigned>
	HugeExprThreshold("scalar-evolution-huge-expr-threshold", cl::Hidden,
	cl::desc("Size of the expression which is considered huge"),
	cl::init(4096));

	static cl::opt<bool>
	ClassifyExpressions("scalar-evolution-classify-expressions",
	cl::Hidden, cl::init(true),
	cl::desc("When printing analysis, include information on every instruction"));

	static cl::opt<bool> UseExpensiveRangeSharpening(
	"scalar-evolution-use-expensive-range-sharpening", cl::Hidden,
	cl::init(false),
	cl::desc("Use more powerful methods of sharpening expression ranges. May "
	"be costly in terms of compile time"));

	//===----------------------------------------------------------------------===//
	// SCEV class definitions
	//===----------------------------------------------------------------------===//

	//===----------------------------------------------------------------------===//
	// Implementation of the SCEV class.
	//

	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
	LLVM_DUMP_METHOD void SCEV::dump() const {
	print(dbgs());
	dbgs() << '\n';
	}
	#endif

	void SCEV::print(raw_ostream &OS) const {
	switch (getSCEVType()) {
	case scConstant:
	cast<SCEVConstant>(this)->getValue()->printAsOperand(OS, false);
	return;
	case scPtrToInt: {
	const SCEVPtrToIntExpr *PtrToInt = cast<SCEVPtrToIntExpr>(this);
	const SCEV *Op = PtrToInt->getOperand();
	OS << "(ptrtoint " << Op->getType() << " " << Op << " to "
	<< *PtrToInt->getType() << ")";
	return;
	}
	case scTruncate: {
	const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(this);
	const SCEV *Op = Trunc->getOperand();
	OS << "(trunc " << Op->getType() << " " << Op << " to "
	<< *Trunc->getType() << ")";
	return;
	}
	case scZeroExtend: {
	const SCEVZeroExtendExpr *ZExt = cast<SCEVZeroExtendExpr>(this);
	const SCEV *Op = ZExt->getOperand();
	OS << "(zext " << Op->getType() << " " << Op << " to "
	<< *ZExt->getType() << ")";
	return;
	}
	case scSignExtend: {
	const SCEVSignExtendExpr *SExt = cast<SCEVSignExtendExpr>(this);
	const SCEV *Op = SExt->getOperand();
	OS << "(sext " << Op->getType() << " " << Op << " to "
	<< *SExt->getType() << ")";
	return;
	}
	case scAddRecExpr: {
	const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(this);
	OS << "{" << *AR->getOperand(0);
	for (unsigned i = 1, e = AR->getNumOperands(); i != e; ++i)
	OS << ",+," << *AR->getOperand(i);
	OS << "}<";
	if (AR->hasNoUnsignedWrap())
	OS << "nuw><";
	if (AR->hasNoSignedWrap())
	OS << "nsw><";
	if (AR->hasNoSelfWrap() &&
	!AR->getNoWrapFlags((NoWrapFlags)(FlagNUW \| FlagNSW)))
	OS << "nw><";
	AR->getLoop()->getHeader()->printAsOperand(OS, /PrintType=/false);
	OS << ">";
	return;
	}
	case scAddExpr:
	case scMulExpr:
	case scUMaxExpr:
	case scSMaxExpr:
	case scUMinExpr:
	case scSMinExpr: {
	const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(this);
	const char *OpStr = nullptr;
	switch (NAry->getSCEVType()) {
	case scAddExpr: OpStr = " + "; break;
	case scMulExpr: OpStr = " * "; break;
	case scUMaxExpr: OpStr = " umax "; break;
	case scSMaxExpr: OpStr = " smax "; break;
	case scUMinExpr:
	OpStr = " umin ";
	break;
	case scSMinExpr:
	OpStr = " smin ";
	break;
	default:
	llvm_unreachable("There are no other nary expression types.");
	}
	OS << "(";
	ListSeparator LS(OpStr);
	for (const SCEV *Op : NAry->operands())
	OS << LS << *Op;
	OS << ")";
	switch (NAry->getSCEVType()) {
	case scAddExpr:
	case scMulExpr:
	if (NAry->hasNoUnsignedWrap())
	OS << "<nuw>";
	if (NAry->hasNoSignedWrap())
	OS << "<nsw>";
	break;
	default:
	// Nothing to print for other nary expressions.
	break;
	}
	return;
	}
	case scUDivExpr: {
	const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(this);
	OS << "(" << UDiv->getLHS() << " /u " << UDiv->getRHS() << ")";
	return;
	}
	case scUnknown: {
	const SCEVUnknown *U = cast<SCEVUnknown>(this);
	Type *AllocTy;
	if (U->isSizeOf(AllocTy)) {
	OS << "sizeof(" << *AllocTy << ")";
	return;
	}
	if (U->isAlignOf(AllocTy)) {
	OS << "alignof(" << *AllocTy << ")";
	return;
	}

	Type *CTy;
	Constant *FieldNo;
	if (U->isOffsetOf(CTy, FieldNo)) {
	OS << "offsetof(" << *CTy << ", ";
	FieldNo->printAsOperand(OS, false);
	OS << ")";
	return;
	}

	// Otherwise just print it normally.
	U->getValue()->printAsOperand(OS, false);
	return;
	}
	case scCouldNotCompute:
	OS << "*COULDNOTCOMPUTE*";
	return;
	}
	llvm_unreachable("Unknown SCEV kind!");
	}

	Type *SCEV::getType() const {
	switch (getSCEVType()) {
	case scConstant:
	return cast<SCEVConstant>(this)->getType();
	case scPtrToInt:
	case scTruncate:
	case scZeroExtend:
	case scSignExtend:
	return cast<SCEVCastExpr>(this)->getType();
	case scAddRecExpr:
	return cast<SCEVAddRecExpr>(this)->getType();
	case scMulExpr:
	return cast<SCEVMulExpr>(this)->getType();
	case scUMaxExpr:
	case scSMaxExpr:
	case scUMinExpr:
	case scSMinExpr:
	return cast<SCEVMinMaxExpr>(this)->getType();
	case scAddExpr:
	return cast<SCEVAddExpr>(this)->getType();
	case scUDivExpr:
	return cast<SCEVUDivExpr>(this)->getType();
	case scUnknown:
	return cast<SCEVUnknown>(this)->getType();
	case scCouldNotCompute:
	llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
	}
	llvm_unreachable("Unknown SCEV kind!");
	}

	bool SCEV::isZero() const {
	if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
	return SC->getValue()->isZero();
	return false;
	}

	bool SCEV::isOne() const {
	if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
	return SC->getValue()->isOne();
	return false;
	}

	bool SCEV::isAllOnesValue() const {
	if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
	return SC->getValue()->isMinusOne();
	return false;
	}

	bool SCEV::isNonConstantNegative() const {
	const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(this);
	if (!Mul) return false;

	// If there is a constant factor, it will be first.
	const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0));
	if (!SC) return false;

	// Return true if the value is negative, this matches things like (-42 * V).
	return SC->getAPInt().isNegative();
	}

	SCEVCouldNotCompute::SCEVCouldNotCompute() :
	SCEV(FoldingSetNodeIDRef(), scCouldNotCompute, 0) {}

	bool SCEVCouldNotCompute::classof(const SCEV *S) {
	return S->getSCEVType() == scCouldNotCompute;
	}

	const SCEV ScalarEvolution::getConstant(ConstantInt V) {
	FoldingSetNodeID ID;
	ID.AddInteger(scConstant);
	ID.AddPointer(V);
	void *IP = nullptr;
	if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
	SCEV *S = new (SCEVAllocator) SCEVConstant(ID.Intern(SCEVAllocator), V);
	UniqueSCEVs.InsertNode(S, IP);
	return S;
	}

	const SCEV *ScalarEvolution::getConstant(const APInt &Val) {
	return getConstant(ConstantInt::get(getContext(), Val));
	}

	const SCEV *
	ScalarEvolution::getConstant(Type *Ty, uint64_t V, bool isSigned) {
	IntegerType *ITy = cast<IntegerType>(getEffectiveSCEVType(Ty));
	return getConstant(ConstantInt::get(ITy, V, isSigned));
	}

	SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID, SCEVTypes SCEVTy,
	const SCEV op, Type ty)
	: SCEV(ID, SCEVTy, computeExpressionSize(op)), Ty(ty) {
	Operands[0] = op;
	}

	SCEVPtrToIntExpr::SCEVPtrToIntExpr(const FoldingSetNodeIDRef ID, const SCEV *Op,
	Type *ITy)
	: SCEVCastExpr(ID, scPtrToInt, Op, ITy) {
	assert(getOperand()->getType()->isPointerTy() && Ty->isIntegerTy() &&
	"Must be a non-bit-width-changing pointer-to-integer cast!");
	}

	SCEVIntegralCastExpr::SCEVIntegralCastExpr(const FoldingSetNodeIDRef ID,
	SCEVTypes SCEVTy, const SCEV *op,
	Type *ty)
	: SCEVCastExpr(ID, SCEVTy, op, ty) {}

	SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID, const SCEV *op,
	Type *ty)
	: SCEVIntegralCastExpr(ID, scTruncate, op, ty) {
	assert(getOperand()->getType()->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
	"Cannot truncate non-integer value!");
	}

	SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID,
	const SCEV op, Type ty)
	: SCEVIntegralCastExpr(ID, scZeroExtend, op, ty) {
	assert(getOperand()->getType()->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
	"Cannot zero extend non-integer value!");
	}

	SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeIDRef ID,
	const SCEV op, Type ty)
	: SCEVIntegralCastExpr(ID, scSignExtend, op, ty) {
	assert(getOperand()->getType()->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
	"Cannot sign extend non-integer value!");
	}

	void SCEVUnknown::deleted() {
	// Clear this SCEVUnknown from various maps.
	SE->forgetMemoizedResults(this);

	// Remove this SCEVUnknown from the uniquing map.
	SE->UniqueSCEVs.RemoveNode(this);

	// Release the value.
	setValPtr(nullptr);
	}

	void SCEVUnknown::allUsesReplacedWith(Value *New) {
	// Remove this SCEVUnknown from the uniquing map.
	SE->UniqueSCEVs.RemoveNode(this);

	// Update this SCEVUnknown to point to the new value. This is needed
	// because there may still be outstanding SCEVs which still point to
	// this SCEVUnknown.
	setValPtr(New);
	}

	bool SCEVUnknown::isSizeOf(Type *&AllocTy) const {
	if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
	if (VCE->getOpcode() == Instruction::PtrToInt)
	if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
	if (CE->getOpcode() == Instruction::GetElementPtr &&
	CE->getOperand(0)->isNullValue() &&
	CE->getNumOperands() == 2)
	if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(1)))
	if (CI->isOne()) {
	AllocTy = cast<GEPOperator>(CE)->getSourceElementType();
	return true;
	}

	return false;
	}

	bool SCEVUnknown::isAlignOf(Type *&AllocTy) const {
	if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
	if (VCE->getOpcode() == Instruction::PtrToInt)
	if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
	if (CE->getOpcode() == Instruction::GetElementPtr &&
	CE->getOperand(0)->isNullValue()) {
	Type *Ty = cast<GEPOperator>(CE)->getSourceElementType();
	if (StructType *STy = dyn_cast<StructType>(Ty))
	if (!STy->isPacked() &&
	CE->getNumOperands() == 3 &&
	CE->getOperand(1)->isNullValue()) {
	if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(2)))
	if (CI->isOne() &&
	STy->getNumElements() == 2 &&
	STy->getElementType(0)->isIntegerTy(1)) {
	AllocTy = STy->getElementType(1);
	return true;
	}
	}
	}

	return false;
	}

	bool SCEVUnknown::isOffsetOf(Type &CTy, Constant &FieldNo) const {
	if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
	if (VCE->getOpcode() == Instruction::PtrToInt)
	if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
	if (CE->getOpcode() == Instruction::GetElementPtr &&
	CE->getNumOperands() == 3 &&
	CE->getOperand(0)->isNullValue() &&
	CE->getOperand(1)->isNullValue()) {
	Type *Ty = cast<GEPOperator>(CE)->getSourceElementType();
	// Ignore vector types here so that ScalarEvolutionExpander doesn't
	// emit getelementptrs that index into vectors.
	if (Ty->isStructTy() \|\| Ty->isArrayTy()) {
	CTy = Ty;
	FieldNo = CE->getOperand(2);
	return true;
	}
	}

	return false;
	}

	//===----------------------------------------------------------------------===//
	// SCEV Utilities
	//===----------------------------------------------------------------------===//

	/// Compare the two values \p LV and \p RV in terms of their "complexity" where
	/// "complexity" is a partial (and somewhat ad-hoc) relation used to order
	/// operands in SCEV expressions. \p EqCache is a set of pairs of values that
	/// have been previously deemed to be "equally complex" by this routine. It is
	/// intended to avoid exponential time complexity in cases like:
	///
	/// %a = f(%x, %y)
	/// %b = f(%a, %a)
	/// %c = f(%b, %b)
	///
	/// %d = f(%x, %y)
	/// %e = f(%d, %d)
	/// %f = f(%e, %e)
	///
	/// CompareValueComplexity(%f, %c)
	///
	/// Since we do not continue running this routine on expression trees once we
	/// have seen unequal values, there is no need to track them in the cache.
	static int
	CompareValueComplexity(EquivalenceClasses<const Value *> &EqCacheValue,
	const LoopInfo const LI, Value LV, Value *RV,
	unsigned Depth) {
	if (Depth > MaxValueCompareDepth \|\| EqCacheValue.isEquivalent(LV, RV))
	return 0;

	// Order pointer values after integer values. This helps SCEVExpander form
	// GEPs.
	bool LIsPointer = LV->getType()->isPointerTy(),
	RIsPointer = RV->getType()->isPointerTy();
	if (LIsPointer != RIsPointer)
	return (int)LIsPointer - (int)RIsPointer;

	// Compare getValueID values.
	unsigned LID = LV->getValueID(), RID = RV->getValueID();
	if (LID != RID)
	return (int)LID - (int)RID;

	// Sort arguments by their position.
	if (const auto *LA = dyn_cast<Argument>(LV)) {
	const auto *RA = cast<Argument>(RV);
	unsigned LArgNo = LA->getArgNo(), RArgNo = RA->getArgNo();
	return (int)LArgNo - (int)RArgNo;
	}

	if (const auto *LGV = dyn_cast<GlobalValue>(LV)) {
	const auto *RGV = cast<GlobalValue>(RV);

	const auto IsGVNameSemantic = [&](const GlobalValue *GV) {
	auto LT = GV->getLinkage();
	return !(GlobalValue::isPrivateLinkage(LT) \|\|
	GlobalValue::isInternalLinkage(LT));
	};

	// Use the names to distinguish the two values, but only if the
	// names are semantically important.
	if (IsGVNameSemantic(LGV) && IsGVNameSemantic(RGV))
	return LGV->getName().compare(RGV->getName());
	}

	// For instructions, compare their loop depth, and their operand count. This
	// is pretty loose.
	if (const auto *LInst = dyn_cast<Instruction>(LV)) {
	const auto *RInst = cast<Instruction>(RV);

	// Compare loop depths.
	const BasicBlock *LParent = LInst->getParent(),
	*RParent = RInst->getParent();
	if (LParent != RParent) {
	unsigned LDepth = LI->getLoopDepth(LParent),
	RDepth = LI->getLoopDepth(RParent);
	if (LDepth != RDepth)
	return (int)LDepth - (int)RDepth;
	}

	// Compare the number of operands.
	unsigned LNumOps = LInst->getNumOperands(),
	RNumOps = RInst->getNumOperands();
	if (LNumOps != RNumOps)
	return (int)LNumOps - (int)RNumOps;

	for (unsigned Idx : seq(0u, LNumOps)) {
	int Result =
	CompareValueComplexity(EqCacheValue, LI, LInst->getOperand(Idx),
	RInst->getOperand(Idx), Depth + 1);
	if (Result != 0)
	return Result;
	}
	}

	EqCacheValue.unionSets(LV, RV);
	return 0;
	}

	// Return negative, zero, or positive, if LHS is less than, equal to, or greater
	// than RHS, respectively. A three-way result allows recursive comparisons to be
	// more efficient.
	// If the max analysis depth was reached, return None, assuming we do not know
	// if they are equivalent for sure.
	static Optional<int>
	CompareSCEVComplexity(EquivalenceClasses<const SCEV *> &EqCacheSCEV,
	EquivalenceClasses<const Value *> &EqCacheValue,
	const LoopInfo const LI, const SCEV LHS,
	const SCEV *RHS, DominatorTree &DT, unsigned Depth = 0) {
	// Fast-path: SCEVs are uniqued so we can do a quick equality check.
	if (LHS == RHS)
	return 0;

	// Primarily, sort the SCEVs by their getSCEVType().
	SCEVTypes LType = LHS->getSCEVType(), RType = RHS->getSCEVType();
	if (LType != RType)
	return (int)LType - (int)RType;

	if (EqCacheSCEV.isEquivalent(LHS, RHS))
	return 0;

	if (Depth > MaxSCEVCompareDepth)
	return None;

	// Aside from the getSCEVType() ordering, the particular ordering
	// isn't very important except that it's beneficial to be consistent,
	// so that (a + b) and (b + a) don't end up as different expressions.
	switch (LType) {
	case scUnknown: {
	const SCEVUnknown *LU = cast<SCEVUnknown>(LHS);
	const SCEVUnknown *RU = cast<SCEVUnknown>(RHS);

	int X = CompareValueComplexity(EqCacheValue, LI, LU->getValue(),
	RU->getValue(), Depth + 1);
	if (X == 0)
	EqCacheSCEV.unionSets(LHS, RHS);
	return X;
	}

	case scConstant: {
	const SCEVConstant *LC = cast<SCEVConstant>(LHS);
	const SCEVConstant *RC = cast<SCEVConstant>(RHS);

	// Compare constant values.
	const APInt &LA = LC->getAPInt();
	const APInt &RA = RC->getAPInt();
	unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth();
	if (LBitWidth != RBitWidth)
	return (int)LBitWidth - (int)RBitWidth;
	return LA.ult(RA) ? -1 : 1;
	}

	case scAddRecExpr: {
	const SCEVAddRecExpr *LA = cast<SCEVAddRecExpr>(LHS);
	const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS);

	// There is always a dominance between two recs that are used by one SCEV,
	// so we can safely sort recs by loop header dominance. We require such
	// order in getAddExpr.
	const Loop LLoop = LA->getLoop(), RLoop = RA->getLoop();
	if (LLoop != RLoop) {
	const BasicBlock LHead = LLoop->getHeader(), RHead = RLoop->getHeader();
	assert(LHead != RHead && "Two loops share the same header?");
	if (DT.dominates(LHead, RHead))
	return 1;
	else
	assert(DT.dominates(RHead, LHead) &&
	"No dominance between recurrences used by one SCEV?");
	return -1;
	}

	// Addrec complexity grows with operand count.
	unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands();
	if (LNumOps != RNumOps)
	return (int)LNumOps - (int)RNumOps;

	// Lexicographically compare.
	for (unsigned i = 0; i != LNumOps; ++i) {
	auto X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI,
	LA->getOperand(i), RA->getOperand(i), DT,
	Depth + 1);
	if (X != 0)
	return X;
	}
	EqCacheSCEV.unionSets(LHS, RHS);
	return 0;
	}

	case scAddExpr:
	case scMulExpr:
	case scSMaxExpr:
	case scUMaxExpr:
	case scSMinExpr:
	case scUMinExpr: {
	const SCEVNAryExpr *LC = cast<SCEVNAryExpr>(LHS);
	const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS);

	// Lexicographically compare n-ary expressions.
	unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands();
	if (LNumOps != RNumOps)
	return (int)LNumOps - (int)RNumOps;

	for (unsigned i = 0; i != LNumOps; ++i) {
	auto X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI,
	LC->getOperand(i), RC->getOperand(i), DT,
	Depth + 1);
	if (X != 0)
	return X;
	}
	EqCacheSCEV.unionSets(LHS, RHS);
	return 0;
	}

	case scUDivExpr: {
	const SCEVUDivExpr *LC = cast<SCEVUDivExpr>(LHS);
	const SCEVUDivExpr *RC = cast<SCEVUDivExpr>(RHS);

	// Lexicographically compare udiv expressions.
	auto X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, LC->getLHS(),
	RC->getLHS(), DT, Depth + 1);
	if (X != 0)
	return X;
	X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, LC->getRHS(),
	RC->getRHS(), DT, Depth + 1);
	if (X == 0)
	EqCacheSCEV.unionSets(LHS, RHS);
	return X;
	}

	case scPtrToInt:
	case scTruncate:
	case scZeroExtend:
	case scSignExtend: {
	const SCEVCastExpr *LC = cast<SCEVCastExpr>(LHS);
	const SCEVCastExpr *RC = cast<SCEVCastExpr>(RHS);

	// Compare cast expressions by operand.
	auto X =
	CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, LC->getOperand(),
	RC->getOperand(), DT, Depth + 1);
	if (X == 0)
	EqCacheSCEV.unionSets(LHS, RHS);
	return X;
	}

	case scCouldNotCompute:
	llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
	}
	llvm_unreachable("Unknown SCEV kind!");
	}

	/// Given a list of SCEV objects, order them by their complexity, and group
	/// objects of the same complexity together by value. When this routine is
	/// finished, we know that any duplicates in the vector are consecutive and that
	/// complexity is monotonically increasing.
	///
	/// Note that we go take special precautions to ensure that we get deterministic
	/// results from this routine. In other words, we don't want the results of
	/// this to depend on where the addresses of various SCEV objects happened to
	/// land in memory.
	static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops,
	LoopInfo *LI, DominatorTree &DT) {
	if (Ops.size() < 2) return; // Noop

	EquivalenceClasses<const SCEV *> EqCacheSCEV;
	EquivalenceClasses<const Value *> EqCacheValue;

	// Whether LHS has provably less complexity than RHS.
	auto IsLessComplex = [&](const SCEV LHS, const SCEV RHS) {
	auto Complexity =
	CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, LHS, RHS, DT);
	return Complexity && *Complexity < 0;
	};
	if (Ops.size() == 2) {
	// This is the common case, which also happens to be trivially simple.
	// Special case it.
	const SCEV &LHS = Ops[0], &RHS = Ops[1];
	if (IsLessComplex(RHS, LHS))
	std::swap(LHS, RHS);
	return;
	}

	// Do the rough sort by complexity.
	llvm::stable_sort(Ops, [&](const SCEV LHS, const SCEV RHS) {
	return IsLessComplex(LHS, RHS);
	});

	// Now that we are sorted by complexity, group elements of the same
	// complexity. Note that this is, at worst, N^2, but the vector is likely to
	// be extremely short in practice. Note that we take this approach because we
	// do not want to depend on the addresses of the objects we are grouping.
	for (unsigned i = 0, e = Ops.size(); i != e-2; ++i) {
	const SCEV *S = Ops[i];
	unsigned Complexity = S->getSCEVType();

	// If there are any objects of the same complexity and same value as this
	// one, group them.
	for (unsigned j = i+1; j != e && Ops[j]->getSCEVType() == Complexity; ++j) {
	if (Ops[j] == S) { // Found a duplicate.
	// Move it to immediately after i'th element.
	std::swap(Ops[i+1], Ops[j]);
	++i; // no need to rescan it.
	if (i == e-2) return; // Done!
	}
	}
	}
	}

	/// Returns true if \p Ops contains a huge SCEV (the subtree of S contains at
	/// least HugeExprThreshold nodes).
	static bool hasHugeExpression(ArrayRef<const SCEV *> Ops) {
	return any_of(Ops, [](const SCEV *S) {
	return S->getExpressionSize() >= HugeExprThreshold;
	});
	}

	//===----------------------------------------------------------------------===//
	// Simple SCEV method implementations
	//===----------------------------------------------------------------------===//

	/// Compute BC(It, K). The result has width W. Assume, K > 0.
	static const SCEV BinomialCoefficient(const SCEV It, unsigned K,
	ScalarEvolution &SE,
	Type *ResultTy) {
	// Handle the simplest case efficiently.
	if (K == 1)
	return SE.getTruncateOrZeroExtend(It, ResultTy);

	// We are using the following formula for BC(It, K):
	//
	// BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / K!
	//
	// Suppose, W is the bitwidth of the return value. We must be prepared for
	// overflow. Hence, we must assure that the result of our computation is
	// equal to the accurate one modulo 2^W. Unfortunately, division isn't
	// safe in modular arithmetic.
	//
	// However, this code doesn't use exactly that formula; the formula it uses
	// is something like the following, where T is the number of factors of 2 in
	// K! (i.e. trailing zeros in the binary representation of K!), and ^ is
	// exponentiation:
	//
	// BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / 2^T / (K! / 2^T)
	//
	// This formula is trivially equivalent to the previous formula. However,
	// this formula can be implemented much more efficiently. The trick is that
	// K! / 2^T is odd, and exact division by an odd number is safe in modular
	// arithmetic. To do exact division in modular arithmetic, all we have
	// to do is multiply by the inverse. Therefore, this step can be done at
	// width W.
	//
	// The next issue is how to safely do the division by 2^T. The way this
	// is done is by doing the multiplication step at a width of at least W + T
	// bits. This way, the bottom W+T bits of the product are accurate. Then,
	// when we perform the division by 2^T (which is equivalent to a right shift
	// by T), the bottom W bits are accurate. Extra bits are okay; they'll get
	// truncated out after the division by 2^T.
	//
	// In comparison to just directly using the first formula, this technique
	// is much more efficient; using the first formula requires W * K bits,
	// but this formula less than W + K bits. Also, the first formula requires
	// a division step, whereas this formula only requires multiplies and shifts.
	//
	// It doesn't matter whether the subtraction step is done in the calculation
	// width or the input iteration count's width; if the subtraction overflows,
	// the result must be zero anyway. We prefer here to do it in the width of
	// the induction variable because it helps a lot for certain cases; CodeGen
	// isn't smart enough to ignore the overflow, which leads to much less
	// efficient code if the width of the subtraction is wider than the native
	// register width.
	//
	// (It's possible to not widen at all by pulling out factors of 2 before
	// the multiplication; for example, K=2 can be calculated as
	// It/2(It+(ItINT_MIN/INT_MIN)+-1). However, it requires
	// extra arithmetic, so it's not an obvious win, and it gets
	// much more complicated for K > 3.)

	// Protection from insane SCEVs; this bound is conservative,
	// but it probably doesn't matter.
	if (K > 1000)
	return SE.getCouldNotCompute();

	unsigned W = SE.getTypeSizeInBits(ResultTy);

	// Calculate K! / 2^T and T; we divide out the factors of two before
	// multiplying for calculating K! / 2^T to avoid overflow.
	// Other overflow doesn't matter because we only care about the bottom
	// W bits of the result.
	APInt OddFactorial(W, 1);
	unsigned T = 1;
	for (unsigned i = 3; i <= K; ++i) {
	APInt Mult(W, i);
	unsigned TwoFactors = Mult.countTrailingZeros();
	T += TwoFactors;
	Mult.lshrInPlace(TwoFactors);
	OddFactorial *= Mult;
	}

	// We need at least W + T bits for the multiplication step
	unsigned CalculationBits = W + T;

	// Calculate 2^T, at width T+W.
	APInt DivFactor = APInt::getOneBitSet(CalculationBits, T);

	// Calculate the multiplicative inverse of K! / 2^T;
	// this multiplication factor will perform the exact division by
	// K! / 2^T.
	APInt Mod = APInt::getSignedMinValue(W+1);
	APInt MultiplyFactor = OddFactorial.zext(W+1);
	MultiplyFactor = MultiplyFactor.multiplicativeInverse(Mod);
	MultiplyFactor = MultiplyFactor.trunc(W);

	// Calculate the product, at width T+W
	IntegerType *CalculationTy = IntegerType::get(SE.getContext(),
	CalculationBits);
	const SCEV *Dividend = SE.getTruncateOrZeroExtend(It, CalculationTy);
	for (unsigned i = 1; i != K; ++i) {
	const SCEV *S = SE.getMinusSCEV(It, SE.getConstant(It->getType(), i));
	Dividend = SE.getMulExpr(Dividend,
	SE.getTruncateOrZeroExtend(S, CalculationTy));
	}

	// Divide by 2^T
	const SCEV *DivResult = SE.getUDivExpr(Dividend, SE.getConstant(DivFactor));

	// Truncate the result, and divide by K! / 2^T.

	return SE.getMulExpr(SE.getConstant(MultiplyFactor),
	SE.getTruncateOrZeroExtend(DivResult, ResultTy));
	}

	/// Return the value of this chain of recurrences at the specified iteration
	/// number. We can evaluate this recurrence by multiplying each element in the
	/// chain by the binomial coefficient corresponding to it. In other words, we
	/// can evaluate {A,+,B,+,C,+,D} as:
	///
	/// ABC(It, 0) + BBC(It, 1) + CBC(It, 2) + DBC(It, 3)
	///
	/// where BC(It, k) stands for binomial coefficient.
	const SCEV SCEVAddRecExpr::evaluateAtIteration(const SCEV It,
	ScalarEvolution &SE) const {
	return evaluateAtIteration(makeArrayRef(op_begin(), op_end()), It, SE);
	}

	const SCEV *
	SCEVAddRecExpr::evaluateAtIteration(ArrayRef<const SCEV *> Operands,
	const SCEV *It, ScalarEvolution &SE) {
	assert(Operands.size() > 0);
	const SCEV *Result = Operands[0];
	for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
	// The computation is correct in the face of overflow provided that the
	// multiplication is performed _after_ the evaluation of the binomial
	// coefficient.
	const SCEV *Coeff = BinomialCoefficient(It, i, SE, Result->getType());
	if (isa<SCEVCouldNotCompute>(Coeff))
	return Coeff;

	Result = SE.getAddExpr(Result, SE.getMulExpr(Operands[i], Coeff));
	}
	return Result;
	}

	//===----------------------------------------------------------------------===//
	// SCEV Expression folder implementations
	//===----------------------------------------------------------------------===//

	const SCEV ScalarEvolution::getLosslessPtrToIntExpr(const SCEV Op,
	unsigned Depth) {
	assert(Depth <= 1 &&
	"getLosslessPtrToIntExpr() should self-recurse at most once.");

	// We could be called with an integer-typed operands during SCEV rewrites.
	// Since the operand is an integer already, just perform zext/trunc/self cast.
	if (!Op->getType()->isPointerTy())
	return Op;

	// What would be an ID for such a SCEV cast expression?
	FoldingSetNodeID ID;
	ID.AddInteger(scPtrToInt);
	ID.AddPointer(Op);

	void *IP = nullptr;

	// Is there already an expression for such a cast?
	if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP))
	return S;

	// It isn't legal for optimizations to construct new ptrtoint expressions
	// for non-integral pointers.
	if (getDataLayout().isNonIntegralPointerType(Op->getType()))
	return getCouldNotCompute();

	Type *IntPtrTy = getDataLayout().getIntPtrType(Op->getType());

	// We can only trivially model ptrtoint if SCEV's effective (integer) type
	// is sufficiently wide to represent all possible pointer values.
	// We could theoretically teach SCEV to truncate wider pointers, but
	// that isn't implemented for now.
	if (getDataLayout().getTypeSizeInBits(getEffectiveSCEVType(Op->getType())) !=
	getDataLayout().getTypeSizeInBits(IntPtrTy))
	return getCouldNotCompute();

	// If not, is this expression something we can't reduce any further?
	if (auto *U = dyn_cast<SCEVUnknown>(Op)) {
	// Perform some basic constant folding. If the operand of the ptr2int cast
	// is a null pointer, don't create a ptr2int SCEV expression (that will be
	// left as-is), but produce a zero constant.
	// NOTE: We could handle a more general case, but lack motivational cases.
	if (isa<ConstantPointerNull>(U->getValue()))
	return getZero(IntPtrTy);

	// Create an explicit cast node.
	// We can reuse the existing insert position since if we get here,
	// we won't have made any changes which would invalidate it.
	SCEV *S = new (SCEVAllocator)
	SCEVPtrToIntExpr(ID.Intern(SCEVAllocator), Op, IntPtrTy);
	UniqueSCEVs.InsertNode(S, IP);
	addToLoopUseLists(S);
	return S;
	}

	assert(Depth == 0 && "getLosslessPtrToIntExpr() should not self-recurse for "
	"non-SCEVUnknown's.");

	// Otherwise, we've got some expression that is more complex than just a
	// single SCEVUnknown. But we don't want to have a SCEVPtrToIntExpr of an
	// arbitrary expression, we want to have SCEVPtrToIntExpr of an SCEVUnknown
	// only, and the expressions must otherwise be integer-typed.
	// So sink the cast down to the SCEVUnknown's.

	/// The SCEVPtrToIntSinkingRewriter takes a scalar evolution expression,
	/// which computes a pointer-typed value, and rewrites the whole expression
	/// tree so that all the computations are done on integers, and the only
	/// pointer-typed operands in the expression are SCEVUnknown.
	class SCEVPtrToIntSinkingRewriter
	: public SCEVRewriteVisitor<SCEVPtrToIntSinkingRewriter> {
	using Base = SCEVRewriteVisitor<SCEVPtrToIntSinkingRewriter>;

	public:
	SCEVPtrToIntSinkingRewriter(ScalarEvolution &SE) : SCEVRewriteVisitor(SE) {}

	static const SCEV rewrite(const SCEV Scev, ScalarEvolution &SE) {
	SCEVPtrToIntSinkingRewriter Rewriter(SE);
	return Rewriter.visit(Scev);
	}

	const SCEV visit(const SCEV S) {
	Type *STy = S->getType();
	// If the expression is not pointer-typed, just keep it as-is.
	if (!STy->isPointerTy())
	return S;
	// Else, recursively sink the cast down into it.
	return Base::visit(S);
	}

	const SCEV visitAddExpr(const SCEVAddExpr Expr) {
	SmallVector<const SCEV *, 2> Operands;
	bool Changed = false;
	for (auto *Op : Expr->operands()) {
	Operands.push_back(visit(Op));
	Changed \|= Op != Operands.back();
	}
	return !Changed ? Expr : SE.getAddExpr(Operands, Expr->getNoWrapFlags());
	}

	const SCEV visitMulExpr(const SCEVMulExpr Expr) {
	SmallVector<const SCEV *, 2> Operands;
	bool Changed = false;
	for (auto *Op : Expr->operands()) {
	Operands.push_back(visit(Op));
	Changed \|= Op != Operands.back();
	}
	return !Changed ? Expr : SE.getMulExpr(Operands, Expr->getNoWrapFlags());
	}

	const SCEV visitUnknown(const SCEVUnknown Expr) {
	assert(Expr->getType()->isPointerTy() &&
	"Should only reach pointer-typed SCEVUnknown's.");
	return SE.getLosslessPtrToIntExpr(Expr, /Depth=/1);
	}
	};

	// And actually perform the cast sinking.
	const SCEV IntOp = SCEVPtrToIntSinkingRewriter::rewrite(Op, this);
	assert(IntOp->getType()->isIntegerTy() &&
	"We must have succeeded in sinking the cast, "
	"and ending up with an integer-typed expression!");
	return IntOp;
	}

	const SCEV ScalarEvolution::getPtrToIntExpr(const SCEV Op, Type *Ty) {
	assert(Ty->isIntegerTy() && "Target type must be an integer type!");

	const SCEV *IntOp = getLosslessPtrToIntExpr(Op);
	if (isa<SCEVCouldNotCompute>(IntOp))
	return IntOp;

	return getTruncateOrZeroExtend(IntOp, Ty);
	}

	const SCEV ScalarEvolution::getTruncateExpr(const SCEV Op, Type *Ty,
	unsigned Depth) {
	assert(getTypeSizeInBits(Op->getType()) > getTypeSizeInBits(Ty) &&
	"This is not a truncating conversion!");
	assert(isSCEVable(Ty) &&
	"This is not a conversion to a SCEVable type!");
	assert(!Op->getType()->isPointerTy() && "Can't truncate pointer!");
	Ty = getEffectiveSCEVType(Ty);

	FoldingSetNodeID ID;
	ID.AddInteger(scTruncate);
	ID.AddPointer(Op);
	ID.AddPointer(Ty);
	void *IP = nullptr;
	if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;

	// Fold if the operand is constant.
	if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
	return getConstant(
	cast<ConstantInt>(ConstantExpr::getTrunc(SC->getValue(), Ty)));

	// trunc(trunc(x)) --> trunc(x)
	if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op))
	return getTruncateExpr(ST->getOperand(), Ty, Depth + 1);

	// trunc(sext(x)) --> sext(x) if widening or trunc(x) if narrowing
	if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
	return getTruncateOrSignExtend(SS->getOperand(), Ty, Depth + 1);

	// trunc(zext(x)) --> zext(x) if widening or trunc(x) if narrowing
	if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
	return getTruncateOrZeroExtend(SZ->getOperand(), Ty, Depth + 1);

	if (Depth > MaxCastDepth) {
	SCEV *S =
	new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator), Op, Ty);
	UniqueSCEVs.InsertNode(S, IP);
	addToLoopUseLists(S);
	return S;
	}

	// trunc(x1 + ... + xN) --> trunc(x1) + ... + trunc(xN) and
	// trunc(x1 * ... * xN) --> trunc(x1) * ... * trunc(xN),
	// if after transforming we have at most one truncate, not counting truncates
	// that replace other casts.
	if (isa<SCEVAddExpr>(Op) \|\| isa<SCEVMulExpr>(Op)) {
	auto *CommOp = cast<SCEVCommutativeExpr>(Op);
	SmallVector<const SCEV *, 4> Operands;
	unsigned numTruncs = 0;
	for (unsigned i = 0, e = CommOp->getNumOperands(); i != e && numTruncs < 2;
	++i) {
	const SCEV *S = getTruncateExpr(CommOp->getOperand(i), Ty, Depth + 1);
	if (!isa<SCEVIntegralCastExpr>(CommOp->getOperand(i)) &&
	isa<SCEVTruncateExpr>(S))
	numTruncs++;
	Operands.push_back(S);
	}
	if (numTruncs < 2) {
	if (isa<SCEVAddExpr>(Op))
	return getAddExpr(Operands);
	else if (isa<SCEVMulExpr>(Op))
	return getMulExpr(Operands);
	else
	llvm_unreachable("Unexpected SCEV type for Op.");
	}
	// Although we checked in the beginning that ID is not in the cache, it is
	// possible that during recursion and different modification ID was inserted
	// into the cache. So if we find it, just return it.
	if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP))
	return S;
	}

	// If the input value is a chrec scev, truncate the chrec's operands.
	if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) {
	SmallVector<const SCEV *, 4> Operands;
	for (const SCEV *Op : AddRec->operands())
	Operands.push_back(getTruncateExpr(Op, Ty, Depth + 1));
	return getAddRecExpr(Operands, AddRec->getLoop(), SCEV::FlagAnyWrap);
	}

	// Return zero if truncating to known zeros.
	uint32_t MinTrailingZeros = GetMinTrailingZeros(Op);
	if (MinTrailingZeros >= getTypeSizeInBits(Ty))
	return getZero(Ty);

	// The cast wasn't folded; create an explicit cast node. We can reuse
	// the existing insert position since if we get here, we won't have
	// made any changes which would invalidate it.
	SCEV *S = new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator),
	Op, Ty);
	UniqueSCEVs.InsertNode(S, IP);
	addToLoopUseLists(S);
	return S;
	}

	// Get the limit of a recurrence such that incrementing by Step cannot cause
	// signed overflow as long as the value of the recurrence within the
	// loop does not exceed this limit before incrementing.
	static const SCEV getSignedOverflowLimitForStep(const SCEV Step,
	ICmpInst::Predicate *Pred,
	ScalarEvolution *SE) {
	unsigned BitWidth = SE->getTypeSizeInBits(Step->getType());
	if (SE->isKnownPositive(Step)) {
	*Pred = ICmpInst::ICMP_SLT;
	return SE->getConstant(APInt::getSignedMinValue(BitWidth) -
	SE->getSignedRangeMax(Step));
	}
	if (SE->isKnownNegative(Step)) {
	*Pred = ICmpInst::ICMP_SGT;
	return SE->getConstant(APInt::getSignedMaxValue(BitWidth) -
	SE->getSignedRangeMin(Step));
	}
	return nullptr;
	}

	// Get the limit of a recurrence such that incrementing by Step cannot cause
	// unsigned overflow as long as the value of the recurrence within the loop does
	// not exceed this limit before incrementing.
	static const SCEV getUnsignedOverflowLimitForStep(const SCEV Step,
	ICmpInst::Predicate *Pred,
	ScalarEvolution *SE) {
	unsigned BitWidth = SE->getTypeSizeInBits(Step->getType());
	*Pred = ICmpInst::ICMP_ULT;

	return SE->getConstant(APInt::getMinValue(BitWidth) -
	SE->getUnsignedRangeMax(Step));
	}

	namespace {

	struct ExtendOpTraitsBase {
	typedef const SCEV (ScalarEvolution::GetExtendExprTy)(const SCEV , Type ,
	unsigned);
	};

	// Used to make code generic over signed and unsigned overflow.
	template <typename ExtendOp> struct ExtendOpTraits {
	// Members present:
	//
	// static const SCEV::NoWrapFlags WrapType;
	//
	// static const ExtendOpTraitsBase::GetExtendExprTy GetExtendExpr;
	//
	// static const SCEV getOverflowLimitForStep(const SCEV Step,
	// ICmpInst::Predicate *Pred,
	// ScalarEvolution *SE);
	};

	template <>
	struct ExtendOpTraits<SCEVSignExtendExpr> : public ExtendOpTraitsBase {
	static const SCEV::NoWrapFlags WrapType = SCEV::FlagNSW;

	static const GetExtendExprTy GetExtendExpr;

	static const SCEV getOverflowLimitForStep(const SCEV Step,
	ICmpInst::Predicate *Pred,
	ScalarEvolution *SE) {
	return getSignedOverflowLimitForStep(Step, Pred, SE);
	}
	};

	const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits<
	SCEVSignExtendExpr>::GetExtendExpr = &ScalarEvolution::getSignExtendExpr;

	template <>
	struct ExtendOpTraits<SCEVZeroExtendExpr> : public ExtendOpTraitsBase {
	static const SCEV::NoWrapFlags WrapType = SCEV::FlagNUW;

	static const GetExtendExprTy GetExtendExpr;

	static const SCEV getOverflowLimitForStep(const SCEV Step,
	ICmpInst::Predicate *Pred,
	ScalarEvolution *SE) {
	return getUnsignedOverflowLimitForStep(Step, Pred, SE);
	}
	};

	const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits<
	SCEVZeroExtendExpr>::GetExtendExpr = &ScalarEvolution::getZeroExtendExpr;

	} // end anonymous namespace

	// The recurrence AR has been shown to have no signed/unsigned wrap or something
	// close to it. Typically, if we can prove NSW/NUW for AR, then we can just as
	// easily prove NSW/NUW for its preincrement or postincrement sibling. This
	// allows normalizing a sign/zero extended AddRec as such: {sext/zext(Step +
	// Start),+,Step} => {(Step + sext/zext(Start),+,Step} As a result, the
	// expression "Step + sext/zext(PreIncAR)" is congruent with
	// "sext/zext(PostIncAR)"
	template <typename ExtendOpTy>
	static const SCEV getPreStartForExtend(const SCEVAddRecExpr AR, Type *Ty,
	ScalarEvolution *SE, unsigned Depth) {
	auto WrapType = ExtendOpTraits<ExtendOpTy>::WrapType;
	auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr;

	const Loop *L = AR->getLoop();
	const SCEV *Start = AR->getStart();
	const SCEV Step = AR->getStepRecurrence(SE);

	// Check for a simple looking step prior to loop entry.
	const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start);
	if (!SA)
	return nullptr;

	// Create an AddExpr for "PreStart" after subtracting Step. Full SCEV
	// subtraction is expensive. For this purpose, perform a quick and dirty
	// difference, by checking for Step in the operand list.
	SmallVector<const SCEV *, 4> DiffOps;
	for (const SCEV *Op : SA->operands())
	if (Op != Step)
	DiffOps.push_back(Op);

	if (DiffOps.size() == SA->getNumOperands())
	return nullptr;

	// Try to prove `WrapType` (SCEV::FlagNSW or SCEV::FlagNUW) on `PreStart` +
	// `Step`:

	// 1. NSW/NUW flags on the step increment.
	auto PreStartFlags =
	ScalarEvolution::maskFlags(SA->getNoWrapFlags(), SCEV::FlagNUW);
	const SCEV *PreStart = SE->getAddExpr(DiffOps, PreStartFlags);
	const SCEVAddRecExpr *PreAR = dyn_cast<SCEVAddRecExpr>(
	SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap));

	// "{S,+,X} is <nsw>/<nuw>" and "the backedge is taken at least once" implies
	// "S+X does not sign/unsign-overflow".
	//

	const SCEV *BECount = SE->getBackedgeTakenCount(L);
	if (PreAR && PreAR->getNoWrapFlags(WrapType) &&
	!isa<SCEVCouldNotCompute>(BECount) && SE->isKnownPositive(BECount))
	return PreStart;

	// 2. Direct overflow check on the step operation's expression.
	unsigned BitWidth = SE->getTypeSizeInBits(AR->getType());
	Type WideTy = IntegerType::get(SE->getContext(), BitWidth 2);
	const SCEV *OperandExtendedStart =
	SE->getAddExpr((SE->*GetExtendExpr)(PreStart, WideTy, Depth),
	(SE->*GetExtendExpr)(Step, WideTy, Depth));
	if ((SE->*GetExtendExpr)(Start, WideTy, Depth) == OperandExtendedStart) {
	if (PreAR && AR->getNoWrapFlags(WrapType)) {
	// If we know `AR` == {`PreStart`+`Step`,+,`Step`} is `WrapType` (FlagNSW
	// or FlagNUW) and that `PreStart` + `Step` is `WrapType` too, then
	// `PreAR` == {`PreStart`,+,`Step`} is also `WrapType`. Cache this fact.
	SE->setNoWrapFlags(const_cast<SCEVAddRecExpr *>(PreAR), WrapType);
	}
	return PreStart;
	}

	// 3. Loop precondition.
	ICmpInst::Predicate Pred;
	const SCEV *OverflowLimit =
	ExtendOpTraits<ExtendOpTy>::getOverflowLimitForStep(Step, &Pred, SE);

	if (OverflowLimit &&
	SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit))
	return PreStart;

	return nullptr;
	}

	// Get the normalized zero or sign extended expression for this AddRec's Start.
	template <typename ExtendOpTy>
	static const SCEV getExtendAddRecStart(const SCEVAddRecExpr AR, Type *Ty,
	ScalarEvolution *SE,
	unsigned Depth) {
	auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr;

	const SCEV *PreStart = getPreStartForExtend<ExtendOpTy>(AR, Ty, SE, Depth);
	if (!PreStart)
	return (SE->*GetExtendExpr)(AR->getStart(), Ty, Depth);

	return SE->getAddExpr((SE->GetExtendExpr)(AR->getStepRecurrence(SE), Ty,
	Depth),
	(SE->*GetExtendExpr)(PreStart, Ty, Depth));
	}

	// Try to prove away overflow by looking at "nearby" add recurrences. A
	// motivating example for this rule: if we know `{0,+,4}` is `ult` `-1` and it
	// does not itself wrap then we can conclude that `{1,+,4}` is `nuw`.
	//
	// Formally:
	//
	// {S,+,X} == {S-T,+,X} + T
	// => Ext({S,+,X}) == Ext({S-T,+,X} + T)
	//
	// If ({S-T,+,X} + T) does not overflow ... (1)
	//
	// RHS == Ext({S-T,+,X} + T) == Ext({S-T,+,X}) + Ext(T)
	//
	// If {S-T,+,X} does not overflow ... (2)
	//
	// RHS == Ext({S-T,+,X}) + Ext(T) == {Ext(S-T),+,Ext(X)} + Ext(T)
	// == {Ext(S-T)+Ext(T),+,Ext(X)}
	//
	// If (S-T)+T does not overflow ... (3)
	//
	// RHS == {Ext(S-T)+Ext(T),+,Ext(X)} == {Ext(S-T+T),+,Ext(X)}
	// == {Ext(S),+,Ext(X)} == LHS
	//
	// Thus, if (1), (2) and (3) are true for some T, then
	// Ext({S,+,X}) == {Ext(S),+,Ext(X)}
	//
	// (3) is implied by (1) -- "(S-T)+T does not overflow" is simply "({S-T,+,X}+T)
	// does not overflow" restricted to the 0th iteration. Therefore we only need
	// to check for (1) and (2).
	//
	// In the current context, S is `Start`, X is `Step`, Ext is `ExtendOpTy` and T
	// is `Delta` (defined below).
	template <typename ExtendOpTy>
	bool ScalarEvolution::proveNoWrapByVaryingStart(const SCEV *Start,
	const SCEV *Step,
	const Loop *L) {
	auto WrapType = ExtendOpTraits<ExtendOpTy>::WrapType;

	// We restrict `Start` to a constant to prevent SCEV from spending too much
	// time here. It is correct (but more expensive) to continue with a
	// non-constant `Start` and do a general SCEV subtraction to compute
	// `PreStart` below.
	const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start);
	if (!StartC)
	return false;

	APInt StartAI = StartC->getAPInt();

	for (unsigned Delta : {-2, -1, 1, 2}) {
	const SCEV *PreStart = getConstant(StartAI - Delta);

	FoldingSetNodeID ID;
	ID.AddInteger(scAddRecExpr);
	ID.AddPointer(PreStart);
	ID.AddPointer(Step);
	ID.AddPointer(L);
	void *IP = nullptr;
	const auto *PreAR =
	static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));

	// Give up if we don't already have the add recurrence we need because
	// actually constructing an add recurrence is relatively expensive.
	if (PreAR && PreAR->getNoWrapFlags(WrapType)) { // proves (2)
	const SCEV *DeltaS = getConstant(StartC->getType(), Delta);
	ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
	const SCEV *Limit = ExtendOpTraits<ExtendOpTy>::getOverflowLimitForStep(
	DeltaS, &Pred, this);
	if (Limit && isKnownPredicate(Pred, PreAR, Limit)) // proves (1)
	return true;
	}
	}

	return false;
	}

	// Finds an integer D for an expression (C + x + y + ...) such that the top
	// level addition in (D + (C - D + x + y + ...)) would not wrap (signed or
	// unsigned) and the number of trailing zeros of (C - D + x + y + ...) is
	// maximized, where C is the \p ConstantTerm, x, y, ... are arbitrary SCEVs, and
	// the (C + x + y + ...) expression is \p WholeAddExpr.
	static APInt extractConstantWithoutWrapping(ScalarEvolution &SE,
	const SCEVConstant *ConstantTerm,
	const SCEVAddExpr *WholeAddExpr) {
	const APInt &C = ConstantTerm->getAPInt();
	const unsigned BitWidth = C.getBitWidth();
	// Find number of trailing zeros of (x + y + ...) w/o the C first:
	uint32_t TZ = BitWidth;
	for (unsigned I = 1, E = WholeAddExpr->getNumOperands(); I < E && TZ; ++I)
	TZ = std::min(TZ, SE.GetMinTrailingZeros(WholeAddExpr->getOperand(I)));
	if (TZ) {
	// Set D to be as many least significant bits of C as possible while still
	// guaranteeing that adding D to (C - D + x + y + ...) won't cause a wrap:
	return TZ < BitWidth ? C.trunc(TZ).zext(BitWidth) : C;
	}
	return APInt(BitWidth, 0);
	}

	// Finds an integer D for an affine AddRec expression {C,+,x} such that the top
	// level addition in (D + {C-D,+,x}) would not wrap (signed or unsigned) and the
	// number of trailing zeros of (C - D + x * n) is maximized, where C is the \p
	// ConstantStart, x is an arbitrary \p Step, and n is the loop trip count.
	static APInt extractConstantWithoutWrapping(ScalarEvolution &SE,
	const APInt &ConstantStart,
	const SCEV *Step) {
	const unsigned BitWidth = ConstantStart.getBitWidth();
	const uint32_t TZ = SE.GetMinTrailingZeros(Step);
	if (TZ)
	return TZ < BitWidth ? ConstantStart.trunc(TZ).zext(BitWidth)
	: ConstantStart;
	return APInt(BitWidth, 0);
	}

	const SCEV *
	ScalarEvolution::getZeroExtendExpr(const SCEV Op, Type Ty, unsigned Depth) {
	assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
	"This is not an extending conversion!");
	assert(isSCEVable(Ty) &&
	"This is not a conversion to a SCEVable type!");
	assert(!Op->getType()->isPointerTy() && "Can't extend pointer!");
	Ty = getEffectiveSCEVType(Ty);

	// Fold if the operand is constant.
	if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
	return getConstant(
	cast<ConstantInt>(ConstantExpr::getZExt(SC->getValue(), Ty)));

	// zext(zext(x)) --> zext(x)
	if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
	return getZeroExtendExpr(SZ->getOperand(), Ty, Depth + 1);

	// Before doing any expensive analysis, check to see if we've already
	// computed a SCEV for this Op and Ty.
	FoldingSetNodeID ID;
	ID.AddInteger(scZeroExtend);
	ID.AddPointer(Op);
	ID.AddPointer(Ty);
	void *IP = nullptr;
	if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
	if (Depth > MaxCastDepth) {
	SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator),
	Op, Ty);
	UniqueSCEVs.InsertNode(S, IP);
	addToLoopUseLists(S);
	return S;
	}

	// zext(trunc(x)) --> zext(x) or x or trunc(x)
	if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) {
	// It's possible the bits taken off by the truncate were all zero bits. If
	// so, we should be able to simplify this further.
	const SCEV *X = ST->getOperand();
	ConstantRange CR = getUnsignedRange(X);
	unsigned TruncBits = getTypeSizeInBits(ST->getType());
	unsigned NewBits = getTypeSizeInBits(Ty);
	if (CR.truncate(TruncBits).zeroExtend(NewBits).contains(
	CR.zextOrTrunc(NewBits)))
	return getTruncateOrZeroExtend(X, Ty, Depth);
	}

	// If the input value is a chrec scev, and we can prove that the value
	// did not overflow the old, smaller, value, we can zero extend all of the
	// operands (often constants). This allows analysis of something like
	// this: for (unsigned char X = 0; X < 100; ++X) { int Y = X; }
	if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op))
	if (AR->isAffine()) {
	const SCEV *Start = AR->getStart();
	const SCEV Step = AR->getStepRecurrence(this);
	unsigned BitWidth = getTypeSizeInBits(AR->getType());
	const Loop *L = AR->getLoop();

	if (!AR->hasNoUnsignedWrap()) {
	auto NewFlags = proveNoWrapViaConstantRanges(AR);
	setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), NewFlags);
	}

	// If we have special knowledge that this addrec won't overflow,
	// we don't need to do any further analysis.
	if (AR->hasNoUnsignedWrap())
	return getAddRecExpr(
	getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Depth + 1),
	getZeroExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags());

	// Check whether the backedge-taken count is SCEVCouldNotCompute.
	// Note that this serves two purposes: It filters out loops that are
	// simply not analyzable, and it covers the case where this code is
	// being called from within backedge-taken count analysis, such that
	// attempting to ask for the backedge-taken count would likely result
	// in infinite recursion. In the later case, the analysis code will
	// cope with a conservative value, and it will take care to purge
	// that value once it has finished.
	const SCEV *MaxBECount = getConstantMaxBackedgeTakenCount(L);
	if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
	// Manually compute the final value for AR, checking for overflow.

	// Check whether the backedge-taken count can be losslessly casted to
	// the addrec's type. The count is always unsigned.
	const SCEV *CastedMaxBECount =
	getTruncateOrZeroExtend(MaxBECount, Start->getType(), Depth);
	const SCEV *RecastedMaxBECount = getTruncateOrZeroExtend(
	CastedMaxBECount, MaxBECount->getType(), Depth);
	if (MaxBECount == RecastedMaxBECount) {
	Type WideTy = IntegerType::get(getContext(), BitWidth 2);
	// Check whether Start+Step*MaxBECount has no unsigned overflow.
	const SCEV *ZMul = getMulExpr(CastedMaxBECount, Step,
	SCEV::FlagAnyWrap, Depth + 1);
	const SCEV *ZAdd = getZeroExtendExpr(getAddExpr(Start, ZMul,
	SCEV::FlagAnyWrap,
	Depth + 1),
	WideTy, Depth + 1);
	const SCEV *WideStart = getZeroExtendExpr(Start, WideTy, Depth + 1);
	const SCEV *WideMaxBECount =
	getZeroExtendExpr(CastedMaxBECount, WideTy, Depth + 1);
	const SCEV *OperandExtendedAdd =
	getAddExpr(WideStart,
	getMulExpr(WideMaxBECount,
	getZeroExtendExpr(Step, WideTy, Depth + 1),
	SCEV::FlagAnyWrap, Depth + 1),
	SCEV::FlagAnyWrap, Depth + 1);
	if (ZAdd == OperandExtendedAdd) {
	// Cache knowledge of AR NUW, which is propagated to this AddRec.
	setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNUW);
	// Return the expression with the addrec on the outside.
	return getAddRecExpr(
	getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this,
	Depth + 1),
	getZeroExtendExpr(Step, Ty, Depth + 1), L,
	AR->getNoWrapFlags());
	}
	// Similar to above, only this time treat the step value as signed.
	// This covers loops that count down.
	OperandExtendedAdd =
	getAddExpr(WideStart,
	getMulExpr(WideMaxBECount,
	getSignExtendExpr(Step, WideTy, Depth + 1),
	SCEV::FlagAnyWrap, Depth + 1),
	SCEV::FlagAnyWrap, Depth + 1);
	if (ZAdd == OperandExtendedAdd) {
	// Cache knowledge of AR NW, which is propagated to this AddRec.
	// Negative step causes unsigned wrap, but it still can't self-wrap.
	setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNW);
	// Return the expression with the addrec on the outside.
	return getAddRecExpr(
	getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this,
	Depth + 1),
	getSignExtendExpr(Step, Ty, Depth + 1), L,
	AR->getNoWrapFlags());
	}
	}
	}

	// Normally, in the cases we can prove no-overflow via a
	// backedge guarding condition, we can also compute a backedge
	// taken count for the loop. The exceptions are assumptions and
	// guards present in the loop -- SCEV is not great at exploiting
	// these to compute max backedge taken counts, but can still use
	// these to prove lack of overflow. Use this fact to avoid
	// doing extra work that may not pay off.
	if (!isa<SCEVCouldNotCompute>(MaxBECount) \|\| HasGuards \|\|
	!AC.assumptions().empty()) {

	auto NewFlags = proveNoUnsignedWrapViaInduction(AR);
	setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), NewFlags);
	if (AR->hasNoUnsignedWrap()) {
	// Same as nuw case above - duplicated here to avoid a compile time
	// issue. It's not clear that the order of checks does matter, but
	// it's one of two issue possible causes for a change which was
	// reverted. Be conservative for the moment.
	return getAddRecExpr(
	getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this,
	Depth + 1),
	getZeroExtendExpr(Step, Ty, Depth + 1), L,
	AR->getNoWrapFlags());
	}

	// For a negative step, we can extend the operands iff doing so only
	// traverses values in the range zext([0,UINT_MAX]).
	if (isKnownNegative(Step)) {
	const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) -
	getSignedRangeMin(Step));
	if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR, N) \|\|
	isKnownOnEveryIteration(ICmpInst::ICMP_UGT, AR, N)) {
	// Cache knowledge of AR NW, which is propagated to this
	// AddRec. Negative step causes unsigned wrap, but it
	// still can't self-wrap.
	setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNW);
	// Return the expression with the addrec on the outside.
	return getAddRecExpr(
	getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this,
	Depth + 1),
	getSignExtendExpr(Step, Ty, Depth + 1), L,
	AR->getNoWrapFlags());
	}
	}
	}

	// zext({C,+,Step}) --> (zext(D) + zext({C-D,+,Step}))<nuw><nsw>
	// if D + (C - D + Step * n) could be proven to not unsigned wrap
	// where D maximizes the number of trailing zeros of (C - D + Step * n)
	if (const auto *SC = dyn_cast<SCEVConstant>(Start)) {
	const APInt &C = SC->getAPInt();
	const APInt &D = extractConstantWithoutWrapping(*this, C, Step);
	if (D != 0) {
	const SCEV *SZExtD = getZeroExtendExpr(getConstant(D), Ty, Depth);
	const SCEV *SResidual =
	getAddRecExpr(getConstant(C - D), Step, L, AR->getNoWrapFlags());
	const SCEV *SZExtR = getZeroExtendExpr(SResidual, Ty, Depth + 1);
	return getAddExpr(SZExtD, SZExtR,
	(SCEV::NoWrapFlags)(SCEV::FlagNSW \| SCEV::FlagNUW),
	Depth + 1);
	}
	}

	if (proveNoWrapByVaryingStart<SCEVZeroExtendExpr>(Start, Step, L)) {
	setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNUW);
	return getAddRecExpr(
	getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Depth + 1),
	getZeroExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags());
	}
	}

	// zext(A % B) --> zext(A) % zext(B)
	{
	const SCEV *LHS;
	const SCEV *RHS;
	if (matchURem(Op, LHS, RHS))
	return getURemExpr(getZeroExtendExpr(LHS, Ty, Depth + 1),
	getZeroExtendExpr(RHS, Ty, Depth + 1));
	}

	// zext(A / B) --> zext(A) / zext(B).
	if (auto *Div = dyn_cast<SCEVUDivExpr>(Op))
	return getUDivExpr(getZeroExtendExpr(Div->getLHS(), Ty, Depth + 1),
	getZeroExtendExpr(Div->getRHS(), Ty, Depth + 1));

	if (auto *SA = dyn_cast<SCEVAddExpr>(Op)) {
	// zext((A + B + ...)<nuw>) --> (zext(A) + zext(B) + ...)<nuw>
	if (SA->hasNoUnsignedWrap()) {
	// If the addition does not unsign overflow then we can, by definition,
	// commute the zero extension with the addition operation.
	SmallVector<const SCEV *, 4> Ops;
	for (const auto *Op : SA->operands())
	Ops.push_back(getZeroExtendExpr(Op, Ty, Depth + 1));
	return getAddExpr(Ops, SCEV::FlagNUW, Depth + 1);
	}

	// zext(C + x + y + ...) --> (zext(D) + zext((C - D) + x + y + ...))
	// if D + (C - D + x + y + ...) could be proven to not unsigned wrap
	// where D maximizes the number of trailing zeros of (C - D + x + y + ...)
	//
	// Often address arithmetics contain expressions like
	// (zext (add (shl X, C1), C2)), for instance, (zext (5 + (4 * X))).
	// This transformation is useful while proving that such expressions are
	// equal or differ by a small constant amount, see LoadStoreVectorizer pass.
	if (const auto *SC = dyn_cast<SCEVConstant>(SA->getOperand(0))) {
	const APInt &D = extractConstantWithoutWrapping(*this, SC, SA);
	if (D != 0) {
	const SCEV *SZExtD = getZeroExtendExpr(getConstant(D), Ty, Depth);
	const SCEV *SResidual =
	getAddExpr(getConstant(-D), SA, SCEV::FlagAnyWrap, Depth);
	const SCEV *SZExtR = getZeroExtendExpr(SResidual, Ty, Depth + 1);
	return getAddExpr(SZExtD, SZExtR,
	(SCEV::NoWrapFlags)(SCEV::FlagNSW \| SCEV::FlagNUW),
	Depth + 1);
	}
	}
	}

	if (auto *SM = dyn_cast<SCEVMulExpr>(Op)) {
	// zext((A * B * ...)<nuw>) --> (zext(A) * zext(B) * ...)<nuw>
	if (SM->hasNoUnsignedWrap()) {
	// If the multiply does not unsign overflow then we can, by definition,
	// commute the zero extension with the multiply operation.
	SmallVector<const SCEV *, 4> Ops;
	for (const auto *Op : SM->operands())
	Ops.push_back(getZeroExtendExpr(Op, Ty, Depth + 1));
	return getMulExpr(Ops, SCEV::FlagNUW, Depth + 1);
	}

	// zext(2^K * (trunc X to iN)) to iM ->
	// 2^K * (zext(trunc X to i{N-K}) to iM)<nuw>
	//
	// Proof:
	//
	// zext(2^K * (trunc X to iN)) to iM
	// = zext((trunc X to iN) << K) to iM
	// = zext((trunc X to i{N-K}) << K)<nuw> to iM
	// (because shl removes the top K bits)
	// = zext((2^K * (trunc X to i{N-K}))<nuw>) to iM
	// = (2^K * (zext(trunc X to i{N-K}) to iM))<nuw>.
	//
	if (SM->getNumOperands() == 2)
	if (auto *MulLHS = dyn_cast<SCEVConstant>(SM->getOperand(0)))
	if (MulLHS->getAPInt().isPowerOf2())
	if (auto *TruncRHS = dyn_cast<SCEVTruncateExpr>(SM->getOperand(1))) {
	int NewTruncBits = getTypeSizeInBits(TruncRHS->getType()) -
	MulLHS->getAPInt().logBase2();
	Type *NewTruncTy = IntegerType::get(getContext(), NewTruncBits);
	return getMulExpr(
	getZeroExtendExpr(MulLHS, Ty),
	getZeroExtendExpr(
	getTruncateExpr(TruncRHS->getOperand(), NewTruncTy), Ty),
	SCEV::FlagNUW, Depth + 1);
	}
	}

	// The cast wasn't folded; create an explicit cast node.
	// Recompute the insert position, as it may have been invalidated.
	if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
	SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator),
	Op, Ty);
	UniqueSCEVs.InsertNode(S, IP);
	addToLoopUseLists(S);
	return S;
	}

	const SCEV *
	ScalarEvolution::getSignExtendExpr(const SCEV Op, Type Ty, unsigned Depth) {
	assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
	"This is not an extending conversion!");
	assert(isSCEVable(Ty) &&
	"This is not a conversion to a SCEVable type!");
	assert(!Op->getType()->isPointerTy() && "Can't extend pointer!");
	Ty = getEffectiveSCEVType(Ty);

	// Fold if the operand is constant.
	if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
	return getConstant(
	cast<ConstantInt>(ConstantExpr::getSExt(SC->getValue(), Ty)));

	// sext(sext(x)) --> sext(x)
	if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
	return getSignExtendExpr(SS->getOperand(), Ty, Depth + 1);

	// sext(zext(x)) --> zext(x)
	if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
	return getZeroExtendExpr(SZ->getOperand(), Ty, Depth + 1);

	// Before doing any expensive analysis, check to see if we've already
	// computed a SCEV for this Op and Ty.
	FoldingSetNodeID ID;
	ID.AddInteger(scSignExtend);
	ID.AddPointer(Op);
	ID.AddPointer(Ty);
	void *IP = nullptr;
	if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
	// Limit recursion depth.
	if (Depth > MaxCastDepth) {
	SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator),
	Op, Ty);
	UniqueSCEVs.InsertNode(S, IP);
	addToLoopUseLists(S);
	return S;
	}

	// sext(trunc(x)) --> sext(x) or x or trunc(x)
	if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) {
	// It's possible the bits taken off by the truncate were all sign bits. If
	// so, we should be able to simplify this further.
	const SCEV *X = ST->getOperand();
	ConstantRange CR = getSignedRange(X);
	unsigned TruncBits = getTypeSizeInBits(ST->getType());
	unsigned NewBits = getTypeSizeInBits(Ty);
	if (CR.truncate(TruncBits).signExtend(NewBits).contains(
	CR.sextOrTrunc(NewBits)))
	return getTruncateOrSignExtend(X, Ty, Depth);
	}

	if (auto *SA = dyn_cast<SCEVAddExpr>(Op)) {
	// sext((A + B + ...)<nsw>) --> (sext(A) + sext(B) + ...)<nsw>
	if (SA->hasNoSignedWrap()) {
	// If the addition does not sign overflow then we can, by definition,
	// commute the sign extension with the addition operation.
	SmallVector<const SCEV *, 4> Ops;
	for (const auto *Op : SA->operands())
	Ops.push_back(getSignExtendExpr(Op, Ty, Depth + 1));
	return getAddExpr(Ops, SCEV::FlagNSW, Depth + 1);
	}

	// sext(C + x + y + ...) --> (sext(D) + sext((C - D) + x + y + ...))
	// if D + (C - D + x + y + ...) could be proven to not signed wrap
	// where D maximizes the number of trailing zeros of (C - D + x + y + ...)
	//
	// For instance, this will bring two seemingly different expressions:
	// 1 + sext(5 + 20 * %x + 24 * %y) and
	// sext(6 + 20 * %x + 24 * %y)
	// to the same form:
	// 2 + sext(4 + 20 * %x + 24 * %y)
	if (const auto *SC = dyn_cast<SCEVConstant>(SA->getOperand(0))) {
	const APInt &D = extractConstantWithoutWrapping(*this, SC, SA);
	if (D != 0) {
	const SCEV *SSExtD = getSignExtendExpr(getConstant(D), Ty, Depth);
	const SCEV *SResidual =
	getAddExpr(getConstant(-D), SA, SCEV::FlagAnyWrap, Depth);
	const SCEV *SSExtR = getSignExtendExpr(SResidual, Ty, Depth + 1);
	return getAddExpr(SSExtD, SSExtR,
	(SCEV::NoWrapFlags)(SCEV::FlagNSW \| SCEV::FlagNUW),
	Depth + 1);
	}
	}
	}
	// If the input value is a chrec scev, and we can prove that the value
	// did not overflow the old, smaller, value, we can sign extend all of the
	// operands (often constants). This allows analysis of something like
	// this: for (signed char X = 0; X < 100; ++X) { int Y = X; }
	if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op))
	if (AR->isAffine()) {
	const SCEV *Start = AR->getStart();
	const SCEV Step = AR->getStepRecurrence(this);
	unsigned BitWidth = getTypeSizeInBits(AR->getType());
	const Loop *L = AR->getLoop();

	if (!AR->hasNoSignedWrap()) {
	auto NewFlags = proveNoWrapViaConstantRanges(AR);
	setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), NewFlags);
	}

	// If we have special knowledge that this addrec won't overflow,
	// we don't need to do any further analysis.
	if (AR->hasNoSignedWrap())
	return getAddRecExpr(
	getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Depth + 1),
	getSignExtendExpr(Step, Ty, Depth + 1), L, SCEV::FlagNSW);

	// Check whether the backedge-taken count is SCEVCouldNotCompute.
	// Note that this serves two purposes: It filters out loops that are
	// simply not analyzable, and it covers the case where this code is
	// being called from within backedge-taken count analysis, such that
	// attempting to ask for the backedge-taken count would likely result
	// in infinite recursion. In the later case, the analysis code will
	// cope with a conservative value, and it will take care to purge
	// that value once it has finished.
	const SCEV *MaxBECount = getConstantMaxBackedgeTakenCount(L);
	if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
	// Manually compute the final value for AR, checking for
	// overflow.

	// Check whether the backedge-taken count can be losslessly casted to
	// the addrec's type. The count is always unsigned.
	const SCEV *CastedMaxBECount =
	getTruncateOrZeroExtend(MaxBECount, Start->getType(), Depth);
	const SCEV *RecastedMaxBECount = getTruncateOrZeroExtend(
	CastedMaxBECount, MaxBECount->getType(), Depth);
	if (MaxBECount == RecastedMaxBECount) {
	Type WideTy = IntegerType::get(getContext(), BitWidth 2);
	// Check whether Start+Step*MaxBECount has no signed overflow.
	const SCEV *SMul = getMulExpr(CastedMaxBECount, Step,
	SCEV::FlagAnyWrap, Depth + 1);
	const SCEV *SAdd = getSignExtendExpr(getAddExpr(Start, SMul,
	SCEV::FlagAnyWrap,
	Depth + 1),
	WideTy, Depth + 1);
	const SCEV *WideStart = getSignExtendExpr(Start, WideTy, Depth + 1);
	const SCEV *WideMaxBECount =
	getZeroExtendExpr(CastedMaxBECount, WideTy, Depth + 1);
	const SCEV *OperandExtendedAdd =
	getAddExpr(WideStart,
	getMulExpr(WideMaxBECount,
	getSignExtendExpr(Step, WideTy, Depth + 1),
	SCEV::FlagAnyWrap, Depth + 1),
	SCEV::FlagAnyWrap, Depth + 1);
	if (SAdd == OperandExtendedAdd) {
	// Cache knowledge of AR NSW, which is propagated to this AddRec.
	setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNSW);
	// Return the expression with the addrec on the outside.
	return getAddRecExpr(
	getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this,
	Depth + 1),
	getSignExtendExpr(Step, Ty, Depth + 1), L,
	AR->getNoWrapFlags());
	}
	// Similar to above, only this time treat the step value as unsigned.
	// This covers loops that count up with an unsigned step.
	OperandExtendedAdd =
	getAddExpr(WideStart,
	getMulExpr(WideMaxBECount,
	getZeroExtendExpr(Step, WideTy, Depth + 1),
	SCEV::FlagAnyWrap, Depth + 1),
	SCEV::FlagAnyWrap, Depth + 1);
	if (SAdd == OperandExtendedAdd) {
	// If AR wraps around then
	//
	// abs(Step) * MaxBECount > unsigned-max(AR->getType())
	// => SAdd != OperandExtendedAdd
	//
	// Thus (AR is not NW => SAdd != OperandExtendedAdd) <=>
	// (SAdd == OperandExtendedAdd => AR is NW)

	setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNW);

	// Return the expression with the addrec on the outside.
	return getAddRecExpr(
	getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this,
	Depth + 1),
	getZeroExtendExpr(Step, Ty, Depth + 1), L,
	AR->getNoWrapFlags());
	}
	}
	}

	auto NewFlags = proveNoSignedWrapViaInduction(AR);
	setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), NewFlags);
	if (AR->hasNoSignedWrap()) {
	// Same as nsw case above - duplicated here to avoid a compile time
	// issue. It's not clear that the order of checks does matter, but
	// it's one of two issue possible causes for a change which was
	// reverted. Be conservative for the moment.
	return getAddRecExpr(
	getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Depth + 1),
	getSignExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags());
	}

	// sext({C,+,Step}) --> (sext(D) + sext({C-D,+,Step}))<nuw><nsw>
	// if D + (C - D + Step * n) could be proven to not signed wrap
	// where D maximizes the number of trailing zeros of (C - D + Step * n)
	if (const auto *SC = dyn_cast<SCEVConstant>(Start)) {
	const APInt &C = SC->getAPInt();
	const APInt &D = extractConstantWithoutWrapping(*this, C, Step);
	if (D != 0) {
	const SCEV *SSExtD = getSignExtendExpr(getConstant(D), Ty, Depth);
	const SCEV *SResidual =
	getAddRecExpr(getConstant(C - D), Step, L, AR->getNoWrapFlags());
	const SCEV *SSExtR = getSignExtendExpr(SResidual, Ty, Depth + 1);
	return getAddExpr(SSExtD, SSExtR,
	(SCEV::NoWrapFlags)(SCEV::FlagNSW \| SCEV::FlagNUW),
	Depth + 1);
	}
	}

	if (proveNoWrapByVaryingStart<SCEVSignExtendExpr>(Start, Step, L)) {
	setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), SCEV::FlagNSW);
	return getAddRecExpr(
	getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Depth + 1),
	getSignExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags());
	}
	}

	// If the input value is provably positive and we could not simplify
	// away the sext build a zext instead.
	if (isKnownNonNegative(Op))
	return getZeroExtendExpr(Op, Ty, Depth + 1);

	// The cast wasn't folded; create an explicit cast node.
	// Recompute the insert position, as it may have been invalidated.
	if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
	SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator),
	Op, Ty);
	UniqueSCEVs.InsertNode(S, IP);
	addToLoopUseLists(S);
	return S;
	}

	/// getAnyExtendExpr - Return a SCEV for the given operand extended with
	/// unspecified bits out to the given type.
	const SCEV ScalarEvolution::getAnyExtendExpr(const SCEV Op,
	Type *Ty) {
	assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
	"This is not an extending conversion!");
	assert(isSCEVable(Ty) &&
	"This is not a conversion to a SCEVable type!");
	Ty = getEffectiveSCEVType(Ty);

	// Sign-extend negative constants.
	if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
	if (SC->getAPInt().isNegative())
	return getSignExtendExpr(Op, Ty);

	// Peel off a truncate cast.
	if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Op)) {
	const SCEV *NewOp = T->getOperand();
	if (getTypeSizeInBits(NewOp->getType()) < getTypeSizeInBits(Ty))
	return getAnyExtendExpr(NewOp, Ty);
	return getTruncateOrNoop(NewOp, Ty);
	}

	// Next try a zext cast. If the cast is folded, use it.
	const SCEV *ZExt = getZeroExtendExpr(Op, Ty);
	if (!isa<SCEVZeroExtendExpr>(ZExt))
	return ZExt;

	// Next try a sext cast. If the cast is folded, use it.
	const SCEV *SExt = getSignExtendExpr(Op, Ty);
	if (!isa<SCEVSignExtendExpr>(SExt))
	return SExt;

	// Force the cast to be folded into the operands of an addrec.
	if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op)) {
	SmallVector<const SCEV *, 4> Ops;
	for (const SCEV *Op : AR->operands())
	Ops.push_back(getAnyExtendExpr(Op, Ty));
	return getAddRecExpr(Ops, AR->getLoop(), SCEV::FlagNW);
	}

	// If the expression is obviously signed, use the sext cast value.
	if (isa<SCEVSMaxExpr>(Op))
	return SExt;

	// Absent any other information, use the zext cast value.
	return ZExt;
	}

	/// Process the given Ops list, which is a list of operands to be added under
	/// the given scale, update the given map. This is a helper function for
	/// getAddRecExpr. As an example of what it does, given a sequence of operands
	/// that would form an add expression like this:
	///
	/// m + n + 13 + (A * (o + p + (B * (q + m + 29)))) + r + (-1 * r)
	///
	/// where A and B are constants, update the map with these values:
	///
	/// (m, 1+AB), (n, 1), (o, A), (p, A), (q, AB), (r, 0)
	///
	/// and add 13 + AB29 to AccumulatedConstant.
	/// This will allow getAddRecExpr to produce this:
	///
	/// 13+AB29 + n + (m * (1+AB)) + ((o + p) A) + (q * A*B)
	///
	/// This form often exposes folding opportunities that are hidden in
	/// the original operand list.
	///
	/// Return true iff it appears that any interesting folding opportunities
	/// may be exposed. This helps getAddRecExpr short-circuit extra work in
	/// the common case where no interesting opportunities are present, and
	/// is also used as a check to avoid infinite recursion.
	static bool
	CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M,
	SmallVectorImpl<const SCEV *> &NewOps,
	APInt &AccumulatedConstant,
	const SCEV const Ops, size_t NumOperands,
	const APInt &Scale,
	ScalarEvolution &SE) {
	bool Interesting = false;

	// Iterate over the add operands. They are sorted, with constants first.
	unsigned i = 0;
	while (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) {
	++i;
	// Pull a buried constant out to the outside.
	if (Scale != 1 \|\| AccumulatedConstant != 0 \|\| C->getValue()->isZero())
	Interesting = true;
	AccumulatedConstant += Scale * C->getAPInt();
	}

	// Next comes everything else. We're especially interested in multiplies
	// here, but they're in the middle, so just visit the rest with one loop.
	for (; i != NumOperands; ++i) {
	const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[i]);
	if (Mul && isa<SCEVConstant>(Mul->getOperand(0))) {
	APInt NewScale =
	Scale * cast<SCEVConstant>(Mul->getOperand(0))->getAPInt();
	if (Mul->getNumOperands() == 2 && isa<SCEVAddExpr>(Mul->getOperand(1))) {
	// A multiplication of a constant with another add; recurse.
	const SCEVAddExpr *Add = cast<SCEVAddExpr>(Mul->getOperand(1));
	Interesting \|=
	CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
	Add->op_begin(), Add->getNumOperands(),
	NewScale, SE);
	} else {
	// A multiplication of a constant with some other value. Update
	// the map.
	SmallVector<const SCEV *, 4> MulOps(drop_begin(Mul->operands()));
	const SCEV *Key = SE.getMulExpr(MulOps);
	auto Pair = M.insert({Key, NewScale});
	if (Pair.second) {
	NewOps.push_back(Pair.first->first);
	} else {
	Pair.first->second += NewScale;
	// The map already had an entry for this value, which may indicate
	// a folding opportunity.
	Interesting = true;
	}
	}
	} else {
	// An ordinary operand. Update the map.
	std::pair<DenseMap<const SCEV *, APInt>::iterator, bool> Pair =
	M.insert({Ops[i], Scale});
	if (Pair.second) {
	NewOps.push_back(Pair.first->first);
	} else {
	Pair.first->second += Scale;
	// The map already had an entry for this value, which may indicate
	// a folding opportunity.
	Interesting = true;
	}
	}
	}

	return Interesting;
	}

	bool ScalarEvolution::willNotOverflow(Instruction::BinaryOps BinOp, bool Signed,
	const SCEV LHS, const SCEV RHS) {
	const SCEV (ScalarEvolution::Operation)(const SCEV , const SCEV ,
	SCEV::NoWrapFlags, unsigned);
	switch (BinOp) {
	default:
	llvm_unreachable("Unsupported binary op");
	case Instruction::Add:
	Operation = &ScalarEvolution::getAddExpr;
	break;
	case Instruction::Sub:
	Operation = &ScalarEvolution::getMinusSCEV;
	break;
	case Instruction::Mul:
	Operation = &ScalarEvolution::getMulExpr;
	break;
	}

	const SCEV (ScalarEvolution::Extension)(const SCEV , Type , unsigned) =
	Signed ? &ScalarEvolution::getSignExtendExpr
	: &ScalarEvolution::getZeroExtendExpr;

	// Check ext(LHS op RHS) == ext(LHS) op ext(RHS)
	auto *NarrowTy = cast<IntegerType>(LHS->getType());
	auto *WideTy =
	IntegerType::get(NarrowTy->getContext(), NarrowTy->getBitWidth() * 2);

	const SCEV A = (this->Extension)(
	(this->*Operation)(LHS, RHS, SCEV::FlagAnyWrap, 0), WideTy, 0);
	const SCEV B = (this->Operation)((this->*Extension)(LHS, WideTy, 0),
	(this->*Extension)(RHS, WideTy, 0),
	SCEV::FlagAnyWrap, 0);
	return A == B;
	}

	std::pair<SCEV::NoWrapFlags, bool /Deduced/>
	ScalarEvolution::getStrengthenedNoWrapFlagsFromBinOp(
	const OverflowingBinaryOperator *OBO) {
	SCEV::NoWrapFlags Flags = SCEV::NoWrapFlags::FlagAnyWrap;

	if (OBO->hasNoUnsignedWrap())
	Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW);
	if (OBO->hasNoSignedWrap())
	Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW);

	bool Deduced = false;

	if (OBO->hasNoUnsignedWrap() && OBO->hasNoSignedWrap())
	return {Flags, Deduced};

	if (OBO->getOpcode() != Instruction::Add &&
	OBO->getOpcode() != Instruction::Sub &&
	OBO->getOpcode() != Instruction::Mul)
	return {Flags, Deduced};

	const SCEV *LHS = getSCEV(OBO->getOperand(0));
	const SCEV *RHS = getSCEV(OBO->getOperand(1));

	if (!OBO->hasNoUnsignedWrap() &&
	willNotOverflow((Instruction::BinaryOps)OBO->getOpcode(),
	/* Signed */ false, LHS, RHS)) {
	Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW);
	Deduced = true;
	}

	if (!OBO->hasNoSignedWrap() &&
	willNotOverflow((Instruction::BinaryOps)OBO->getOpcode(),
	/* Signed */ true, LHS, RHS)) {
	Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW);
	Deduced = true;
	}

	return {Flags, Deduced};
	}

	// We're trying to construct a SCEV of type `Type' with `Ops' as operands and
	// `OldFlags' as can't-wrap behavior. Infer a more aggressive set of
	// can't-overflow flags for the operation if possible.
	static SCEV::NoWrapFlags
	StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type,
	const ArrayRef<const SCEV *> Ops,
	SCEV::NoWrapFlags Flags) {
	using namespace std::placeholders;

	using OBO = OverflowingBinaryOperator;

	bool CanAnalyze =
	Type == scAddExpr \|\| Type == scAddRecExpr \|\| Type == scMulExpr;
	(void)CanAnalyze;
	assert(CanAnalyze && "don't call from other places!");

	int SignOrUnsignMask = SCEV::FlagNUW \| SCEV::FlagNSW;
	SCEV::NoWrapFlags SignOrUnsignWrap =
	ScalarEvolution::maskFlags(Flags, SignOrUnsignMask);

	// If FlagNSW is true and all the operands are non-negative, infer FlagNUW.
	auto IsKnownNonNegative = [&](const SCEV *S) {
	return SE->isKnownNonNegative(S);
	};

	if (SignOrUnsignWrap == SCEV::FlagNSW && all_of(Ops, IsKnownNonNegative))
	Flags =
	ScalarEvolution::setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask);

	SignOrUnsignWrap = ScalarEvolution::maskFlags(Flags, SignOrUnsignMask);

	if (SignOrUnsignWrap != SignOrUnsignMask &&
	(Type == scAddExpr \|\| Type == scMulExpr) && Ops.size() == 2 &&
	isa<SCEVConstant>(Ops[0])) {

	auto Opcode = [&] {
	switch (Type) {
	case scAddExpr:
	return Instruction::Add;
	case scMulExpr:
	return Instruction::Mul;
	default:
	llvm_unreachable("Unexpected SCEV op.");
	}
	}();

	const APInt &C = cast<SCEVConstant>(Ops[0])->getAPInt();

	// (A <opcode> C) --> (A <opcode> C)<nsw> if the op doesn't sign overflow.
	if (!(SignOrUnsignWrap & SCEV::FlagNSW)) {
	auto NSWRegion = ConstantRange::makeGuaranteedNoWrapRegion(
	Opcode, C, OBO::NoSignedWrap);
	if (NSWRegion.contains(SE->getSignedRange(Ops[1])))
	Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW);
	}

	// (A <opcode> C) --> (A <opcode> C)<nuw> if the op doesn't unsign overflow.
	if (!(SignOrUnsignWrap & SCEV::FlagNUW)) {
	auto NUWRegion = ConstantRange::makeGuaranteedNoWrapRegion(
	Opcode, C, OBO::NoUnsignedWrap);
	if (NUWRegion.contains(SE->getUnsignedRange(Ops[1])))
	Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW);
	}
	}

	return Flags;
	}

	bool ScalarEvolution::isAvailableAtLoopEntry(const SCEV S, const Loop L) {
	return isLoopInvariant(S, L) && properlyDominates(S, L->getHeader());
	}

	/// Get a canonical add expression, or something simpler if possible.
	const SCEV ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV > &Ops,
	SCEV::NoWrapFlags OrigFlags,
	unsigned Depth) {
	assert(!(OrigFlags & ~(SCEV::FlagNUW \| SCEV::FlagNSW)) &&
	"only nuw or nsw allowed");
	assert(!Ops.empty() && "Cannot get empty add!");
	if (Ops.size() == 1) return Ops[0];
	#ifndef NDEBUG
	Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
	for (unsigned i = 1, e = Ops.size(); i != e; ++i)
	assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
	"SCEVAddExpr operand types don't match!");
	unsigned NumPtrs = count_if(
	Ops, [](const SCEV *Op) { return Op->getType()->isPointerTy(); });
	assert(NumPtrs <= 1 && "add has at most one pointer operand");
	#endif

	// Sort by complexity, this groups all similar expression types together.
	GroupByComplexity(Ops, &LI, DT);

	// If there are any constants, fold them together.
	unsigned Idx = 0;
	if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
	++Idx;
	assert(Idx < Ops.size());
	while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
	// We found two constants, fold them together!
	Ops[0] = getConstant(LHSC->getAPInt() + RHSC->getAPInt());
	if (Ops.size() == 2) return Ops[0];
	Ops.erase(Ops.begin()+1); // Erase the folded element
	LHSC = cast<SCEVConstant>(Ops[0]);
	}

	// If we are left with a constant zero being added, strip it off.
	if (LHSC->getValue()->isZero()) {
	Ops.erase(Ops.begin());
	--Idx;
	}

	if (Ops.size() == 1) return Ops[0];
	}

	// Delay expensive flag strengthening until necessary.
	auto ComputeFlags = [this, OrigFlags](const ArrayRef<const SCEV *> Ops) {
	return StrengthenNoWrapFlags(this, scAddExpr, Ops, OrigFlags);
	};

	// Limit recursion calls depth.
	if (Depth > MaxArithDepth \|\| hasHugeExpression(Ops))
	return getOrCreateAddExpr(Ops, ComputeFlags(Ops));

	if (SCEV *S = std::get<0>(findExistingSCEVInCache(scAddExpr, Ops))) {
	// Don't strengthen flags if we have no new information.
	SCEVAddExpr Add = static_cast<SCEVAddExpr >(S);
	if (Add->getNoWrapFlags(OrigFlags) != OrigFlags)
	Add->setNoWrapFlags(ComputeFlags(Ops));
	return S;
	}

	// Okay, check to see if the same value occurs in the operand list more than
	// once. If so, merge them together into an multiply expression. Since we
	// sorted the list, these values are required to be adjacent.
	Type *Ty = Ops[0]->getType();
	bool FoundMatch = false;
	for (unsigned i = 0, e = Ops.size(); i != e-1; ++i)
	if (Ops[i] == Ops[i+1]) { // X + Y + Y --> X + Y*2
	// Scan ahead to count how many equal operands there are.
	unsigned Count = 2;
	while (i+Count != e && Ops[i+Count] == Ops[i])
	++Count;
	// Merge the values into a multiply.
	const SCEV *Scale = getConstant(Ty, Count);
	const SCEV *Mul = getMulExpr(Scale, Ops[i], SCEV::FlagAnyWrap, Depth + 1);
	if (Ops.size() == Count)
	return Mul;
	Ops[i] = Mul;
	Ops.erase(Ops.begin()+i+1, Ops.begin()+i+Count);
	--i; e -= Count - 1;
	FoundMatch = true;
	}
	if (FoundMatch)
	return getAddExpr(Ops, OrigFlags, Depth + 1);

	// Check for truncates. If all the operands are truncated from the same
	// type, see if factoring out the truncate would permit the result to be
	// folded. eg., ntrunc(x) + mtrunc(y) --> trunc(trunc(m)x + trunc(n)y)
	// if the contents of the resulting outer trunc fold to something simple.
	auto FindTruncSrcType = [&]() -> Type * {
	// We're ultimately looking to fold an addrec of truncs and muls of only
	// constants and truncs, so if we find any other types of SCEV
	// as operands of the addrec then we bail and return nullptr here.
	// Otherwise, we return the type of the operand of a trunc that we find.
	if (auto *T = dyn_cast<SCEVTruncateExpr>(Ops[Idx]))
	return T->getOperand()->getType();
	if (const auto *Mul = dyn_cast<SCEVMulExpr>(Ops[Idx])) {
	const auto *LastOp = Mul->getOperand(Mul->getNumOperands() - 1);
	if (const auto *T = dyn_cast<SCEVTruncateExpr>(LastOp))
	return T->getOperand()->getType();
	}
	return nullptr;
	};
	if (auto *SrcType = FindTruncSrcType()) {
	SmallVector<const SCEV *, 8> LargeOps;
	bool Ok = true;
	// Check all the operands to see if they can be represented in the
	// source type of the truncate.
	for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
	if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Ops[i])) {
	if (T->getOperand()->getType() != SrcType) {
	Ok = false;
	break;
	}
	LargeOps.push_back(T->getOperand());
	} else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) {
	LargeOps.push_back(getAnyExtendExpr(C, SrcType));
	} else if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Ops[i])) {
	SmallVector<const SCEV *, 8> LargeMulOps;
	for (unsigned j = 0, f = M->getNumOperands(); j != f && Ok; ++j) {
	if (const SCEVTruncateExpr *T =
	dyn_cast<SCEVTruncateExpr>(M->getOperand(j))) {
	if (T->getOperand()->getType() != SrcType) {
	Ok = false;
	break;
	}
	LargeMulOps.push_back(T->getOperand());
	} else if (const auto *C = dyn_cast<SCEVConstant>(M->getOperand(j))) {
	LargeMulOps.push_back(getAnyExtendExpr(C, SrcType));
	} else {
	Ok = false;
	break;
	}
	}
	if (Ok)
	LargeOps.push_back(getMulExpr(LargeMulOps, SCEV::FlagAnyWrap, Depth + 1));
	} else {
	Ok = false;
	break;
	}
	}
	if (Ok) {
	// Evaluate the expression in the larger type.
	const SCEV *Fold = getAddExpr(LargeOps, SCEV::FlagAnyWrap, Depth + 1);
	// If it folds to something simple, use it. Otherwise, don't.
	if (isa<SCEVConstant>(Fold) \|\| isa<SCEVUnknown>(Fold))
	return getTruncateExpr(Fold, Ty);
	}
	}

	if (Ops.size() == 2) {
	// Check if we have an expression of the form ((X + C1) - C2), where C1 and
	// C2 can be folded in a way that allows retaining wrapping flags of (X +
	// C1).
	const SCEV *A = Ops[0];
	const SCEV *B = Ops[1];
	auto *AddExpr = dyn_cast<SCEVAddExpr>(B);
	auto *C = dyn_cast<SCEVConstant>(A);
	if (AddExpr && C && isa<SCEVConstant>(AddExpr->getOperand(0))) {
	auto C1 = cast<SCEVConstant>(AddExpr->getOperand(0))->getAPInt();
	auto C2 = C->getAPInt();
	SCEV::NoWrapFlags PreservedFlags = SCEV::FlagAnyWrap;

	APInt ConstAdd = C1 + C2;
	auto AddFlags = AddExpr->getNoWrapFlags();
	// Adding a smaller constant is NUW if the original AddExpr was NUW.
	if (ScalarEvolution::maskFlags(AddFlags, SCEV::FlagNUW) ==
	SCEV::FlagNUW &&
	ConstAdd.ule(C1)) {
	PreservedFlags =
	ScalarEvolution::setFlags(PreservedFlags, SCEV::FlagNUW);
	}

	// Adding a constant with the same sign and small magnitude is NSW, if the
	// original AddExpr was NSW.
	if (ScalarEvolution::maskFlags(AddFlags, SCEV::FlagNSW) ==
	SCEV::FlagNSW &&
	C1.isSignBitSet() == ConstAdd.isSignBitSet() &&
	ConstAdd.abs().ule(C1.abs())) {
	PreservedFlags =
	ScalarEvolution::setFlags(PreservedFlags, SCEV::FlagNSW);
	}

	if (PreservedFlags != SCEV::FlagAnyWrap) {
	SmallVector<const SCEV *, 4> NewOps(AddExpr->op_begin(),
	AddExpr->op_end());
	NewOps[0] = getConstant(ConstAdd);
	return getAddExpr(NewOps, PreservedFlags);
	}
	}
	}

	// Skip past any other cast SCEVs.
	while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddExpr)
	++Idx;

	// If there are add operands they would be next.
	if (Idx < Ops.size()) {
	bool DeletedAdd = false;
	// If the original flags and all inlined SCEVAddExprs are NUW, use the
	// common NUW flag for expression after inlining. Other flags cannot be
	// preserved, because they may depend on the original order of operations.
	SCEV::NoWrapFlags CommonFlags = maskFlags(OrigFlags, SCEV::FlagNUW);
	while (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[Idx])) {
	if (Ops.size() > AddOpsInlineThreshold \|\|
	Add->getNumOperands() > AddOpsInlineThreshold)
	break;
	// If we have an add, expand the add operands onto the end of the operands
	// list.
	Ops.erase(Ops.begin()+Idx);
	Ops.append(Add->op_begin(), Add->op_end());
	DeletedAdd = true;
	CommonFlags = maskFlags(CommonFlags, Add->getNoWrapFlags());
	}

	// If we deleted at least one add, we added operands to the end of the list,
	// and they are not necessarily sorted. Recurse to resort and resimplify
	// any operands we just acquired.
	if (DeletedAdd)
	return getAddExpr(Ops, CommonFlags, Depth + 1);
	}

	// Skip over the add expression until we get to a multiply.
	while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr)
	++Idx;

	// Check to see if there are any folding opportunities present with
	// operands multiplied by constant values.
	if (Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx])) {
	uint64_t BitWidth = getTypeSizeInBits(Ty);
	DenseMap<const SCEV *, APInt> M;
	SmallVector<const SCEV *, 8> NewOps;
	APInt AccumulatedConstant(BitWidth, 0);
	if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
	Ops.data(), Ops.size(),
	APInt(BitWidth, 1), *this)) {
	struct APIntCompare {
	bool operator()(const APInt &LHS, const APInt &RHS) const {
	return LHS.ult(RHS);
	}
	};

	// Some interesting folding opportunity is present, so its worthwhile to
	// re-generate the operands list. Group the operands by constant scale,
	// to avoid multiplying by the same constant scale multiple times.
	std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare> MulOpLists;
	for (const SCEV *NewOp : NewOps)
	MulOpLists[M.find(NewOp)->second].push_back(NewOp);
	// Re-generate the operands list.
	Ops.clear();
	if (AccumulatedConstant != 0)
	Ops.push_back(getConstant(AccumulatedConstant));
	for (auto &MulOp : MulOpLists) {
	if (MulOp.first == 1) {
	Ops.push_back(getAddExpr(MulOp.second, SCEV::FlagAnyWrap, Depth + 1));
	} else if (MulOp.first != 0) {
	Ops.push_back(getMulExpr(
	getConstant(MulOp.first),
	getAddExpr(MulOp.second, SCEV::FlagAnyWrap, Depth + 1),
	SCEV::FlagAnyWrap, Depth + 1));
	}
	}
	if (Ops.empty())
	return getZero(Ty);
	if (Ops.size() == 1)
	return Ops[0];
	return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
	}
	}

	// If we are adding something to a multiply expression, make sure the
	// something is not already an operand of the multiply. If so, merge it into
	// the multiply.
	for (; Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx]); ++Idx) {
	const SCEVMulExpr *Mul = cast<SCEVMulExpr>(Ops[Idx]);
	for (unsigned MulOp = 0, e = Mul->getNumOperands(); MulOp != e; ++MulOp) {
	const SCEV *MulOpSCEV = Mul->getOperand(MulOp);
	if (isa<SCEVConstant>(MulOpSCEV))
	continue;
	for (unsigned AddOp = 0, e = Ops.size(); AddOp != e; ++AddOp)
	if (MulOpSCEV == Ops[AddOp]) {
	// Fold W + X + (X * Y * Z) --> W + (X * ((Y*Z)+1))
	const SCEV *InnerMul = Mul->getOperand(MulOp == 0);
	if (Mul->getNumOperands() != 2) {
	// If the multiply has more than two operands, we must get the
	// Y*Z term.
	SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(),
	Mul->op_begin()+MulOp);
	MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end());
	InnerMul = getMulExpr(MulOps, SCEV::FlagAnyWrap, Depth + 1);
	}
	SmallVector<const SCEV *, 2> TwoOps = {getOne(Ty), InnerMul};
	const SCEV *AddOne = getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1);
	const SCEV *OuterMul = getMulExpr(AddOne, MulOpSCEV,
	SCEV::FlagAnyWrap, Depth + 1);
	if (Ops.size() == 2) return OuterMul;
	if (AddOp < Idx) {
	Ops.erase(Ops.begin()+AddOp);
	Ops.erase(Ops.begin()+Idx-1);
	} else {
	Ops.erase(Ops.begin()+Idx);
	Ops.erase(Ops.begin()+AddOp-1);
	}
	Ops.push_back(OuterMul);
	return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
	}

	// Check this multiply against other multiplies being added together.
	for (unsigned OtherMulIdx = Idx+1;
	OtherMulIdx < Ops.size() && isa<SCEVMulExpr>(Ops[OtherMulIdx]);
	++OtherMulIdx) {
	const SCEVMulExpr *OtherMul = cast<SCEVMulExpr>(Ops[OtherMulIdx]);
	// If MulOp occurs in OtherMul, we can fold the two multiplies
	// together.
	for (unsigned OMulOp = 0, e = OtherMul->getNumOperands();
	OMulOp != e; ++OMulOp)
	if (OtherMul->getOperand(OMulOp) == MulOpSCEV) {
	// Fold X + (ABC) + (ADE) --> X + (A(BC+D*E))
	const SCEV *InnerMul1 = Mul->getOperand(MulOp == 0);
	if (Mul->getNumOperands() != 2) {
	SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(),
	Mul->op_begin()+MulOp);
	MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end());
	InnerMul1 = getMulExpr(MulOps, SCEV::FlagAnyWrap, Depth + 1);
	}
	const SCEV *InnerMul2 = OtherMul->getOperand(OMulOp == 0);
	if (OtherMul->getNumOperands() != 2) {
	SmallVector<const SCEV *, 4> MulOps(OtherMul->op_begin(),
	OtherMul->op_begin()+OMulOp);
	MulOps.append(OtherMul->op_begin()+OMulOp+1, OtherMul->op_end());
	InnerMul2 = getMulExpr(MulOps, SCEV::FlagAnyWrap, Depth + 1);
	}
	SmallVector<const SCEV *, 2> TwoOps = {InnerMul1, InnerMul2};
	const SCEV *InnerMulSum =
	getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1);
	const SCEV *OuterMul = getMulExpr(MulOpSCEV, InnerMulSum,
	SCEV::FlagAnyWrap, Depth + 1);
	if (Ops.size() == 2) return OuterMul;
	Ops.erase(Ops.begin()+Idx);
	Ops.erase(Ops.begin()+OtherMulIdx-1);
	Ops.push_back(OuterMul);
	return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
	}
	}
	}
	}

	// If there are any add recurrences in the operands list, see if any other
	// added values are loop invariant. If so, we can fold them into the
	// recurrence.
	while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr)
	++Idx;

	// Scan over all recurrences, trying to fold loop invariants into them.
	for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) {
	// Scan all of the other operands to this add and add them to the vector if
	// they are loop invariant w.r.t. the recurrence.
	SmallVector<const SCEV *, 8> LIOps;
	const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
	const Loop *AddRecLoop = AddRec->getLoop();
	for (unsigned i = 0, e = Ops.size(); i != e; ++i)
	if (isAvailableAtLoopEntry(Ops[i], AddRecLoop)) {
	LIOps.push_back(Ops[i]);
	Ops.erase(Ops.begin()+i);
	--i; --e;
	}

	// If we found some loop invariants, fold them into the recurrence.
	if (!LIOps.empty()) {
	// Compute nowrap flags for the addition of the loop-invariant ops and
	// the addrec. Temporarily push it as an operand for that purpose.
	LIOps.push_back(AddRec);
	SCEV::NoWrapFlags Flags = ComputeFlags(LIOps);
	LIOps.pop_back();

	// NLI + LI + {Start,+,Step} --> NLI + {LI+Start,+,Step}
	LIOps.push_back(AddRec->getStart());

	SmallVector<const SCEV *, 4> AddRecOps(AddRec->operands());
	// This follows from the fact that the no-wrap flags on the outer add
	// expression are applicable on the 0th iteration, when the add recurrence
	// will be equal to its start value.
	AddRecOps[0] = getAddExpr(LIOps, Flags, Depth + 1);

	// Build the new addrec. Propagate the NUW and NSW flags if both the
	// outer add and the inner addrec are guaranteed to have no overflow.
	// Always propagate NW.
	Flags = AddRec->getNoWrapFlags(setFlags(Flags, SCEV::FlagNW));
	const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRecLoop, Flags);

	// If all of the other operands were loop invariant, we are done.
	if (Ops.size() == 1) return NewRec;

	// Otherwise, add the folded AddRec by the non-invariant parts.
	for (unsigned i = 0;; ++i)
	if (Ops[i] == AddRec) {
	Ops[i] = NewRec;
	break;
	}
	return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
	}

	// Okay, if there weren't any loop invariants to be folded, check to see if
	// there are multiple AddRec's with the same loop induction variable being
	// added together. If so, we can fold them.
	for (unsigned OtherIdx = Idx+1;
	OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
	++OtherIdx) {
	// We expect the AddRecExpr's to be sorted in reverse dominance order,
	// so that the 1st found AddRecExpr is dominated by all others.
	assert(DT.dominates(
	cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()->getHeader(),
	AddRec->getLoop()->getHeader()) &&
	"AddRecExprs are not sorted in reverse dominance order?");
	if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) {
	// Other + {A,+,B}<L> + {C,+,D}<L> --> Other + {A+C,+,B+D}<L>
	SmallVector<const SCEV *, 4> AddRecOps(AddRec->operands());
	for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
	++OtherIdx) {
	const auto *OtherAddRec = cast<SCEVAddRecExpr>(Ops[OtherIdx]);
	if (OtherAddRec->getLoop() == AddRecLoop) {
	for (unsigned i = 0, e = OtherAddRec->getNumOperands();
	i != e; ++i) {
	if (i >= AddRecOps.size()) {
	AddRecOps.append(OtherAddRec->op_begin()+i,
	OtherAddRec->op_end());
	break;
	}
	SmallVector<const SCEV *, 2> TwoOps = {
	AddRecOps[i], OtherAddRec->getOperand(i)};
	AddRecOps[i] = getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1);
	}
	Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
	}
	}
	// Step size has changed, so we cannot guarantee no self-wraparound.
	Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop, SCEV::FlagAnyWrap);
	return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
	}
	}

	// Otherwise couldn't fold anything into this recurrence. Move onto the
	// next one.
	}

	// Okay, it looks like we really DO need an add expr. Check to see if we
	// already have one, otherwise create a new one.
	return getOrCreateAddExpr(Ops, ComputeFlags(Ops));
	}

	const SCEV *
	ScalarEvolution::getOrCreateAddExpr(ArrayRef<const SCEV *> Ops,
	SCEV::NoWrapFlags Flags) {
	FoldingSetNodeID ID;
	ID.AddInteger(scAddExpr);
	for (const SCEV *Op : Ops)
	ID.AddPointer(Op);
	void *IP = nullptr;
	SCEVAddExpr *S =
	static_cast<SCEVAddExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
	if (!S) {
	const SCEV *O = SCEVAllocator.Allocate<const SCEV >(Ops.size());
	std::uninitialized_copy(Ops.begin(), Ops.end(), O);
	S = new (SCEVAllocator)
	SCEVAddExpr(ID.Intern(SCEVAllocator), O, Ops.size());
	UniqueSCEVs.InsertNode(S, IP);
	addToLoopUseLists(S);
	}
	S->setNoWrapFlags(Flags);
	return S;
	}

	const SCEV *
	ScalarEvolution::getOrCreateAddRecExpr(ArrayRef<const SCEV *> Ops,
	const Loop *L, SCEV::NoWrapFlags Flags) {
	FoldingSetNodeID ID;
	ID.AddInteger(scAddRecExpr);
	for (unsigned i = 0, e = Ops.size(); i != e; ++i)
	ID.AddPointer(Ops[i]);
	ID.AddPointer(L);
	void *IP = nullptr;
	SCEVAddRecExpr *S =
	static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
	if (!S) {
	const SCEV *O = SCEVAllocator.Allocate<const SCEV >(Ops.size());
	std::uninitialized_copy(Ops.begin(), Ops.end(), O);
	S = new (SCEVAllocator)
	SCEVAddRecExpr(ID.Intern(SCEVAllocator), O, Ops.size(), L);
	UniqueSCEVs.InsertNode(S, IP);
	addToLoopUseLists(S);
	}
	setNoWrapFlags(S, Flags);
	return S;
	}

	const SCEV *
	ScalarEvolution::getOrCreateMulExpr(ArrayRef<const SCEV *> Ops,
	SCEV::NoWrapFlags Flags) {
	FoldingSetNodeID ID;
	ID.AddInteger(scMulExpr);
	for (unsigned i = 0, e = Ops.size(); i != e; ++i)
	ID.AddPointer(Ops[i]);
	void *IP = nullptr;
	SCEVMulExpr *S =
	static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
	if (!S) {
	const SCEV *O = SCEVAllocator.Allocate<const SCEV >(Ops.size());
	std::uninitialized_copy(Ops.begin(), Ops.end(), O);
	S = new (SCEVAllocator) SCEVMulExpr(ID.Intern(SCEVAllocator),
	O, Ops.size());
	UniqueSCEVs.InsertNode(S, IP);
	addToLoopUseLists(S);
	}
	S->setNoWrapFlags(Flags);
	return S;
	}

	static uint64_t umul_ov(uint64_t i, uint64_t j, bool &Overflow) {
	uint64_t k = i*j;
	if (j > 1 && k / j != i) Overflow = true;
	return k;
	}

	/// Compute the result of "n choose k", the binomial coefficient. If an
	/// intermediate computation overflows, Overflow will be set and the return will
	/// be garbage. Overflow is not cleared on absence of overflow.
	static uint64_t Choose(uint64_t n, uint64_t k, bool &Overflow) {
	// We use the multiplicative formula:
	// n(n-1)(n-2)...(n-(k-1)) / k(k-1)(k-2)...1 .
	// At each iteration, we take the n-th term of the numeral and divide by the
	// (k-n)th term of the denominator. This division will always produce an
	// integral result, and helps reduce the chance of overflow in the
	// intermediate computations. However, we can still overflow even when the
	// final result would fit.

	if (n == 0 \|\| n == k) return 1;
	if (k > n) return 0;

	if (k > n/2)
	k = n-k;

	uint64_t r = 1;
	for (uint64_t i = 1; i <= k; ++i) {
	r = umul_ov(r, n-(i-1), Overflow);
	r /= i;
	}
	return r;
	}

	/// Determine if any of the operands in this SCEV are a constant or if
	/// any of the add or multiply expressions in this SCEV contain a constant.
	static bool containsConstantInAddMulChain(const SCEV *StartExpr) {
	struct FindConstantInAddMulChain {
	bool FoundConstant = false;

	bool follow(const SCEV *S) {
	FoundConstant \|= isa<SCEVConstant>(S);
	return isa<SCEVAddExpr>(S) \|\| isa<SCEVMulExpr>(S);
	}

	bool isDone() const {
	return FoundConstant;
	}
	};

	FindConstantInAddMulChain F;
	SCEVTraversal<FindConstantInAddMulChain> ST(F);
	ST.visitAll(StartExpr);
	return F.FoundConstant;
	}

	/// Get a canonical multiply expression, or something simpler if possible.
	const SCEV ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV > &Ops,
	SCEV::NoWrapFlags OrigFlags,
	unsigned Depth) {
	assert(OrigFlags == maskFlags(OrigFlags, SCEV::FlagNUW \| SCEV::FlagNSW) &&
	"only nuw or nsw allowed");
	assert(!Ops.empty() && "Cannot get empty mul!");
	if (Ops.size() == 1) return Ops[0];
	#ifndef NDEBUG
	Type *ETy = Ops[0]->getType();
	assert(!ETy->isPointerTy());
	for (unsigned i = 1, e = Ops.size(); i != e; ++i)
	assert(Ops[i]->getType() == ETy &&
	"SCEVMulExpr operand types don't match!");
	#endif

	// Sort by complexity, this groups all similar expression types together.
	GroupByComplexity(Ops, &LI, DT);

	// If there are any constants, fold them together.
	unsigned Idx = 0;
	if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
	++Idx;
	assert(Idx < Ops.size());
	while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
	// We found two constants, fold them together!
	Ops[0] = getConstant(LHSC->getAPInt() * RHSC->getAPInt());
	if (Ops.size() == 2) return Ops[0];
	Ops.erase(Ops.begin()+1); // Erase the folded element
	LHSC = cast<SCEVConstant>(Ops[0]);
	}

	// If we have a multiply of zero, it will always be zero.
	if (LHSC->getValue()->isZero())
	return LHSC;

	// If we are left with a constant one being multiplied, strip it off.
	if (LHSC->getValue()->isOne()) {
	Ops.erase(Ops.begin());
	--Idx;
	}

	if (Ops.size() == 1)
	return Ops[0];
	}

	// Delay expensive flag strengthening until necessary.
	auto ComputeFlags = [this, OrigFlags](const ArrayRef<const SCEV *> Ops) {
	return StrengthenNoWrapFlags(this, scMulExpr, Ops, OrigFlags);
	};

	// Limit recursion calls depth.
	if (Depth > MaxArithDepth \|\| hasHugeExpression(Ops))
	return getOrCreateMulExpr(Ops, ComputeFlags(Ops));

	if (SCEV *S = std::get<0>(findExistingSCEVInCache(scMulExpr, Ops))) {
	// Don't strengthen flags if we have no new information.
	SCEVMulExpr Mul = static_cast<SCEVMulExpr >(S);
	if (Mul->getNoWrapFlags(OrigFlags) != OrigFlags)
	Mul->setNoWrapFlags(ComputeFlags(Ops));
	return S;
	}

	if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
	if (Ops.size() == 2) {
	// C1(C2+V) -> C1C2 + C1*V
	if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1]))
	// If any of Add's ops are Adds or Muls with a constant, apply this
	// transformation as well.
	//
	// TODO: There are some cases where this transformation is not
	// profitable; for example, Add = (C0 + X) * Y + Z. Maybe the scope of
	// this transformation should be narrowed down.
	if (Add->getNumOperands() == 2 && containsConstantInAddMulChain(Add))
	return getAddExpr(getMulExpr(LHSC, Add->getOperand(0),
	SCEV::FlagAnyWrap, Depth + 1),
	getMulExpr(LHSC, Add->getOperand(1),
	SCEV::FlagAnyWrap, Depth + 1),
	SCEV::FlagAnyWrap, Depth + 1);

	if (Ops[0]->isAllOnesValue()) {
	// If we have a mul by -1 of an add, try distributing the -1 among the
	// add operands.
	if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1])) {
	SmallVector<const SCEV *, 4> NewOps;
	bool AnyFolded = false;
	for (const SCEV *AddOp : Add->operands()) {
	const SCEV *Mul = getMulExpr(Ops[0], AddOp, SCEV::FlagAnyWrap,
	Depth + 1);
	if (!isa<SCEVMulExpr>(Mul)) AnyFolded = true;
	NewOps.push_back(Mul);
	}
	if (AnyFolded)
	return getAddExpr(NewOps, SCEV::FlagAnyWrap, Depth + 1);
	} else if (const auto *AddRec = dyn_cast<SCEVAddRecExpr>(Ops[1])) {
	// Negation preserves a recurrence's no self-wrap property.
	SmallVector<const SCEV *, 4> Operands;
	for (const SCEV *AddRecOp : AddRec->operands())
	Operands.push_back(getMulExpr(Ops[0], AddRecOp, SCEV::FlagAnyWrap,
	Depth + 1));

	return getAddRecExpr(Operands, AddRec->getLoop(),
	AddRec->getNoWrapFlags(SCEV::FlagNW));
	}
	}
	}
	}

	// Skip over the add expression until we get to a multiply.
	while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr)
	++Idx;

	// If there are mul operands inline them all into this expression.
	if (Idx < Ops.size()) {
	bool DeletedMul = false;
	while (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[Idx])) {
	if (Ops.size() > MulOpsInlineThreshold)
	break;
	// If we have an mul, expand the mul operands onto the end of the
	// operands list.
	Ops.erase(Ops.begin()+Idx);
	Ops.append(Mul->op_begin(), Mul->op_end());
	DeletedMul = true;
	}

	// If we deleted at least one mul, we added operands to the end of the
	// list, and they are not necessarily sorted. Recurse to resort and
	// resimplify any operands we just acquired.
	if (DeletedMul)
	return getMulExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
	}

	// If there are any add recurrences in the operands list, see if any other
	// added values are loop invariant. If so, we can fold them into the
	// recurrence.
	while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr)
	++Idx;

	// Scan over all recurrences, trying to fold loop invariants into them.
	for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) {
	// Scan all of the other operands to this mul and add them to the vector
	// if they are loop invariant w.r.t. the recurrence.
	SmallVector<const SCEV *, 8> LIOps;
	const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
	const Loop *AddRecLoop = AddRec->getLoop();
	for (unsigned i = 0, e = Ops.size(); i != e; ++i)
	if (isAvailableAtLoopEntry(Ops[i], AddRecLoop)) {
	LIOps.push_back(Ops[i]);
	Ops.erase(Ops.begin()+i);
	--i; --e;
	}

	// If we found some loop invariants, fold them into the recurrence.
	if (!LIOps.empty()) {
	// NLI * LI * {Start,+,Step} --> NLI * {LIStart,+,LIStep}
	SmallVector<const SCEV *, 4> NewOps;
	NewOps.reserve(AddRec->getNumOperands());
	const SCEV *Scale = getMulExpr(LIOps, SCEV::FlagAnyWrap, Depth + 1);
	for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i)
	NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i),
	SCEV::FlagAnyWrap, Depth + 1));

	// Build the new addrec. Propagate the NUW and NSW flags if both the
	// outer mul and the inner addrec are guaranteed to have no overflow.
	//
	// No self-wrap cannot be guaranteed after changing the step size, but
	// will be inferred if either NUW or NSW is true.
	SCEV::NoWrapFlags Flags = ComputeFlags({Scale, AddRec});
	const SCEV *NewRec = getAddRecExpr(
	NewOps, AddRecLoop, AddRec->getNoWrapFlags(Flags));

	// If all of the other operands were loop invariant, we are done.
	if (Ops.size() == 1) return NewRec;

	// Otherwise, multiply the folded AddRec by the non-invariant parts.
	for (unsigned i = 0;; ++i)
	if (Ops[i] == AddRec) {
	Ops[i] = NewRec;
	break;
	}
	return getMulExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
	}

	// Okay, if there weren't any loop invariants to be folded, check to see
	// if there are multiple AddRec's with the same loop induction variable
	// being multiplied together. If so, we can fold them.

	// {A1,+,A2,+,...,+,An}<L> * {B1,+,B2,+,...,+,Bn}<L>
	// = {x=1 in [ sum y=x..2x [ sum z=max(y-x, y-n)..min(x,n) [
	// choose(x, 2x)choose(2x-y, x-z)A_{y-z}*B_z
	// ]]],+,...up to x=2n}.
	// Note that the arguments to choose() are always integers with values
	// known at compile time, never SCEV objects.
	//
	// The implementation avoids pointless extra computations when the two
	// addrec's are of different length (mathematically, it's equivalent to
	// an infinite stream of zeros on the right).
	bool OpsModified = false;
	for (unsigned OtherIdx = Idx+1;
	OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
	++OtherIdx) {
	const SCEVAddRecExpr *OtherAddRec =
	dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]);
	if (!OtherAddRec \|\| OtherAddRec->getLoop() != AddRecLoop)
	continue;

	// Limit max number of arguments to avoid creation of unreasonably big
	// SCEVAddRecs with very complex operands.
	if (AddRec->getNumOperands() + OtherAddRec->getNumOperands() - 1 >
	MaxAddRecSize \|\| hasHugeExpression({AddRec, OtherAddRec}))
	continue;

	bool Overflow = false;
	Type *Ty = AddRec->getType();
	bool LargerThan64Bits = getTypeSizeInBits(Ty) > 64;
	SmallVector<const SCEV*, 7> AddRecOps;
	for (int x = 0, xe = AddRec->getNumOperands() +
	OtherAddRec->getNumOperands() - 1; x != xe && !Overflow; ++x) {
	SmallVector <const SCEV *, 7> SumOps;
	for (int y = x, ye = 2*x+1; y != ye && !Overflow; ++y) {
	uint64_t Coeff1 = Choose(x, 2*x - y, Overflow);
	for (int z = std::max(y-x, y-(int)AddRec->getNumOperands()+1),
	ze = std::min(x+1, (int)OtherAddRec->getNumOperands());
	z < ze && !Overflow; ++z) {
	uint64_t Coeff2 = Choose(2*x - y, x-z, Overflow);
	uint64_t Coeff;
	if (LargerThan64Bits)
	Coeff = umul_ov(Coeff1, Coeff2, Overflow);
	else
	Coeff = Coeff1*Coeff2;
	const SCEV *CoeffTerm = getConstant(Ty, Coeff);
	const SCEV *Term1 = AddRec->getOperand(y-z);
	const SCEV *Term2 = OtherAddRec->getOperand(z);
	SumOps.push_back(getMulExpr(CoeffTerm, Term1, Term2,
	SCEV::FlagAnyWrap, Depth + 1));
	}
	}
	if (SumOps.empty())
	SumOps.push_back(getZero(Ty));
	AddRecOps.push_back(getAddExpr(SumOps, SCEV::FlagAnyWrap, Depth + 1));
	}
	if (!Overflow) {
	const SCEV *NewAddRec = getAddRecExpr(AddRecOps, AddRecLoop,
	SCEV::FlagAnyWrap);
	if (Ops.size() == 2) return NewAddRec;
	Ops[Idx] = NewAddRec;
	Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
	OpsModified = true;
	AddRec = dyn_cast<SCEVAddRecExpr>(NewAddRec);
	if (!AddRec)
	break;
	}
	}
	if (OpsModified)
	return getMulExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);

	// Otherwise couldn't fold anything into this recurrence. Move onto the
	// next one.
	}

	// Okay, it looks like we really DO need an mul expr. Check to see if we
	// already have one, otherwise create a new one.
	return getOrCreateMulExpr(Ops, ComputeFlags(Ops));
	}

	/// Represents an unsigned remainder expression based on unsigned division.
	const SCEV ScalarEvolution::getURemExpr(const SCEV LHS,
	const SCEV *RHS) {
	assert(getEffectiveSCEVType(LHS->getType()) ==
	getEffectiveSCEVType(RHS->getType()) &&
	"SCEVURemExpr operand types don't match!");

	// Short-circuit easy cases
	if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) {
	// If constant is one, the result is trivial
	if (RHSC->getValue()->isOne())
	return getZero(LHS->getType()); // X urem 1 --> 0

	// If constant is a power of two, fold into a zext(trunc(LHS)).
	if (RHSC->getAPInt().isPowerOf2()) {
	Type *FullTy = LHS->getType();
	Type *TruncTy =
	IntegerType::get(getContext(), RHSC->getAPInt().logBase2());
	return getZeroExtendExpr(getTruncateExpr(LHS, TruncTy), FullTy);
	}
	}

	// Fallback to %a == %x urem %y == %x -<nuw> ((%x udiv %y) *<nuw> %y)
	const SCEV *UDiv = getUDivExpr(LHS, RHS);
	const SCEV *Mult = getMulExpr(UDiv, RHS, SCEV::FlagNUW);
	return getMinusSCEV(LHS, Mult, SCEV::FlagNUW);
	}

	/// Get a canonical unsigned division expression, or something simpler if
	/// possible.
	const SCEV ScalarEvolution::getUDivExpr(const SCEV LHS,
	const SCEV *RHS) {
	assert(!LHS->getType()->isPointerTy() &&
	"SCEVUDivExpr operand can't be pointer!");
	assert(LHS->getType() == RHS->getType() &&
	"SCEVUDivExpr operand types don't match!");

	FoldingSetNodeID ID;
	ID.AddInteger(scUDivExpr);
	ID.AddPointer(LHS);
	ID.AddPointer(RHS);
	void *IP = nullptr;
	if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP))
	return S;

	// 0 udiv Y == 0
	if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS))
	if (LHSC->getValue()->isZero())
	return LHS;

	if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) {
	if (RHSC->getValue()->isOne())
	return LHS; // X udiv 1 --> x
	// If the denominator is zero, the result of the udiv is undefined. Don't
	// try to analyze it, because the resolution chosen here may differ from
	// the resolution chosen in other parts of the compiler.
	if (!RHSC->getValue()->isZero()) {
	// Determine if the division can be folded into the operands of
	// its operands.
	// TODO: Generalize this to non-constants by using known-bits information.
	Type *Ty = LHS->getType();
	unsigned LZ = RHSC->getAPInt().countLeadingZeros();
	unsigned MaxShiftAmt = getTypeSizeInBits(Ty) - LZ - 1;
	// For non-power-of-two values, effectively round the value up to the
	// nearest power of two.
	if (!RHSC->getAPInt().isPowerOf2())
	++MaxShiftAmt;
	IntegerType *ExtTy =
	IntegerType::get(getContext(), getTypeSizeInBits(Ty) + MaxShiftAmt);
	if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS))
	if (const SCEVConstant *Step =
	dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this))) {
	// {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded.
	const APInt &StepInt = Step->getAPInt();
	const APInt &DivInt = RHSC->getAPInt();
	if (!StepInt.urem(DivInt) &&
	getZeroExtendExpr(AR, ExtTy) ==
	getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
	getZeroExtendExpr(Step, ExtTy),
	AR->getLoop(), SCEV::FlagAnyWrap)) {
	SmallVector<const SCEV *, 4> Operands;
	for (const SCEV *Op : AR->operands())
	Operands.push_back(getUDivExpr(Op, RHS));
	return getAddRecExpr(Operands, AR->getLoop(), SCEV::FlagNW);
	}
	/// Get a canonical UDivExpr for a recurrence.
	/// {X,+,N}/C => {Y,+,N}/C where Y=X-(X%N). Safe when C%N=0.
	// We can currently only fold X%N if X is constant.
	const SCEVConstant *StartC = dyn_cast<SCEVConstant>(AR->getStart());
	if (StartC && !DivInt.urem(StepInt) &&
	getZeroExtendExpr(AR, ExtTy) ==
	getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
	getZeroExtendExpr(Step, ExtTy),
	AR->getLoop(), SCEV::FlagAnyWrap)) {
	const APInt &StartInt = StartC->getAPInt();
	const APInt &StartRem = StartInt.urem(StepInt);
	if (StartRem != 0) {
	const SCEV *NewLHS =
	getAddRecExpr(getConstant(StartInt - StartRem), Step,
	AR->getLoop(), SCEV::FlagNW);
	if (LHS != NewLHS) {
	LHS = NewLHS;

	// Reset the ID to include the new LHS, and check if it is
	// already cached.
	ID.clear();
	ID.AddInteger(scUDivExpr);
	ID.AddPointer(LHS);
	ID.AddPointer(RHS);
	IP = nullptr;
	if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP))
	return S;
	}
	}
	}
	}
	// (AB)/C --> A(B/C) if safe and B/C can be folded.
	if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(LHS)) {
	SmallVector<const SCEV *, 4> Operands;
	for (const SCEV *Op : M->operands())
	Operands.push_back(getZeroExtendExpr(Op, ExtTy));
	if (getZeroExtendExpr(M, ExtTy) == getMulExpr(Operands))
	// Find an operand that's safely divisible.
	for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
	const SCEV *Op = M->getOperand(i);
	const SCEV *Div = getUDivExpr(Op, RHSC);
	if (!isa<SCEVUDivExpr>(Div) && getMulExpr(Div, RHSC) == Op) {
	Operands = SmallVector<const SCEV *, 4>(M->operands());
	Operands[i] = Div;
	return getMulExpr(Operands);
	}
	}
	}

	// (A/B)/C --> A/(BC) if safe and BC can be folded.
	if (const SCEVUDivExpr *OtherDiv = dyn_cast<SCEVUDivExpr>(LHS)) {
	if (auto *DivisorConstant =
	dyn_cast<SCEVConstant>(OtherDiv->getRHS())) {
	bool Overflow = false;
	APInt NewRHS =
	DivisorConstant->getAPInt().umul_ov(RHSC->getAPInt(), Overflow);
	if (Overflow) {
	return getConstant(RHSC->getType(), 0, false);
	}
	return getUDivExpr(OtherDiv->getLHS(), getConstant(NewRHS));
	}
	}

	// (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded.
	if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(LHS)) {
	SmallVector<const SCEV *, 4> Operands;
	for (const SCEV *Op : A->operands())
	Operands.push_back(getZeroExtendExpr(Op, ExtTy));
	if (getZeroExtendExpr(A, ExtTy) == getAddExpr(Operands)) {
	Operands.clear();
	for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) {
	const SCEV *Op = getUDivExpr(A->getOperand(i), RHS);
	if (isa<SCEVUDivExpr>(Op) \|\|
	getMulExpr(Op, RHS) != A->getOperand(i))
	break;
	Operands.push_back(Op);
	}
	if (Operands.size() == A->getNumOperands())
	return getAddExpr(Operands);
	}
	}

	// Fold if both operands are constant.
	if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) {
	Constant *LHSCV = LHSC->getValue();
	Constant *RHSCV = RHSC->getValue();
	return getConstant(cast<ConstantInt>(ConstantExpr::getUDiv(LHSCV,
	RHSCV)));
	}
	}
	}

	// The Insertion Point (IP) might be invalid by now (due to UniqueSCEVs
	// changes). Make sure we get a new one.
	IP = nullptr;
	if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
	SCEV *S = new (SCEVAllocator) SCEVUDivExpr(ID.Intern(SCEVAllocator),
	LHS, RHS);
	UniqueSCEVs.InsertNode(S, IP);
	addToLoopUseLists(S);
	return S;
	}

	static const APInt gcd(const SCEVConstant C1, const SCEVConstant C2) {
	APInt A = C1->getAPInt().abs();
	APInt B = C2->getAPInt().abs();
	uint32_t ABW = A.getBitWidth();
	uint32_t BBW = B.getBitWidth();

	if (ABW > BBW)
	B = B.zext(ABW);
	else if (ABW < BBW)
	A = A.zext(BBW);

	return APIntOps::GreatestCommonDivisor(std::move(A), std::move(B));
	}

	/// Get a canonical unsigned division expression, or something simpler if
	/// possible. There is no representation for an exact udiv in SCEV IR, but we
	/// can attempt to remove factors from the LHS and RHS. We can't do this when
	/// it's not exact because the udiv may be clearing bits.
	const SCEV ScalarEvolution::getUDivExactExpr(const SCEV LHS,
	const SCEV *RHS) {
	// TODO: we could try to find factors in all sorts of things, but for now we
	// just deal with u/exact (multiply, constant). See SCEVDivision towards the
	// end of this file for inspiration.

	const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS);
	if (!Mul \|\| !Mul->hasNoUnsignedWrap())
	return getUDivExpr(LHS, RHS);

	if (const SCEVConstant *RHSCst = dyn_cast<SCEVConstant>(RHS)) {
	// If the mulexpr multiplies by a constant, then that constant must be the
	// first element of the mulexpr.
	if (const auto *LHSCst = dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
	if (LHSCst == RHSCst) {
	SmallVector<const SCEV *, 2> Operands(drop_begin(Mul->operands()));
	return getMulExpr(Operands);
	}

	// We can't just assume that LHSCst divides RHSCst cleanly, it could be
	// that there's a factor provided by one of the other terms. We need to
	// check.
	APInt Factor = gcd(LHSCst, RHSCst);
	if (!Factor.isIntN(1)) {
	LHSCst =
	cast<SCEVConstant>(getConstant(LHSCst->getAPInt().udiv(Factor)));
	RHSCst =
	cast<SCEVConstant>(getConstant(RHSCst->getAPInt().udiv(Factor)));
	SmallVector<const SCEV *, 2> Operands;
	Operands.push_back(LHSCst);
	Operands.append(Mul->op_begin() + 1, Mul->op_end());
	LHS = getMulExpr(Operands);
	RHS = RHSCst;
	Mul = dyn_cast<SCEVMulExpr>(LHS);
	if (!Mul)
	return getUDivExactExpr(LHS, RHS);
	}
	}
	}

	for (int i = 0, e = Mul->getNumOperands(); i != e; ++i) {
	if (Mul->getOperand(i) == RHS) {
	SmallVector<const SCEV *, 2> Operands;
	Operands.append(Mul->op_begin(), Mul->op_begin() + i);
	Operands.append(Mul->op_begin() + i + 1, Mul->op_end());
	return getMulExpr(Operands);
	}
	}

	return getUDivExpr(LHS, RHS);
	}

	/// Get an add recurrence expression for the specified loop. Simplify the
	/// expression as much as possible.
	const SCEV ScalarEvolution::getAddRecExpr(const SCEV Start, const SCEV *Step,
	const Loop *L,
	SCEV::NoWrapFlags Flags) {
	SmallVector<const SCEV *, 4> Operands;
	Operands.push_back(Start);
	if (const SCEVAddRecExpr *StepChrec = dyn_cast<SCEVAddRecExpr>(Step))
	if (StepChrec->getLoop() == L) {
	Operands.append(StepChrec->op_begin(), StepChrec->op_end());
	return getAddRecExpr(Operands, L, maskFlags(Flags, SCEV::FlagNW));
	}

	Operands.push_back(Step);
	return getAddRecExpr(Operands, L, Flags);
	}

	/// Get an add recurrence expression for the specified loop. Simplify the
	/// expression as much as possible.
	const SCEV *
	ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
	const Loop *L, SCEV::NoWrapFlags Flags) {
	if (Operands.size() == 1) return Operands[0];
	#ifndef NDEBUG
	Type *ETy = getEffectiveSCEVType(Operands[0]->getType());
	for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
	assert(getEffectiveSCEVType(Operands[i]->getType()) == ETy &&
	"SCEVAddRecExpr operand types don't match!");
	assert(!Operands[i]->getType()->isPointerTy() && "Step must be integer");
	}
	for (unsigned i = 0, e = Operands.size(); i != e; ++i)
	assert(isLoopInvariant(Operands[i], L) &&
	"SCEVAddRecExpr operand is not loop-invariant!");
	#endif

	if (Operands.back()->isZero()) {
	Operands.pop_back();
	return getAddRecExpr(Operands, L, SCEV::FlagAnyWrap); // {X,+,0} --> X
	}

	// It's tempting to want to call getConstantMaxBackedgeTakenCount count here and
	// use that information to infer NUW and NSW flags. However, computing a
	// BE count requires calling getAddRecExpr, so we may not yet have a
	// meaningful BE count at this point (and if we don't, we'd be stuck
	// with a SCEVCouldNotCompute as the cached BE count).

	Flags = StrengthenNoWrapFlags(this, scAddRecExpr, Operands, Flags);

	// Canonicalize nested AddRecs in by nesting them in order of loop depth.
	if (const SCEVAddRecExpr *NestedAR = dyn_cast<SCEVAddRecExpr>(Operands[0])) {
	const Loop *NestedLoop = NestedAR->getLoop();
	if (L->contains(NestedLoop)
	? (L->getLoopDepth() < NestedLoop->getLoopDepth())
	: (!NestedLoop->contains(L) &&
	DT.dominates(L->getHeader(), NestedLoop->getHeader()))) {
	SmallVector<const SCEV *, 4> NestedOperands(NestedAR->operands());
	Operands[0] = NestedAR->getStart();
	// AddRecs require their operands be loop-invariant with respect to their
	// loops. Don't perform this transformation if it would break this
	// requirement.
	bool AllInvariant = all_of(
	Operands, [&](const SCEV *Op) { return isLoopInvariant(Op, L); });

	if (AllInvariant) {
	// Create a recurrence for the outer loop with the same step size.
	//
	// The outer recurrence keeps its NW flag but only keeps NUW/NSW if the
	// inner recurrence has the same property.
	SCEV::NoWrapFlags OuterFlags =
	maskFlags(Flags, SCEV::FlagNW \| NestedAR->getNoWrapFlags());

	NestedOperands[0] = getAddRecExpr(Operands, L, OuterFlags);
	AllInvariant = all_of(NestedOperands, [&](const SCEV *Op) {
	return isLoopInvariant(Op, NestedLoop);
	});

	if (AllInvariant) {
	// Ok, both add recurrences are valid after the transformation.
	//
	// The inner recurrence keeps its NW flag but only keeps NUW/NSW if
	// the outer recurrence has the same property.
	SCEV::NoWrapFlags InnerFlags =
	maskFlags(NestedAR->getNoWrapFlags(), SCEV::FlagNW \| Flags);
	return getAddRecExpr(NestedOperands, NestedLoop, InnerFlags);
	}
	}
	// Reset Operands to its original state.
	Operands[0] = NestedAR;
	}
	}

	// Okay, it looks like we really DO need an addrec expr. Check to see if we
	// already have one, otherwise create a new one.
	return getOrCreateAddRecExpr(Operands, L, Flags);
	}

	const SCEV *
	ScalarEvolution::getGEPExpr(GEPOperator *GEP,
	const SmallVectorImpl<const SCEV *> &IndexExprs) {
	const SCEV *BaseExpr = getSCEV(GEP->getPointerOperand());
	// getSCEV(Base)->getType() has the same address space as Base->getType()
	// because SCEV::getType() preserves the address space.
	Type *IntIdxTy = getEffectiveSCEVType(BaseExpr->getType());
	// FIXME(PR23527): Don't blindly transfer the inbounds flag from the GEP
	// instruction to its SCEV, because the Instruction may be guarded by control
	// flow and the no-overflow bits may not be valid for the expression in any
	// context. This can be fixed similarly to how these flags are handled for
	// adds.
	SCEV::NoWrapFlags OffsetWrap =
	GEP->isInBounds() ? SCEV::FlagNSW : SCEV::FlagAnyWrap;

	Type *CurTy = GEP->getType();
	bool FirstIter = true;
	SmallVector<const SCEV *, 4> Offsets;
	for (const SCEV *IndexExpr : IndexExprs) {
	// Compute the (potentially symbolic) offset in bytes for this index.
	if (StructType *STy = dyn_cast<StructType>(CurTy)) {
	// For a struct, add the member offset.
	ConstantInt *Index = cast<SCEVConstant>(IndexExpr)->getValue();
	unsigned FieldNo = Index->getZExtValue();
	const SCEV *FieldOffset = getOffsetOfExpr(IntIdxTy, STy, FieldNo);
	Offsets.push_back(FieldOffset);

	// Update CurTy to the type of the field at Index.
	CurTy = STy->getTypeAtIndex(Index);
	} else {
	// Update CurTy to its element type.
	if (FirstIter) {
	assert(isa<PointerType>(CurTy) &&
	"The first index of a GEP indexes a pointer");
	CurTy = GEP->getSourceElementType();
	FirstIter = false;
	} else {
	CurTy = GetElementPtrInst::getTypeAtIndex(CurTy, (uint64_t)0);
	}
	// For an array, add the element offset, explicitly scaled.
	const SCEV *ElementSize = getSizeOfExpr(IntIdxTy, CurTy);
	// Getelementptr indices are signed.
	IndexExpr = getTruncateOrSignExtend(IndexExpr, IntIdxTy);

	// Multiply the index by the element size to compute the element offset.
	const SCEV *LocalOffset = getMulExpr(IndexExpr, ElementSize, OffsetWrap);
	Offsets.push_back(LocalOffset);
	}
	}

	// Handle degenerate case of GEP without offsets.
	if (Offsets.empty())
	return BaseExpr;

	// Add the offsets together, assuming nsw if inbounds.
	const SCEV *Offset = getAddExpr(Offsets, OffsetWrap);
	// Add the base address and the offset. We cannot use the nsw flag, as the
	// base address is unsigned. However, if we know that the offset is
	// non-negative, we can use nuw.
	SCEV::NoWrapFlags BaseWrap = GEP->isInBounds() && isKnownNonNegative(Offset)
	? SCEV::FlagNUW : SCEV::FlagAnyWrap;
	return getAddExpr(BaseExpr, Offset, BaseWrap);
	}

	std::tuple<SCEV , FoldingSetNodeID, void >
	ScalarEvolution::findExistingSCEVInCache(SCEVTypes SCEVType,
	ArrayRef<const SCEV *> Ops) {
	FoldingSetNodeID ID;
	void *IP = nullptr;
	ID.AddInteger(SCEVType);
	for (unsigned i = 0, e = Ops.size(); i != e; ++i)
	ID.AddPointer(Ops[i]);
	return std::tuple<SCEV , FoldingSetNodeID, void >(
	UniqueSCEVs.FindNodeOrInsertPos(ID, IP), std::move(ID), IP);
	}

	const SCEV ScalarEvolution::getAbsExpr(const SCEV Op, bool IsNSW) {
	SCEV::NoWrapFlags Flags = IsNSW ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
	return getSMaxExpr(Op, getNegativeSCEV(Op, Flags));
	}

	const SCEV *ScalarEvolution::getMinMaxExpr(SCEVTypes Kind,
	SmallVectorImpl<const SCEV *> &Ops) {
	assert(!Ops.empty() && "Cannot get empty (u\|s)(min\|max)!");
	if (Ops.size() == 1) return Ops[0];
	#ifndef NDEBUG
	Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
	for (unsigned i = 1, e = Ops.size(); i != e; ++i) {
	assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
	"Operand types don't match!");
	assert(Ops[0]->getType()->isPointerTy() ==
	Ops[i]->getType()->isPointerTy() &&
	"min/max should be consistently pointerish");
	}
	#endif

	bool IsSigned = Kind == scSMaxExpr \|\| Kind == scSMinExpr;
	bool IsMax = Kind == scSMaxExpr \|\| Kind == scUMaxExpr;

	// Sort by complexity, this groups all similar expression types together.
	GroupByComplexity(Ops, &LI, DT);

	// Check if we have created the same expression before.
	if (const SCEV *S = std::get<0>(findExistingSCEVInCache(Kind, Ops))) {
	return S;
	}

	// If there are any constants, fold them together.
	unsigned Idx = 0;
	if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
	++Idx;
	assert(Idx < Ops.size());
	auto FoldOp = [&](const APInt &LHS, const APInt &RHS) {
	if (Kind == scSMaxExpr)
	return APIntOps::smax(LHS, RHS);
	else if (Kind == scSMinExpr)
	return APIntOps::smin(LHS, RHS);
	else if (Kind == scUMaxExpr)
	return APIntOps::umax(LHS, RHS);
	else if (Kind == scUMinExpr)
	return APIntOps::umin(LHS, RHS);
	llvm_unreachable("Unknown SCEV min/max opcode");
	};

	while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
	// We found two constants, fold them together!
	ConstantInt *Fold = ConstantInt::get(
	getContext(), FoldOp(LHSC->getAPInt(), RHSC->getAPInt()));
	Ops[0] = getConstant(Fold);
	Ops.erase(Ops.begin()+1); // Erase the folded element
	if (Ops.size() == 1) return Ops[0];
	LHSC = cast<SCEVConstant>(Ops[0]);
	}

	bool IsMinV = LHSC->getValue()->isMinValue(IsSigned);
	bool IsMaxV = LHSC->getValue()->isMaxValue(IsSigned);

	if (IsMax ? IsMinV : IsMaxV) {
	// If we are left with a constant minimum(/maximum)-int, strip it off.
	Ops.erase(Ops.begin());
	--Idx;
	} else if (IsMax ? IsMaxV : IsMinV) {
	// If we have a max(/min) with a constant maximum(/minimum)-int,
	// it will always be the extremum.
	return LHSC;
	}

	if (Ops.size() == 1) return Ops[0];
	}

	// Find the first operation of the same kind
	while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < Kind)
	++Idx;

	// Check to see if one of the operands is of the same kind. If so, expand its
	// operands onto our operand list, and recurse to simplify.
	if (Idx < Ops.size()) {
	bool DeletedAny = false;
	while (Ops[Idx]->getSCEVType() == Kind) {
	const SCEVMinMaxExpr *SMME = cast<SCEVMinMaxExpr>(Ops[Idx]);
	Ops.erase(Ops.begin()+Idx);
	Ops.append(SMME->op_begin(), SMME->op_end());
	DeletedAny = true;
	}

	if (DeletedAny)
	return getMinMaxExpr(Kind, Ops);
	}

	// Okay, check to see if the same value occurs in the operand list twice. If
	// so, delete one. Since we sorted the list, these values are required to
	// be adjacent.
	llvm::CmpInst::Predicate GEPred =
	IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
	llvm::CmpInst::Predicate LEPred =
	IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
	llvm::CmpInst::Predicate FirstPred = IsMax ? GEPred : LEPred;
	llvm::CmpInst::Predicate SecondPred = IsMax ? LEPred : GEPred;
	for (unsigned i = 0, e = Ops.size() - 1; i != e; ++i) {
	if (Ops[i] == Ops[i + 1] \|\|
	isKnownViaNonRecursiveReasoning(FirstPred, Ops[i], Ops[i + 1])) {
	// X op Y op Y --> X op Y
	// X op Y --> X, if we know X, Y are ordered appropriately
	Ops.erase(Ops.begin() + i + 1, Ops.begin() + i + 2);
	--i;
	--e;
	} else if (isKnownViaNonRecursiveReasoning(SecondPred, Ops[i],
	Ops[i + 1])) {
	// X op Y --> Y, if we know X, Y are ordered appropriately
	Ops.erase(Ops.begin() + i, Ops.begin() + i + 1);
	--i;
	--e;
	}
	}

	if (Ops.size() == 1) return Ops[0];

	assert(!Ops.empty() && "Reduced smax down to nothing!");

	// Okay, it looks like we really DO need an expr. Check to see if we
	// already have one, otherwise create a new one.
	const SCEV *ExistingSCEV;
	FoldingSetNodeID ID;
	void *IP;
	std::tie(ExistingSCEV, ID, IP) = findExistingSCEVInCache(Kind, Ops);
	if (ExistingSCEV)
	return ExistingSCEV;
	const SCEV *O = SCEVAllocator.Allocate<const SCEV >(Ops.size());
	std::uninitialized_copy(Ops.begin(), Ops.end(), O);
	SCEV *S = new (SCEVAllocator)
	SCEVMinMaxExpr(ID.Intern(SCEVAllocator), Kind, O, Ops.size());

	UniqueSCEVs.InsertNode(S, IP);
	addToLoopUseLists(S);
	return S;
	}

	const SCEV ScalarEvolution::getSMaxExpr(const SCEV LHS, const SCEV *RHS) {
	SmallVector<const SCEV *, 2> Ops = {LHS, RHS};
	return getSMaxExpr(Ops);
	}

	const SCEV ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV > &Ops) {
	return getMinMaxExpr(scSMaxExpr, Ops);
	}

	const SCEV ScalarEvolution::getUMaxExpr(const SCEV LHS, const SCEV *RHS) {
	SmallVector<const SCEV *, 2> Ops = {LHS, RHS};
	return getUMaxExpr(Ops);
	}

	const SCEV ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV > &Ops) {
	return getMinMaxExpr(scUMaxExpr, Ops);
	}

	const SCEV ScalarEvolution::getSMinExpr(const SCEV LHS,
	const SCEV *RHS) {
	SmallVector<const SCEV *, 2> Ops = { LHS, RHS };
	return getSMinExpr(Ops);
	}

	const SCEV ScalarEvolution::getSMinExpr(SmallVectorImpl<const SCEV > &Ops) {
	return getMinMaxExpr(scSMinExpr, Ops);
	}

	const SCEV ScalarEvolution::getUMinExpr(const SCEV LHS,
	const SCEV *RHS) {
	SmallVector<const SCEV *, 2> Ops = { LHS, RHS };
	return getUMinExpr(Ops);
	}

	const SCEV ScalarEvolution::getUMinExpr(SmallVectorImpl<const SCEV > &Ops) {
	return getMinMaxExpr(scUMinExpr, Ops);
	}

	const SCEV *
	ScalarEvolution::getSizeOfScalableVectorExpr(Type *IntTy,
	ScalableVectorType *ScalableTy) {
	Constant *NullPtr = Constant::getNullValue(ScalableTy->getPointerTo());
	Constant *One = ConstantInt::get(IntTy, 1);
	Constant *GEP = ConstantExpr::getGetElementPtr(ScalableTy, NullPtr, One);
	// Note that the expression we created is the final expression, we don't
	// want to simplify it any further Also, if we call a normal getSCEV(),
	// we'll end up in an endless recursion. So just create an SCEVUnknown.
	return getUnknown(ConstantExpr::getPtrToInt(GEP, IntTy));
	}

	const SCEV ScalarEvolution::getSizeOfExpr(Type IntTy, Type *AllocTy) {
	if (auto *ScalableAllocTy = dyn_cast<ScalableVectorType>(AllocTy))
	return getSizeOfScalableVectorExpr(IntTy, ScalableAllocTy);
	// We can bypass creating a target-independent constant expression and then
	// folding it back into a ConstantInt. This is just a compile-time
	// optimization.
	return getConstant(IntTy, getDataLayout().getTypeAllocSize(AllocTy));
	}

	const SCEV ScalarEvolution::getStoreSizeOfExpr(Type IntTy, Type *StoreTy) {
	if (auto *ScalableStoreTy = dyn_cast<ScalableVectorType>(StoreTy))
	return getSizeOfScalableVectorExpr(IntTy, ScalableStoreTy);
	// We can bypass creating a target-independent constant expression and then
	// folding it back into a ConstantInt. This is just a compile-time
	// optimization.
	return getConstant(IntTy, getDataLayout().getTypeStoreSize(StoreTy));
	}

	const SCEV ScalarEvolution::getOffsetOfExpr(Type IntTy,
	StructType *STy,
	unsigned FieldNo) {
	// We can bypass creating a target-independent constant expression and then
	// folding it back into a ConstantInt. This is just a compile-time
	// optimization.
	return getConstant(
	IntTy, getDataLayout().getStructLayout(STy)->getElementOffset(FieldNo));
	}

	const SCEV ScalarEvolution::getUnknown(Value V) {
	// Don't attempt to do anything other than create a SCEVUnknown object
	// here. createSCEV only calls getUnknown after checking for all other
	// interesting possibilities, and any other code that calls getUnknown
	// is doing so in order to hide a value from SCEV canonicalization.

	FoldingSetNodeID ID;
	ID.AddInteger(scUnknown);
	ID.AddPointer(V);
	void *IP = nullptr;
	if (SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) {
	assert(cast<SCEVUnknown>(S)->getValue() == V &&
	"Stale SCEVUnknown in uniquing map!");
	return S;
	}
	SCEV *S = new (SCEVAllocator) SCEVUnknown(ID.Intern(SCEVAllocator), V, this,
	FirstUnknown);
	FirstUnknown = cast<SCEVUnknown>(S);
	UniqueSCEVs.InsertNode(S, IP);
	return S;
	}

	//===----------------------------------------------------------------------===//
	// Basic SCEV Analysis and PHI Idiom Recognition Code
	//

	/// Test if values of the given type are analyzable within the SCEV
	/// framework. This primarily includes integer types, and it can optionally
	/// include pointer types if the ScalarEvolution class has access to
	/// target-specific information.
	bool ScalarEvolution::isSCEVable(Type *Ty) const {
	// Integers and pointers are always SCEVable.
	return Ty->isIntOrPtrTy();
	}

	/// Return the size in bits of the specified type, for which isSCEVable must
	/// return true.
	uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const {
	assert(isSCEVable(Ty) && "Type is not SCEVable!");
	if (Ty->isPointerTy())
	return getDataLayout().getIndexTypeSizeInBits(Ty);
	return getDataLayout().getTypeSizeInBits(Ty);
	}

	/// Return a type with the same bitwidth as the given type and which represents
	/// how SCEV will treat the given type, for which isSCEVable must return
	/// true. For pointer types, this is the pointer index sized integer type.
	Type ScalarEvolution::getEffectiveSCEVType(Type Ty) const {
	assert(isSCEVable(Ty) && "Type is not SCEVable!");

	if (Ty->isIntegerTy())
	return Ty;

	// The only other support type is pointer.
	assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!");
	return getDataLayout().getIndexType(Ty);
	}

	Type ScalarEvolution::getWiderType(Type T1, Type *T2) const {
	return getTypeSizeInBits(T1) >= getTypeSizeInBits(T2) ? T1 : T2;
	}

	const SCEV *ScalarEvolution::getCouldNotCompute() {
	return CouldNotCompute.get();
	}

	bool ScalarEvolution::checkValidity(const SCEV *S) const {
	bool ContainsNulls = SCEVExprContains(S, [](const SCEV *S) {
	auto *SU = dyn_cast<SCEVUnknown>(S);
	return SU && SU->getValue() == nullptr;
	});

	return !ContainsNulls;
	}

	bool ScalarEvolution::containsAddRecurrence(const SCEV *S) {
	HasRecMapType::iterator I = HasRecMap.find(S);
	if (I != HasRecMap.end())
	return I->second;

	bool FoundAddRec =
	SCEVExprContains(S, [](const SCEV *S) { return isa<SCEVAddRecExpr>(S); });
	HasRecMap.insert({S, FoundAddRec});
	return FoundAddRec;
	}

	/// Try to split a SCEVAddExpr into a pair of {SCEV, ConstantInt}.
	/// If \p S is a SCEVAddExpr and is composed of a sub SCEV S' and an
	/// offset I, then return {S', I}, else return {\p S, nullptr}.
	static std::pair<const SCEV , ConstantInt > splitAddExpr(const SCEV *S) {
	const auto *Add = dyn_cast<SCEVAddExpr>(S);
	if (!Add)
	return {S, nullptr};

	if (Add->getNumOperands() != 2)
	return {S, nullptr};

	auto *ConstOp = dyn_cast<SCEVConstant>(Add->getOperand(0));
	if (!ConstOp)
	return {S, nullptr};

	return {Add->getOperand(1), ConstOp->getValue()};
	}

	/// Return the ValueOffsetPair set for \p S. \p S can be represented
	/// by the value and offset from any ValueOffsetPair in the set.
	ScalarEvolution::ValueOffsetPairSetVector *
	ScalarEvolution::getSCEVValues(const SCEV *S) {
	ExprValueMapType::iterator SI = ExprValueMap.find_as(S);
	if (SI == ExprValueMap.end())
	return nullptr;
	#ifndef NDEBUG
	if (VerifySCEVMap) {
	// Check there is no dangling Value in the set returned.
	for (const auto &VE : SI->second)
	assert(ValueExprMap.count(VE.first));
	}
	#endif
	return &SI->second;
	}

	/// Erase Value from ValueExprMap and ExprValueMap. ValueExprMap.erase(V)
	/// cannot be used separately. eraseValueFromMap should be used to remove
	/// V from ValueExprMap and ExprValueMap at the same time.
	void ScalarEvolution::eraseValueFromMap(Value *V) {
	ValueExprMapType::iterator I = ValueExprMap.find_as(V);
	if (I != ValueExprMap.end()) {
	const SCEV *S = I->second;
	// Remove {V, 0} from the set of ExprValueMap[S]
	if (auto *SV = getSCEVValues(S))
	SV->remove({V, nullptr});

	// Remove {V, Offset} from the set of ExprValueMap[Stripped]
	const SCEV *Stripped;
	ConstantInt *Offset;
	std::tie(Stripped, Offset) = splitAddExpr(S);
	if (Offset != nullptr) {
	if (auto *SV = getSCEVValues(Stripped))
	SV->remove({V, Offset});
	}
	ValueExprMap.erase(V);
	}
	}

	/// Check whether value has nuw/nsw/exact set but SCEV does not.
	/// TODO: In reality it is better to check the poison recursively
	/// but this is better than nothing.
	static bool SCEVLostPoisonFlags(const SCEV S, const Value V) {
	if (auto *I = dyn_cast<Instruction>(V)) {
	if (isa<OverflowingBinaryOperator>(I)) {
	if (auto *NS = dyn_cast<SCEVNAryExpr>(S)) {
	if (I->hasNoSignedWrap() && !NS->hasNoSignedWrap())
	return true;
	if (I->hasNoUnsignedWrap() && !NS->hasNoUnsignedWrap())
	return true;
	}
	} else if (isa<PossiblyExactOperator>(I) && I->isExact())
	return true;
	}
	return false;
	}

	/// Return an existing SCEV if it exists, otherwise analyze the expression and
	/// create a new one.
	const SCEV ScalarEvolution::getSCEV(Value V) {
	assert(isSCEVable(V->getType()) && "Value is not SCEVable!");

	const SCEV *S = getExistingSCEV(V);
	if (S == nullptr) {
	S = createSCEV(V);
	// During PHI resolution, it is possible to create two SCEVs for the same
	// V, so it is needed to double check whether V->S is inserted into
	// ValueExprMap before insert S->{V, 0} into ExprValueMap.
	std::pair<ValueExprMapType::iterator, bool> Pair =
	ValueExprMap.insert({SCEVCallbackVH(V, this), S});
	if (Pair.second && !SCEVLostPoisonFlags(S, V)) {
	ExprValueMap[S].insert({V, nullptr});

	// If S == Stripped + Offset, add Stripped -> {V, Offset} into
	// ExprValueMap.
	const SCEV *Stripped = S;
	ConstantInt *Offset = nullptr;
	std::tie(Stripped, Offset) = splitAddExpr(S);
	// If stripped is SCEVUnknown, don't bother to save
	// Stripped -> {V, offset}. It doesn't simplify and sometimes even
	// increase the complexity of the expansion code.
	// If V is GetElementPtrInst, don't save Stripped -> {V, offset}
	// because it may generate add/sub instead of GEP in SCEV expansion.
	if (Offset != nullptr && !isa<SCEVUnknown>(Stripped) &&
	!isa<GetElementPtrInst>(V))
	ExprValueMap[Stripped].insert({V, Offset});
	}
	}
	return S;
	}

	const SCEV ScalarEvolution::getExistingSCEV(Value V) {
	assert(isSCEVable(V->getType()) && "Value is not SCEVable!");

	ValueExprMapType::iterator I = ValueExprMap.find_as(V);
	if (I != ValueExprMap.end()) {
	const SCEV *S = I->second;
	if (checkValidity(S))
	return S;
	eraseValueFromMap(V);
	forgetMemoizedResults(S);
	}
	return nullptr;
	}

	/// Return a SCEV corresponding to -V = -1*V
	const SCEV ScalarEvolution::getNegativeSCEV(const SCEV V,
	SCEV::NoWrapFlags Flags) {
	if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
	return getConstant(
	cast<ConstantInt>(ConstantExpr::getNeg(VC->getValue())));

	Type *Ty = V->getType();
	Ty = getEffectiveSCEVType(Ty);
	return getMulExpr(V, getMinusOne(Ty), Flags);
	}

	/// If Expr computes ~A, return A else return nullptr
	static const SCEV MatchNotExpr(const SCEV Expr) {
	const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Expr);
	if (!Add \|\| Add->getNumOperands() != 2 \|\|
	!Add->getOperand(0)->isAllOnesValue())
	return nullptr;

	const SCEVMulExpr *AddRHS = dyn_cast<SCEVMulExpr>(Add->getOperand(1));
	if (!AddRHS \|\| AddRHS->getNumOperands() != 2 \|\|
	!AddRHS->getOperand(0)->isAllOnesValue())
	return nullptr;

	return AddRHS->getOperand(1);
	}

	/// Return a SCEV corresponding to ~V = -1-V
	const SCEV ScalarEvolution::getNotSCEV(const SCEV V) {
	if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
	return getConstant(
	cast<ConstantInt>(ConstantExpr::getNot(VC->getValue())));

	// Fold ~(u\|s)(min\|max)(~x, ~y) to (u\|s)(max\|min)(x, y)
	if (const SCEVMinMaxExpr *MME = dyn_cast<SCEVMinMaxExpr>(V)) {
	auto MatchMinMaxNegation = [&](const SCEVMinMaxExpr *MME) {
	SmallVector<const SCEV *, 2> MatchedOperands;
	for (const SCEV *Operand : MME->operands()) {
	const SCEV *Matched = MatchNotExpr(Operand);
	if (!Matched)
	return (const SCEV *)nullptr;
	MatchedOperands.push_back(Matched);
	}
	return getMinMaxExpr(SCEVMinMaxExpr::negate(MME->getSCEVType()),
	MatchedOperands);
	};
	if (const SCEV *Replaced = MatchMinMaxNegation(MME))
	return Replaced;
	}

	Type *Ty = V->getType();
	Ty = getEffectiveSCEVType(Ty);
	return getMinusSCEV(getMinusOne(Ty), V);
	}

	/// Compute an expression equivalent to S - getPointerBase(S).
	static const SCEV removePointerBase(ScalarEvolution SE, const SCEV *P) {
	assert(P->getType()->isPointerTy());

	if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(P)) {
	// The base of an AddRec is the first operand.
	SmallVector<const SCEV *> Ops{AddRec->operands()};
	Ops[0] = removePointerBase(SE, Ops[0]);
	// Don't try to transfer nowrap flags for now. We could in some cases
	// (for example, if pointer operand of the AddRec is a SCEVUnknown).
	return SE->getAddRecExpr(Ops, AddRec->getLoop(), SCEV::FlagAnyWrap);
	}
	if (auto *Add = dyn_cast<SCEVAddExpr>(P)) {
	// The base of an Add is the pointer operand.
	SmallVector<const SCEV *> Ops{Add->operands()};
	const SCEV **PtrOp = nullptr;
	for (const SCEV *&AddOp : Ops) {
	if (AddOp->getType()->isPointerTy()) {
	// If we find an Add with multiple pointer operands, treat it as a
	// pointer base to be consistent with getPointerBase. Eventually
	// we should be able to assert this is impossible.
	if (PtrOp)
	return SE->getZero(P->getType());
	PtrOp = &AddOp;
	}
	}
	PtrOp = removePointerBase(SE, PtrOp);
	// Don't try to transfer nowrap flags for now. We could in some cases
	// (for example, if the pointer operand of the Add is a SCEVUnknown).
	return SE->getAddExpr(Ops);
	}
	// Any other expression must be a pointer base.
	return SE->getZero(P->getType());
	}

	const SCEV ScalarEvolution::getMinusSCEV(const SCEV LHS, const SCEV *RHS,
	SCEV::NoWrapFlags Flags,
	unsigned Depth) {
	// Fast path: X - X --> 0.
	if (LHS == RHS)
	return getZero(LHS->getType());

	// If we subtract two pointers with different pointer bases, bail.
	// Eventually, we're going to add an assertion to getMulExpr that we
	// can't multiply by a pointer.
	if (RHS->getType()->isPointerTy()) {
	if (!LHS->getType()->isPointerTy() \|\|
	getPointerBase(LHS) != getPointerBase(RHS))
	return getCouldNotCompute();
	LHS = removePointerBase(this, LHS);
	RHS = removePointerBase(this, RHS);
	}

	// We represent LHS - RHS as LHS + (-1)*RHS. This transformation
	// makes it so that we cannot make much use of NUW.
	auto AddFlags = SCEV::FlagAnyWrap;
	const bool RHSIsNotMinSigned =
	!getSignedRangeMin(RHS).isMinSignedValue();
	if (maskFlags(Flags, SCEV::FlagNSW) == SCEV::FlagNSW) {
	// Let M be the minimum representable signed value. Then (-1)*RHS
	// signed-wraps if and only if RHS is M. That can happen even for
	// a NSW subtraction because e.g. (-1)*M signed-wraps even though
	// -1 - M does not. So to transfer NSW from LHS - RHS to LHS +
	// (-1)*RHS, we need to prove that RHS != M.
	//
	// If LHS is non-negative and we know that LHS - RHS does not
	// signed-wrap, then RHS cannot be M. So we can rule out signed-wrap
	// either by proving that RHS > M or that LHS >= 0.
	if (RHSIsNotMinSigned \|\| isKnownNonNegative(LHS)) {
	AddFlags = SCEV::FlagNSW;
	}
	}

	// FIXME: Find a correct way to transfer NSW to (-1)*M when LHS -
	// RHS is NSW and LHS >= 0.
	//
	// The difficulty here is that the NSW flag may have been proven
	// relative to a loop that is to be found in a recurrence in LHS and
	// not in RHS. Applying NSW to (-1)*M may then let the NSW have a
	// larger scope than intended.
	auto NegFlags = RHSIsNotMinSigned ? SCEV::FlagNSW : SCEV::FlagAnyWrap;

	return getAddExpr(LHS, getNegativeSCEV(RHS, NegFlags), AddFlags, Depth);
	}

	const SCEV ScalarEvolution::getTruncateOrZeroExtend(const SCEV V, Type *Ty,
	unsigned Depth) {
	Type *SrcTy = V->getType();
	assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
	"Cannot truncate or zero extend with non-integer arguments!");
	if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
	return V; // No conversion
	if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty))
	return getTruncateExpr(V, Ty, Depth);
	return getZeroExtendExpr(V, Ty, Depth);
	}

	const SCEV ScalarEvolution::getTruncateOrSignExtend(const SCEV V, Type *Ty,
	unsigned Depth) {
	Type *SrcTy = V->getType();
	assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
	"Cannot truncate or zero extend with non-integer arguments!");
	if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
	return V; // No conversion
	if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty))
	return getTruncateExpr(V, Ty, Depth);
	return getSignExtendExpr(V, Ty, Depth);
	}

	const SCEV *
	ScalarEvolution::getNoopOrZeroExtend(const SCEV V, Type Ty) {
	Type *SrcTy = V->getType();
	assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
	"Cannot noop or zero extend with non-integer arguments!");
	assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
	"getNoopOrZeroExtend cannot truncate!");
	if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
	return V; // No conversion
	return getZeroExtendExpr(V, Ty);
	}

	const SCEV *
	ScalarEvolution::getNoopOrSignExtend(const SCEV V, Type Ty) {
	Type *SrcTy = V->getType();
	assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
	"Cannot noop or sign extend with non-integer arguments!");
	assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
	"getNoopOrSignExtend cannot truncate!");
	if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
	return V; // No conversion
	return getSignExtendExpr(V, Ty);
	}

	const SCEV *
	ScalarEvolution::getNoopOrAnyExtend(const SCEV V, Type Ty) {
	Type *SrcTy = V->getType();
	assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
	"Cannot noop or any extend with non-integer arguments!");
	assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
	"getNoopOrAnyExtend cannot truncate!");
	if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
	return V; // No conversion
	return getAnyExtendExpr(V, Ty);
	}

	const SCEV *
	ScalarEvolution::getTruncateOrNoop(const SCEV V, Type Ty) {
	Type *SrcTy = V->getType();
	assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
	"Cannot truncate or noop with non-integer arguments!");
	assert(getTypeSizeInBits(SrcTy) >= getTypeSizeInBits(Ty) &&
	"getTruncateOrNoop cannot extend!");
	if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
	return V; // No conversion
	return getTruncateExpr(V, Ty);
	}

	const SCEV ScalarEvolution::getUMaxFromMismatchedTypes(const SCEV LHS,
	const SCEV *RHS) {
	const SCEV *PromotedLHS = LHS;
	const SCEV *PromotedRHS = RHS;

	if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType()))
	PromotedRHS = getZeroExtendExpr(RHS, LHS->getType());
	else
	PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType());

	return getUMaxExpr(PromotedLHS, PromotedRHS);
	}

	const SCEV ScalarEvolution::getUMinFromMismatchedTypes(const SCEV LHS,
	const SCEV *RHS) {
	SmallVector<const SCEV *, 2> Ops = { LHS, RHS };
	return getUMinFromMismatchedTypes(Ops);
	}

	const SCEV *ScalarEvolution::getUMinFromMismatchedTypes(
	SmallVectorImpl<const SCEV *> &Ops) {
	assert(!Ops.empty() && "At least one operand must be!");
	// Trivial case.
	if (Ops.size() == 1)
	return Ops[0];

	// Find the max type first.
	Type *MaxType = nullptr;
	for (auto *S : Ops)
	if (MaxType)
	MaxType = getWiderType(MaxType, S->getType());
	else
	MaxType = S->getType();
	assert(MaxType && "Failed to find maximum type!");

	// Extend all ops to max type.
	SmallVector<const SCEV *, 2> PromotedOps;
	for (auto *S : Ops)
	PromotedOps.push_back(getNoopOrZeroExtend(S, MaxType));

	// Generate umin.
	return getUMinExpr(PromotedOps);
	}

	const SCEV ScalarEvolution::getPointerBase(const SCEV V) {
	// A pointer operand may evaluate to a nonpointer expression, such as null.
	if (!V->getType()->isPointerTy())
	return V;

	while (true) {
	if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(V)) {
	V = AddRec->getStart();
	} else if (auto *Add = dyn_cast<SCEVAddExpr>(V)) {
	const SCEV *PtrOp = nullptr;
	for (const SCEV *AddOp : Add->operands()) {
	if (AddOp->getType()->isPointerTy()) {
	// Cannot find the base of an expression with multiple pointer ops.
	if (PtrOp)
	return V;
	PtrOp = AddOp;
	}
	}
	if (!PtrOp) // All operands were non-pointer.
	return V;
	V = PtrOp;
	} else // Not something we can look further into.
	return V;
	}
	}

	/// Push users of the given Instruction onto the given Worklist.
	static void
	PushDefUseChildren(Instruction *I,
	SmallVectorImpl<Instruction *> &Worklist) {
	// Push the def-use children onto the Worklist stack.
	for (User *U : I->users())
	Worklist.push_back(cast<Instruction>(U));
	}

	void ScalarEvolution::forgetSymbolicName(Instruction PN, const SCEV SymName) {
	SmallVector<Instruction *, 16> Worklist;
	PushDefUseChildren(PN, Worklist);

	SmallPtrSet<Instruction *, 8> Visited;
	Visited.insert(PN);
	while (!Worklist.empty()) {
	Instruction *I = Worklist.pop_back_val();
	if (!Visited.insert(I).second)
	continue;

	auto It = ValueExprMap.find_as(static_cast<Value *>(I));
	if (It != ValueExprMap.end()) {
	const SCEV *Old = It->second;

	// Short-circuit the def-use traversal if the symbolic name
	// ceases to appear in expressions.
	if (Old != SymName && !hasOperand(Old, SymName))
	continue;

	// SCEVUnknown for a PHI either means that it has an unrecognized
	// structure, it's a PHI that's in the progress of being computed
	// by createNodeForPHI, or it's a single-value PHI. In the first case,
	// additional loop trip count information isn't going to change anything.
	// In the second case, createNodeForPHI will perform the necessary
	// updates on its own when it gets to that point. In the third, we do
	// want to forget the SCEVUnknown.
	if (!isa<PHINode>(I) \|\|
	!isa<SCEVUnknown>(Old) \|\|
	(I != PN && Old == SymName)) {
	eraseValueFromMap(It->first);
	forgetMemoizedResults(Old);
	}
	}

	PushDefUseChildren(I, Worklist);
	}
	}

	namespace {

	/// Takes SCEV S and Loop L. For each AddRec sub-expression, use its start
	/// expression in case its Loop is L. If it is not L then
	/// if IgnoreOtherLoops is true then use AddRec itself
	/// otherwise rewrite cannot be done.
	/// If SCEV contains non-invariant unknown SCEV rewrite cannot be done.
	class SCEVInitRewriter : public SCEVRewriteVisitor<SCEVInitRewriter> {
	public:
	static const SCEV rewrite(const SCEV S, const Loop *L, ScalarEvolution &SE,
	bool IgnoreOtherLoops = true) {
	SCEVInitRewriter Rewriter(L, SE);
	const SCEV *Result = Rewriter.visit(S);
	if (Rewriter.hasSeenLoopVariantSCEVUnknown())
	return SE.getCouldNotCompute();
	return Rewriter.hasSeenOtherLoops() && !IgnoreOtherLoops
	? SE.getCouldNotCompute()
	: Result;
	}

	const SCEV visitUnknown(const SCEVUnknown Expr) {
	if (!SE.isLoopInvariant(Expr, L))
	SeenLoopVariantSCEVUnknown = true;
	return Expr;
	}

	const SCEV visitAddRecExpr(const SCEVAddRecExpr Expr) {
	// Only re-write AddRecExprs for this loop.
	if (Expr->getLoop() == L)
	return Expr->getStart();
	SeenOtherLoops = true;
	return Expr;
	}

	bool hasSeenLoopVariantSCEVUnknown() { return SeenLoopVariantSCEVUnknown; }

	bool hasSeenOtherLoops() { return SeenOtherLoops; }

	private:
	explicit SCEVInitRewriter(const Loop *L, ScalarEvolution &SE)
	: SCEVRewriteVisitor(SE), L(L) {}

	const Loop *L;
	bool SeenLoopVariantSCEVUnknown = false;
	bool SeenOtherLoops = false;
	};

	/// Takes SCEV S and Loop L. For each AddRec sub-expression, use its post
	/// increment expression in case its Loop is L. If it is not L then
	/// use AddRec itself.
	/// If SCEV contains non-invariant unknown SCEV rewrite cannot be done.
	class SCEVPostIncRewriter : public SCEVRewriteVisitor<SCEVPostIncRewriter> {
	public:
	static const SCEV rewrite(const SCEV S, const Loop *L, ScalarEvolution &SE) {
	SCEVPostIncRewriter Rewriter(L, SE);
	const SCEV *Result = Rewriter.visit(S);
	return Rewriter.hasSeenLoopVariantSCEVUnknown()
	? SE.getCouldNotCompute()
	: Result;
	}

	const SCEV visitUnknown(const SCEVUnknown Expr) {
	if (!SE.isLoopInvariant(Expr, L))
	SeenLoopVariantSCEVUnknown = true;
	return Expr;
	}

	const SCEV visitAddRecExpr(const SCEVAddRecExpr Expr) {
	// Only re-write AddRecExprs for this loop.
	if (Expr->getLoop() == L)
	return Expr->getPostIncExpr(SE);
	SeenOtherLoops = true;
	return Expr;
	}

	bool hasSeenLoopVariantSCEVUnknown() { return SeenLoopVariantSCEVUnknown; }

	bool hasSeenOtherLoops() { return SeenOtherLoops; }

	private:
	explicit SCEVPostIncRewriter(const Loop *L, ScalarEvolution &SE)
	: SCEVRewriteVisitor(SE), L(L) {}

	const Loop *L;
	bool SeenLoopVariantSCEVUnknown = false;
	bool SeenOtherLoops = false;
	};

	/// This class evaluates the compare condition by matching it against the
	/// condition of loop latch. If there is a match we assume a true value
	/// for the condition while building SCEV nodes.
	class SCEVBackedgeConditionFolder
	: public SCEVRewriteVisitor<SCEVBackedgeConditionFolder> {
	public:
	static const SCEV rewrite(const SCEV S, const Loop *L,
	ScalarEvolution &SE) {
	bool IsPosBECond = false;
	Value *BECond = nullptr;
	if (BasicBlock *Latch = L->getLoopLatch()) {
	BranchInst *BI = dyn_cast<BranchInst>(Latch->getTerminator());
	if (BI && BI->isConditional()) {
	assert(BI->getSuccessor(0) != BI->getSuccessor(1) &&
	"Both outgoing branches should not target same header!");
	BECond = BI->getCondition();
	IsPosBECond = BI->getSuccessor(0) == L->getHeader();
	} else {
	return S;
	}
	}
	SCEVBackedgeConditionFolder Rewriter(L, BECond, IsPosBECond, SE);
	return Rewriter.visit(S);
	}

	const SCEV visitUnknown(const SCEVUnknown Expr) {
	const SCEV *Result = Expr;
	bool InvariantF = SE.isLoopInvariant(Expr, L);

	if (!InvariantF) {
	Instruction *I = cast<Instruction>(Expr->getValue());
	switch (I->getOpcode()) {
	case Instruction::Select: {
	SelectInst *SI = cast<SelectInst>(I);
	Optional<const SCEV *> Res =
	compareWithBackedgeCondition(SI->getCondition());
	if (Res.hasValue()) {
	bool IsOne = cast<SCEVConstant>(Res.getValue())->getValue()->isOne();
	Result = SE.getSCEV(IsOne ? SI->getTrueValue() : SI->getFalseValue());
	}
	break;
	}
	default: {
	Optional<const SCEV *> Res = compareWithBackedgeCondition(I);
	if (Res.hasValue())
	Result = Res.getValue();
	break;
	}
	}
	}
	return Result;
	}

	private:
	explicit SCEVBackedgeConditionFolder(const Loop L, Value BECond,
	bool IsPosBECond, ScalarEvolution &SE)
	: SCEVRewriteVisitor(SE), L(L), BackedgeCond(BECond),
	IsPositiveBECond(IsPosBECond) {}

	Optional<const SCEV > compareWithBackedgeCondition(Value IC);

	const Loop *L;
	/// Loop back condition.
	Value *BackedgeCond = nullptr;
	/// Set to true if loop back is on positive branch condition.
	bool IsPositiveBECond;
	};

	Optional<const SCEV *>
	SCEVBackedgeConditionFolder::compareWithBackedgeCondition(Value *IC) {

	// If value matches the backedge condition for loop latch,
	// then return a constant evolution node based on loopback
	// branch taken.
	if (BackedgeCond == IC)
	return IsPositiveBECond ? SE.getOne(Type::getInt1Ty(SE.getContext()))
	: SE.getZero(Type::getInt1Ty(SE.getContext()));
	return None;
	}

	class SCEVShiftRewriter : public SCEVRewriteVisitor<SCEVShiftRewriter> {
	public:
	static const SCEV rewrite(const SCEV S, const Loop *L,
	ScalarEvolution &SE) {
	SCEVShiftRewriter Rewriter(L, SE);
	const SCEV *Result = Rewriter.visit(S);
	return Rewriter.isValid() ? Result : SE.getCouldNotCompute();
	}

	const SCEV visitUnknown(const SCEVUnknown Expr) {
	// Only allow AddRecExprs for this loop.
	if (!SE.isLoopInvariant(Expr, L))
	Valid = false;
	return Expr;
	}

	const SCEV visitAddRecExpr(const SCEVAddRecExpr Expr) {
	if (Expr->getLoop() == L && Expr->isAffine())
	return SE.getMinusSCEV(Expr, Expr->getStepRecurrence(SE));
	Valid = false;
	return Expr;
	}

	bool isValid() { return Valid; }

	private:
	explicit SCEVShiftRewriter(const Loop *L, ScalarEvolution &SE)
	: SCEVRewriteVisitor(SE), L(L) {}

	const Loop *L;
	bool Valid = true;
	};

	} // end anonymous namespace

	SCEV::NoWrapFlags
	ScalarEvolution::proveNoWrapViaConstantRanges(const SCEVAddRecExpr *AR) {
	if (!AR->isAffine())
	return SCEV::FlagAnyWrap;

	using OBO = OverflowingBinaryOperator;

	SCEV::NoWrapFlags Result = SCEV::FlagAnyWrap;

	if (!AR->hasNoSignedWrap()) {
	ConstantRange AddRecRange = getSignedRange(AR);
	ConstantRange IncRange = getSignedRange(AR->getStepRecurrence(*this));

	auto NSWRegion = ConstantRange::makeGuaranteedNoWrapRegion(
	Instruction::Add, IncRange, OBO::NoSignedWrap);
	if (NSWRegion.contains(AddRecRange))
	Result = ScalarEvolution::setFlags(Result, SCEV::FlagNSW);
	}

	if (!AR->hasNoUnsignedWrap()) {
	ConstantRange AddRecRange = getUnsignedRange(AR);
	ConstantRange IncRange = getUnsignedRange(AR->getStepRecurrence(*this));

	auto NUWRegion = ConstantRange::makeGuaranteedNoWrapRegion(
	Instruction::Add, IncRange, OBO::NoUnsignedWrap);
	if (NUWRegion.contains(AddRecRange))
	Result = ScalarEvolution::setFlags(Result, SCEV::FlagNUW);
	}

	return Result;
	}

	SCEV::NoWrapFlags
	ScalarEvolution::proveNoSignedWrapViaInduction(const SCEVAddRecExpr *AR) {
	SCEV::NoWrapFlags Result = AR->getNoWrapFlags();

	if (AR->hasNoSignedWrap())
	return Result;

	if (!AR->isAffine())
	return Result;

	const SCEV Step = AR->getStepRecurrence(this);
	const Loop *L = AR->getLoop();

	// Check whether the backedge-taken count is SCEVCouldNotCompute.
	// Note that this serves two purposes: It filters out loops that are
	// simply not analyzable, and it covers the case where this code is
	// being called from within backedge-taken count analysis, such that
	// attempting to ask for the backedge-taken count would likely result
	// in infinite recursion. In the later case, the analysis code will
	// cope with a conservative value, and it will take care to purge
	// that value once it has finished.
	const SCEV *MaxBECount = getConstantMaxBackedgeTakenCount(L);

	// Normally, in the cases we can prove no-overflow via a
	// backedge guarding condition, we can also compute a backedge
	// taken count for the loop. The exceptions are assumptions and
	// guards present in the loop -- SCEV is not great at exploiting
	// these to compute max backedge taken counts, but can still use
	// these to prove lack of overflow. Use this fact to avoid
	// doing extra work that may not pay off.

	if (isa<SCEVCouldNotCompute>(MaxBECount) && !HasGuards &&
	AC.assumptions().empty())
	return Result;

	// If the backedge is guarded by a comparison with the pre-inc value the
	// addrec is safe. Also, if the entry is guarded by a comparison with the
	// start value and the backedge is guarded by a comparison with the post-inc
	// value, the addrec is safe.
	ICmpInst::Predicate Pred;
	const SCEV *OverflowLimit =
	getSignedOverflowLimitForStep(Step, &Pred, this);
	if (OverflowLimit &&
	(isLoopBackedgeGuardedByCond(L, Pred, AR, OverflowLimit) \|\|
	isKnownOnEveryIteration(Pred, AR, OverflowLimit))) {
	Result = setFlags(Result, SCEV::FlagNSW);
	}
	return Result;
	}
	SCEV::NoWrapFlags
	ScalarEvolution::proveNoUnsignedWrapViaInduction(const SCEVAddRecExpr *AR) {
	SCEV::NoWrapFlags Result = AR->getNoWrapFlags();

	if (AR->hasNoUnsignedWrap())
	return Result;

	if (!AR->isAffine())
	return Result;

	const SCEV Step = AR->getStepRecurrence(this);
	unsigned BitWidth = getTypeSizeInBits(AR->getType());
	const Loop *L = AR->getLoop();

	// Check whether the backedge-taken count is SCEVCouldNotCompute.
	// Note that this serves two purposes: It filters out loops that are
	// simply not analyzable, and it covers the case where this code is
	// being called from within backedge-taken count analysis, such that
	// attempting to ask for the backedge-taken count would likely result
	// in infinite recursion. In the later case, the analysis code will
	// cope with a conservative value, and it will take care to purge
	// that value once it has finished.
	const SCEV *MaxBECount = getConstantMaxBackedgeTakenCount(L);

	// Normally, in the cases we can prove no-overflow via a
	// backedge guarding condition, we can also compute a backedge
	// taken count for the loop. The exceptions are assumptions and
	// guards present in the loop -- SCEV is not great at exploiting
	// these to compute max backedge taken counts, but can still use
	// these to prove lack of overflow. Use this fact to avoid
	// doing extra work that may not pay off.

	if (isa<SCEVCouldNotCompute>(MaxBECount) && !HasGuards &&
	AC.assumptions().empty())
	return Result;

	// If the backedge is guarded by a comparison with the pre-inc value the
	// addrec is safe. Also, if the entry is guarded by a comparison with the
	// start value and the backedge is guarded by a comparison with the post-inc
	// value, the addrec is safe.
	if (isKnownPositive(Step)) {
	const SCEV *N = getConstant(APInt::getMinValue(BitWidth) -
	getUnsignedRangeMax(Step));
	if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, AR, N) \|\|
	isKnownOnEveryIteration(ICmpInst::ICMP_ULT, AR, N)) {
	Result = setFlags(Result, SCEV::FlagNUW);
	}
	}

	return Result;
	}

	namespace {

	/// Represents an abstract binary operation. This may exist as a
	/// normal instruction or constant expression, or may have been
	/// derived from an expression tree.
	struct BinaryOp {
	unsigned Opcode;
	Value *LHS;
	Value *RHS;
	bool IsNSW = false;
	bool IsNUW = false;

	/// Op is set if this BinaryOp corresponds to a concrete LLVM instruction or
	/// constant expression.
	Operator *Op = nullptr;

	explicit BinaryOp(Operator *Op)
	: Opcode(Op->getOpcode()), LHS(Op->getOperand(0)), RHS(Op->getOperand(1)),
	Op(Op) {
	if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(Op)) {
	IsNSW = OBO->hasNoSignedWrap();
	IsNUW = OBO->hasNoUnsignedWrap();
	}
	}

	explicit BinaryOp(unsigned Opcode, Value LHS, Value RHS, bool IsNSW = false,
	bool IsNUW = false)
	: Opcode(Opcode), LHS(LHS), RHS(RHS), IsNSW(IsNSW), IsNUW(IsNUW) {}
	};

	} // end anonymous namespace

	/// Try to map \p V into a BinaryOp, and return \c None on failure.
	static Optional<BinaryOp> MatchBinaryOp(Value *V, DominatorTree &DT) {
	auto *Op = dyn_cast<Operator>(V);
	if (!Op)
	return None;

	// Implementation detail: all the cleverness here should happen without
	// creating new SCEV expressions -- our caller knowns tricks to avoid creating
	// SCEV expressions when possible, and we should not break that.

	switch (Op->getOpcode()) {
	case Instruction::Add:
	case Instruction::Sub:
	case Instruction::Mul:
	case Instruction::UDiv:
	case Instruction::URem:
	case Instruction::And:
	case Instruction::Or:
	case Instruction::AShr:
	case Instruction::Shl:
	return BinaryOp(Op);

	case Instruction::Xor:
	if (auto *RHSC = dyn_cast<ConstantInt>(Op->getOperand(1)))
	// If the RHS of the xor is a signmask, then this is just an add.
	// Instcombine turns add of signmask into xor as a strength reduction step.
	if (RHSC->getValue().isSignMask())
	return BinaryOp(Instruction::Add, Op->getOperand(0), Op->getOperand(1));
	return BinaryOp(Op);

	case Instruction::LShr:
	// Turn logical shift right of a constant into a unsigned divide.
	if (ConstantInt *SA = dyn_cast<ConstantInt>(Op->getOperand(1))) {
	uint32_t BitWidth = cast<IntegerType>(Op->getType())->getBitWidth();

	// If the shift count is not less than the bitwidth, the result of
	// the shift is undefined. Don't try to analyze it, because the
	// resolution chosen here may differ from the resolution chosen in
	// other parts of the compiler.
	if (SA->getValue().ult(BitWidth)) {
	Constant *X =
	ConstantInt::get(SA->getContext(),
	APInt::getOneBitSet(BitWidth, SA->getZExtValue()));
	return BinaryOp(Instruction::UDiv, Op->getOperand(0), X);
	}
	}
	return BinaryOp(Op);

	case Instruction::ExtractValue: {
	auto *EVI = cast<ExtractValueInst>(Op);
	if (EVI->getNumIndices() != 1 \|\| EVI->getIndices()[0] != 0)
	break;

	auto *WO = dyn_cast<WithOverflowInst>(EVI->getAggregateOperand());
	if (!WO)
	break;

	Instruction::BinaryOps BinOp = WO->getBinaryOp();
	bool Signed = WO->isSigned();
	// TODO: Should add nuw/nsw flags for mul as well.
	if (BinOp == Instruction::Mul \|\| !isOverflowIntrinsicNoWrap(WO, DT))
	return BinaryOp(BinOp, WO->getLHS(), WO->getRHS());

	// Now that we know that all uses of the arithmetic-result component of
	// CI are guarded by the overflow check, we can go ahead and pretend
	// that the arithmetic is non-overflowing.
	return BinaryOp(BinOp, WO->getLHS(), WO->getRHS(),
	/* IsNSW = / Signed, / IsNUW = */ !Signed);
	}

	default:
	break;
	}

	// Recognise intrinsic loop.decrement.reg, and as this has exactly the same
	// semantics as a Sub, return a binary sub expression.
	if (auto *II = dyn_cast<IntrinsicInst>(V))
	if (II->getIntrinsicID() == Intrinsic::loop_decrement_reg)
	return BinaryOp(Instruction::Sub, II->getOperand(0), II->getOperand(1));

	return None;
	}

	/// Helper function to createAddRecFromPHIWithCasts. We have a phi
	/// node whose symbolic (unknown) SCEV is \p SymbolicPHI, which is updated via
	/// the loop backedge by a SCEVAddExpr, possibly also with a few casts on the
	/// way. This function checks if \p Op, an operand of this SCEVAddExpr,
	/// follows one of the following patterns:
	/// Op == (SExt ix (Trunc iy (%SymbolicPHI) to ix) to iy)
	/// Op == (ZExt ix (Trunc iy (%SymbolicPHI) to ix) to iy)
	/// If the SCEV expression of \p Op conforms with one of the expected patterns
	/// we return the type of the truncation operation, and indicate whether the
	/// truncated type should be treated as signed/unsigned by setting
	/// \p Signed to true/false, respectively.
	static Type isSimpleCastedPHI(const SCEV Op, const SCEVUnknown *SymbolicPHI,
	bool &Signed, ScalarEvolution &SE) {
	// The case where Op == SymbolicPHI (that is, with no type conversions on
	// the way) is handled by the regular add recurrence creating logic and
	// would have already been triggered in createAddRecForPHI. Reaching it here
	// means that createAddRecFromPHI had failed for this PHI before (e.g.,
	// because one of the other operands of the SCEVAddExpr updating this PHI is
	// not invariant).
	//
	// Here we look for the case where Op = (ext(trunc(SymbolicPHI))), and in
	// this case predicates that allow us to prove that Op == SymbolicPHI will
	// be added.
	if (Op == SymbolicPHI)
	return nullptr;

	unsigned SourceBits = SE.getTypeSizeInBits(SymbolicPHI->getType());
	unsigned NewBits = SE.getTypeSizeInBits(Op->getType());
	if (SourceBits != NewBits)
	return nullptr;

	const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(Op);
	const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(Op);
	if (!SExt && !ZExt)
	return nullptr;
	const SCEVTruncateExpr *Trunc =
	SExt ? dyn_cast<SCEVTruncateExpr>(SExt->getOperand())
	: dyn_cast<SCEVTruncateExpr>(ZExt->getOperand());
	if (!Trunc)
	return nullptr;
	const SCEV *X = Trunc->getOperand();
	if (X != SymbolicPHI)
	return nullptr;
	Signed = SExt != nullptr;
	return Trunc->getType();
	}

	static const Loop isIntegerLoopHeaderPHI(const PHINode PN, LoopInfo &LI) {
	if (!PN->getType()->isIntegerTy())
	return nullptr;
	const Loop *L = LI.getLoopFor(PN->getParent());
	if (!L \|\| L->getHeader() != PN->getParent())
	return nullptr;
	return L;
	}

	// Analyze \p SymbolicPHI, a SCEV expression of a phi node, and check if the
	// computation that updates the phi follows the following pattern:
	// (SExt/ZExt ix (Trunc iy (%SymbolicPHI) to ix) to iy) + InvariantAccum
	// which correspond to a phi->trunc->sext/zext->add->phi update chain.
	// If so, try to see if it can be rewritten as an AddRecExpr under some
	// Predicates. If successful, return them as a pair. Also cache the results
	// of the analysis.
	//
	// Example usage scenario:
	// Say the Rewriter is called for the following SCEV:
	// 8 * ((sext i32 (trunc i64 %X to i32) to i64) + %Step)
	// where:
	// %X = phi i64 (%Start, %BEValue)
	// It will visitMul->visitAdd->visitSExt->visitTrunc->visitUnknown(%X),
	// and call this function with %SymbolicPHI = %X.
	//
	// The analysis will find that the value coming around the backedge has
	// the following SCEV:
	// BEValue = ((sext i32 (trunc i64 %X to i32) to i64) + %Step)
	// Upon concluding that this matches the desired pattern, the function
	// will return the pair {NewAddRec, SmallPredsVec} where:
	// NewAddRec = {%Start,+,%Step}
	// SmallPredsVec = {P1, P2, P3} as follows:
	// P1(WrapPred): AR: {trunc(%Start),+,(trunc %Step)}<nsw> Flags: <nssw>
	// P2(EqualPred): %Start == (sext i32 (trunc i64 %Start to i32) to i64)
	// P3(EqualPred): %Step == (sext i32 (trunc i64 %Step to i32) to i64)
	// The returned pair means that SymbolicPHI can be rewritten into NewAddRec
	// under the predicates {P1,P2,P3}.
	// This predicated rewrite will be cached in PredicatedSCEVRewrites:
	// PredicatedSCEVRewrites[{%X,L}] = {NewAddRec, {P1,P2,P3)}
	//
	// TODO's:
	//
	// 1) Extend the Induction descriptor to also support inductions that involve
	// casts: When needed (namely, when we are called in the context of the
	// vectorizer induction analysis), a Set of cast instructions will be
	// populated by this method, and provided back to isInductionPHI. This is
	// needed to allow the vectorizer to properly record them to be ignored by
	// the cost model and to avoid vectorizing them (otherwise these casts,
	// which are redundant under the runtime overflow checks, will be
	// vectorized, which can be costly).
	//
	// 2) Support additional induction/PHISCEV patterns: We also want to support
	// inductions where the sext-trunc / zext-trunc operations (partly) occur
	// after the induction update operation (the induction increment):
	//
	// (Trunc iy (SExt/ZExt ix (%SymbolicPHI + InvariantAccum) to iy) to ix)
	// which correspond to a phi->add->trunc->sext/zext->phi update chain.
	//
	// (Trunc iy ((SExt/ZExt ix (%SymbolicPhi) to iy) + InvariantAccum) to ix)
	// which correspond to a phi->trunc->add->sext/zext->phi update chain.
	//
	// 3) Outline common code with createAddRecFromPHI to avoid duplication.
	Optional<std::pair<const SCEV , SmallVector<const SCEVPredicate , 3>>>
	ScalarEvolution::createAddRecFromPHIWithCastsImpl(const SCEVUnknown *SymbolicPHI) {
	SmallVector<const SCEVPredicate *, 3> Predicates;

	// *** Part1: Analyze if we have a phi-with-cast pattern for which we can
	// return an AddRec expression under some predicate.

	auto *PN = cast<PHINode>(SymbolicPHI->getValue());
	const Loop *L = isIntegerLoopHeaderPHI(PN, LI);
	assert(L && "Expecting an integer loop header phi");

	// The loop may have multiple entrances or multiple exits; we can analyze
	// this phi as an addrec if it has a unique entry value and a unique
	// backedge value.
	Value BEValueV = nullptr, StartValueV = nullptr;
	for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
	Value *V = PN->getIncomingValue(i);
	if (L->contains(PN->getIncomingBlock(i))) {
	if (!BEValueV) {
	BEValueV = V;
	} else if (BEValueV != V) {
	BEValueV = nullptr;
	break;
	}
	} else if (!StartValueV) {
	StartValueV = V;
	} else if (StartValueV != V) {
	StartValueV = nullptr;
	break;
	}
	}
	if (!BEValueV \|\| !StartValueV)
	return None;

	const SCEV *BEValue = getSCEV(BEValueV);

	// If the value coming around the backedge is an add with the symbolic
	// value we just inserted, possibly with casts that we can ignore under
	// an appropriate runtime guard, then we found a simple induction variable!
	const auto *Add = dyn_cast<SCEVAddExpr>(BEValue);
	if (!Add)
	return None;

	// If there is a single occurrence of the symbolic value, possibly
	// casted, replace it with a recurrence.
	unsigned FoundIndex = Add->getNumOperands();
	Type *TruncTy = nullptr;
	bool Signed;
	for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
	if ((TruncTy =
	isSimpleCastedPHI(Add->getOperand(i), SymbolicPHI, Signed, *this)))
	if (FoundIndex == e) {
	FoundIndex = i;
	break;
	}

	if (FoundIndex == Add->getNumOperands())
	return None;

	// Create an add with everything but the specified operand.
	SmallVector<const SCEV *, 8> Ops;
	for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
	if (i != FoundIndex)
	Ops.push_back(Add->getOperand(i));
	const SCEV *Accum = getAddExpr(Ops);

	// The runtime checks will not be valid if the step amount is
	// varying inside the loop.
	if (!isLoopInvariant(Accum, L))
	return None;

	// *** Part2: Create the predicates

	// Analysis was successful: we have a phi-with-cast pattern for which we
	// can return an AddRec expression under the following predicates:
	//
	// P1: A Wrap predicate that guarantees that Trunc(Start) + i*Trunc(Accum)
	// fits within the truncated type (does not overflow) for i = 0 to n-1.
	// P2: An Equal predicate that guarantees that
	// Start = (Ext ix (Trunc iy (Start) to ix) to iy)
	// P3: An Equal predicate that guarantees that
	// Accum = (Ext ix (Trunc iy (Accum) to ix) to iy)
	//
	// As we next prove, the above predicates guarantee that:
	// Start + iAccum = (Ext ix (Trunc iy ( Start + iAccum ) to ix) to iy)
	//
	//
	// More formally, we want to prove that:
	// Expr(i+1) = Start + (i+1) * Accum
	// = (Ext ix (Trunc iy (Expr(i)) to ix) to iy) + Accum
	//
	// Given that:
	// 1) Expr(0) = Start
	// 2) Expr(1) = Start + Accum
	// = (Ext ix (Trunc iy (Start) to ix) to iy) + Accum :: from P2
	// 3) Induction hypothesis (step i):
	// Expr(i) = (Ext ix (Trunc iy (Expr(i-1)) to ix) to iy) + Accum
	//
	// Proof:
	// Expr(i+1) =
	// = Start + (i+1)*Accum
	// = (Start + i*Accum) + Accum
	// = Expr(i) + Accum
	// = (Ext ix (Trunc iy (Expr(i-1)) to ix) to iy) + Accum + Accum
	// :: from step i
	//
	// = (Ext ix (Trunc iy (Start + (i-1)*Accum) to ix) to iy) + Accum + Accum
	//
	// = (Ext ix (Trunc iy (Start + (i-1)*Accum) to ix) to iy)
	// + (Ext ix (Trunc iy (Accum) to ix) to iy)
	// + Accum :: from P3
	//
	// = (Ext ix (Trunc iy ((Start + (i-1)*Accum) + Accum) to ix) to iy)
	// + Accum :: from P1: Ext(x)+Ext(y)=>Ext(x+y)
	//
	// = (Ext ix (Trunc iy (Start + i*Accum) to ix) to iy) + Accum
	// = (Ext ix (Trunc iy (Expr(i)) to ix) to iy) + Accum
	//
	// By induction, the same applies to all iterations 1<=i<n:
	//

	// Create a truncated addrec for which we will add a no overflow check (P1).
	const SCEV *StartVal = getSCEV(StartValueV);
	const SCEV *PHISCEV =
	getAddRecExpr(getTruncateExpr(StartVal, TruncTy),
	getTruncateExpr(Accum, TruncTy), L, SCEV::FlagAnyWrap);

	// PHISCEV can be either a SCEVConstant or a SCEVAddRecExpr.
	// ex: If truncated Accum is 0 and StartVal is a constant, then PHISCEV
	// will be constant.
	//
	// If PHISCEV is a constant, then P1 degenerates into P2 or P3, so we don't
	// add P1.
	if (const auto *AR = dyn_cast<SCEVAddRecExpr>(PHISCEV)) {
	SCEVWrapPredicate::IncrementWrapFlags AddedFlags =
	Signed ? SCEVWrapPredicate::IncrementNSSW
	: SCEVWrapPredicate::IncrementNUSW;
	const SCEVPredicate *AddRecPred = getWrapPredicate(AR, AddedFlags);
	Predicates.push_back(AddRecPred);
	}

	// Create the Equal Predicates P2,P3:

	// It is possible that the predicates P2 and/or P3 are computable at
	// compile time due to StartVal and/or Accum being constants.
	// If either one is, then we can check that now and escape if either P2
	// or P3 is false.

	// Construct the extended SCEV: (Ext ix (Trunc iy (Expr) to ix) to iy)
	// for each of StartVal and Accum
	auto getExtendedExpr = [&](const SCEV *Expr,
	bool CreateSignExtend) -> const SCEV * {
	assert(isLoopInvariant(Expr, L) && "Expr is expected to be invariant");
	const SCEV *TruncatedExpr = getTruncateExpr(Expr, TruncTy);
	const SCEV *ExtendedExpr =
	CreateSignExtend ? getSignExtendExpr(TruncatedExpr, Expr->getType())
	: getZeroExtendExpr(TruncatedExpr, Expr->getType());
	return ExtendedExpr;
	};

	// Given:
	// ExtendedExpr = (Ext ix (Trunc iy (Expr) to ix) to iy
	// = getExtendedExpr(Expr)
	// Determine whether the predicate P: Expr == ExtendedExpr
	// is known to be false at compile time
	auto PredIsKnownFalse = [&](const SCEV *Expr,
	const SCEV *ExtendedExpr) -> bool {
	return Expr != ExtendedExpr &&
	isKnownPredicate(ICmpInst::ICMP_NE, Expr, ExtendedExpr);
	};

	const SCEV *StartExtended = getExtendedExpr(StartVal, Signed);
	if (PredIsKnownFalse(StartVal, StartExtended)) {
	LLVM_DEBUG(dbgs() << "P2 is compile-time false\n";);
	return None;
	}

	// The Step is always Signed (because the overflow checks are either
	// NSSW or NUSW)
	const SCEV AccumExtended = getExtendedExpr(Accum, /CreateSignExtend=*/true);
	if (PredIsKnownFalse(Accum, AccumExtended)) {
	LLVM_DEBUG(dbgs() << "P3 is compile-time false\n";);
	return None;
	}

	auto AppendPredicate = [&](const SCEV *Expr,
	const SCEV *ExtendedExpr) -> void {
	if (Expr != ExtendedExpr &&
	!isKnownPredicate(ICmpInst::ICMP_EQ, Expr, ExtendedExpr)) {
	const SCEVPredicate *Pred = getEqualPredicate(Expr, ExtendedExpr);
	LLVM_DEBUG(dbgs() << "Added Predicate: " << *Pred);
	Predicates.push_back(Pred);
	}
	};

	AppendPredicate(StartVal, StartExtended);
	AppendPredicate(Accum, AccumExtended);

	// *** Part3: Predicates are ready. Now go ahead and create the new addrec in
	// which the casts had been folded away. The caller can rewrite SymbolicPHI
	// into NewAR if it will also add the runtime overflow checks specified in
	// Predicates.
	auto *NewAR = getAddRecExpr(StartVal, Accum, L, SCEV::FlagAnyWrap);

	std::pair<const SCEV , SmallVector<const SCEVPredicate , 3>> PredRewrite =
	std::make_pair(NewAR, Predicates);
	// Remember the result of the analysis for this SCEV at this locayyytion.
	PredicatedSCEVRewrites[{SymbolicPHI, L}] = PredRewrite;
	return PredRewrite;
	}

	Optional<std::pair<const SCEV , SmallVector<const SCEVPredicate , 3>>>
	ScalarEvolution::createAddRecFromPHIWithCasts(const SCEVUnknown *SymbolicPHI) {
	auto *PN = cast<PHINode>(SymbolicPHI->getValue());
	const Loop *L = isIntegerLoopHeaderPHI(PN, LI);
	if (!L)
	return None;

	// Check to see if we already analyzed this PHI.
	auto I = PredicatedSCEVRewrites.find({SymbolicPHI, L});
	if (I != PredicatedSCEVRewrites.end()) {
	std::pair<const SCEV , SmallVector<const SCEVPredicate , 3>> Rewrite =
	I->second;
	// Analysis was done before and failed to create an AddRec:
	if (Rewrite.first == SymbolicPHI)
	return None;
	// Analysis was done before and succeeded to create an AddRec under
	// a predicate:
	assert(isa<SCEVAddRecExpr>(Rewrite.first) && "Expected an AddRec");
	assert(!(Rewrite.second).empty() && "Expected to find Predicates");
	return Rewrite;
	}

	Optional<std::pair<const SCEV , SmallVector<const SCEVPredicate , 3>>>
	Rewrite = createAddRecFromPHIWithCastsImpl(SymbolicPHI);

	// Record in the cache that the analysis failed
	if (!Rewrite) {
	SmallVector<const SCEVPredicate *, 3> Predicates;
	PredicatedSCEVRewrites[{SymbolicPHI, L}] = {SymbolicPHI, Predicates};
	return None;
	}

	return Rewrite;
	}

	// FIXME: This utility is currently required because the Rewriter currently
	// does not rewrite this expression:
	// {0, +, (sext ix (trunc iy to ix) to iy)}
	// into {0, +, %step},
	// even when the following Equal predicate exists:
	// "%step == (sext ix (trunc iy to ix) to iy)".
	bool PredicatedScalarEvolution::areAddRecsEqualWithPreds(
	const SCEVAddRecExpr AR1, const SCEVAddRecExpr AR2) const {
	if (AR1 == AR2)
	return true;

	auto areExprsEqual = [&](const SCEV Expr1, const SCEV Expr2) -> bool {
	if (Expr1 != Expr2 && !Preds.implies(SE.getEqualPredicate(Expr1, Expr2)) &&
	!Preds.implies(SE.getEqualPredicate(Expr2, Expr1)))
	return false;
	return true;
	};

	if (!areExprsEqual(AR1->getStart(), AR2->getStart()) \|\|
	!areExprsEqual(AR1->getStepRecurrence(SE), AR2->getStepRecurrence(SE)))
	return false;
	return true;
	}

	/// A helper function for createAddRecFromPHI to handle simple cases.
	///
	/// This function tries to find an AddRec expression for the simplest (yet most
	/// common) cases: PN = PHI(Start, OP(Self, LoopInvariant)).
	/// If it fails, createAddRecFromPHI will use a more general, but slow,
	/// technique for finding the AddRec expression.
	const SCEV ScalarEvolution::createSimpleAffineAddRec(PHINode PN,
	Value *BEValueV,
	Value *StartValueV) {
	const Loop *L = LI.getLoopFor(PN->getParent());
	assert(L && L->getHeader() == PN->getParent());
	assert(BEValueV && StartValueV);

	auto BO = MatchBinaryOp(BEValueV, DT);
	if (!BO)
	return nullptr;

	if (BO->Opcode != Instruction::Add)
	return nullptr;

	const SCEV *Accum = nullptr;
	if (BO->LHS == PN && L->isLoopInvariant(BO->RHS))
	Accum = getSCEV(BO->RHS);
	else if (BO->RHS == PN && L->isLoopInvariant(BO->LHS))
	Accum = getSCEV(BO->LHS);

	if (!Accum)
	return nullptr;

	SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
	if (BO->IsNUW)
	Flags = setFlags(Flags, SCEV::FlagNUW);
	if (BO->IsNSW)
	Flags = setFlags(Flags, SCEV::FlagNSW);

	const SCEV *StartVal = getSCEV(StartValueV);
	const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags);

	ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;

	// We can add Flags to the post-inc expression only if we
	// know that it is undefined behavior for BEValueV to
	// overflow.
	if (auto *BEInst = dyn_cast<Instruction>(BEValueV))
	if (isLoopInvariant(Accum, L) && isAddRecNeverPoison(BEInst, L))
	(void)getAddRecExpr(getAddExpr(StartVal, Accum), Accum, L, Flags);

	return PHISCEV;
	}

	const SCEV ScalarEvolution::createAddRecFromPHI(PHINode PN) {
	const Loop *L = LI.getLoopFor(PN->getParent());
	if (!L \|\| L->getHeader() != PN->getParent())
	return nullptr;

	// The loop may have multiple entrances or multiple exits; we can analyze
	// this phi as an addrec if it has a unique entry value and a unique
	// backedge value.
	Value BEValueV = nullptr, StartValueV = nullptr;
	for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
	Value *V = PN->getIncomingValue(i);
	if (L->contains(PN->getIncomingBlock(i))) {
	if (!BEValueV) {
	BEValueV = V;
	} else if (BEValueV != V) {
	BEValueV = nullptr;
	break;
	}
	} else if (!StartValueV) {
	StartValueV = V;
	} else if (StartValueV != V) {
	StartValueV = nullptr;
	break;
	}
	}
	if (!BEValueV \|\| !StartValueV)
	return nullptr;

	assert(ValueExprMap.find_as(PN) == ValueExprMap.end() &&
	"PHI node already processed?");

	// First, try to find AddRec expression without creating a fictituos symbolic
	// value for PN.
	if (auto *S = createSimpleAffineAddRec(PN, BEValueV, StartValueV))
	return S;

	// Handle PHI node value symbolically.
	const SCEV *SymbolicName = getUnknown(PN);
	ValueExprMap.insert({SCEVCallbackVH(PN, this), SymbolicName});

	// Using this symbolic name for the PHI, analyze the value coming around
	// the back-edge.
	const SCEV *BEValue = getSCEV(BEValueV);

	// NOTE: If BEValue is loop invariant, we know that the PHI node just
	// has a special value for the first iteration of the loop.

	// If the value coming around the backedge is an add with the symbolic
	// value we just inserted, then we found a simple induction variable!
	if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(BEValue)) {
	// If there is a single occurrence of the symbolic value, replace it
	// with a recurrence.
	unsigned FoundIndex = Add->getNumOperands();
	for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
	if (Add->getOperand(i) == SymbolicName)
	if (FoundIndex == e) {
	FoundIndex = i;
	break;
	}

	if (FoundIndex != Add->getNumOperands()) {
	// Create an add with everything but the specified operand.
	SmallVector<const SCEV *, 8> Ops;
	for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
	if (i != FoundIndex)
	Ops.push_back(SCEVBackedgeConditionFolder::rewrite(Add->getOperand(i),
	L, *this));
	const SCEV *Accum = getAddExpr(Ops);

	// This is not a valid addrec if the step amount is varying each
	// loop iteration, but is not itself an addrec in this loop.
	if (isLoopInvariant(Accum, L) \|\|
	(isa<SCEVAddRecExpr>(Accum) &&
	cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) {
	SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;

	if (auto BO = MatchBinaryOp(BEValueV, DT)) {
	if (BO->Opcode == Instruction::Add && BO->LHS == PN) {
	if (BO->IsNUW)
	Flags = setFlags(Flags, SCEV::FlagNUW);
	if (BO->IsNSW)
	Flags = setFlags(Flags, SCEV::FlagNSW);
	}
	} else if (GEPOperator *GEP = dyn_cast<GEPOperator>(BEValueV)) {
	// If the increment is an inbounds GEP, then we know the address
	// space cannot be wrapped around. We cannot make any guarantee
	// about signed or unsigned overflow because pointers are
	// unsigned but we may have a negative index from the base
	// pointer. We can guarantee that no unsigned wrap occurs if the
	// indices form a positive value.
	if (GEP->isInBounds() && GEP->getOperand(0) == PN) {
	Flags = setFlags(Flags, SCEV::FlagNW);

	const SCEV *Ptr = getSCEV(GEP->getPointerOperand());
	if (isKnownPositive(getMinusSCEV(getSCEV(GEP), Ptr)))
	Flags = setFlags(Flags, SCEV::FlagNUW);
	}

	// We cannot transfer nuw and nsw flags from subtraction
	// operations -- sub nuw X, Y is not the same as add nuw X, -Y
	// for instance.
	}

	const SCEV *StartVal = getSCEV(StartValueV);
	const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags);

	// Okay, for the entire analysis of this edge we assumed the PHI
	// to be symbolic. We now need to go back and purge all of the
	// entries for the scalars that use the symbolic expression.
	forgetSymbolicName(PN, SymbolicName);
	ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;

	// We can add Flags to the post-inc expression only if we
	// know that it is undefined behavior for BEValueV to
	// overflow.
	if (auto *BEInst = dyn_cast<Instruction>(BEValueV))
	if (isLoopInvariant(Accum, L) && isAddRecNeverPoison(BEInst, L))
	(void)getAddRecExpr(getAddExpr(StartVal, Accum), Accum, L, Flags);

	return PHISCEV;
	}
	}
	} else {
	// Otherwise, this could be a loop like this:
	// i = 0; for (j = 1; ..; ++j) { .... i = j; }
	// In this case, j = {1,+,1} and BEValue is j.
	// Because the other in-value of i (0) fits the evolution of BEValue
	// i really is an addrec evolution.
	//
	// We can generalize this saying that i is the shifted value of BEValue
	// by one iteration:
	// PHI(f(0), f({1,+,1})) --> f({0,+,1})
	const SCEV Shifted = SCEVShiftRewriter::rewrite(BEValue, L, this);
	const SCEV Start = SCEVInitRewriter::rewrite(Shifted, L, this, false);
	if (Shifted != getCouldNotCompute() &&
	Start != getCouldNotCompute()) {
	const SCEV *StartVal = getSCEV(StartValueV);
	if (Start == StartVal) {
	// Okay, for the entire analysis of this edge we assumed the PHI
	// to be symbolic. We now need to go back and purge all of the
	// entries for the scalars that use the symbolic expression.
	forgetSymbolicName(PN, SymbolicName);
	ValueExprMap[SCEVCallbackVH(PN, this)] = Shifted;
	return Shifted;
	}
	}
	}

	// Remove the temporary PHI node SCEV that has been inserted while intending
	// to create an AddRecExpr for this PHI node. We can not keep this temporary
	// as it will prevent later (possibly simpler) SCEV expressions to be added
	// to the ValueExprMap.
	eraseValueFromMap(PN);

	return nullptr;
	}

	// Checks if the SCEV S is available at BB. S is considered available at BB
	// if S can be materialized at BB without introducing a fault.
	static bool IsAvailableOnEntry(const Loop L, DominatorTree &DT, const SCEV S,
	BasicBlock *BB) {
	struct CheckAvailable {
	bool TraversalDone = false;
	bool Available = true;

	const Loop *L = nullptr; // The loop BB is in (can be nullptr)
	BasicBlock *BB = nullptr;
	DominatorTree &DT;

	CheckAvailable(const Loop L, BasicBlock BB, DominatorTree &DT)
	: L(L), BB(BB), DT(DT) {}

	bool setUnavailable() {
	TraversalDone = true;
	Available = false;
	return false;
	}

	bool follow(const SCEV *S) {
	switch (S->getSCEVType()) {
	case scConstant:
	case scPtrToInt:
	case scTruncate:
	case scZeroExtend:
	case scSignExtend:
	case scAddExpr:
	case scMulExpr:
	case scUMaxExpr:
	case scSMaxExpr:
	case scUMinExpr:
	case scSMinExpr:
	// These expressions are available if their operand(s) is/are.
	return true;

	case scAddRecExpr: {
	// We allow add recurrences that are on the loop BB is in, or some
	// outer loop. This guarantees availability because the value of the
	// add recurrence at BB is simply the "current" value of the induction
	// variable. We can relax this in the future; for instance an add
	// recurrence on a sibling dominating loop is also available at BB.
	const auto *ARLoop = cast<SCEVAddRecExpr>(S)->getLoop();
	if (L && (ARLoop == L \|\| ARLoop->contains(L)))
	return true;

	return setUnavailable();
	}

	case scUnknown: {
	// For SCEVUnknown, we check for simple dominance.
	const auto *SU = cast<SCEVUnknown>(S);
	Value *V = SU->getValue();

	if (isa<Argument>(V))
	return false;

	if (isa<Instruction>(V) && DT.dominates(cast<Instruction>(V), BB))
	return false;

	return setUnavailable();
	}

	case scUDivExpr:
	case scCouldNotCompute:
	// We do not try to smart about these at all.
	return setUnavailable();
	}
	llvm_unreachable("Unknown SCEV kind!");
	}

	bool isDone() { return TraversalDone; }
	};

	CheckAvailable CA(L, BB, DT);
	SCEVTraversal<CheckAvailable> ST(CA);

	ST.visitAll(S);
	return CA.Available;
	}

	// Try to match a control flow sequence that branches out at BI and merges back
	// at Merge into a "C ? LHS : RHS" select pattern. Return true on a successful
	// match.
	static bool BrPHIToSelect(DominatorTree &DT, BranchInst BI, PHINode Merge,
	Value &C, Value &LHS, Value *&RHS) {
	C = BI->getCondition();

	BasicBlockEdge LeftEdge(BI->getParent(), BI->getSuccessor(0));
	BasicBlockEdge RightEdge(BI->getParent(), BI->getSuccessor(1));

	if (!LeftEdge.isSingleEdge())
	return false;

	assert(RightEdge.isSingleEdge() && "Follows from LeftEdge.isSingleEdge()");

	Use &LeftUse = Merge->getOperandUse(0);
	Use &RightUse = Merge->getOperandUse(1);

	if (DT.dominates(LeftEdge, LeftUse) && DT.dominates(RightEdge, RightUse)) {
	LHS = LeftUse;
	RHS = RightUse;
	return true;
	}

	if (DT.dominates(LeftEdge, RightUse) && DT.dominates(RightEdge, LeftUse)) {
	LHS = RightUse;
	RHS = LeftUse;
	return true;
	}

	return false;
	}

	const SCEV ScalarEvolution::createNodeFromSelectLikePHI(PHINode PN) {
	auto IsReachable =
	[&](BasicBlock *BB) { return DT.isReachableFromEntry(BB); };
	if (PN->getNumIncomingValues() == 2 && all_of(PN->blocks(), IsReachable)) {
	const Loop *L = LI.getLoopFor(PN->getParent());

	// We don't want to break LCSSA, even in a SCEV expression tree.
	for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
	if (LI.getLoopFor(PN->getIncomingBlock(i)) != L)
	return nullptr;

	// Try to match
	//
	// br %cond, label %left, label %right
	// left:
	// br label %merge
	// right:
	// br label %merge
	// merge:
	// V = phi [ %x, %left ], [ %y, %right ]
	//
	// as "select %cond, %x, %y"

	BasicBlock *IDom = DT[PN->getParent()]->getIDom()->getBlock();
	assert(IDom && "At least the entry block should dominate PN");

	auto *BI = dyn_cast<BranchInst>(IDom->getTerminator());
	Value Cond = nullptr, LHS = nullptr, *RHS = nullptr;

	if (BI && BI->isConditional() &&
	BrPHIToSelect(DT, BI, PN, Cond, LHS, RHS) &&
	IsAvailableOnEntry(L, DT, getSCEV(LHS), PN->getParent()) &&
	IsAvailableOnEntry(L, DT, getSCEV(RHS), PN->getParent()))
	return createNodeForSelectOrPHI(PN, Cond, LHS, RHS);
	}

	return nullptr;
	}

	const SCEV ScalarEvolution::createNodeForPHI(PHINode PN) {
	if (const SCEV *S = createAddRecFromPHI(PN))
	return S;

	if (const SCEV *S = createNodeFromSelectLikePHI(PN))
	return S;

	// If the PHI has a single incoming value, follow that value, unless the
	// PHI's incoming blocks are in a different loop, in which case doing so
	// risks breaking LCSSA form. Instcombine would normally zap these, but
	// it doesn't have DominatorTree information, so it may miss cases.
	if (Value *V = SimplifyInstruction(PN, {getDataLayout(), &TLI, &DT, &AC}))
	if (LI.replacementPreservesLCSSAForm(PN, V))
	return getSCEV(V);

	// If it's not a loop phi, we can't handle it yet.
	return getUnknown(PN);
	}

	const SCEV ScalarEvolution::createNodeForSelectOrPHI(Instruction I,
	Value *Cond,
	Value *TrueVal,
	Value *FalseVal) {
	// Handle "constant" branch or select. This can occur for instance when a
	// loop pass transforms an inner loop and moves on to process the outer loop.
	if (auto *CI = dyn_cast<ConstantInt>(Cond))
	return getSCEV(CI->isOne() ? TrueVal : FalseVal);

	// Try to match some simple smax or umax patterns.
	auto *ICI = dyn_cast<ICmpInst>(Cond);
	if (!ICI)
	return getUnknown(I);

	Value *LHS = ICI->getOperand(0);
	Value *RHS = ICI->getOperand(1);

	switch (ICI->getPredicate()) {
	case ICmpInst::ICMP_SLT:
	case ICmpInst::ICMP_SLE:
	case ICmpInst::ICMP_ULT:
	case ICmpInst::ICMP_ULE:
	std::swap(LHS, RHS);
	LLVM_FALLTHROUGH;
	case ICmpInst::ICMP_SGT:
	case ICmpInst::ICMP_SGE:
	case ICmpInst::ICMP_UGT:
	case ICmpInst::ICMP_UGE:
	// a > b ? a+x : b+x -> max(a, b)+x
	// a > b ? b+x : a+x -> min(a, b)+x
	if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType())) {
	bool Signed = ICI->isSigned();
	const SCEV *LA = getSCEV(TrueVal);
	const SCEV *RA = getSCEV(FalseVal);
	const SCEV *LS = getSCEV(LHS);
	const SCEV *RS = getSCEV(RHS);
	if (LA->getType()->isPointerTy()) {
	// FIXME: Handle cases where LS/RS are pointers not equal to LA/RA.
	// Need to make sure we can't produce weird expressions involving
	// negated pointers.
	if (LA == LS && RA == RS)
	return Signed ? getSMaxExpr(LS, RS) : getUMaxExpr(LS, RS);
	if (LA == RS && RA == LS)
	return Signed ? getSMinExpr(LS, RS) : getUMinExpr(LS, RS);
	}
	auto CoerceOperand = [&](const SCEV Op) -> const SCEV {
	if (Op->getType()->isPointerTy()) {
	Op = getLosslessPtrToIntExpr(Op);
	if (isa<SCEVCouldNotCompute>(Op))
	return Op;
	}
	if (Signed)
	Op = getNoopOrSignExtend(Op, I->getType());
	else
	Op = getNoopOrZeroExtend(Op, I->getType());
	return Op;
	};
	LS = CoerceOperand(LS);
	RS = CoerceOperand(RS);
	if (isa<SCEVCouldNotCompute>(LS) \|\| isa<SCEVCouldNotCompute>(RS))
	break;
	const SCEV *LDiff = getMinusSCEV(LA, LS);
	const SCEV *RDiff = getMinusSCEV(RA, RS);
	if (LDiff == RDiff)
	return getAddExpr(Signed ? getSMaxExpr(LS, RS) : getUMaxExpr(LS, RS),
	LDiff);
	LDiff = getMinusSCEV(LA, RS);
	RDiff = getMinusSCEV(RA, LS);
	if (LDiff == RDiff)
	return getAddExpr(Signed ? getSMinExpr(LS, RS) : getUMinExpr(LS, RS),
	LDiff);
	}
	break;
	case ICmpInst::ICMP_NE:
	// n != 0 ? n+x : 1+x -> umax(n, 1)+x
	if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType()) &&
	isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) {
	const SCEV *One = getOne(I->getType());
	const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), I->getType());
	const SCEV *LA = getSCEV(TrueVal);
	const SCEV *RA = getSCEV(FalseVal);
	const SCEV *LDiff = getMinusSCEV(LA, LS);
	const SCEV *RDiff = getMinusSCEV(RA, One);
	if (LDiff == RDiff)
	return getAddExpr(getUMaxExpr(One, LS), LDiff);
	}
	break;
	case ICmpInst::ICMP_EQ:
	// n == 0 ? 1+x : n+x -> umax(n, 1)+x
	if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType()) &&
	isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) {
	const SCEV *One = getOne(I->getType());
	const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), I->getType());
	const SCEV *LA = getSCEV(TrueVal);
	const SCEV *RA = getSCEV(FalseVal);
	const SCEV *LDiff = getMinusSCEV(LA, One);
	const SCEV *RDiff = getMinusSCEV(RA, LS);
	if (LDiff == RDiff)
	return getAddExpr(getUMaxExpr(One, LS), LDiff);
	}
	break;
	default:
	break;
	}

	return getUnknown(I);
	}

	/// Expand GEP instructions into add and multiply operations. This allows them
	/// to be analyzed by regular SCEV code.
	const SCEV ScalarEvolution::createNodeForGEP(GEPOperator GEP) {
	// Don't attempt to analyze GEPs over unsized objects.
	if (!GEP->getSourceElementType()->isSized())
	return getUnknown(GEP);

	SmallVector<const SCEV *, 4> IndexExprs;
	for (Value *Index : GEP->indices())
	IndexExprs.push_back(getSCEV(Index));
	return getGEPExpr(GEP, IndexExprs);
	}

	uint32_t ScalarEvolution::GetMinTrailingZerosImpl(const SCEV *S) {
	if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
	return C->getAPInt().countTrailingZeros();

	if (const SCEVPtrToIntExpr *I = dyn_cast<SCEVPtrToIntExpr>(S))
	return GetMinTrailingZeros(I->getOperand());

	if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(S))
	return std::min(GetMinTrailingZeros(T->getOperand()),
	(uint32_t)getTypeSizeInBits(T->getType()));

	if (const SCEVZeroExtendExpr *E = dyn_cast<SCEVZeroExtendExpr>(S)) {
	uint32_t OpRes = GetMinTrailingZeros(E->getOperand());
	return OpRes == getTypeSizeInBits(E->getOperand()->getType())
	? getTypeSizeInBits(E->getType())
	: OpRes;
	}

	if (const SCEVSignExtendExpr *E = dyn_cast<SCEVSignExtendExpr>(S)) {
	uint32_t OpRes = GetMinTrailingZeros(E->getOperand());
	return OpRes == getTypeSizeInBits(E->getOperand()->getType())
	? getTypeSizeInBits(E->getType())
	: OpRes;
	}

	if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) {
	// The result is the min of all operands results.
	uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0));
	for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i)
	MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i)));
	return MinOpRes;
	}

	if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {
	// The result is the sum of all operands results.
	uint32_t SumOpRes = GetMinTrailingZeros(M->getOperand(0));
	uint32_t BitWidth = getTypeSizeInBits(M->getType());
	for (unsigned i = 1, e = M->getNumOperands();
	SumOpRes != BitWidth && i != e; ++i)
	SumOpRes =
	std::min(SumOpRes + GetMinTrailingZeros(M->getOperand(i)), BitWidth);
	return SumOpRes;
	}

	if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) {
	// The result is the min of all operands results.
	uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0));
	for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i)
	MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i)));
	return MinOpRes;
	}

	if (const SCEVSMaxExpr *M = dyn_cast<SCEVSMaxExpr>(S)) {
	// The result is the min of all operands results.
	uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0));
	for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i)
	MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i)));
	return MinOpRes;
	}

	if (const SCEVUMaxExpr *M = dyn_cast<SCEVUMaxExpr>(S)) {
	// The result is the min of all operands results.
	uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0));
	for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i)
	MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i)));
	return MinOpRes;
	}

	if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
	// For a SCEVUnknown, ask ValueTracking.
	KnownBits Known = computeKnownBits(U->getValue(), getDataLayout(), 0, &AC, nullptr, &DT);
	return Known.countMinTrailingZeros();
	}

	// SCEVUDivExpr
	return 0;
	}

	uint32_t ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
	auto I = MinTrailingZerosCache.find(S);
	if (I != MinTrailingZerosCache.end())
	return I->second;

	uint32_t Result = GetMinTrailingZerosImpl(S);
	auto InsertPair = MinTrailingZerosCache.insert({S, Result});
	assert(InsertPair.second && "Should insert a new key");
	return InsertPair.first->second;
	}

	/// Helper method to assign a range to V from metadata present in the IR.
	static Optional<ConstantRange> GetRangeFromMetadata(Value *V) {
	if (Instruction *I = dyn_cast<Instruction>(V))
	if (MDNode *MD = I->getMetadata(LLVMContext::MD_range))
	return getConstantRangeFromMetadata(*MD);

	return None;
	}

	void ScalarEvolution::setNoWrapFlags(SCEVAddRecExpr *AddRec,
	SCEV::NoWrapFlags Flags) {
	if (AddRec->getNoWrapFlags(Flags) != Flags) {
	AddRec->setNoWrapFlags(Flags);
	UnsignedRanges.erase(AddRec);
	SignedRanges.erase(AddRec);
	}
	}

	ConstantRange ScalarEvolution::
	getRangeForUnknownRecurrence(const SCEVUnknown *U) {
	const DataLayout &DL = getDataLayout();

	unsigned BitWidth = getTypeSizeInBits(U->getType());
	const ConstantRange FullSet(BitWidth, /isFullSet=/true);

	// Match a simple recurrence of the form: <start, ShiftOp, Step>, and then
	// use information about the trip count to improve our available range. Note
	// that the trip count independent cases are already handled by known bits.
	// WARNING: The definition of recurrence used here is subtly different than
	// the one used by AddRec (and thus most of this file). Step is allowed to
	// be arbitrarily loop varying here, where AddRec allows only loop invariant
	// and other addrecs in the same loop (for non-affine addrecs). The code
	// below intentionally handles the case where step is not loop invariant.
	auto *P = dyn_cast<PHINode>(U->getValue());
	if (!P)
	return FullSet;

	// Make sure that no Phi input comes from an unreachable block. Otherwise,
	// even the values that are not available in these blocks may come from them,
	// and this leads to false-positive recurrence test.
	for (auto *Pred : predecessors(P->getParent()))
	if (!DT.isReachableFromEntry(Pred))
	return FullSet;

	BinaryOperator *BO;
	Value Start, Step;
	if (!matchSimpleRecurrence(P, BO, Start, Step))
	return FullSet;

	// If we found a recurrence in reachable code, we must be in a loop. Note
	// that BO might be in some subloop of L, and that's completely okay.
	auto *L = LI.getLoopFor(P->getParent());
	assert(L && L->getHeader() == P->getParent());
	if (!L->contains(BO->getParent()))
	// NOTE: This bailout should be an assert instead. However, asserting
	// the condition here exposes a case where LoopFusion is querying SCEV
	// with malformed loop information during the midst of the transform.
	// There doesn't appear to be an obvious fix, so for the moment bailout
	// until the caller issue can be fixed. PR49566 tracks the bug.
	return FullSet;

	// TODO: Extend to other opcodes such as mul, and div
	switch (BO->getOpcode()) {
	default:
	return FullSet;
	case Instruction::AShr:
	case Instruction::LShr:
	case Instruction::Shl:
	break;
	};

	if (BO->getOperand(0) != P)
	// TODO: Handle the power function forms some day.
	return FullSet;

	unsigned TC = getSmallConstantMaxTripCount(L);
	if (!TC \|\| TC >= BitWidth)
	return FullSet;

	auto KnownStart = computeKnownBits(Start, DL, 0, &AC, nullptr, &DT);
	auto KnownStep = computeKnownBits(Step, DL, 0, &AC, nullptr, &DT);
	assert(KnownStart.getBitWidth() == BitWidth &&
	KnownStep.getBitWidth() == BitWidth);

	// Compute total shift amount, being careful of overflow and bitwidths.
	auto MaxShiftAmt = KnownStep.getMaxValue();
	APInt TCAP(BitWidth, TC-1);
	bool Overflow = false;
	auto TotalShift = MaxShiftAmt.umul_ov(TCAP, Overflow);
	if (Overflow)
	return FullSet;

	switch (BO->getOpcode()) {
	default:
	llvm_unreachable("filtered out above");
	case Instruction::AShr: {
	// For each ashr, three cases:
	// shift = 0 => unchanged value
	// saturation => 0 or -1
	// other => a value closer to zero (of the same sign)
	// Thus, the end value is closer to zero than the start.
	auto KnownEnd = KnownBits::ashr(KnownStart,
	KnownBits::makeConstant(TotalShift));
	if (KnownStart.isNonNegative())
	// Analogous to lshr (simply not yet canonicalized)
	return ConstantRange::getNonEmpty(KnownEnd.getMinValue(),
	KnownStart.getMaxValue() + 1);
	if (KnownStart.isNegative())
	// End >=u Start && End <=s Start
	return ConstantRange::getNonEmpty(KnownStart.getMinValue(),
	KnownEnd.getMaxValue() + 1);
	break;
	}
	case Instruction::LShr: {
	// For each lshr, three cases:
	// shift = 0 => unchanged value
	// saturation => 0
	// other => a smaller positive number
	// Thus, the low end of the unsigned range is the last value produced.
	auto KnownEnd = KnownBits::lshr(KnownStart,
	KnownBits::makeConstant(TotalShift));
	return ConstantRange::getNonEmpty(KnownEnd.getMinValue(),
	KnownStart.getMaxValue() + 1);
	}
	case Instruction::Shl: {
	// Iff no bits are shifted out, value increases on every shift.
	auto KnownEnd = KnownBits::shl(KnownStart,
	KnownBits::makeConstant(TotalShift));
	if (TotalShift.ult(KnownStart.countMinLeadingZeros()))
	return ConstantRange(KnownStart.getMinValue(),
	KnownEnd.getMaxValue() + 1);
	break;
	}
	};
	return FullSet;
	}

	/// Determine the range for a particular SCEV. If SignHint is
	/// HINT_RANGE_UNSIGNED (resp. HINT_RANGE_SIGNED) then getRange prefers ranges
	/// with a "cleaner" unsigned (resp. signed) representation.
	const ConstantRange &
	ScalarEvolution::getRangeRef(const SCEV *S,
	ScalarEvolution::RangeSignHint SignHint) {
	DenseMap<const SCEV *, ConstantRange> &Cache =
	SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED ? UnsignedRanges
	: SignedRanges;
	ConstantRange::PreferredRangeType RangeType =
	SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED
	? ConstantRange::Unsigned : ConstantRange::Signed;

	// See if we've computed this range already.
	DenseMap<const SCEV *, ConstantRange>::iterator I = Cache.find(S);
	if (I != Cache.end())
	return I->second;

	if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
	return setRange(C, SignHint, ConstantRange(C->getAPInt()));

	unsigned BitWidth = getTypeSizeInBits(S->getType());
	ConstantRange ConservativeResult(BitWidth, /isFullSet=/true);
	using OBO = OverflowingBinaryOperator;

	// If the value has known zeros, the maximum value will have those known zeros
	// as well.
	uint32_t TZ = GetMinTrailingZeros(S);
	if (TZ != 0) {
	if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED)
	ConservativeResult =
	ConstantRange(APInt::getMinValue(BitWidth),
	APInt::getMaxValue(BitWidth).lshr(TZ).shl(TZ) + 1);
	else
	ConservativeResult = ConstantRange(
	APInt::getSignedMinValue(BitWidth),
	APInt::getSignedMaxValue(BitWidth).ashr(TZ).shl(TZ) + 1);
	}

	if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
	ConstantRange X = getRangeRef(Add->getOperand(0), SignHint);
	unsigned WrapType = OBO::AnyWrap;
	if (Add->hasNoSignedWrap())
	WrapType \|= OBO::NoSignedWrap;
	if (Add->hasNoUnsignedWrap())
	WrapType \|= OBO::NoUnsignedWrap;
	for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i)
	X = X.addWithNoWrap(getRangeRef(Add->getOperand(i), SignHint),
	WrapType, RangeType);
	return setRange(Add, SignHint,
	ConservativeResult.intersectWith(X, RangeType));
	}

	if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
	ConstantRange X = getRangeRef(Mul->getOperand(0), SignHint);
	for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i)
	X = X.multiply(getRangeRef(Mul->getOperand(i), SignHint));
	return setRange(Mul, SignHint,
	ConservativeResult.intersectWith(X, RangeType));
	}

	if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) {
	ConstantRange X = getRangeRef(SMax->getOperand(0), SignHint);
	for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i)
	X = X.smax(getRangeRef(SMax->getOperand(i), SignHint));
	return setRange(SMax, SignHint,
	ConservativeResult.intersectWith(X, RangeType));
	}

	if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) {
	ConstantRange X = getRangeRef(UMax->getOperand(0), SignHint);
	for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i)
	X = X.umax(getRangeRef(UMax->getOperand(i), SignHint));
	return setRange(UMax, SignHint,
	ConservativeResult.intersectWith(X, RangeType));
	}

	if (const SCEVSMinExpr *SMin = dyn_cast<SCEVSMinExpr>(S)) {
	ConstantRange X = getRangeRef(SMin->getOperand(0), SignHint);
	for (unsigned i = 1, e = SMin->getNumOperands(); i != e; ++i)
	X = X.smin(getRangeRef(SMin->getOperand(i), SignHint));
	return setRange(SMin, SignHint,
	ConservativeResult.intersectWith(X, RangeType));
	}

	if (const SCEVUMinExpr *UMin = dyn_cast<SCEVUMinExpr>(S)) {
	ConstantRange X = getRangeRef(UMin->getOperand(0), SignHint);
	for (unsigned i = 1, e = UMin->getNumOperands(); i != e; ++i)
	X = X.umin(getRangeRef(UMin->getOperand(i), SignHint));
	return setRange(UMin, SignHint,
	ConservativeResult.intersectWith(X, RangeType));
	}

	if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
	ConstantRange X = getRangeRef(UDiv->getLHS(), SignHint);
	ConstantRange Y = getRangeRef(UDiv->getRHS(), SignHint);
	return setRange(UDiv, SignHint,
	ConservativeResult.intersectWith(X.udiv(Y), RangeType));
	}

	if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) {
	ConstantRange X = getRangeRef(ZExt->getOperand(), SignHint);
	return setRange(ZExt, SignHint,
	ConservativeResult.intersectWith(X.zeroExtend(BitWidth),
	RangeType));
	}

	if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) {
	ConstantRange X = getRangeRef(SExt->getOperand(), SignHint);
	return setRange(SExt, SignHint,
	ConservativeResult.intersectWith(X.signExtend(BitWidth),
	RangeType));
	}

	if (const SCEVPtrToIntExpr *PtrToInt = dyn_cast<SCEVPtrToIntExpr>(S)) {
	ConstantRange X = getRangeRef(PtrToInt->getOperand(), SignHint);
	return setRange(PtrToInt, SignHint, X);
	}

	if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) {
	ConstantRange X = getRangeRef(Trunc->getOperand(), SignHint);
	return setRange(Trunc, SignHint,
	ConservativeResult.intersectWith(X.truncate(BitWidth),
	RangeType));
	}

	if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
	// If there's no unsigned wrap, the value will never be less than its
	// initial value.
	if (AddRec->hasNoUnsignedWrap()) {
	APInt UnsignedMinValue = getUnsignedRangeMin(AddRec->getStart());
	if (!UnsignedMinValue.isNullValue())
	ConservativeResult = ConservativeResult.intersectWith(
	ConstantRange(UnsignedMinValue, APInt(BitWidth, 0)), RangeType);
	}

	// If there's no signed wrap, and all the operands except initial value have
	// the same sign or zero, the value won't ever be:
	// 1: smaller than initial value if operands are non negative,
	// 2: bigger than initial value if operands are non positive.
	// For both cases, value can not cross signed min/max boundary.
	if (AddRec->hasNoSignedWrap()) {
	bool AllNonNeg = true;
	bool AllNonPos = true;
	for (unsigned i = 1, e = AddRec->getNumOperands(); i != e; ++i) {
	if (!isKnownNonNegative(AddRec->getOperand(i)))
	AllNonNeg = false;
	if (!isKnownNonPositive(AddRec->getOperand(i)))
	AllNonPos = false;
	}
	if (AllNonNeg)
	ConservativeResult = ConservativeResult.intersectWith(
	ConstantRange::getNonEmpty(getSignedRangeMin(AddRec->getStart()),
	APInt::getSignedMinValue(BitWidth)),
	RangeType);
	else if (AllNonPos)
	ConservativeResult = ConservativeResult.intersectWith(
	ConstantRange::getNonEmpty(
	APInt::getSignedMinValue(BitWidth),
	getSignedRangeMax(AddRec->getStart()) + 1),
	RangeType);
	}

	// TODO: non-affine addrec
	if (AddRec->isAffine()) {
	const SCEV *MaxBECount = getConstantMaxBackedgeTakenCount(AddRec->getLoop());
	if (!isa<SCEVCouldNotCompute>(MaxBECount) &&
	getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) {
	auto RangeFromAffine = getRangeForAffineAR(
	AddRec->getStart(), AddRec->getStepRecurrence(*this), MaxBECount,
	BitWidth);
	ConservativeResult =
	ConservativeResult.intersectWith(RangeFromAffine, RangeType);

	auto RangeFromFactoring = getRangeViaFactoring(
	AddRec->getStart(), AddRec->getStepRecurrence(*this), MaxBECount,
	BitWidth);
	ConservativeResult =
	ConservativeResult.intersectWith(RangeFromFactoring, RangeType);
	}

	// Now try symbolic BE count and more powerful methods.
	if (UseExpensiveRangeSharpening) {
	const SCEV *SymbolicMaxBECount =
	getSymbolicMaxBackedgeTakenCount(AddRec->getLoop());
	if (!isa<SCEVCouldNotCompute>(SymbolicMaxBECount) &&
	getTypeSizeInBits(MaxBECount->getType()) <= BitWidth &&
	AddRec->hasNoSelfWrap()) {
	auto RangeFromAffineNew = getRangeForAffineNoSelfWrappingAR(
	AddRec, SymbolicMaxBECount, BitWidth, SignHint);
	ConservativeResult =
	ConservativeResult.intersectWith(RangeFromAffineNew, RangeType);
	}
	}
	}

	return setRange(AddRec, SignHint, std::move(ConservativeResult));
	}

	if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {

	// Check if the IR explicitly contains !range metadata.
	Optional<ConstantRange> MDRange = GetRangeFromMetadata(U->getValue());
	if (MDRange.hasValue())
	ConservativeResult = ConservativeResult.intersectWith(MDRange.getValue(),
	RangeType);

	// Use facts about recurrences in the underlying IR. Note that add
	// recurrences are AddRecExprs and thus don't hit this path. This
	// primarily handles shift recurrences.
	auto CR = getRangeForUnknownRecurrence(U);
	ConservativeResult = ConservativeResult.intersectWith(CR);

	// See if ValueTracking can give us a useful range.
	const DataLayout &DL = getDataLayout();
	KnownBits Known = computeKnownBits(U->getValue(), DL, 0, &AC, nullptr, &DT);
	if (Known.getBitWidth() != BitWidth)
	Known = Known.zextOrTrunc(BitWidth);

	// ValueTracking may be able to compute a tighter result for the number of
	// sign bits than for the value of those sign bits.
	unsigned NS = ComputeNumSignBits(U->getValue(), DL, 0, &AC, nullptr, &DT);
	if (U->getType()->isPointerTy()) {
	// If the pointer size is larger than the index size type, this can cause
	// NS to be larger than BitWidth. So compensate for this.
	unsigned ptrSize = DL.getPointerTypeSizeInBits(U->getType());
	int ptrIdxDiff = ptrSize - BitWidth;
	if (ptrIdxDiff > 0 && ptrSize > BitWidth && NS > (unsigned)ptrIdxDiff)
	NS -= ptrIdxDiff;
	}

	if (NS > 1) {
	// If we know any of the sign bits, we know all of the sign bits.
	if (!Known.Zero.getHiBits(NS).isNullValue())
	Known.Zero.setHighBits(NS);
	if (!Known.One.getHiBits(NS).isNullValue())
	Known.One.setHighBits(NS);
	}

	if (Known.getMinValue() != Known.getMaxValue() + 1)
	ConservativeResult = ConservativeResult.intersectWith(
	ConstantRange(Known.getMinValue(), Known.getMaxValue() + 1),
	RangeType);
	if (NS > 1)
	ConservativeResult = ConservativeResult.intersectWith(
	ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1),
	APInt::getSignedMaxValue(BitWidth).ashr(NS - 1) + 1),
	RangeType);

	// A range of Phi is a subset of union of all ranges of its input.
	if (const PHINode *Phi = dyn_cast<PHINode>(U->getValue())) {
	// Make sure that we do not run over cycled Phis.
	if (PendingPhiRanges.insert(Phi).second) {
	ConstantRange RangeFromOps(BitWidth, /isFullSet=/false);
	for (auto &Op : Phi->operands()) {
	auto OpRange = getRangeRef(getSCEV(Op), SignHint);
	RangeFromOps = RangeFromOps.unionWith(OpRange);
	// No point to continue if we already have a full set.
	if (RangeFromOps.isFullSet())
	break;
	}
	ConservativeResult =
	ConservativeResult.intersectWith(RangeFromOps, RangeType);
	bool Erased = PendingPhiRanges.erase(Phi);
	assert(Erased && "Failed to erase Phi properly?");
	(void) Erased;
	}
	}

	return setRange(U, SignHint, std::move(ConservativeResult));
	}

	return setRange(S, SignHint, std::move(ConservativeResult));
	}

	// Given a StartRange, Step and MaxBECount for an expression compute a range of
	// values that the expression can take. Initially, the expression has a value
	// from StartRange and then is changed by Step up to MaxBECount times. Signed
	// argument defines if we treat Step as signed or unsigned.
	static ConstantRange getRangeForAffineARHelper(APInt Step,
	const ConstantRange &StartRange,
	const APInt &MaxBECount,
	unsigned BitWidth, bool Signed) {
	// If either Step or MaxBECount is 0, then the expression won't change, and we
	// just need to return the initial range.
	if (Step == 0 \|\| MaxBECount == 0)
	return StartRange;

	// If we don't know anything about the initial value (i.e. StartRange is
	// FullRange), then we don't know anything about the final range either.
	// Return FullRange.
	if (StartRange.isFullSet())
	return ConstantRange::getFull(BitWidth);

	// If Step is signed and negative, then we use its absolute value, but we also
	// note that we're moving in the opposite direction.
	bool Descending = Signed && Step.isNegative();

	if (Signed)
	// This is correct even for INT_SMIN. Let's look at i8 to illustrate this:
	// abs(INT_SMIN) = abs(-128) = abs(0x80) = -0x80 = 0x80 = 128.
	// This equations hold true due to the well-defined wrap-around behavior of
	// APInt.
	Step = Step.abs();

	// Check if Offset is more than full span of BitWidth. If it is, the
	// expression is guaranteed to overflow.
	if (APInt::getMaxValue(StartRange.getBitWidth()).udiv(Step).ult(MaxBECount))
	return ConstantRange::getFull(BitWidth);

	// Offset is by how much the expression can change. Checks above guarantee no
	// overflow here.
	APInt Offset = Step * MaxBECount;

	// Minimum value of the final range will match the minimal value of StartRange
	// if the expression is increasing and will be decreased by Offset otherwise.
	// Maximum value of the final range will match the maximal value of StartRange
	// if the expression is decreasing and will be increased by Offset otherwise.
	APInt StartLower = StartRange.getLower();
	APInt StartUpper = StartRange.getUpper() - 1;
	APInt MovedBoundary = Descending ? (StartLower - std::move(Offset))
	: (StartUpper + std::move(Offset));

	// It's possible that the new minimum/maximum value will fall into the initial
	// range (due to wrap around). This means that the expression can take any
	// value in this bitwidth, and we have to return full range.
	if (StartRange.contains(MovedBoundary))
	return ConstantRange::getFull(BitWidth);

	APInt NewLower =
	Descending ? std::move(MovedBoundary) : std::move(StartLower);
	APInt NewUpper =
	Descending ? std::move(StartUpper) : std::move(MovedBoundary);
	NewUpper += 1;

	// No overflow detected, return [StartLower, StartUpper + Offset + 1) range.
	return ConstantRange::getNonEmpty(std::move(NewLower), std::move(NewUpper));
	}

	ConstantRange ScalarEvolution::getRangeForAffineAR(const SCEV *Start,
	const SCEV *Step,
	const SCEV *MaxBECount,
	unsigned BitWidth) {
	assert(!isa<SCEVCouldNotCompute>(MaxBECount) &&
	getTypeSizeInBits(MaxBECount->getType()) <= BitWidth &&
	"Precondition!");

	MaxBECount = getNoopOrZeroExtend(MaxBECount, Start->getType());
	APInt MaxBECountValue = getUnsignedRangeMax(MaxBECount);

	// First, consider step signed.
	ConstantRange StartSRange = getSignedRange(Start);
	ConstantRange StepSRange = getSignedRange(Step);

	// If Step can be both positive and negative, we need to find ranges for the
	// maximum absolute step values in both directions and union them.
	ConstantRange SR =
	getRangeForAffineARHelper(StepSRange.getSignedMin(), StartSRange,
	MaxBECountValue, BitWidth, /* Signed = */ true);
	SR = SR.unionWith(getRangeForAffineARHelper(StepSRange.getSignedMax(),
	StartSRange, MaxBECountValue,
	BitWidth, /* Signed = */ true));

	// Next, consider step unsigned.
	ConstantRange UR = getRangeForAffineARHelper(
	getUnsignedRangeMax(Step), getUnsignedRange(Start),
	MaxBECountValue, BitWidth, /* Signed = */ false);

	// Finally, intersect signed and unsigned ranges.
	return SR.intersectWith(UR, ConstantRange::Smallest);
	}

	ConstantRange ScalarEvolution::getRangeForAffineNoSelfWrappingAR(
	const SCEVAddRecExpr AddRec, const SCEV MaxBECount, unsigned BitWidth,
	ScalarEvolution::RangeSignHint SignHint) {
	assert(AddRec->isAffine() && "Non-affine AddRecs are not suppored!\n");
	assert(AddRec->hasNoSelfWrap() &&
	"This only works for non-self-wrapping AddRecs!");
	const bool IsSigned = SignHint == HINT_RANGE_SIGNED;
	const SCEV Step = AddRec->getStepRecurrence(this);
	// Only deal with constant step to save compile time.
	if (!isa<SCEVConstant>(Step))
	return ConstantRange::getFull(BitWidth);
	// Let's make sure that we can prove that we do not self-wrap during
	// MaxBECount iterations. We need this because MaxBECount is a maximum
	// iteration count estimate, and we might infer nw from some exit for which we
	// do not know max exit count (or any other side reasoning).
	// TODO: Turn into assert at some point.
	if (getTypeSizeInBits(MaxBECount->getType()) >
	getTypeSizeInBits(AddRec->getType()))
	return ConstantRange::getFull(BitWidth);
	MaxBECount = getNoopOrZeroExtend(MaxBECount, AddRec->getType());
	const SCEV *RangeWidth = getMinusOne(AddRec->getType());
	const SCEV *StepAbs = getUMinExpr(Step, getNegativeSCEV(Step));
	const SCEV *MaxItersWithoutWrap = getUDivExpr(RangeWidth, StepAbs);
	if (!isKnownPredicateViaConstantRanges(ICmpInst::ICMP_ULE, MaxBECount,
	MaxItersWithoutWrap))
	return ConstantRange::getFull(BitWidth);

	ICmpInst::Predicate LEPred =
	IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
	ICmpInst::Predicate GEPred =
	IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
	const SCEV End = AddRec->evaluateAtIteration(MaxBECount, this);

	// We know that there is no self-wrap. Let's take Start and End values and
	// look at all intermediate values V1, V2, ..., Vn that IndVar takes during
	// the iteration. They either lie inside the range [Min(Start, End),
	// Max(Start, End)] or outside it:
	//
	// Case 1: RangeMin ... Start V1 ... VN End ... RangeMax;
	// Case 2: RangeMin Vk ... V1 Start ... End Vn ... Vk + 1 RangeMax;
	//
	// No self wrap flag guarantees that the intermediate values cannot be BOTH
	// outside and inside the range [Min(Start, End), Max(Start, End)]. Using that
	// knowledge, let's try to prove that we are dealing with Case 1. It is so if
	// Start <= End and step is positive, or Start >= End and step is negative.
	const SCEV *Start = AddRec->getStart();
	ConstantRange StartRange = getRangeRef(Start, SignHint);
	ConstantRange EndRange = getRangeRef(End, SignHint);
	ConstantRange RangeBetween = StartRange.unionWith(EndRange);
	// If they already cover full iteration space, we will know nothing useful
	// even if we prove what we want to prove.
	if (RangeBetween.isFullSet())
	return RangeBetween;
	// Only deal with ranges that do not wrap (i.e. RangeMin < RangeMax).
	bool IsWrappedSet = IsSigned ? RangeBetween.isSignWrappedSet()
	: RangeBetween.isWrappedSet();
	if (IsWrappedSet)
	return ConstantRange::getFull(BitWidth);

	if (isKnownPositive(Step) &&
	isKnownPredicateViaConstantRanges(LEPred, Start, End))
	return RangeBetween;
	else if (isKnownNegative(Step) &&
	isKnownPredicateViaConstantRanges(GEPred, Start, End))
	return RangeBetween;
	return ConstantRange::getFull(BitWidth);
	}

	ConstantRange ScalarEvolution::getRangeViaFactoring(const SCEV *Start,
	const SCEV *Step,
	const SCEV *MaxBECount,
	unsigned BitWidth) {
	// RangeOf({C?A:B,+,C?P:Q}) == RangeOf(C?{A,+,P}:{B,+,Q})
	// == RangeOf({A,+,P}) union RangeOf({B,+,Q})

	struct SelectPattern {
	Value *Condition = nullptr;
	APInt TrueValue;
	APInt FalseValue;

	explicit SelectPattern(ScalarEvolution &SE, unsigned BitWidth,
	const SCEV *S) {
	Optional<unsigned> CastOp;
	APInt Offset(BitWidth, 0);

	assert(SE.getTypeSizeInBits(S->getType()) == BitWidth &&
	"Should be!");

	// Peel off a constant offset:
	if (auto *SA = dyn_cast<SCEVAddExpr>(S)) {
	// In the future we could consider being smarter here and handle
	// {Start+Step,+,Step} too.
	if (SA->getNumOperands() != 2 \|\| !isa<SCEVConstant>(SA->getOperand(0)))
	return;

	Offset = cast<SCEVConstant>(SA->getOperand(0))->getAPInt();
	S = SA->getOperand(1);
	}

	// Peel off a cast operation
	if (auto *SCast = dyn_cast<SCEVIntegralCastExpr>(S)) {
	CastOp = SCast->getSCEVType();
	S = SCast->getOperand();
	}

	using namespace llvm::PatternMatch;

	auto *SU = dyn_cast<SCEVUnknown>(S);
	const APInt TrueVal, FalseVal;
	if (!SU \|\|
	!match(SU->getValue(), m_Select(m_Value(Condition), m_APInt(TrueVal),
	m_APInt(FalseVal)))) {
	Condition = nullptr;
	return;
	}

	TrueValue = *TrueVal;
	FalseValue = *FalseVal;

	// Re-apply the cast we peeled off earlier
	if (CastOp.hasValue())
	switch (*CastOp) {
	default:
	llvm_unreachable("Unknown SCEV cast type!");

	case scTruncate:
	TrueValue = TrueValue.trunc(BitWidth);
	FalseValue = FalseValue.trunc(BitWidth);
	break;
	case scZeroExtend:
	TrueValue = TrueValue.zext(BitWidth);
	FalseValue = FalseValue.zext(BitWidth);
	break;
	case scSignExtend:
	TrueValue = TrueValue.sext(BitWidth);
	FalseValue = FalseValue.sext(BitWidth);
	break;
	}

	// Re-apply the constant offset we peeled off earlier
	TrueValue += Offset;
	FalseValue += Offset;
	}

	bool isRecognized() { return Condition != nullptr; }
	};

	SelectPattern StartPattern(*this, BitWidth, Start);
	if (!StartPattern.isRecognized())
	return ConstantRange::getFull(BitWidth);

	SelectPattern StepPattern(*this, BitWidth, Step);
	if (!StepPattern.isRecognized())
	return ConstantRange::getFull(BitWidth);

	if (StartPattern.Condition != StepPattern.Condition) {
	// We don't handle this case today; but we could, by considering four
	// possibilities below instead of two. I'm not sure if there are cases where
	// that will help over what getRange already does, though.
	return ConstantRange::getFull(BitWidth);
	}

	// NB! Calling ScalarEvolution::getConstant is fine, but we should not try to
	// construct arbitrary general SCEV expressions here. This function is called
	// from deep in the call stack, and calling getSCEV (on a sext instruction,
	// say) can end up caching a suboptimal value.

	// FIXME: without the explicit `this` receiver below, MSVC errors out with
	// C2352 and C2512 (otherwise it isn't needed).

	const SCEV *TrueStart = this->getConstant(StartPattern.TrueValue);
	const SCEV *TrueStep = this->getConstant(StepPattern.TrueValue);
	const SCEV *FalseStart = this->getConstant(StartPattern.FalseValue);
	const SCEV *FalseStep = this->getConstant(StepPattern.FalseValue);

	ConstantRange TrueRange =
	this->getRangeForAffineAR(TrueStart, TrueStep, MaxBECount, BitWidth);
	ConstantRange FalseRange =
	this->getRangeForAffineAR(FalseStart, FalseStep, MaxBECount, BitWidth);

	return TrueRange.unionWith(FalseRange);
	}

	SCEV::NoWrapFlags ScalarEvolution::getNoWrapFlagsFromUB(const Value *V) {
	if (isa<ConstantExpr>(V)) return SCEV::FlagAnyWrap;
	const BinaryOperator *BinOp = cast<BinaryOperator>(V);

	// Return early if there are no flags to propagate to the SCEV.
	SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
	if (BinOp->hasNoUnsignedWrap())
	Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW);
	if (BinOp->hasNoSignedWrap())
	Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW);
	if (Flags == SCEV::FlagAnyWrap)
	return SCEV::FlagAnyWrap;

	return isSCEVExprNeverPoison(BinOp) ? Flags : SCEV::FlagAnyWrap;
	}

	bool ScalarEvolution::isSCEVExprNeverPoison(const Instruction *I) {
	// Here we check that I is in the header of the innermost loop containing I,
	// since we only deal with instructions in the loop header. The actual loop we
	// need to check later will come from an add recurrence, but getting that
	// requires computing the SCEV of the operands, which can be expensive. This
	// check we can do cheaply to rule out some cases early.
	Loop *InnermostContainingLoop = LI.getLoopFor(I->getParent());
	if (InnermostContainingLoop == nullptr \|\|
	InnermostContainingLoop->getHeader() != I->getParent())
	return false;

	// Only proceed if we can prove that I does not yield poison.
	if (!programUndefinedIfPoison(I))
	return false;

	// At this point we know that if I is executed, then it does not wrap
	// according to at least one of NSW or NUW. If I is not executed, then we do
	// not know if the calculation that I represents would wrap. Multiple
	// instructions can map to the same SCEV. If we apply NSW or NUW from I to
	// the SCEV, we must guarantee no wrapping for that SCEV also when it is
	// derived from other instructions that map to the same SCEV. We cannot make
	// that guarantee for cases where I is not executed. So we need to find the
	// loop that I is considered in relation to and prove that I is executed for
	// every iteration of that loop. That implies that the value that I
	// calculates does not wrap anywhere in the loop, so then we can apply the
	// flags to the SCEV.
	//
	// We check isLoopInvariant to disambiguate in case we are adding recurrences
	// from different loops, so that we know which loop to prove that I is
	// executed in.
	for (unsigned OpIndex = 0; OpIndex < I->getNumOperands(); ++OpIndex) {
	// I could be an extractvalue from a call to an overflow intrinsic.
	// TODO: We can do better here in some cases.
	if (!isSCEVable(I->getOperand(OpIndex)->getType()))
	return false;
	const SCEV *Op = getSCEV(I->getOperand(OpIndex));
	if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) {
	bool AllOtherOpsLoopInvariant = true;
	for (unsigned OtherOpIndex = 0; OtherOpIndex < I->getNumOperands();
	++OtherOpIndex) {
	if (OtherOpIndex != OpIndex) {
	const SCEV *OtherOp = getSCEV(I->getOperand(OtherOpIndex));
	if (!isLoopInvariant(OtherOp, AddRec->getLoop())) {
	AllOtherOpsLoopInvariant = false;
	break;
	}
	}
	}
	if (AllOtherOpsLoopInvariant &&
	isGuaranteedToExecuteForEveryIteration(I, AddRec->getLoop()))
	return true;
	}
	}
	return false;
	}

	bool ScalarEvolution::isAddRecNeverPoison(const Instruction I, const Loop L) {
	// If we know that \c I can never be poison period, then that's enough.
	if (isSCEVExprNeverPoison(I))
	return true;

	// For an add recurrence specifically, we assume that infinite loops without
	// side effects are undefined behavior, and then reason as follows:
	//
	// If the add recurrence is poison in any iteration, it is poison on all
	// future iterations (since incrementing poison yields poison). If the result
	// of the add recurrence is fed into the loop latch condition and the loop
	// does not contain any throws or exiting blocks other than the latch, we now
	// have the ability to "choose" whether the backedge is taken or not (by
	// choosing a sufficiently evil value for the poison feeding into the branch)
	// for every iteration including and after the one in which \p I first became
	// poison. There are two possibilities (let's call the iteration in which \p
	// I first became poison as K):
	//
	// 1. In the set of iterations including and after K, the loop body executes
	// no side effects. In this case executing the backege an infinte number
	// of times will yield undefined behavior.
	//
	// 2. In the set of iterations including and after K, the loop body executes
	// at least one side effect. In this case, that specific instance of side
	// effect is control dependent on poison, which also yields undefined
	// behavior.

	auto *ExitingBB = L->getExitingBlock();
	auto *LatchBB = L->getLoopLatch();
	if (!ExitingBB \|\| !LatchBB \|\| ExitingBB != LatchBB)
	return false;

	SmallPtrSet<const Instruction *, 16> Pushed;
	SmallVector<const Instruction *, 8> PoisonStack;

	// We start by assuming \c I, the post-inc add recurrence, is poison. Only
	// things that are known to be poison under that assumption go on the
	// PoisonStack.
	Pushed.insert(I);
	PoisonStack.push_back(I);

	bool LatchControlDependentOnPoison = false;
	while (!PoisonStack.empty() && !LatchControlDependentOnPoison) {
	const Instruction *Poison = PoisonStack.pop_back_val();

	for (auto *PoisonUser : Poison->users()) {
	if (propagatesPoison(cast<Operator>(PoisonUser))) {
	if (Pushed.insert(cast<Instruction>(PoisonUser)).second)
	PoisonStack.push_back(cast<Instruction>(PoisonUser));
	} else if (auto *BI = dyn_cast<BranchInst>(PoisonUser)) {
	assert(BI->isConditional() && "Only possibility!");
	if (BI->getParent() == LatchBB) {
	LatchControlDependentOnPoison = true;
	break;
	}
	}
	}
	}

	return LatchControlDependentOnPoison && loopHasNoAbnormalExits(L);
	}

	ScalarEvolution::LoopProperties
	ScalarEvolution::getLoopProperties(const Loop *L) {
	using LoopProperties = ScalarEvolution::LoopProperties;

	auto Itr = LoopPropertiesCache.find(L);
	if (Itr == LoopPropertiesCache.end()) {
	auto HasSideEffects = [](Instruction *I) {
	if (auto *SI = dyn_cast<StoreInst>(I))
	return !SI->isSimple();

	return I->mayThrow() \|\| I->mayWriteToMemory();
	};

	LoopProperties LP = {/* HasNoAbnormalExits */ true,
	/HasNoSideEffects/ true};

	for (auto *BB : L->getBlocks())
	for (auto &I : *BB) {
	if (!isGuaranteedToTransferExecutionToSuccessor(&I))
	LP.HasNoAbnormalExits = false;
	if (HasSideEffects(&I))
	LP.HasNoSideEffects = false;
	if (!LP.HasNoAbnormalExits && !LP.HasNoSideEffects)
	break; // We're already as pessimistic as we can get.
	}

	auto InsertPair = LoopPropertiesCache.insert({L, LP});
	assert(InsertPair.second && "We just checked!");
	Itr = InsertPair.first;
	}

	return Itr->second;
	}

	bool ScalarEvolution::loopIsFiniteByAssumption(const Loop *L) {
	// A mustprogress loop without side effects must be finite.
	// TODO: The check used here is very conservative. It's only specific
	// side effects which are well defined in infinite loops.
	return isMustProgress(L) && loopHasNoSideEffects(L);
	}

	const SCEV ScalarEvolution::createSCEV(Value V) {
	if (!isSCEVable(V->getType()))
	return getUnknown(V);

	if (Instruction *I = dyn_cast<Instruction>(V)) {
	// Don't attempt to analyze instructions in blocks that aren't
	// reachable. Such instructions don't matter, and they aren't required
	// to obey basic rules for definitions dominating uses which this
	// analysis depends on.
	if (!DT.isReachableFromEntry(I->getParent()))
	return getUnknown(UndefValue::get(V->getType()));
	} else if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
	return getConstant(CI);
	else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V))
	return GA->isInterposable() ? getUnknown(V) : getSCEV(GA->getAliasee());
	else if (!isa<ConstantExpr>(V))
	return getUnknown(V);

	Operator *U = cast<Operator>(V);
	if (auto BO = MatchBinaryOp(U, DT)) {
	switch (BO->Opcode) {
	case Instruction::Add: {
	// The simple thing to do would be to just call getSCEV on both operands
	// and call getAddExpr with the result. However if we're looking at a
	// bunch of things all added together, this can be quite inefficient,
	// because it leads to N-1 getAddExpr calls for N ultimate operands.
	// Instead, gather up all the operands and make a single getAddExpr call.
	// LLVM IR canonical form means we need only traverse the left operands.
	SmallVector<const SCEV *, 4> AddOps;
	do {
	if (BO->Op) {
	if (auto *OpSCEV = getExistingSCEV(BO->Op)) {
	AddOps.push_back(OpSCEV);
	break;
	}

	// If a NUW or NSW flag can be applied to the SCEV for this
	// addition, then compute the SCEV for this addition by itself
	// with a separate call to getAddExpr. We need to do that
	// instead of pushing the operands of the addition onto AddOps,
	// since the flags are only known to apply to this particular
	// addition - they may not apply to other additions that can be
	// formed with operands from AddOps.
	const SCEV *RHS = getSCEV(BO->RHS);
	SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(BO->Op);
	if (Flags != SCEV::FlagAnyWrap) {
	const SCEV *LHS = getSCEV(BO->LHS);
	if (BO->Opcode == Instruction::Sub)
	AddOps.push_back(getMinusSCEV(LHS, RHS, Flags));
	else
	AddOps.push_back(getAddExpr(LHS, RHS, Flags));
	break;
	}
	}

	if (BO->Opcode == Instruction::Sub)
	AddOps.push_back(getNegativeSCEV(getSCEV(BO->RHS)));
	else
	AddOps.push_back(getSCEV(BO->RHS));

	auto NewBO = MatchBinaryOp(BO->LHS, DT);
	if (!NewBO \|\| (NewBO->Opcode != Instruction::Add &&
	NewBO->Opcode != Instruction::Sub)) {
	AddOps.push_back(getSCEV(BO->LHS));
	break;
	}
	BO = NewBO;
	} while (true);

	return getAddExpr(AddOps);
	}

	case Instruction::Mul: {
	SmallVector<const SCEV *, 4> MulOps;
	do {
	if (BO->Op) {
	if (auto *OpSCEV = getExistingSCEV(BO->Op)) {
	MulOps.push_back(OpSCEV);
	break;
	}

	SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(BO->Op);
	if (Flags != SCEV::FlagAnyWrap) {
	MulOps.push_back(
	getMulExpr(getSCEV(BO->LHS), getSCEV(BO->RHS), Flags));
	break;
	}
	}

	MulOps.push_back(getSCEV(BO->RHS));
	auto NewBO = MatchBinaryOp(BO->LHS, DT);
	if (!NewBO \|\| NewBO->Opcode != Instruction::Mul) {
	MulOps.push_back(getSCEV(BO->LHS));
	break;
	}
	BO = NewBO;
	} while (true);

	return getMulExpr(MulOps);
	}
	case Instruction::UDiv:
	return getUDivExpr(getSCEV(BO->LHS), getSCEV(BO->RHS));
	case Instruction::URem:
	return getURemExpr(getSCEV(BO->LHS), getSCEV(BO->RHS));
	case Instruction::Sub: {
	SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
	if (BO->Op)
	Flags = getNoWrapFlagsFromUB(BO->Op);
	return getMinusSCEV(getSCEV(BO->LHS), getSCEV(BO->RHS), Flags);
	}
	case Instruction::And:
	// For an expression like x&255 that merely masks off the high bits,
	// use zext(trunc(x)) as the SCEV expression.
	if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS)) {
	if (CI->isZero())
	return getSCEV(BO->RHS);
	if (CI->isMinusOne())
	return getSCEV(BO->LHS);
	const APInt &A = CI->getValue();

	// Instcombine's ShrinkDemandedConstant may strip bits out of
	// constants, obscuring what would otherwise be a low-bits mask.
	// Use computeKnownBits to compute what ShrinkDemandedConstant
	// knew about to reconstruct a low-bits mask value.
	unsigned LZ = A.countLeadingZeros();
	unsigned TZ = A.countTrailingZeros();
	unsigned BitWidth = A.getBitWidth();
	KnownBits Known(BitWidth);
	computeKnownBits(BO->LHS, Known, getDataLayout(),
	0, &AC, nullptr, &DT);

	APInt EffectiveMask =
	APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ);
	if ((LZ != 0 \|\| TZ != 0) && !((~A & ~Known.Zero) & EffectiveMask)) {
	const SCEV *MulCount = getConstant(APInt::getOneBitSet(BitWidth, TZ));
	const SCEV *LHS = getSCEV(BO->LHS);
	const SCEV *ShiftedLHS = nullptr;
	if (auto *LHSMul = dyn_cast<SCEVMulExpr>(LHS)) {
	if (auto *OpC = dyn_cast<SCEVConstant>(LHSMul->getOperand(0))) {
	// For an expression like (x * 8) & 8, simplify the multiply.
	unsigned MulZeros = OpC->getAPInt().countTrailingZeros();
	unsigned GCD = std::min(MulZeros, TZ);
	APInt DivAmt = APInt::getOneBitSet(BitWidth, TZ - GCD);
	SmallVector<const SCEV*, 4> MulOps;
	MulOps.push_back(getConstant(OpC->getAPInt().lshr(GCD)));
	MulOps.append(LHSMul->op_begin() + 1, LHSMul->op_end());
	auto *NewMul = getMulExpr(MulOps, LHSMul->getNoWrapFlags());
	ShiftedLHS = getUDivExpr(NewMul, getConstant(DivAmt));
	}
	}
	if (!ShiftedLHS)
	ShiftedLHS = getUDivExpr(LHS, MulCount);
	return getMulExpr(
	getZeroExtendExpr(
	getTruncateExpr(ShiftedLHS,
	IntegerType::get(getContext(), BitWidth - LZ - TZ)),
	BO->LHS->getType()),
	MulCount);
	}
	}
	break;

	case Instruction::Or:
	// If the RHS of the Or is a constant, we may have something like:
	// X4+1 which got turned into X4\|1. Handle this as an Add so loop
	// optimizations will transparently handle this case.
	//
	// In order for this transformation to be safe, the LHS must be of the
	// form X*(2^n) and the Or constant must be less than 2^n.
	if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS)) {
	const SCEV *LHS = getSCEV(BO->LHS);
	const APInt &CIVal = CI->getValue();
	if (GetMinTrailingZeros(LHS) >=
	(CIVal.getBitWidth() - CIVal.countLeadingZeros())) {
	// Build a plain add SCEV.
	return getAddExpr(LHS, getSCEV(CI),
	(SCEV::NoWrapFlags)(SCEV::FlagNUW \| SCEV::FlagNSW));
	}
	}
	break;

	case Instruction::Xor:
	if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS)) {
	// If the RHS of xor is -1, then this is a not operation.
	if (CI->isMinusOne())
	return getNotSCEV(getSCEV(BO->LHS));

	// Model xor(and(x, C), C) as and(~x, C), if C is a low-bits mask.
	// This is a variant of the check for xor with -1, and it handles
	// the case where instcombine has trimmed non-demanded bits out
	// of an xor with -1.
	if (auto *LBO = dyn_cast<BinaryOperator>(BO->LHS))
	if (ConstantInt *LCI = dyn_cast<ConstantInt>(LBO->getOperand(1)))
	if (LBO->getOpcode() == Instruction::And &&
	LCI->getValue() == CI->getValue())
	if (const SCEVZeroExtendExpr *Z =
	dyn_cast<SCEVZeroExtendExpr>(getSCEV(BO->LHS))) {
	Type *UTy = BO->LHS->getType();
	const SCEV *Z0 = Z->getOperand();
	Type *Z0Ty = Z0->getType();
	unsigned Z0TySize = getTypeSizeInBits(Z0Ty);

	// If C is a low-bits mask, the zero extend is serving to
	// mask off the high bits. Complement the operand and
	// re-apply the zext.
	if (CI->getValue().isMask(Z0TySize))
	return getZeroExtendExpr(getNotSCEV(Z0), UTy);

	// If C is a single bit, it may be in the sign-bit position
	// before the zero-extend. In this case, represent the xor
	// using an add, which is equivalent, and re-apply the zext.
	APInt Trunc = CI->getValue().trunc(Z0TySize);
	if (Trunc.zext(getTypeSizeInBits(UTy)) == CI->getValue() &&
	Trunc.isSignMask())
	return getZeroExtendExpr(getAddExpr(Z0, getConstant(Trunc)),
	UTy);
	}
	}
	break;

	case Instruction::Shl:
	// Turn shift left of a constant amount into a multiply.
	if (ConstantInt *SA = dyn_cast<ConstantInt>(BO->RHS)) {
	uint32_t BitWidth = cast<IntegerType>(SA->getType())->getBitWidth();

	// If the shift count is not less than the bitwidth, the result of
	// the shift is undefined. Don't try to analyze it, because the
	// resolution chosen here may differ from the resolution chosen in
	// other parts of the compiler.
	if (SA->getValue().uge(BitWidth))
	break;

	// We can safely preserve the nuw flag in all cases. It's also safe to
	// turn a nuw nsw shl into a nuw nsw mul. However, nsw in isolation
	// requires special handling. It can be preserved as long as we're not
	// left shifting by bitwidth - 1.
	auto Flags = SCEV::FlagAnyWrap;
	if (BO->Op) {
	auto MulFlags = getNoWrapFlagsFromUB(BO->Op);
	if ((MulFlags & SCEV::FlagNSW) &&
	((MulFlags & SCEV::FlagNUW) \|\| SA->getValue().ult(BitWidth - 1)))
	Flags = (SCEV::NoWrapFlags)(Flags \| SCEV::FlagNSW);
	if (MulFlags & SCEV::FlagNUW)
	Flags = (SCEV::NoWrapFlags)(Flags \| SCEV::FlagNUW);
	}

	Constant *X = ConstantInt::get(
	getContext(), APInt::getOneBitSet(BitWidth, SA->getZExtValue()));
	return getMulExpr(getSCEV(BO->LHS), getSCEV(X), Flags);
	}
	break;

	case Instruction::AShr: {
	// AShr X, C, where C is a constant.
	ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS);
	if (!CI)
	break;

	Type *OuterTy = BO->LHS->getType();
	uint64_t BitWidth = getTypeSizeInBits(OuterTy);
	// If the shift count is not less than the bitwidth, the result of
	// the shift is undefined. Don't try to analyze it, because the
	// resolution chosen here may differ from the resolution chosen in
	// other parts of the compiler.
	if (CI->getValue().uge(BitWidth))
	break;

	if (CI->isZero())
	return getSCEV(BO->LHS); // shift by zero --> noop

	uint64_t AShrAmt = CI->getZExtValue();
	Type *TruncTy = IntegerType::get(getContext(), BitWidth - AShrAmt);

	Operator *L = dyn_cast<Operator>(BO->LHS);
	if (L && L->getOpcode() == Instruction::Shl) {
	// X = Shl A, n
	// Y = AShr X, m
	// Both n and m are constant.

	const SCEV *ShlOp0SCEV = getSCEV(L->getOperand(0));
	if (L->getOperand(1) == BO->RHS)
	// For a two-shift sext-inreg, i.e. n = m,
	// use sext(trunc(x)) as the SCEV expression.
	return getSignExtendExpr(
	getTruncateExpr(ShlOp0SCEV, TruncTy), OuterTy);

	ConstantInt *ShlAmtCI = dyn_cast<ConstantInt>(L->getOperand(1));
	if (ShlAmtCI && ShlAmtCI->getValue().ult(BitWidth)) {
	uint64_t ShlAmt = ShlAmtCI->getZExtValue();
	if (ShlAmt > AShrAmt) {
	// When n > m, use sext(mul(trunc(x), 2^(n-m)))) as the SCEV
	// expression. We already checked that ShlAmt < BitWidth, so
	// the multiplier, 1 << (ShlAmt - AShrAmt), fits into TruncTy as
	// ShlAmt - AShrAmt < Amt.
	APInt Mul = APInt::getOneBitSet(BitWidth - AShrAmt,
	ShlAmt - AShrAmt);
	return getSignExtendExpr(
	getMulExpr(getTruncateExpr(ShlOp0SCEV, TruncTy),
	getConstant(Mul)), OuterTy);
	}
	}
	}
	break;
	}
	}
	}

	switch (U->getOpcode()) {
	case Instruction::Trunc:
	return getTruncateExpr(getSCEV(U->getOperand(0)), U->getType());

	case Instruction::ZExt:
	return getZeroExtendExpr(getSCEV(U->getOperand(0)), U->getType());

	case Instruction::SExt:
	if (auto BO = MatchBinaryOp(U->getOperand(0), DT)) {
	// The NSW flag of a subtract does not always survive the conversion to
	// A + (-1)*B. By pushing sign extension onto its operands we are much
	// more likely to preserve NSW and allow later AddRec optimisations.
	//
	// NOTE: This is effectively duplicating this logic from getSignExtend:
	// sext((A + B + ...)<nsw>) --> (sext(A) + sext(B) + ...)<nsw>
	// but by that point the NSW information has potentially been lost.
	if (BO->Opcode == Instruction::Sub && BO->IsNSW) {
	Type *Ty = U->getType();
	auto *V1 = getSignExtendExpr(getSCEV(BO->LHS), Ty);
	auto *V2 = getSignExtendExpr(getSCEV(BO->RHS), Ty);
	return getMinusSCEV(V1, V2, SCEV::FlagNSW);
	}
	}
	return getSignExtendExpr(getSCEV(U->getOperand(0)), U->getType());

	case Instruction::BitCast:
	// BitCasts are no-op casts so we just eliminate the cast.
	if (isSCEVable(U->getType()) && isSCEVable(U->getOperand(0)->getType()))
	return getSCEV(U->getOperand(0));
	break;

	case Instruction::PtrToInt: {
	// Pointer to integer cast is straight-forward, so do model it.
	const SCEV *Op = getSCEV(U->getOperand(0));
	Type *DstIntTy = U->getType();
	// But only if effective SCEV (integer) type is wide enough to represent
	// all possible pointer values.
	const SCEV *IntOp = getPtrToIntExpr(Op, DstIntTy);
	if (isa<SCEVCouldNotCompute>(IntOp))
	return getUnknown(V);
	return IntOp;
	}
	case Instruction::IntToPtr:
	// Just don't deal with inttoptr casts.
	return getUnknown(V);

	case Instruction::SDiv:
	// If both operands are non-negative, this is just an udiv.
	if (isKnownNonNegative(getSCEV(U->getOperand(0))) &&
	isKnownNonNegative(getSCEV(U->getOperand(1))))
	return getUDivExpr(getSCEV(U->getOperand(0)), getSCEV(U->getOperand(1)));
	break;

	case Instruction::SRem:
	// If both operands are non-negative, this is just an urem.
	if (isKnownNonNegative(getSCEV(U->getOperand(0))) &&
	isKnownNonNegative(getSCEV(U->getOperand(1))))
	return getURemExpr(getSCEV(U->getOperand(0)), getSCEV(U->getOperand(1)));
	break;

	case Instruction::GetElementPtr:
	return createNodeForGEP(cast<GEPOperator>(U));

	case Instruction::PHI:
	return createNodeForPHI(cast<PHINode>(U));

	case Instruction::Select:
	// U can also be a select constant expr, which let fall through. Since
	// createNodeForSelect only works for a condition that is an `ICmpInst`, and
	// constant expressions cannot have instructions as operands, we'd have
	// returned getUnknown for a select constant expressions anyway.
	if (isa<Instruction>(U))
	return createNodeForSelectOrPHI(cast<Instruction>(U), U->getOperand(0),
	U->getOperand(1), U->getOperand(2));
	break;

	case Instruction::Call:
	case Instruction::Invoke:
	if (Value *RV = cast<CallBase>(U)->getReturnedArgOperand())
	return getSCEV(RV);

	if (auto *II = dyn_cast<IntrinsicInst>(U)) {
	switch (II->getIntrinsicID()) {
	case Intrinsic::abs:
	return getAbsExpr(
	getSCEV(II->getArgOperand(0)),
	/IsNSW=/cast<ConstantInt>(II->getArgOperand(1))->isOne());
	case Intrinsic::umax:
	return getUMaxExpr(getSCEV(II->getArgOperand(0)),
	getSCEV(II->getArgOperand(1)));
	case Intrinsic::umin:
	return getUMinExpr(getSCEV(II->getArgOperand(0)),
	getSCEV(II->getArgOperand(1)));
	case Intrinsic::smax:
	return getSMaxExpr(getSCEV(II->getArgOperand(0)),
	getSCEV(II->getArgOperand(1)));
	case Intrinsic::smin:
	return getSMinExpr(getSCEV(II->getArgOperand(0)),
	getSCEV(II->getArgOperand(1)));
	case Intrinsic::usub_sat: {
	const SCEV *X = getSCEV(II->getArgOperand(0));
	const SCEV *Y = getSCEV(II->getArgOperand(1));
	const SCEV *ClampedY = getUMinExpr(X, Y);
	return getMinusSCEV(X, ClampedY, SCEV::FlagNUW);
	}
	case Intrinsic::uadd_sat: {
	const SCEV *X = getSCEV(II->getArgOperand(0));
	const SCEV *Y = getSCEV(II->getArgOperand(1));
	const SCEV *ClampedX = getUMinExpr(X, getNotSCEV(Y));
	return getAddExpr(ClampedX, Y, SCEV::FlagNUW);
	}
	case Intrinsic::start_loop_iterations:
	// A start_loop_iterations is just equivalent to the first operand for
	// SCEV purposes.
	return getSCEV(II->getArgOperand(0));
	default:
	break;
	}
	}
	break;
	}

	return getUnknown(V);
	}

	//===----------------------------------------------------------------------===//
	// Iteration Count Computation Code
	//

	const SCEV ScalarEvolution::getTripCountFromExitCount(const SCEV ExitCount) {
	// Get the trip count from the BE count by adding 1. Overflow, results
	// in zero which means "unknown".
	return getAddExpr(ExitCount, getOne(ExitCount->getType()));
	}

	static unsigned getConstantTripCount(const SCEVConstant *ExitCount) {
	if (!ExitCount)
	return 0;

	ConstantInt *ExitConst = ExitCount->getValue();

	// Guard against huge trip counts.
	if (ExitConst->getValue().getActiveBits() > 32)
	return 0;

	// In case of integer overflow, this returns 0, which is correct.
	return ((unsigned)ExitConst->getZExtValue()) + 1;
	}

	unsigned ScalarEvolution::getSmallConstantTripCount(const Loop *L) {
	auto *ExitCount = dyn_cast<SCEVConstant>(getBackedgeTakenCount(L, Exact));
	return getConstantTripCount(ExitCount);
	}

	unsigned
	ScalarEvolution::getSmallConstantTripCount(const Loop *L,
	const BasicBlock *ExitingBlock) {
	assert(ExitingBlock && "Must pass a non-null exiting block!");
	assert(L->isLoopExiting(ExitingBlock) &&
	"Exiting block must actually branch out of the loop!");
	const SCEVConstant *ExitCount =
	dyn_cast<SCEVConstant>(getExitCount(L, ExitingBlock));
	return getConstantTripCount(ExitCount);
	}

	unsigned ScalarEvolution::getSmallConstantMaxTripCount(const Loop *L) {
	const auto *MaxExitCount =
	dyn_cast<SCEVConstant>(getConstantMaxBackedgeTakenCount(L));
	return getConstantTripCount(MaxExitCount);
	}

	unsigned ScalarEvolution::getSmallConstantTripMultiple(const Loop *L) {
	SmallVector<BasicBlock *, 8> ExitingBlocks;
	L->getExitingBlocks(ExitingBlocks);

	Optional<unsigned> Res = None;
	for (auto *ExitingBB : ExitingBlocks) {
	unsigned Multiple = getSmallConstantTripMultiple(L, ExitingBB);
	if (!Res)
	Res = Multiple;
	Res = (unsigned)GreatestCommonDivisor64(*Res, Multiple);
	}
	return Res.getValueOr(1);
	}

	unsigned ScalarEvolution::getSmallConstantTripMultiple(const Loop *L,
	const SCEV *ExitCount) {
	if (ExitCount == getCouldNotCompute())
	return 1;

	// Get the trip count
	const SCEV *TCExpr = getTripCountFromExitCount(ExitCount);

	const SCEVConstant *TC = dyn_cast<SCEVConstant>(TCExpr);
	if (!TC)
	// Attempt to factor more general cases. Returns the greatest power of
	// two divisor. If overflow happens, the trip count expression is still
	// divisible by the greatest power of 2 divisor returned.
	return 1U << std::min((uint32_t)31,
	GetMinTrailingZeros(applyLoopGuards(TCExpr, L)));

	ConstantInt *Result = TC->getValue();

	// Guard against huge trip counts (this requires checking
	// for zero to handle the case where the trip count == -1 and the
	// addition wraps).
	if (!Result \|\| Result->getValue().getActiveBits() > 32 \|\|
	Result->getValue().getActiveBits() == 0)
	return 1;

	return (unsigned)Result->getZExtValue();
	}

	/// Returns the largest constant divisor of the trip count of this loop as a
	/// normal unsigned value, if possible. This means that the actual trip count is
	/// always a multiple of the returned value (don't forget the trip count could
	/// very well be zero as well!).
	///
	/// Returns 1 if the trip count is unknown or not guaranteed to be the
	/// multiple of a constant (which is also the case if the trip count is simply
	/// constant, use getSmallConstantTripCount for that case), Will also return 1
	/// if the trip count is very large (>= 2^32).
	///
	/// As explained in the comments for getSmallConstantTripCount, this assumes
	/// that control exits the loop via ExitingBlock.
	unsigned
	ScalarEvolution::getSmallConstantTripMultiple(const Loop *L,
	const BasicBlock *ExitingBlock) {
	assert(ExitingBlock && "Must pass a non-null exiting block!");
	assert(L->isLoopExiting(ExitingBlock) &&
	"Exiting block must actually branch out of the loop!");
	const SCEV *ExitCount = getExitCount(L, ExitingBlock);
	return getSmallConstantTripMultiple(L, ExitCount);
	}

	const SCEV ScalarEvolution::getExitCount(const Loop L,
	const BasicBlock *ExitingBlock,
	ExitCountKind Kind) {
	switch (Kind) {
	case Exact:
	case SymbolicMaximum:
	return getBackedgeTakenInfo(L).getExact(ExitingBlock, this);
	case ConstantMaximum:
	return getBackedgeTakenInfo(L).getConstantMax(ExitingBlock, this);
	};
	llvm_unreachable("Invalid ExitCountKind!");
	}

	const SCEV *
	ScalarEvolution::getPredicatedBackedgeTakenCount(const Loop *L,
	SCEVUnionPredicate &Preds) {
	return getPredicatedBackedgeTakenInfo(L).getExact(L, this, &Preds);
	}

	const SCEV ScalarEvolution::getBackedgeTakenCount(const Loop L,
	ExitCountKind Kind) {
	switch (Kind) {
	case Exact:
	return getBackedgeTakenInfo(L).getExact(L, this);
	case ConstantMaximum:
	return getBackedgeTakenInfo(L).getConstantMax(this);
	case SymbolicMaximum:
	return getBackedgeTakenInfo(L).getSymbolicMax(L, this);
	};
	llvm_unreachable("Invalid ExitCountKind!");
	}

	bool ScalarEvolution::isBackedgeTakenCountMaxOrZero(const Loop *L) {
	return getBackedgeTakenInfo(L).isConstantMaxOrZero(this);
	}

	/// Push PHI nodes in the header of the given loop onto the given Worklist.
	static void
	PushLoopPHIs(const Loop L, SmallVectorImpl<Instruction > &Worklist) {
	BasicBlock *Header = L->getHeader();

	// Push all Loop-header PHIs onto the Worklist stack.
	for (PHINode &PN : Header->phis())
	Worklist.push_back(&PN);
	}

	const ScalarEvolution::BackedgeTakenInfo &
	ScalarEvolution::getPredicatedBackedgeTakenInfo(const Loop *L) {
	auto &BTI = getBackedgeTakenInfo(L);
	if (BTI.hasFullInfo())
	return BTI;

	auto Pair = PredicatedBackedgeTakenCounts.insert({L, BackedgeTakenInfo()});

	if (!Pair.second)
	return Pair.first->second;

	BackedgeTakenInfo Result =
	computeBackedgeTakenCount(L, /AllowPredicates=/true);

	return PredicatedBackedgeTakenCounts.find(L)->second = std::move(Result);
	}

	ScalarEvolution::BackedgeTakenInfo &
	ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
	// Initially insert an invalid entry for this loop. If the insertion
	// succeeds, proceed to actually compute a backedge-taken count and
	// update the value. The temporary CouldNotCompute value tells SCEV
	// code elsewhere that it shouldn't attempt to request a new
	// backedge-taken count, which could result in infinite recursion.
	std::pair<DenseMap<const Loop *, BackedgeTakenInfo>::iterator, bool> Pair =
	BackedgeTakenCounts.insert({L, BackedgeTakenInfo()});
	if (!Pair.second)
	return Pair.first->second;

	// computeBackedgeTakenCount may allocate memory for its result. Inserting it
	// into the BackedgeTakenCounts map transfers ownership. Otherwise, the result
	// must be cleared in this scope.
	BackedgeTakenInfo Result = computeBackedgeTakenCount(L);

	// In product build, there are no usage of statistic.
	(void)NumTripCountsComputed;
	(void)NumTripCountsNotComputed;
	#if LLVM_ENABLE_STATS \|\| !defined(NDEBUG)
	const SCEV *BEExact = Result.getExact(L, this);
	if (BEExact != getCouldNotCompute()) {
	assert(isLoopInvariant(BEExact, L) &&
	isLoopInvariant(Result.getConstantMax(this), L) &&
	"Computed backedge-taken count isn't loop invariant for loop!");
	++NumTripCountsComputed;
	} else if (Result.getConstantMax(this) == getCouldNotCompute() &&
	isa<PHINode>(L->getHeader()->begin())) {
	// Only count loops that have phi nodes as not being computable.
	++NumTripCountsNotComputed;
	}
	#endif // LLVM_ENABLE_STATS \|\| !defined(NDEBUG)

	// Now that we know more about the trip count for this loop, forget any
	// existing SCEV values for PHI nodes in this loop since they are only
	// conservative estimates made without the benefit of trip count
	// information. This is similar to the code in forgetLoop, except that
	// it handles SCEVUnknown PHI nodes specially.
	if (Result.hasAnyInfo()) {
	SmallVector<Instruction *, 16> Worklist;
	PushLoopPHIs(L, Worklist);

	SmallPtrSet<Instruction *, 8> Discovered;
	while (!Worklist.empty()) {
	Instruction *I = Worklist.pop_back_val();

	ValueExprMapType::iterator It =
	ValueExprMap.find_as(static_cast<Value *>(I));
	if (It != ValueExprMap.end()) {
	const SCEV *Old = It->second;

	// SCEVUnknown for a PHI either means that it has an unrecognized
	// structure, or it's a PHI that's in the progress of being computed
	// by createNodeForPHI. In the former case, additional loop trip
	// count information isn't going to change anything. In the later
	// case, createNodeForPHI will perform the necessary updates on its
	// own when it gets to that point.
	if (!isa<PHINode>(I) \|\| !isa<SCEVUnknown>(Old)) {
	eraseValueFromMap(It->first);
	forgetMemoizedResults(Old);
	}
	if (PHINode *PN = dyn_cast<PHINode>(I))
	ConstantEvolutionLoopExitValue.erase(PN);
	}

	// Since we don't need to invalidate anything for correctness and we're
	// only invalidating to make SCEV's results more precise, we get to stop
	// early to avoid invalidating too much. This is especially important in
	// cases like:
	//
	// %v = f(pn0, pn1) // pn0 and pn1 used through some other phi node
	// loop0:
	// %pn0 = phi
	// ...
	// loop1:
	// %pn1 = phi
	// ...
	//
	// where both loop0 and loop1's backedge taken count uses the SCEV
	// expression for %v. If we don't have the early stop below then in cases
	// like the above, getBackedgeTakenInfo(loop1) will clear out the trip
	// count for loop0 and getBackedgeTakenInfo(loop0) will clear out the trip
	// count for loop1, effectively nullifying SCEV's trip count cache.
	for (auto *U : I->users())
	if (auto *I = dyn_cast<Instruction>(U)) {
	auto *LoopForUser = LI.getLoopFor(I->getParent());
	if (LoopForUser && L->contains(LoopForUser) &&
	Discovered.insert(I).second)
	Worklist.push_back(I);
	}
	}
	}

	// Re-lookup the insert position, since the call to
	// computeBackedgeTakenCount above could result in a
	// recusive call to getBackedgeTakenInfo (on a different
	// loop), which would invalidate the iterator computed
	// earlier.
	return BackedgeTakenCounts.find(L)->second = std::move(Result);
	}

	void ScalarEvolution::forgetAllLoops() {
	// This method is intended to forget all info about loops. It should
	// invalidate caches as if the following happened:
	// - The trip counts of all loops have changed arbitrarily
	// - Every llvm::Value has been updated in place to produce a different
	// result.
	BackedgeTakenCounts.clear();
	PredicatedBackedgeTakenCounts.clear();
	LoopPropertiesCache.clear();
	ConstantEvolutionLoopExitValue.clear();
	ValueExprMap.clear();
	ValuesAtScopes.clear();
	LoopDispositions.clear();
	BlockDispositions.clear();
	UnsignedRanges.clear();
	SignedRanges.clear();
	ExprValueMap.clear();
	HasRecMap.clear();
	MinTrailingZerosCache.clear();
	PredicatedSCEVRewrites.clear();
	}

	void ScalarEvolution::forgetLoop(const Loop *L) {
	SmallVector<const Loop *, 16> LoopWorklist(1, L);
	SmallVector<Instruction *, 32> Worklist;
	SmallPtrSet<Instruction *, 16> Visited;

	// Iterate over all the loops and sub-loops to drop SCEV information.
	while (!LoopWorklist.empty()) {
	auto *CurrL = LoopWorklist.pop_back_val();

	// Drop any stored trip count value.
	BackedgeTakenCounts.erase(CurrL);
	PredicatedBackedgeTakenCounts.erase(CurrL);

	// Drop information about predicated SCEV rewrites for this loop.
	for (auto I = PredicatedSCEVRewrites.begin();
	I != PredicatedSCEVRewrites.end();) {
	std::pair<const SCEV , const Loop > Entry = I->first;
	if (Entry.second == CurrL)
	PredicatedSCEVRewrites.erase(I++);
	else
	++I;
	}

	auto LoopUsersItr = LoopUsers.find(CurrL);
	if (LoopUsersItr != LoopUsers.end()) {
	for (auto *S : LoopUsersItr->second)
	forgetMemoizedResults(S);
	LoopUsers.erase(LoopUsersItr);
	}

	// Drop information about expressions based on loop-header PHIs.
	PushLoopPHIs(CurrL, Worklist);

	while (!Worklist.empty()) {
	Instruction *I = Worklist.pop_back_val();
	if (!Visited.insert(I).second)
	continue;

	ValueExprMapType::iterator It =
	ValueExprMap.find_as(static_cast<Value *>(I));
	if (It != ValueExprMap.end()) {
	eraseValueFromMap(It->first);
	forgetMemoizedResults(It->second);
	if (PHINode *PN = dyn_cast<PHINode>(I))
	ConstantEvolutionLoopExitValue.erase(PN);
	}

	PushDefUseChildren(I, Worklist);
	}

	LoopPropertiesCache.erase(CurrL);
	// Forget all contained loops too, to avoid dangling entries in the
	// ValuesAtScopes map.
	LoopWorklist.append(CurrL->begin(), CurrL->end());
	}
	}

	void ScalarEvolution::forgetTopmostLoop(const Loop *L) {
	while (Loop *Parent = L->getParentLoop())
	L = Parent;
	forgetLoop(L);
	}

	void ScalarEvolution::forgetValue(Value *V) {
	Instruction *I = dyn_cast<Instruction>(V);
	if (!I) return;

	// Drop information about expressions based on loop-header PHIs.
	SmallVector<Instruction *, 16> Worklist;
	Worklist.push_back(I);

	SmallPtrSet<Instruction *, 8> Visited;
	while (!Worklist.empty()) {
	I = Worklist.pop_back_val();
	if (!Visited.insert(I).second)
	continue;

	ValueExprMapType::iterator It =
	ValueExprMap.find_as(static_cast<Value *>(I));
	if (It != ValueExprMap.end()) {
	eraseValueFromMap(It->first);
	forgetMemoizedResults(It->second);
	if (PHINode *PN = dyn_cast<PHINode>(I))
	ConstantEvolutionLoopExitValue.erase(PN);
	}

	PushDefUseChildren(I, Worklist);
	}
	}

	void ScalarEvolution::forgetLoopDispositions(const Loop *L) {
	LoopDispositions.clear();
	}

	/// Get the exact loop backedge taken count considering all loop exits. A
	/// computable result can only be returned for loops with all exiting blocks
	/// dominating the latch. howFarToZero assumes that the limit of each loop test
	/// is never skipped. This is a valid assumption as long as the loop exits via
	/// that test. For precise results, it is the caller's responsibility to specify
	/// the relevant loop exiting block using getExact(ExitingBlock, SE).
	const SCEV *
	ScalarEvolution::BackedgeTakenInfo::getExact(const Loop L, ScalarEvolution SE,
	SCEVUnionPredicate *Preds) const {
	// If any exits were not computable, the loop is not computable.
	if (!isComplete() \|\| ExitNotTaken.empty())
	return SE->getCouldNotCompute();

	const BasicBlock *Latch = L->getLoopLatch();
	// All exiting blocks we have collected must dominate the only backedge.
	if (!Latch)
	return SE->getCouldNotCompute();

	// All exiting blocks we have gathered dominate loop's latch, so exact trip
	// count is simply a minimum out of all these calculated exit counts.
	SmallVector<const SCEV *, 2> Ops;
	for (auto &ENT : ExitNotTaken) {
	const SCEV *BECount = ENT.ExactNotTaken;
	assert(BECount != SE->getCouldNotCompute() && "Bad exit SCEV!");
	assert(SE->DT.dominates(ENT.ExitingBlock, Latch) &&
	"We should only have known counts for exiting blocks that dominate "
	"latch!");

	Ops.push_back(BECount);

	if (Preds && !ENT.hasAlwaysTruePredicate())
	Preds->add(ENT.Predicate.get());

	assert((Preds \|\| ENT.hasAlwaysTruePredicate()) &&
	"Predicate should be always true!");
	}

	return SE->getUMinFromMismatchedTypes(Ops);
	}

	/// Get the exact not taken count for this loop exit.
	const SCEV *
	ScalarEvolution::BackedgeTakenInfo::getExact(const BasicBlock *ExitingBlock,
	ScalarEvolution *SE) const {
	for (auto &ENT : ExitNotTaken)
	if (ENT.ExitingBlock == ExitingBlock && ENT.hasAlwaysTruePredicate())
	return ENT.ExactNotTaken;

	return SE->getCouldNotCompute();
	}

	const SCEV *ScalarEvolution::BackedgeTakenInfo::getConstantMax(
	const BasicBlock ExitingBlock, ScalarEvolution SE) const {
	for (auto &ENT : ExitNotTaken)
	if (ENT.ExitingBlock == ExitingBlock && ENT.hasAlwaysTruePredicate())
	return ENT.MaxNotTaken;

	return SE->getCouldNotCompute();
	}

	/// getConstantMax - Get the constant max backedge taken count for the loop.
	const SCEV *
	ScalarEvolution::BackedgeTakenInfo::getConstantMax(ScalarEvolution *SE) const {
	auto PredicateNotAlwaysTrue = [](const ExitNotTakenInfo &ENT) {
	return !ENT.hasAlwaysTruePredicate();
	};

	if (any_of(ExitNotTaken, PredicateNotAlwaysTrue) \|\| !getConstantMax())
	return SE->getCouldNotCompute();

	assert((isa<SCEVCouldNotCompute>(getConstantMax()) \|\|
	isa<SCEVConstant>(getConstantMax())) &&
	"No point in having a non-constant max backedge taken count!");
	return getConstantMax();
	}

	const SCEV *
	ScalarEvolution::BackedgeTakenInfo::getSymbolicMax(const Loop *L,
	ScalarEvolution *SE) {
	if (!SymbolicMax)
	SymbolicMax = SE->computeSymbolicMaxBackedgeTakenCount(L);
	return SymbolicMax;
	}

	bool ScalarEvolution::BackedgeTakenInfo::isConstantMaxOrZero(
	ScalarEvolution *SE) const {
	auto PredicateNotAlwaysTrue = [](const ExitNotTakenInfo &ENT) {
	return !ENT.hasAlwaysTruePredicate();
	};
	return MaxOrZero && !any_of(ExitNotTaken, PredicateNotAlwaysTrue);
	}

	bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S) const {
	return Operands.contains(S);
	}

	ScalarEvolution::ExitLimit::ExitLimit(const SCEV *E)
	: ExitLimit(E, E, false, None) {
	}

	ScalarEvolution::ExitLimit::ExitLimit(
	const SCEV E, const SCEV M, bool MaxOrZero,
	ArrayRef<const SmallPtrSetImpl<const SCEVPredicate > > PredSetList)
	: ExactNotTaken(E), MaxNotTaken(M), MaxOrZero(MaxOrZero) {
	assert((isa<SCEVCouldNotCompute>(ExactNotTaken) \|\|
	!isa<SCEVCouldNotCompute>(MaxNotTaken)) &&
	"Exact is not allowed to be less precise than Max");
	assert((isa<SCEVCouldNotCompute>(MaxNotTaken) \|\|
	isa<SCEVConstant>(MaxNotTaken)) &&
	"No point in having a non-constant max backedge taken count!");
	for (auto *PredSet : PredSetList)
	for (auto P : PredSet)
	addPredicate(P);
	assert((isa<SCEVCouldNotCompute>(E) \|\| !E->getType()->isPointerTy()) &&
	"Backedge count should be int");
	assert((isa<SCEVCouldNotCompute>(M) \|\| !M->getType()->isPointerTy()) &&
	"Max backedge count should be int");
	}

	ScalarEvolution::ExitLimit::ExitLimit(
	const SCEV E, const SCEV M, bool MaxOrZero,
	const SmallPtrSetImpl<const SCEVPredicate *> &PredSet)
	: ExitLimit(E, M, MaxOrZero, {&PredSet}) {
	}

	ScalarEvolution::ExitLimit::ExitLimit(const SCEV E, const SCEV M,
	bool MaxOrZero)
	: ExitLimit(E, M, MaxOrZero, None) {
	}

	class SCEVRecordOperands {
	SmallPtrSetImpl<const SCEV *> &Operands;

	public:
	SCEVRecordOperands(SmallPtrSetImpl<const SCEV *> &Operands)
	: Operands(Operands) {}
	bool follow(const SCEV *S) {
	Operands.insert(S);
	return true;
	}
	bool isDone() { return false; }
	};

	/// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each
	/// computable exit into a persistent ExitNotTakenInfo array.
	ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo(
	ArrayRef<ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo> ExitCounts,
	bool IsComplete, const SCEV *ConstantMax, bool MaxOrZero)
	: ConstantMax(ConstantMax), IsComplete(IsComplete), MaxOrZero(MaxOrZero) {
	using EdgeExitInfo = ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo;

	ExitNotTaken.reserve(ExitCounts.size());
	std::transform(
	ExitCounts.begin(), ExitCounts.end(), std::back_inserter(ExitNotTaken),
	[&](const EdgeExitInfo &EEI) {
	BasicBlock *ExitBB = EEI.first;
	const ExitLimit &EL = EEI.second;
	if (EL.Predicates.empty())
	return ExitNotTakenInfo(ExitBB, EL.ExactNotTaken, EL.MaxNotTaken,
	nullptr);

	std::unique_ptr<SCEVUnionPredicate> Predicate(new SCEVUnionPredicate);
	for (auto *Pred : EL.Predicates)
	Predicate->add(Pred);

	return ExitNotTakenInfo(ExitBB, EL.ExactNotTaken, EL.MaxNotTaken,
	std::move(Predicate));
	});
	assert((isa<SCEVCouldNotCompute>(ConstantMax) \|\|
	isa<SCEVConstant>(ConstantMax)) &&
	"No point in having a non-constant max backedge taken count!");

	SCEVRecordOperands RecordOperands(Operands);
	SCEVTraversal<SCEVRecordOperands> ST(RecordOperands);
	if (!isa<SCEVCouldNotCompute>(ConstantMax))
	ST.visitAll(ConstantMax);
	for (auto &ENT : ExitNotTaken)
	if (!isa<SCEVCouldNotCompute>(ENT.ExactNotTaken))
	ST.visitAll(ENT.ExactNotTaken);
	}

	/// Compute the number of times the backedge of the specified loop will execute.
	ScalarEvolution::BackedgeTakenInfo
	ScalarEvolution::computeBackedgeTakenCount(const Loop *L,
	bool AllowPredicates) {
	SmallVector<BasicBlock *, 8> ExitingBlocks;
	L->getExitingBlocks(ExitingBlocks);

	using EdgeExitInfo = ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo;

	SmallVector<EdgeExitInfo, 4> ExitCounts;
	bool CouldComputeBECount = true;
	BasicBlock *Latch = L->getLoopLatch(); // may be NULL.
	const SCEV *MustExitMaxBECount = nullptr;
	const SCEV *MayExitMaxBECount = nullptr;
	bool MustExitMaxOrZero = false;

	// Compute the ExitLimit for each loop exit. Use this to populate ExitCounts
	// and compute maxBECount.
	// Do a union of all the predicates here.
	for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
	BasicBlock *ExitBB = ExitingBlocks[i];

	// We canonicalize untaken exits to br (constant), ignore them so that
	// proving an exit untaken doesn't negatively impact our ability to reason
	// about the loop as whole.
	if (auto *BI = dyn_cast<BranchInst>(ExitBB->getTerminator()))
	if (auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
	bool ExitIfTrue = !L->contains(BI->getSuccessor(0));
	if ((ExitIfTrue && CI->isZero()) \|\| (!ExitIfTrue && CI->isOne()))
	continue;
	}

	ExitLimit EL = computeExitLimit(L, ExitBB, AllowPredicates);

	assert((AllowPredicates \|\| EL.Predicates.empty()) &&
	"Predicated exit limit when predicates are not allowed!");

	// 1. For each exit that can be computed, add an entry to ExitCounts.
	// CouldComputeBECount is true only if all exits can be computed.
	if (EL.ExactNotTaken == getCouldNotCompute())
	// We couldn't compute an exact value for this exit, so
	// we won't be able to compute an exact value for the loop.
	CouldComputeBECount = false;
	else
	ExitCounts.emplace_back(ExitBB, EL);

	// 2. Derive the loop's MaxBECount from each exit's max number of
	// non-exiting iterations. Partition the loop exits into two kinds:
	// LoopMustExits and LoopMayExits.
	//
	// If the exit dominates the loop latch, it is a LoopMustExit otherwise it
	// is a LoopMayExit. If any computable LoopMustExit is found, then
	// MaxBECount is the minimum EL.MaxNotTaken of computable
	// LoopMustExits. Otherwise, MaxBECount is conservatively the maximum
	// EL.MaxNotTaken, where CouldNotCompute is considered greater than any
	// computable EL.MaxNotTaken.
	if (EL.MaxNotTaken != getCouldNotCompute() && Latch &&
	DT.dominates(ExitBB, Latch)) {
	if (!MustExitMaxBECount) {
	MustExitMaxBECount = EL.MaxNotTaken;
	MustExitMaxOrZero = EL.MaxOrZero;
	} else {
	MustExitMaxBECount =
	getUMinFromMismatchedTypes(MustExitMaxBECount, EL.MaxNotTaken);
	}
	} else if (MayExitMaxBECount != getCouldNotCompute()) {
	if (!MayExitMaxBECount \|\| EL.MaxNotTaken == getCouldNotCompute())
	MayExitMaxBECount = EL.MaxNotTaken;
	else {
	MayExitMaxBECount =
	getUMaxFromMismatchedTypes(MayExitMaxBECount, EL.MaxNotTaken);
	}
	}
	}
	const SCEV *MaxBECount = MustExitMaxBECount ? MustExitMaxBECount :
	(MayExitMaxBECount ? MayExitMaxBECount : getCouldNotCompute());
	// The loop backedge will be taken the maximum or zero times if there's
	// a single exit that must be taken the maximum or zero times.
	bool MaxOrZero = (MustExitMaxOrZero && ExitingBlocks.size() == 1);
	return BackedgeTakenInfo(std::move(ExitCounts), CouldComputeBECount,
	MaxBECount, MaxOrZero);
	}

	ScalarEvolution::ExitLimit
	ScalarEvolution::computeExitLimit(const Loop L, BasicBlock ExitingBlock,
	bool AllowPredicates) {
	assert(L->contains(ExitingBlock) && "Exit count for non-loop block?");
	// If our exiting block does not dominate the latch, then its connection with
	// loop's exit limit may be far from trivial.
	const BasicBlock *Latch = L->getLoopLatch();
	if (!Latch \|\| !DT.dominates(ExitingBlock, Latch))
	return getCouldNotCompute();

	bool IsOnlyExit = (L->getExitingBlock() != nullptr);
	Instruction *Term = ExitingBlock->getTerminator();
	if (BranchInst *BI = dyn_cast<BranchInst>(Term)) {
	assert(BI->isConditional() && "If unconditional, it can't be in loop!");
	bool ExitIfTrue = !L->contains(BI->getSuccessor(0));
	assert(ExitIfTrue == L->contains(BI->getSuccessor(1)) &&
	"It should have one successor in loop and one exit block!");
	// Proceed to the next level to examine the exit condition expression.
	return computeExitLimitFromCond(
	L, BI->getCondition(), ExitIfTrue,
	/ControlsExit=/IsOnlyExit, AllowPredicates);
	}

	if (SwitchInst *SI = dyn_cast<SwitchInst>(Term)) {
	// For switch, make sure that there is a single exit from the loop.
	BasicBlock *Exit = nullptr;
	for (auto *SBB : successors(ExitingBlock))
	if (!L->contains(SBB)) {
	if (Exit) // Multiple exit successors.
	return getCouldNotCompute();
	Exit = SBB;
	}
	assert(Exit && "Exiting block must have at least one exit");
	return computeExitLimitFromSingleExitSwitch(L, SI, Exit,
	/ControlsExit=/IsOnlyExit);
	}

	return getCouldNotCompute();
	}

	ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCond(
	const Loop L, Value ExitCond, bool ExitIfTrue,
	bool ControlsExit, bool AllowPredicates) {
	ScalarEvolution::ExitLimitCacheTy Cache(L, ExitIfTrue, AllowPredicates);
	return computeExitLimitFromCondCached(Cache, L, ExitCond, ExitIfTrue,
	ControlsExit, AllowPredicates);
	}

	Optional<ScalarEvolution::ExitLimit>
	ScalarEvolution::ExitLimitCache::find(const Loop L, Value ExitCond,
	bool ExitIfTrue, bool ControlsExit,
	bool AllowPredicates) {
	(void)this->L;
	(void)this->ExitIfTrue;
	(void)this->AllowPredicates;

	assert(this->L == L && this->ExitIfTrue == ExitIfTrue &&
	this->AllowPredicates == AllowPredicates &&
	"Variance in assumed invariant key components!");
	auto Itr = TripCountMap.find({ExitCond, ControlsExit});
	if (Itr == TripCountMap.end())
	return None;
	return Itr->second;
	}

	void ScalarEvolution::ExitLimitCache::insert(const Loop L, Value ExitCond,
	bool ExitIfTrue,
	bool ControlsExit,
	bool AllowPredicates,
	const ExitLimit &EL) {
	assert(this->L == L && this->ExitIfTrue == ExitIfTrue &&
	this->AllowPredicates == AllowPredicates &&
	"Variance in assumed invariant key components!");

	auto InsertResult = TripCountMap.insert({{ExitCond, ControlsExit}, EL});
	assert(InsertResult.second && "Expected successful insertion!");
	(void)InsertResult;
	(void)ExitIfTrue;
	}

	ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondCached(
	ExitLimitCacheTy &Cache, const Loop L, Value ExitCond, bool ExitIfTrue,
	bool ControlsExit, bool AllowPredicates) {

	if (auto MaybeEL =
	Cache.find(L, ExitCond, ExitIfTrue, ControlsExit, AllowPredicates))
	return *MaybeEL;

	ExitLimit EL = computeExitLimitFromCondImpl(Cache, L, ExitCond, ExitIfTrue,
	ControlsExit, AllowPredicates);
	Cache.insert(L, ExitCond, ExitIfTrue, ControlsExit, AllowPredicates, EL);
	return EL;
	}

	ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl(
	ExitLimitCacheTy &Cache, const Loop L, Value ExitCond, bool ExitIfTrue,
	bool ControlsExit, bool AllowPredicates) {
	// Handle BinOp conditions (And, Or).
	if (auto LimitFromBinOp = computeExitLimitFromCondFromBinOp(
	Cache, L, ExitCond, ExitIfTrue, ControlsExit, AllowPredicates))
	return *LimitFromBinOp;

	// With an icmp, it may be feasible to compute an exact backedge-taken count.
	// Proceed to the next level to examine the icmp.
	if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond)) {
	ExitLimit EL =
	computeExitLimitFromICmp(L, ExitCondICmp, ExitIfTrue, ControlsExit);
	if (EL.hasFullInfo() \|\| !AllowPredicates)
	return EL;

	// Try again, but use SCEV predicates this time.
	return computeExitLimitFromICmp(L, ExitCondICmp, ExitIfTrue, ControlsExit,
	/AllowPredicates=/true);
	}

	// Check for a constant condition. These are normally stripped out by
	// SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to
	// preserve the CFG and is temporarily leaving constant conditions
	// in place.
	if (ConstantInt *CI = dyn_cast<ConstantInt>(ExitCond)) {
	if (ExitIfTrue == !CI->getZExtValue())
	// The backedge is always taken.
	return getCouldNotCompute();
	else
	// The backedge is never taken.
	return getZero(CI->getType());
	}

	// If it's not an integer or pointer comparison then compute it the hard way.
	return computeExitCountExhaustively(L, ExitCond, ExitIfTrue);
	}

	Optional<ScalarEvolution::ExitLimit>
	ScalarEvolution::computeExitLimitFromCondFromBinOp(
	ExitLimitCacheTy &Cache, const Loop L, Value ExitCond, bool ExitIfTrue,
	bool ControlsExit, bool AllowPredicates) {
	// Check if the controlling expression for this loop is an And or Or.
	Value Op0, Op1;
	bool IsAnd = false;
	if (match(ExitCond, m_LogicalAnd(m_Value(Op0), m_Value(Op1))))
	IsAnd = true;
	else if (match(ExitCond, m_LogicalOr(m_Value(Op0), m_Value(Op1))))
	IsAnd = false;
	else
	return None;

	// EitherMayExit is true in these two cases:
	// br (and Op0 Op1), loop, exit
	// br (or Op0 Op1), exit, loop
	bool EitherMayExit = IsAnd ^ ExitIfTrue;
	ExitLimit EL0 = computeExitLimitFromCondCached(Cache, L, Op0, ExitIfTrue,
	ControlsExit && !EitherMayExit,
	AllowPredicates);
	ExitLimit EL1 = computeExitLimitFromCondCached(Cache, L, Op1, ExitIfTrue,
	ControlsExit && !EitherMayExit,
	AllowPredicates);

	// Be robust against unsimplified IR for the form "op i1 X, NeutralElement"
	const Constant *NeutralElement = ConstantInt::get(ExitCond->getType(), IsAnd);
	if (isa<ConstantInt>(Op1))
	return Op1 == NeutralElement ? EL0 : EL1;
	if (isa<ConstantInt>(Op0))
	return Op0 == NeutralElement ? EL1 : EL0;

	const SCEV *BECount = getCouldNotCompute();
	const SCEV *MaxBECount = getCouldNotCompute();
	if (EitherMayExit) {
	// Both conditions must be same for the loop to continue executing.
	// Choose the less conservative count.
	// If ExitCond is a short-circuit form (select), using
	// umin(EL0.ExactNotTaken, EL1.ExactNotTaken) is unsafe in general.
	// To see the detailed examples, please see
	// test/Analysis/ScalarEvolution/exit-count-select.ll
	bool PoisonSafe = isa<BinaryOperator>(ExitCond);
	if (!PoisonSafe)
	// Even if ExitCond is select, we can safely derive BECount using both
	// EL0 and EL1 in these cases:
	// (1) EL0.ExactNotTaken is non-zero
	// (2) EL1.ExactNotTaken is non-poison
	// (3) EL0.ExactNotTaken is zero (BECount should be simply zero and
	// it cannot be umin(0, ..))
	// The PoisonSafe assignment below is simplified and the assertion after
	// BECount calculation fully guarantees the condition (3).
	PoisonSafe = isa<SCEVConstant>(EL0.ExactNotTaken) \|\|
	isa<SCEVConstant>(EL1.ExactNotTaken);
	if (EL0.ExactNotTaken != getCouldNotCompute() &&
	EL1.ExactNotTaken != getCouldNotCompute() && PoisonSafe) {
	BECount =
	getUMinFromMismatchedTypes(EL0.ExactNotTaken, EL1.ExactNotTaken);

	// If EL0.ExactNotTaken was zero and ExitCond was a short-circuit form,
	// it should have been simplified to zero (see the condition (3) above)
	assert(!isa<BinaryOperator>(ExitCond) \|\| !EL0.ExactNotTaken->isZero() \|\|
	BECount->isZero());
	}
	if (EL0.MaxNotTaken == getCouldNotCompute())
	MaxBECount = EL1.MaxNotTaken;
	else if (EL1.MaxNotTaken == getCouldNotCompute())
	MaxBECount = EL0.MaxNotTaken;
	else
	MaxBECount = getUMinFromMismatchedTypes(EL0.MaxNotTaken, EL1.MaxNotTaken);
	} else {
	// Both conditions must be same at the same time for the loop to exit.
	// For now, be conservative.
	if (EL0.ExactNotTaken == EL1.ExactNotTaken)
	BECount = EL0.ExactNotTaken;
	}

	// There are cases (e.g. PR26207) where computeExitLimitFromCond is able
	// to be more aggressive when computing BECount than when computing
	// MaxBECount. In these cases it is possible for EL0.ExactNotTaken and
	// EL1.ExactNotTaken to match, but for EL0.MaxNotTaken and EL1.MaxNotTaken
	// to not.
	if (isa<SCEVCouldNotCompute>(MaxBECount) &&
	!isa<SCEVCouldNotCompute>(BECount))
	MaxBECount = getConstant(getUnsignedRangeMax(BECount));

	return ExitLimit(BECount, MaxBECount, false,
	{ &EL0.Predicates, &EL1.Predicates });
	}

	ScalarEvolution::ExitLimit
	ScalarEvolution::computeExitLimitFromICmp(const Loop *L,
	ICmpInst *ExitCond,
	bool ExitIfTrue,
	bool ControlsExit,
	bool AllowPredicates) {
	// If the condition was exit on true, convert the condition to exit on false
	ICmpInst::Predicate Pred;
	if (!ExitIfTrue)
	Pred = ExitCond->getPredicate();
	else
	Pred = ExitCond->getInversePredicate();
	const ICmpInst::Predicate OriginalPred = Pred;

	// Handle common loops like: for (X = "string"; *X; ++X)
	if (LoadInst *LI = dyn_cast<LoadInst>(ExitCond->getOperand(0)))
	if (Constant *RHS = dyn_cast<Constant>(ExitCond->getOperand(1))) {
	ExitLimit ItCnt =
	computeLoadConstantCompareExitLimit(LI, RHS, L, Pred);
	if (ItCnt.hasAnyInfo())
	return ItCnt;
	}

	const SCEV *LHS = getSCEV(ExitCond->getOperand(0));
	const SCEV *RHS = getSCEV(ExitCond->getOperand(1));

	// Try to evaluate any dependencies out of the loop.
	LHS = getSCEVAtScope(LHS, L);
	RHS = getSCEVAtScope(RHS, L);

	// At this point, we would like to compute how many iterations of the
	// loop the predicate will return true for these inputs.
	if (isLoopInvariant(LHS, L) && !isLoopInvariant(RHS, L)) {
	// If there is a loop-invariant, force it into the RHS.
	std::swap(LHS, RHS);
	Pred = ICmpInst::getSwappedPredicate(Pred);
	}

	// Simplify the operands before analyzing them.
	(void)SimplifyICmpOperands(Pred, LHS, RHS);

	// If we have a comparison of a chrec against a constant, try to use value
	// ranges to answer this query.
	if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS))
	if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(LHS))
	if (AddRec->getLoop() == L) {
	// Form the constant range.
	ConstantRange CompRange =
	ConstantRange::makeExactICmpRegion(Pred, RHSC->getAPInt());

	const SCEV Ret = AddRec->getNumIterationsInRange(CompRange, this);
	if (!isa<SCEVCouldNotCompute>(Ret)) return Ret;
	}

	switch (Pred) {
	case ICmpInst::ICMP_NE: { // while (X != Y)
	// Convert to: while (X-Y != 0)
	if (LHS->getType()->isPointerTy()) {
	LHS = getLosslessPtrToIntExpr(LHS);
	if (isa<SCEVCouldNotCompute>(LHS))
	return LHS;
	}
	if (RHS->getType()->isPointerTy()) {
	RHS = getLosslessPtrToIntExpr(RHS);
	if (isa<SCEVCouldNotCompute>(RHS))
	return RHS;
	}
	ExitLimit EL = howFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit,
	AllowPredicates);
	if (EL.hasAnyInfo()) return EL;
	break;
	}
	case ICmpInst::ICMP_EQ: { // while (X == Y)
	// Convert to: while (X-Y == 0)
	if (LHS->getType()->isPointerTy()) {
	LHS = getLosslessPtrToIntExpr(LHS);
	if (isa<SCEVCouldNotCompute>(LHS))
	return LHS;
	}
	if (RHS->getType()->isPointerTy()) {
	RHS = getLosslessPtrToIntExpr(RHS);
	if (isa<SCEVCouldNotCompute>(RHS))
	return RHS;
	}
	ExitLimit EL = howFarToNonZero(getMinusSCEV(LHS, RHS), L);
	if (EL.hasAnyInfo()) return EL;
	break;
	}
	case ICmpInst::ICMP_SLT:
	case ICmpInst::ICMP_ULT: { // while (X < Y)
	bool IsSigned = Pred == ICmpInst::ICMP_SLT;
	ExitLimit EL = howManyLessThans(LHS, RHS, L, IsSigned, ControlsExit,
	AllowPredicates);
	if (EL.hasAnyInfo()) return EL;
	break;
	}
	case ICmpInst::ICMP_SGT:
	case ICmpInst::ICMP_UGT: { // while (X > Y)
	bool IsSigned = Pred == ICmpInst::ICMP_SGT;
	ExitLimit EL =
	howManyGreaterThans(LHS, RHS, L, IsSigned, ControlsExit,
	AllowPredicates);
	if (EL.hasAnyInfo()) return EL;
	break;
	}
	default:
	break;
	}

	auto *ExhaustiveCount =
	computeExitCountExhaustively(L, ExitCond, ExitIfTrue);

	if (!isa<SCEVCouldNotCompute>(ExhaustiveCount))
	return ExhaustiveCount;

	return computeShiftCompareExitLimit(ExitCond->getOperand(0),
	ExitCond->getOperand(1), L, OriginalPred);
	}

	ScalarEvolution::ExitLimit
	ScalarEvolution::computeExitLimitFromSingleExitSwitch(const Loop *L,
	SwitchInst *Switch,
	BasicBlock *ExitingBlock,
	bool ControlsExit) {
	assert(!L->contains(ExitingBlock) && "Not an exiting block!");

	// Give up if the exit is the default dest of a switch.
	if (Switch->getDefaultDest() == ExitingBlock)
	return getCouldNotCompute();

	assert(L->contains(Switch->getDefaultDest()) &&
	"Default case must not exit the loop!");
	const SCEV *LHS = getSCEVAtScope(Switch->getCondition(), L);
	const SCEV *RHS = getConstant(Switch->findCaseDest(ExitingBlock));

	// while (X != Y) --> while (X-Y != 0)
	ExitLimit EL = howFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit);
	if (EL.hasAnyInfo())
	return EL;

	return getCouldNotCompute();
	}

	static ConstantInt *
	EvaluateConstantChrecAtConstant(const SCEVAddRecExpr AddRec, ConstantInt C,
	ScalarEvolution &SE) {
	const SCEV *InVal = SE.getConstant(C);
	const SCEV *Val = AddRec->evaluateAtIteration(InVal, SE);
	assert(isa<SCEVConstant>(Val) &&
	"Evaluation of SCEV at constant didn't fold correctly?");
	return cast<SCEVConstant>(Val)->getValue();
	}

	/// Given an exit condition of 'icmp op load X, cst', try to see if we can
	/// compute the backedge execution count.
	ScalarEvolution::ExitLimit
	ScalarEvolution::computeLoadConstantCompareExitLimit(
	LoadInst *LI,
	Constant *RHS,
	const Loop *L,
	ICmpInst::Predicate predicate) {
	if (LI->isVolatile()) return getCouldNotCompute();

	// Check to see if the loaded pointer is a getelementptr of a global.
	// TODO: Use SCEV instead of manually grubbing with GEPs.
	GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0));
	if (!GEP) return getCouldNotCompute();

	// Make sure that it is really a constant global we are gepping, with an
	// initializer, and make sure the first IDX is really 0.
	GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0));
	if (!GV \|\| !GV->isConstant() \|\| !GV->hasDefinitiveInitializer() \|\|
	GEP->getNumOperands() < 3 \|\| !isa<Constant>(GEP->getOperand(1)) \|\|
	!cast<Constant>(GEP->getOperand(1))->isNullValue())
	return getCouldNotCompute();

	// Okay, we allow one non-constant index into the GEP instruction.
	Value *VarIdx = nullptr;
	std::vector<Constant*> Indexes;
	unsigned VarIdxNum = 0;
	for (unsigned i = 2, e = GEP->getNumOperands(); i != e; ++i)
	if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) {
	Indexes.push_back(CI);
	} else if (!isa<ConstantInt>(GEP->getOperand(i))) {
	if (VarIdx) return getCouldNotCompute(); // Multiple non-constant idx's.
	VarIdx = GEP->getOperand(i);
	VarIdxNum = i-2;
	Indexes.push_back(nullptr);
	}

	// Loop-invariant loads may be a byproduct of loop optimization. Skip them.
	if (!VarIdx)
	return getCouldNotCompute();

	// Okay, we know we have a (load (gep GV, 0, X)) comparison with a constant.
	// Check to see if X is a loop variant variable value now.
	const SCEV *Idx = getSCEV(VarIdx);
	Idx = getSCEVAtScope(Idx, L);

	// We can only recognize very limited forms of loop index expressions, in
	// particular, only affine AddRec's like {C1,+,C2}<L>.
	const SCEVAddRecExpr *IdxExpr = dyn_cast<SCEVAddRecExpr>(Idx);
	if (!IdxExpr \|\| IdxExpr->getLoop() != L \|\| !IdxExpr->isAffine() \|\|
	isLoopInvariant(IdxExpr, L) \|\|
	!isa<SCEVConstant>(IdxExpr->getOperand(0)) \|\|
	!isa<SCEVConstant>(IdxExpr->getOperand(1)))
	return getCouldNotCompute();

	unsigned MaxSteps = MaxBruteForceIterations;
	for (unsigned IterationNum = 0; IterationNum != MaxSteps; ++IterationNum) {
	ConstantInt *ItCst = ConstantInt::get(
	cast<IntegerType>(IdxExpr->getType()), IterationNum);
	ConstantInt Val = EvaluateConstantChrecAtConstant(IdxExpr, ItCst, this);

	// Form the GEP offset.
	Indexes[VarIdxNum] = Val;

	Constant *Result = ConstantFoldLoadThroughGEPIndices(GV->getInitializer(),
	Indexes);
	if (!Result) break; // Cannot compute!

	// Evaluate the condition for this iteration.
	Result = ConstantExpr::getICmp(predicate, Result, RHS);
	if (!isa<ConstantInt>(Result)) break; // Couldn't decide for sure
	if (cast<ConstantInt>(Result)->getValue().isMinValue()) {
	++NumArrayLenItCounts;
	return getConstant(ItCst); // Found terminating iteration!
	}
	}
	return getCouldNotCompute();
	}

	ScalarEvolution::ExitLimit ScalarEvolution::computeShiftCompareExitLimit(
	Value LHS, Value RHSV, const Loop *L, ICmpInst::Predicate Pred) {
	ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV);
	if (!RHS)
	return getCouldNotCompute();

	const BasicBlock *Latch = L->getLoopLatch();
	if (!Latch)
	return getCouldNotCompute();

	const BasicBlock *Predecessor = L->getLoopPredecessor();
	if (!Predecessor)
	return getCouldNotCompute();

	// Return true if V is of the form "LHS `shift_op` <positive constant>".
	// Return LHS in OutLHS and shift_opt in OutOpCode.
	auto MatchPositiveShift =
	[](Value V, Value &OutLHS, Instruction::BinaryOps &OutOpCode) {

	using namespace PatternMatch;

	ConstantInt *ShiftAmt;
	if (match(V, m_LShr(m_Value(OutLHS), m_ConstantInt(ShiftAmt))))
	OutOpCode = Instruction::LShr;
	else if (match(V, m_AShr(m_Value(OutLHS), m_ConstantInt(ShiftAmt))))
	OutOpCode = Instruction::AShr;
	else if (match(V, m_Shl(m_Value(OutLHS), m_ConstantInt(ShiftAmt))))
	OutOpCode = Instruction::Shl;
	else
	return false;

	return ShiftAmt->getValue().isStrictlyPositive();
	};

	// Recognize a "shift recurrence" either of the form %iv or of %iv.shifted in
	//
	// loop:
	// %iv = phi i32 [ %iv.shifted, %loop ], [ %val, %preheader ]
	// %iv.shifted = lshr i32 %iv, <positive constant>
	//
	// Return true on a successful match. Return the corresponding PHI node (%iv
	// above) in PNOut and the opcode of the shift operation in OpCodeOut.
	auto MatchShiftRecurrence =
	[&](Value V, PHINode &PNOut, Instruction::BinaryOps &OpCodeOut) {
	Optional<Instruction::BinaryOps> PostShiftOpCode;

	{
	Instruction::BinaryOps OpC;
	Value *V;

	// If we encounter a shift instruction, "peel off" the shift operation,
	// and remember that we did so. Later when we inspect %iv's backedge
	// value, we will make sure that the backedge value uses the same
	// operation.
	//
	// Note: the peeled shift operation does not have to be the same
	// instruction as the one feeding into the PHI's backedge value. We only
	// really care about it being the same kind of shift instruction --
	// that's all that is required for our later inferences to hold.
	if (MatchPositiveShift(LHS, V, OpC)) {
	PostShiftOpCode = OpC;
	LHS = V;
	}
	}

	PNOut = dyn_cast<PHINode>(LHS);
	if (!PNOut \|\| PNOut->getParent() != L->getHeader())
	return false;

	Value *BEValue = PNOut->getIncomingValueForBlock(Latch);
	Value *OpLHS;

	return
	// The backedge value for the PHI node must be a shift by a positive
	// amount
	MatchPositiveShift(BEValue, OpLHS, OpCodeOut) &&

	// of the PHI node itself
	OpLHS == PNOut &&

	// and the kind of shift should be match the kind of shift we peeled
	// off, if any.
	(!PostShiftOpCode.hasValue() \|\| *PostShiftOpCode == OpCodeOut);
	};

	PHINode *PN;
	Instruction::BinaryOps OpCode;
	if (!MatchShiftRecurrence(LHS, PN, OpCode))
	return getCouldNotCompute();

	const DataLayout &DL = getDataLayout();

	// The key rationale for this optimization is that for some kinds of shift
	// recurrences, the value of the recurrence "stabilizes" to either 0 or -1
	// within a finite number of iterations. If the condition guarding the
	// backedge (in the sense that the backedge is taken if the condition is true)
	// is false for the value the shift recurrence stabilizes to, then we know
	// that the backedge is taken only a finite number of times.

	ConstantInt *StableValue = nullptr;
	switch (OpCode) {
	default:
	llvm_unreachable("Impossible case!");

	case Instruction::AShr: {
	// {K,ashr,<positive-constant>} stabilizes to signum(K) in at most
	// bitwidth(K) iterations.
	Value *FirstValue = PN->getIncomingValueForBlock(Predecessor);
	KnownBits Known = computeKnownBits(FirstValue, DL, 0, &AC,
	Predecessor->getTerminator(), &DT);
	auto *Ty = cast<IntegerType>(RHS->getType());
	if (Known.isNonNegative())
	StableValue = ConstantInt::get(Ty, 0);
	else if (Known.isNegative())
	StableValue = ConstantInt::get(Ty, -1, true);
	else
	return getCouldNotCompute();

	break;
	}
	case Instruction::LShr:
	case Instruction::Shl:
	// Both {K,lshr,<positive-constant>} and {K,shl,<positive-constant>}
	// stabilize to 0 in at most bitwidth(K) iterations.
	StableValue = ConstantInt::get(cast<IntegerType>(RHS->getType()), 0);
	break;
	}

	auto *Result =
	ConstantFoldCompareInstOperands(Pred, StableValue, RHS, DL, &TLI);
	assert(Result->getType()->isIntegerTy(1) &&
	"Otherwise cannot be an operand to a branch instruction");

	if (Result->isZeroValue()) {
	unsigned BitWidth = getTypeSizeInBits(RHS->getType());
	const SCEV *UpperBound =
	getConstant(getEffectiveSCEVType(RHS->getType()), BitWidth);
	return ExitLimit(getCouldNotCompute(), UpperBound, false);
	}

	return getCouldNotCompute();
	}

	/// Return true if we can constant fold an instruction of the specified type,
	/// assuming that all operands were constants.
	static bool CanConstantFold(const Instruction *I) {
	if (isa<BinaryOperator>(I) \|\| isa<CmpInst>(I) \|\|
	isa<SelectInst>(I) \|\| isa<CastInst>(I) \|\| isa<GetElementPtrInst>(I) \|\|
	isa<LoadInst>(I) \|\| isa<ExtractValueInst>(I))
	return true;

	if (const CallInst *CI = dyn_cast<CallInst>(I))
	if (const Function *F = CI->getCalledFunction())
	return canConstantFoldCallTo(CI, F);
	return false;
	}

	/// Determine whether this instruction can constant evolve within this loop
	/// assuming its operands can all constant evolve.
	static bool canConstantEvolve(Instruction I, const Loop L) {
	// An instruction outside of the loop can't be derived from a loop PHI.
	if (!L->contains(I)) return false;

	if (isa<PHINode>(I)) {
	// We don't currently keep track of the control flow needed to evaluate
	// PHIs, so we cannot handle PHIs inside of loops.
	return L->getHeader() == I->getParent();
	}

	// If we won't be able to constant fold this expression even if the operands
	// are constants, bail early.
	return CanConstantFold(I);
	}

	/// getConstantEvolvingPHIOperands - Implement getConstantEvolvingPHI by
	/// recursing through each instruction operand until reaching a loop header phi.
	static PHINode *
	getConstantEvolvingPHIOperands(Instruction UseInst, const Loop L,
	DenseMap<Instruction , PHINode > &PHIMap,
	unsigned Depth) {
	if (Depth > MaxConstantEvolvingDepth)
	return nullptr;

	// Otherwise, we can evaluate this instruction if all of its operands are
	// constant or derived from a PHI node themselves.
	PHINode *PHI = nullptr;
	for (Value *Op : UseInst->operands()) {
	if (isa<Constant>(Op)) continue;

	Instruction *OpInst = dyn_cast<Instruction>(Op);
	if (!OpInst \|\| !canConstantEvolve(OpInst, L)) return nullptr;

	PHINode *P = dyn_cast<PHINode>(OpInst);
	if (!P)
	// If this operand is already visited, reuse the prior result.
	// We may have P != PHI if this is the deepest point at which the
	// inconsistent paths meet.
	P = PHIMap.lookup(OpInst);
	if (!P) {
	// Recurse and memoize the results, whether a phi is found or not.
	// This recursive call invalidates pointers into PHIMap.
	P = getConstantEvolvingPHIOperands(OpInst, L, PHIMap, Depth + 1);
	PHIMap[OpInst] = P;
	}
	if (!P)
	return nullptr; // Not evolving from PHI
	if (PHI && PHI != P)
	return nullptr; // Evolving from multiple different PHIs.
	PHI = P;
	}
	// This is a expression evolving from a constant PHI!
	return PHI;
	}

	/// getConstantEvolvingPHI - Given an LLVM value and a loop, return a PHI node
	/// in the loop that V is derived from. We allow arbitrary operations along the
	/// way, but the operands of an operation must either be constants or a value
	/// derived from a constant PHI. If this expression does not fit with these
	/// constraints, return null.
	static PHINode getConstantEvolvingPHI(Value V, const Loop *L) {
	Instruction *I = dyn_cast<Instruction>(V);
	if (!I \|\| !canConstantEvolve(I, L)) return nullptr;

	if (PHINode *PN = dyn_cast<PHINode>(I))
	return PN;

	// Record non-constant instructions contained by the loop.
	DenseMap<Instruction , PHINode > PHIMap;
	return getConstantEvolvingPHIOperands(I, L, PHIMap, 0);
	}

	/// EvaluateExpression - Given an expression that passes the
	/// getConstantEvolvingPHI predicate, evaluate its value assuming the PHI node
	/// in the loop has the value PHIVal. If we can't fold this expression for some
	/// reason, return null.
	static Constant EvaluateExpression(Value V, const Loop *L,
	DenseMap<Instruction , Constant > &Vals,
	const DataLayout &DL,
	const TargetLibraryInfo *TLI) {
	// Convenient constant check, but redundant for recursive calls.
	if (Constant *C = dyn_cast<Constant>(V)) return C;
	Instruction *I = dyn_cast<Instruction>(V);
	if (!I) return nullptr;

	if (Constant *C = Vals.lookup(I)) return C;

	// An instruction inside the loop depends on a value outside the loop that we
	// weren't given a mapping for, or a value such as a call inside the loop.
	if (!canConstantEvolve(I, L)) return nullptr;

	// An unmapped PHI can be due to a branch or another loop inside this loop,
	// or due to this not being the initial iteration through a loop where we
	// couldn't compute the evolution of this particular PHI last time.
	if (isa<PHINode>(I)) return nullptr;

	std::vector<Constant*> Operands(I->getNumOperands());

	for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
	Instruction *Operand = dyn_cast<Instruction>(I->getOperand(i));
	if (!Operand) {
	Operands[i] = dyn_cast<Constant>(I->getOperand(i));
	if (!Operands[i]) return nullptr;
	continue;
	}
	Constant *C = EvaluateExpression(Operand, L, Vals, DL, TLI);
	Vals[Operand] = C;
	if (!C) return nullptr;
	Operands[i] = C;
	}

	if (CmpInst *CI = dyn_cast<CmpInst>(I))
	return ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0],
	Operands[1], DL, TLI);
	if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
	if (!LI->isVolatile())
	return ConstantFoldLoadFromConstPtr(Operands[0], LI->getType(), DL);
	}
	return ConstantFoldInstOperands(I, Operands, DL, TLI);
	}


	// If every incoming value to PN except the one for BB is a specific Constant,
	// return that, else return nullptr.
	static Constant getOtherIncomingValue(PHINode PN, BasicBlock *BB) {
	Constant *IncomingVal = nullptr;

	for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
	if (PN->getIncomingBlock(i) == BB)
	continue;

	auto *CurrentVal = dyn_cast<Constant>(PN->getIncomingValue(i));
	if (!CurrentVal)
	return nullptr;

	if (IncomingVal != CurrentVal) {
	if (IncomingVal)
	return nullptr;
	IncomingVal = CurrentVal;
	}
	}

	return IncomingVal;
	}

	/// getConstantEvolutionLoopExitValue - If we know that the specified Phi is
	/// in the header of its containing loop, we know the loop executes a
	/// constant number of times, and the PHI node is just a recurrence
	/// involving constants, fold it.
	Constant *
	ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
	const APInt &BEs,
	const Loop *L) {
	auto I = ConstantEvolutionLoopExitValue.find(PN);
	if (I != ConstantEvolutionLoopExitValue.end())
	return I->second;

	if (BEs.ugt(MaxBruteForceIterations))
	return ConstantEvolutionLoopExitValue[PN] = nullptr; // Not going to evaluate it.

	Constant *&RetVal = ConstantEvolutionLoopExitValue[PN];

	DenseMap<Instruction , Constant > CurrentIterVals;
	BasicBlock *Header = L->getHeader();
	assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!");

	BasicBlock *Latch = L->getLoopLatch();
	if (!Latch)
	return nullptr;

	for (PHINode &PHI : Header->phis()) {
	if (auto *StartCST = getOtherIncomingValue(&PHI, Latch))
	CurrentIterVals[&PHI] = StartCST;
	}
	if (!CurrentIterVals.count(PN))
	return RetVal = nullptr;

	Value *BEValue = PN->getIncomingValueForBlock(Latch);

	// Execute the loop symbolically to determine the exit value.
	assert(BEs.getActiveBits() < CHAR_BIT * sizeof(unsigned) &&
	"BEs is <= MaxBruteForceIterations which is an 'unsigned'!");

	unsigned NumIterations = BEs.getZExtValue(); // must be in range
	unsigned IterationNum = 0;
	const DataLayout &DL = getDataLayout();
	for (; ; ++IterationNum) {
	if (IterationNum == NumIterations)
	return RetVal = CurrentIterVals[PN]; // Got exit value!

	// Compute the value of the PHIs for the next iteration.
	// EvaluateExpression adds non-phi values to the CurrentIterVals map.
	DenseMap<Instruction , Constant > NextIterVals;
	Constant *NextPHI =
	EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI);
	if (!NextPHI)
	return nullptr; // Couldn't evaluate!
	NextIterVals[PN] = NextPHI;

	bool StoppedEvolving = NextPHI == CurrentIterVals[PN];

	// Also evaluate the other PHI nodes. However, we don't get to stop if we
	// cease to be able to evaluate one of them or if they stop evolving,
	// because that doesn't necessarily prevent us from computing PN.
	SmallVector<std::pair<PHINode , Constant >, 8> PHIsToCompute;
	for (const auto &I : CurrentIterVals) {
	PHINode *PHI = dyn_cast<PHINode>(I.first);
	if (!PHI \|\| PHI == PN \|\| PHI->getParent() != Header) continue;
	PHIsToCompute.emplace_back(PHI, I.second);
	}
	// We use two distinct loops because EvaluateExpression may invalidate any
	// iterators into CurrentIterVals.
	for (const auto &I : PHIsToCompute) {
	PHINode *PHI = I.first;
	Constant *&NextPHI = NextIterVals[PHI];
	if (!NextPHI) { // Not already computed.
	Value *BEValue = PHI->getIncomingValueForBlock(Latch);
	NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI);
	}
	if (NextPHI != I.second)
	StoppedEvolving = false;
	}

	// If all entries in CurrentIterVals == NextIterVals then we can stop
	// iterating, the loop can't continue to change.
	if (StoppedEvolving)
	return RetVal = CurrentIterVals[PN];

	CurrentIterVals.swap(NextIterVals);
	}
	}

	const SCEV ScalarEvolution::computeExitCountExhaustively(const Loop L,
	Value *Cond,
	bool ExitWhen) {
	PHINode *PN = getConstantEvolvingPHI(Cond, L);
	if (!PN) return getCouldNotCompute();

	// If the loop is canonicalized, the PHI will have exactly two entries.
	// That's the only form we support here.
	if (PN->getNumIncomingValues() != 2) return getCouldNotCompute();

	DenseMap<Instruction , Constant > CurrentIterVals;
	BasicBlock *Header = L->getHeader();
	assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!");

	BasicBlock *Latch = L->getLoopLatch();
	assert(Latch && "Should follow from NumIncomingValues == 2!");

	for (PHINode &PHI : Header->phis()) {
	if (auto *StartCST = getOtherIncomingValue(&PHI, Latch))
	CurrentIterVals[&PHI] = StartCST;
	}
	if (!CurrentIterVals.count(PN))
	return getCouldNotCompute();

	// Okay, we find a PHI node that defines the trip count of this loop. Execute
	// the loop symbolically to determine when the condition gets a value of
	// "ExitWhen".
	unsigned MaxIterations = MaxBruteForceIterations; // Limit analysis.
	const DataLayout &DL = getDataLayout();
	for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){
	auto *CondVal = dyn_cast_or_null<ConstantInt>(
	EvaluateExpression(Cond, L, CurrentIterVals, DL, &TLI));

	// Couldn't symbolically evaluate.
	if (!CondVal) return getCouldNotCompute();

	if (CondVal->getValue() == uint64_t(ExitWhen)) {
	++NumBruteForceTripCountsComputed;
	return getConstant(Type::getInt32Ty(getContext()), IterationNum);
	}

	// Update all the PHI nodes for the next iteration.
	DenseMap<Instruction , Constant > NextIterVals;

	// Create a list of which PHIs we need to compute. We want to do this before
	// calling EvaluateExpression on them because that may invalidate iterators
	// into CurrentIterVals.
	SmallVector<PHINode *, 8> PHIsToCompute;
	for (const auto &I : CurrentIterVals) {
	PHINode *PHI = dyn_cast<PHINode>(I.first);
	if (!PHI \|\| PHI->getParent() != Header) continue;
	PHIsToCompute.push_back(PHI);
	}
	for (PHINode *PHI : PHIsToCompute) {
	Constant *&NextPHI = NextIterVals[PHI];
	if (NextPHI) continue; // Already computed!

	Value *BEValue = PHI->getIncomingValueForBlock(Latch);
	NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI);
	}
	CurrentIterVals.swap(NextIterVals);
	}

	// Too many iterations were needed to evaluate.
	return getCouldNotCompute();
	}

	const SCEV ScalarEvolution::getSCEVAtScope(const SCEV V, const Loop *L) {
	SmallVector<std::pair<const Loop , const SCEV >, 2> &Values =
	ValuesAtScopes[V];
	// Check to see if we've folded this expression at this loop before.
	for (auto &LS : Values)
	if (LS.first == L)
	return LS.second ? LS.second : V;

	Values.emplace_back(L, nullptr);

	// Otherwise compute it.
	const SCEV *C = computeSCEVAtScope(V, L);
	for (auto &LS : reverse(ValuesAtScopes[V]))
	if (LS.first == L) {
	LS.second = C;
	break;
	}
	return C;
	}

	/// This builds up a Constant using the ConstantExpr interface. That way, we
	/// will return Constants for objects which aren't represented by a
	/// SCEVConstant, because SCEVConstant is restricted to ConstantInt.
	/// Returns NULL if the SCEV isn't representable as a Constant.
	static Constant BuildConstantFromSCEV(const SCEV V) {
	switch (V->getSCEVType()) {
	case scCouldNotCompute:
	case scAddRecExpr:
	return nullptr;
	case scConstant:
	return cast<SCEVConstant>(V)->getValue();
	case scUnknown:
	return dyn_cast<Constant>(cast<SCEVUnknown>(V)->getValue());
	case scSignExtend: {
	const SCEVSignExtendExpr *SS = cast<SCEVSignExtendExpr>(V);
	if (Constant *CastOp = BuildConstantFromSCEV(SS->getOperand()))
	return ConstantExpr::getSExt(CastOp, SS->getType());
	return nullptr;
	}
	case scZeroExtend: {
	const SCEVZeroExtendExpr *SZ = cast<SCEVZeroExtendExpr>(V);
	if (Constant *CastOp = BuildConstantFromSCEV(SZ->getOperand()))
	return ConstantExpr::getZExt(CastOp, SZ->getType());
	return nullptr;
	}
	case scPtrToInt: {
	const SCEVPtrToIntExpr *P2I = cast<SCEVPtrToIntExpr>(V);
	if (Constant *CastOp = BuildConstantFromSCEV(P2I->getOperand()))
	return ConstantExpr::getPtrToInt(CastOp, P2I->getType());

	return nullptr;
	}
	case scTruncate: {
	const SCEVTruncateExpr *ST = cast<SCEVTruncateExpr>(V);
	if (Constant *CastOp = BuildConstantFromSCEV(ST->getOperand()))
	return ConstantExpr::getTrunc(CastOp, ST->getType());
	return nullptr;
	}
	case scAddExpr: {
	const SCEVAddExpr *SA = cast<SCEVAddExpr>(V);
	if (Constant *C = BuildConstantFromSCEV(SA->getOperand(0))) {
	if (PointerType *PTy = dyn_cast<PointerType>(C->getType())) {
	unsigned AS = PTy->getAddressSpace();
	Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS);
	C = ConstantExpr::getBitCast(C, DestPtrTy);
	}
	for (unsigned i = 1, e = SA->getNumOperands(); i != e; ++i) {
	Constant *C2 = BuildConstantFromSCEV(SA->getOperand(i));
	if (!C2)
	return nullptr;

	// First pointer!
	if (!C->getType()->isPointerTy() && C2->getType()->isPointerTy()) {
	unsigned AS = C2->getType()->getPointerAddressSpace();
	std::swap(C, C2);
	Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS);
	// The offsets have been converted to bytes. We can add bytes to an
	// i8* by GEP with the byte count in the first index.
	C = ConstantExpr::getBitCast(C, DestPtrTy);
	}

	// Don't bother trying to sum two pointers. We probably can't
	// statically compute a load that results from it anyway.
	if (C2->getType()->isPointerTy())
	return nullptr;

	if (C->getType()->isPointerTy()) {
	C = ConstantExpr::getGetElementPtr(Type::getInt8Ty(C->getContext()),
	C, C2);
	} else {
	C = ConstantExpr::getAdd(C, C2);
	}
	}
	return C;
	}
	return nullptr;
	}
	case scMulExpr: {
	const SCEVMulExpr *SM = cast<SCEVMulExpr>(V);
	if (Constant *C = BuildConstantFromSCEV(SM->getOperand(0))) {
	// Don't bother with pointers at all.
	if (C->getType()->isPointerTy())
	return nullptr;
	for (unsigned i = 1, e = SM->getNumOperands(); i != e; ++i) {
	Constant *C2 = BuildConstantFromSCEV(SM->getOperand(i));
	if (!C2 \|\| C2->getType()->isPointerTy())
	return nullptr;
	C = ConstantExpr::getMul(C, C2);
	}
	return C;
	}
	return nullptr;
	}
	case scUDivExpr: {
	const SCEVUDivExpr *SU = cast<SCEVUDivExpr>(V);
	if (Constant *LHS = BuildConstantFromSCEV(SU->getLHS()))
	if (Constant *RHS = BuildConstantFromSCEV(SU->getRHS()))
	if (LHS->getType() == RHS->getType())
	return ConstantExpr::getUDiv(LHS, RHS);
	return nullptr;
	}
	case scSMaxExpr:
	case scUMaxExpr:
	case scSMinExpr:
	case scUMinExpr:
	return nullptr; // TODO: smax, umax, smin, umax.
	}
	llvm_unreachable("Unknown SCEV kind!");
	}

	const SCEV ScalarEvolution::computeSCEVAtScope(const SCEV V, const Loop *L) {
	if (isa<SCEVConstant>(V)) return V;

	// If this instruction is evolved from a constant-evolving PHI, compute the
	// exit value from the loop without using SCEVs.
	if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V)) {
	if (Instruction *I = dyn_cast<Instruction>(SU->getValue())) {
	if (PHINode *PN = dyn_cast<PHINode>(I)) {
	const Loop *CurrLoop = this->LI[I->getParent()];
	// Looking for loop exit value.
	if (CurrLoop && CurrLoop->getParentLoop() == L &&
	PN->getParent() == CurrLoop->getHeader()) {
	// Okay, there is no closed form solution for the PHI node. Check
	// to see if the loop that contains it has a known backedge-taken
	// count. If so, we may be able to force computation of the exit
	// value.
	const SCEV *BackedgeTakenCount = getBackedgeTakenCount(CurrLoop);
	// This trivial case can show up in some degenerate cases where
	// the incoming IR has not yet been fully simplified.
	if (BackedgeTakenCount->isZero()) {
	Value *InitValue = nullptr;
	bool MultipleInitValues = false;
	for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
	if (!CurrLoop->contains(PN->getIncomingBlock(i))) {
	if (!InitValue)
	InitValue = PN->getIncomingValue(i);
	else if (InitValue != PN->getIncomingValue(i)) {
	MultipleInitValues = true;
	break;
	}
	}
	}
	if (!MultipleInitValues && InitValue)
	return getSCEV(InitValue);
	}
	// Do we have a loop invariant value flowing around the backedge
	// for a loop which must execute the backedge?
	if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount) &&
	isKnownPositive(BackedgeTakenCount) &&
	PN->getNumIncomingValues() == 2) {

	unsigned InLoopPred =
	CurrLoop->contains(PN->getIncomingBlock(0)) ? 0 : 1;
	Value *BackedgeVal = PN->getIncomingValue(InLoopPred);
	if (CurrLoop->isLoopInvariant(BackedgeVal))
	return getSCEV(BackedgeVal);
	}
	if (auto *BTCC = dyn_cast<SCEVConstant>(BackedgeTakenCount)) {
	// Okay, we know how many times the containing loop executes. If
	// this is a constant evolving PHI node, get the final value at
	// the specified iteration number.
	Constant *RV = getConstantEvolutionLoopExitValue(
	PN, BTCC->getAPInt(), CurrLoop);
	if (RV) return getSCEV(RV);
	}
	}

	// If there is a single-input Phi, evaluate it at our scope. If we can
	// prove that this replacement does not break LCSSA form, use new value.
	if (PN->getNumOperands() == 1) {
	const SCEV *Input = getSCEV(PN->getOperand(0));
	const SCEV *InputAtScope = getSCEVAtScope(Input, L);
	// TODO: We can generalize it using LI.replacementPreservesLCSSAForm,
	// for the simplest case just support constants.
	if (isa<SCEVConstant>(InputAtScope)) return InputAtScope;
	}
	}

	// Okay, this is an expression that we cannot symbolically evaluate
	// into a SCEV. Check to see if it's possible to symbolically evaluate
	// the arguments into constants, and if so, try to constant propagate the
	// result. This is particularly useful for computing loop exit values.
	if (CanConstantFold(I)) {
	SmallVector<Constant *, 4> Operands;
	bool MadeImprovement = false;
	for (Value *Op : I->operands()) {
	if (Constant *C = dyn_cast<Constant>(Op)) {
	Operands.push_back(C);
	continue;
	}

	// If any of the operands is non-constant and if they are
	// non-integer and non-pointer, don't even try to analyze them
	// with scev techniques.
	if (!isSCEVable(Op->getType()))
	return V;

	const SCEV *OrigV = getSCEV(Op);
	const SCEV *OpV = getSCEVAtScope(OrigV, L);
	MadeImprovement \|= OrigV != OpV;

	Constant *C = BuildConstantFromSCEV(OpV);
	if (!C) return V;
	if (C->getType() != Op->getType())
	C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
	Op->getType(),
	false),
	C, Op->getType());
	Operands.push_back(C);
	}

	// Check to see if getSCEVAtScope actually made an improvement.
	if (MadeImprovement) {
	Constant *C = nullptr;
	const DataLayout &DL = getDataLayout();
	if (const CmpInst *CI = dyn_cast<CmpInst>(I))
	C = ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0],
	Operands[1], DL, &TLI);
	else if (const LoadInst *Load = dyn_cast<LoadInst>(I)) {
	if (!Load->isVolatile())
	C = ConstantFoldLoadFromConstPtr(Operands[0], Load->getType(),
	DL);
	} else
	C = ConstantFoldInstOperands(I, Operands, DL, &TLI);
	if (!C) return V;
	return getSCEV(C);
	}
	}
	}

	// This is some other type of SCEVUnknown, just return it.
	return V;
	}

	if (const SCEVCommutativeExpr *Comm = dyn_cast<SCEVCommutativeExpr>(V)) {
	// Avoid performing the look-up in the common case where the specified
	// expression has no loop-variant portions.
	for (unsigned i = 0, e = Comm->getNumOperands(); i != e; ++i) {
	const SCEV *OpAtScope = getSCEVAtScope(Comm->getOperand(i), L);
	if (OpAtScope != Comm->getOperand(i)) {
	// Okay, at least one of these operands is loop variant but might be
	// foldable. Build a new instance of the folded commutative expression.
	SmallVector<const SCEV *, 8> NewOps(Comm->op_begin(),
	Comm->op_begin()+i);
	NewOps.push_back(OpAtScope);

	for (++i; i != e; ++i) {
	OpAtScope = getSCEVAtScope(Comm->getOperand(i), L);
	NewOps.push_back(OpAtScope);
	}
	if (isa<SCEVAddExpr>(Comm))
	return getAddExpr(NewOps, Comm->getNoWrapFlags());
	if (isa<SCEVMulExpr>(Comm))
	return getMulExpr(NewOps, Comm->getNoWrapFlags());
	if (isa<SCEVMinMaxExpr>(Comm))
	return getMinMaxExpr(Comm->getSCEVType(), NewOps);
	llvm_unreachable("Unknown commutative SCEV type!");
	}
	}
	// If we got here, all operands are loop invariant.
	return Comm;
	}

	if (const SCEVUDivExpr *Div = dyn_cast<SCEVUDivExpr>(V)) {
	const SCEV *LHS = getSCEVAtScope(Div->getLHS(), L);
	const SCEV *RHS = getSCEVAtScope(Div->getRHS(), L);
	if (LHS == Div->getLHS() && RHS == Div->getRHS())
	return Div; // must be loop invariant
	return getUDivExpr(LHS, RHS);
	}

	// If this is a loop recurrence for a loop that does not contain L, then we
	// are dealing with the final value computed by the loop.
	if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(V)) {
	// First, attempt to evaluate each operand.
	// Avoid performing the look-up in the common case where the specified
	// expression has no loop-variant portions.
	for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) {
	const SCEV *OpAtScope = getSCEVAtScope(AddRec->getOperand(i), L);
	if (OpAtScope == AddRec->getOperand(i))
	continue;

	// Okay, at least one of these operands is loop variant but might be
	// foldable. Build a new instance of the folded commutative expression.
	SmallVector<const SCEV *, 8> NewOps(AddRec->op_begin(),
	AddRec->op_begin()+i);
	NewOps.push_back(OpAtScope);
	for (++i; i != e; ++i)
	NewOps.push_back(getSCEVAtScope(AddRec->getOperand(i), L));

	const SCEV *FoldedRec =
	getAddRecExpr(NewOps, AddRec->getLoop(),
	AddRec->getNoWrapFlags(SCEV::FlagNW));
	AddRec = dyn_cast<SCEVAddRecExpr>(FoldedRec);
	// The addrec may be folded to a nonrecurrence, for example, if the
	// induction variable is multiplied by zero after constant folding. Go
	// ahead and return the folded value.
	if (!AddRec)
	return FoldedRec;
	break;
	}

	// If the scope is outside the addrec's loop, evaluate it by using the
	// loop exit value of the addrec.
	if (!AddRec->getLoop()->contains(L)) {
	// To evaluate this recurrence, we need to know how many times the AddRec
	// loop iterates. Compute this now.
	const SCEV *BackedgeTakenCount = getBackedgeTakenCount(AddRec->getLoop());
	if (BackedgeTakenCount == getCouldNotCompute()) return AddRec;

	// Then, evaluate the AddRec.
	return AddRec->evaluateAtIteration(BackedgeTakenCount, *this);
	}

	return AddRec;
	}

	if (const SCEVZeroExtendExpr *Cast = dyn_cast<SCEVZeroExtendExpr>(V)) {
	const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
	if (Op == Cast->getOperand())
	return Cast; // must be loop invariant
	return getZeroExtendExpr(Op, Cast->getType());
	}

	if (const SCEVSignExtendExpr *Cast = dyn_cast<SCEVSignExtendExpr>(V)) {
	const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
	if (Op == Cast->getOperand())
	return Cast; // must be loop invariant
	return getSignExtendExpr(Op, Cast->getType());
	}

	if (const SCEVTruncateExpr *Cast = dyn_cast<SCEVTruncateExpr>(V)) {
	const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
	if (Op == Cast->getOperand())
	return Cast; // must be loop invariant
	return getTruncateExpr(Op, Cast->getType());
	}

	if (const SCEVPtrToIntExpr *Cast = dyn_cast<SCEVPtrToIntExpr>(V)) {
	const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
	if (Op == Cast->getOperand())
	return Cast; // must be loop invariant
	return getPtrToIntExpr(Op, Cast->getType());
	}

	llvm_unreachable("Unknown SCEV type!");
	}

	const SCEV ScalarEvolution::getSCEVAtScope(Value V, const Loop *L) {
	return getSCEVAtScope(getSCEV(V), L);
	}

	const SCEV ScalarEvolution::stripInjectiveFunctions(const SCEV S) const {
	if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S))
	return stripInjectiveFunctions(ZExt->getOperand());
	if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S))
	return stripInjectiveFunctions(SExt->getOperand());
	return S;
	}

	/// Finds the minimum unsigned root of the following equation:
	///
	/// A * X = B (mod N)
	///
	/// where N = 2^BW and BW is the common bit width of A and B. The signedness of
	/// A and B isn't important.
	///
	/// If the equation does not have a solution, SCEVCouldNotCompute is returned.
	static const SCEV SolveLinEquationWithOverflow(const APInt &A, const SCEV B,
	ScalarEvolution &SE) {
	uint32_t BW = A.getBitWidth();
	assert(BW == SE.getTypeSizeInBits(B->getType()));
	assert(A != 0 && "A must be non-zero.");

	// 1. D = gcd(A, N)
	//
	// The gcd of A and N may have only one prime factor: 2. The number of
	// trailing zeros in A is its multiplicity
	uint32_t Mult2 = A.countTrailingZeros();
	// D = 2^Mult2

	// 2. Check if B is divisible by D.
	//
	// B is divisible by D if and only if the multiplicity of prime factor 2 for B
	// is not less than multiplicity of this prime factor for D.
	if (SE.GetMinTrailingZeros(B) < Mult2)
	return SE.getCouldNotCompute();

	// 3. Compute I: the multiplicative inverse of (A / D) in arithmetic
	// modulo (N / D).
	//
	// If D == 1, (N / D) == N == 2^BW, so we need one extra bit to represent
	// (N / D) in general. The inverse itself always fits into BW bits, though,
	// so we immediately truncate it.
	APInt AD = A.lshr(Mult2).zext(BW + 1); // AD = A / D
	APInt Mod(BW + 1, 0);
	Mod.setBit(BW - Mult2); // Mod = N / D
	APInt I = AD.multiplicativeInverse(Mod).trunc(BW);

	// 4. Compute the minimum unsigned root of the equation:
	// I * (B / D) mod (N / D)
	// To simplify the computation, we factor out the divide by D:
	// (I * B mod N) / D
	const SCEV *D = SE.getConstant(APInt::getOneBitSet(BW, Mult2));
	return SE.getUDivExactExpr(SE.getMulExpr(B, SE.getConstant(I)), D);
	}

	/// For a given quadratic addrec, generate coefficients of the corresponding
	/// quadratic equation, multiplied by a common value to ensure that they are
	/// integers.
	/// The returned value is a tuple { A, B, C, M, BitWidth }, where
	/// Ax^2 + Bx + C is the quadratic function, M is the value that A, B and C
	/// were multiplied by, and BitWidth is the bit width of the original addrec
	/// coefficients.
	/// This function returns None if the addrec coefficients are not compile-
	/// time constants.
	static Optional<std::tuple<APInt, APInt, APInt, APInt, unsigned>>
	GetQuadraticEquation(const SCEVAddRecExpr *AddRec) {
	assert(AddRec->getNumOperands() == 3 && "This is not a quadratic chrec!");
	const SCEVConstant *LC = dyn_cast<SCEVConstant>(AddRec->getOperand(0));
	const SCEVConstant *MC = dyn_cast<SCEVConstant>(AddRec->getOperand(1));
	const SCEVConstant *NC = dyn_cast<SCEVConstant>(AddRec->getOperand(2));
	LLVM_DEBUG(dbgs() << __func__ << ": analyzing quadratic addrec: "
	<< *AddRec << '\n');

	// We currently can only solve this if the coefficients are constants.
	if (!LC \|\| !MC \|\| !NC) {
	LLVM_DEBUG(dbgs() << __func__ << ": coefficients are not constant\n");
	return None;
	}

	APInt L = LC->getAPInt();
	APInt M = MC->getAPInt();
	APInt N = NC->getAPInt();
	assert(!N.isNullValue() && "This is not a quadratic addrec");

	unsigned BitWidth = LC->getAPInt().getBitWidth();
	unsigned NewWidth = BitWidth + 1;
	LLVM_DEBUG(dbgs() << __func__ << ": addrec coeff bw: "
	<< BitWidth << '\n');
	// The sign-extension (as opposed to a zero-extension) here matches the
	// extension used in SolveQuadraticEquationWrap (with the same motivation).
	N = N.sext(NewWidth);
	M = M.sext(NewWidth);
	L = L.sext(NewWidth);

	// The increments are M, M+N, M+2N, ..., so the accumulated values are
	// L+M, (L+M)+(M+N), (L+M)+(M+N)+(M+2N), ..., that is,
	// L+M, L+2M+N, L+3M+3N, ...
	// After n iterations the accumulated value Acc is L + nM + n(n-1)/2 N.
	//
	// The equation Acc = 0 is then
	// L + nM + n(n-1)/2 N = 0, or 2L + 2M n + n(n-1) N = 0.
	// In a quadratic form it becomes:
	// N n^2 + (2M-N) n + 2L = 0.

	APInt A = N;
	APInt B = 2 * M - A;
	APInt C = 2 * L;
	APInt T = APInt(NewWidth, 2);
	LLVM_DEBUG(dbgs() << __func__ << ": equation " << A << "x^2 + " << B
	<< "x + " << C << ", coeff bw: " << NewWidth
	<< ", multiplied by " << T << '\n');
	return std::make_tuple(A, B, C, T, BitWidth);
	}

	/// Helper function to compare optional APInts:
	/// (a) if X and Y both exist, return min(X, Y),
	/// (b) if neither X nor Y exist, return None,
	/// (c) if exactly one of X and Y exists, return that value.
	static Optional<APInt> MinOptional(Optional<APInt> X, Optional<APInt> Y) {
	if (X.hasValue() && Y.hasValue()) {
	unsigned W = std::max(X->getBitWidth(), Y->getBitWidth());
	APInt XW = X->sextOrSelf(W);
	APInt YW = Y->sextOrSelf(W);
	return XW.slt(YW) ? X : Y;
	}
	if (!X.hasValue() && !Y.hasValue())
	return None;
	return X.hasValue() ? X : Y;
	}

	/// Helper function to truncate an optional APInt to a given BitWidth.
	/// When solving addrec-related equations, it is preferable to return a value
	/// that has the same bit width as the original addrec's coefficients. If the
	/// solution fits in the original bit width, truncate it (except for i1).
	/// Returning a value of a different bit width may inhibit some optimizations.
	///
	/// In general, a solution to a quadratic equation generated from an addrec
	/// may require BW+1 bits, where BW is the bit width of the addrec's
	/// coefficients. The reason is that the coefficients of the quadratic
	/// equation are BW+1 bits wide (to avoid truncation when converting from
	/// the addrec to the equation).
	static Optional<APInt> TruncIfPossible(Optional<APInt> X, unsigned BitWidth) {
	if (!X.hasValue())
	return None;
	unsigned W = X->getBitWidth();
	if (BitWidth > 1 && BitWidth < W && X->isIntN(BitWidth))
	return X->trunc(BitWidth);
	return X;
	}

	/// Let c(n) be the value of the quadratic chrec {L,+,M,+,N} after n
	/// iterations. The values L, M, N are assumed to be signed, and they
	/// should all have the same bit widths.
	/// Find the least n >= 0 such that c(n) = 0 in the arithmetic modulo 2^BW,
	/// where BW is the bit width of the addrec's coefficients.
	/// If the calculated value is a BW-bit integer (for BW > 1), it will be
	/// returned as such, otherwise the bit width of the returned value may
	/// be greater than BW.
	///
	/// This function returns None if
	/// (a) the addrec coefficients are not constant, or
	/// (b) SolveQuadraticEquationWrap was unable to find a solution. For cases
	/// like x^2 = 5, no integer solutions exist, in other cases an integer
	/// solution may exist, but SolveQuadraticEquationWrap may fail to find it.
	static Optional<APInt>
	SolveQuadraticAddRecExact(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
	APInt A, B, C, M;
	unsigned BitWidth;
	auto T = GetQuadraticEquation(AddRec);
	if (!T.hasValue())
	return None;

	std::tie(A, B, C, M, BitWidth) = *T;
	LLVM_DEBUG(dbgs() << __func__ << ": solving for unsigned overflow\n");
	Optional<APInt> X = APIntOps::SolveQuadraticEquationWrap(A, B, C, BitWidth+1);
	if (!X.hasValue())
	return None;

	ConstantInt CX = ConstantInt::get(SE.getContext(), X);
	ConstantInt *V = EvaluateConstantChrecAtConstant(AddRec, CX, SE);
	if (!V->isZero())
	return None;

	return TruncIfPossible(X, BitWidth);
	}

	/// Let c(n) be the value of the quadratic chrec {0,+,M,+,N} after n
	/// iterations. The values M, N are assumed to be signed, and they
	/// should all have the same bit widths.
	/// Find the least n such that c(n) does not belong to the given range,
	/// while c(n-1) does.
	///
	/// This function returns None if
	/// (a) the addrec coefficients are not constant, or
	/// (b) SolveQuadraticEquationWrap was unable to find a solution for the
	/// bounds of the range.
	static Optional<APInt>
	SolveQuadraticAddRecRange(const SCEVAddRecExpr *AddRec,
	const ConstantRange &Range, ScalarEvolution &SE) {
	assert(AddRec->getOperand(0)->isZero() &&
	"Starting value of addrec should be 0");
	LLVM_DEBUG(dbgs() << __func__ << ": solving boundary crossing for range "
	<< Range << ", addrec " << *AddRec << '\n');
	// This case is handled in getNumIterationsInRange. Here we can assume that
	// we start in the range.
	assert(Range.contains(APInt(SE.getTypeSizeInBits(AddRec->getType()), 0)) &&
	"Addrec's initial value should be in range");

	APInt A, B, C, M;
	unsigned BitWidth;
	auto T = GetQuadraticEquation(AddRec);
	if (!T.hasValue())
	return None;

	// Be careful about the return value: there can be two reasons for not
	// returning an actual number. First, if no solutions to the equations
	// were found, and second, if the solutions don't leave the given range.
	// The first case means that the actual solution is "unknown", the second
	// means that it's known, but not valid. If the solution is unknown, we
	// cannot make any conclusions.
	// Return a pair: the optional solution and a flag indicating if the
	// solution was found.
	auto SolveForBoundary = [&](APInt Bound) -> std::pair<Optional<APInt>,bool> {
	// Solve for signed overflow and unsigned overflow, pick the lower
	// solution.
	LLVM_DEBUG(dbgs() << "SolveQuadraticAddRecRange: checking boundary "
	<< Bound << " (before multiplying by " << M << ")\n");
	Bound *= M; // The quadratic equation multiplier.

	Optional<APInt> SO = None;
	if (BitWidth > 1) {
	LLVM_DEBUG(dbgs() << "SolveQuadraticAddRecRange: solving for "
	"signed overflow\n");
	SO = APIntOps::SolveQuadraticEquationWrap(A, B, -Bound, BitWidth);
	}
	LLVM_DEBUG(dbgs() << "SolveQuadraticAddRecRange: solving for "
	"unsigned overflow\n");
	Optional<APInt> UO = APIntOps::SolveQuadraticEquationWrap(A, B, -Bound,
	BitWidth+1);

	auto LeavesRange = [&] (const APInt &X) {
	ConstantInt *C0 = ConstantInt::get(SE.getContext(), X);
	ConstantInt *V0 = EvaluateConstantChrecAtConstant(AddRec, C0, SE);
	if (Range.contains(V0->getValue()))
	return false;
	// X should be at least 1, so X-1 is non-negative.
	ConstantInt *C1 = ConstantInt::get(SE.getContext(), X-1);
	ConstantInt *V1 = EvaluateConstantChrecAtConstant(AddRec, C1, SE);
	if (Range.contains(V1->getValue()))
	return true;
	return false;
	};

	// If SolveQuadraticEquationWrap returns None, it means that there can
	// be a solution, but the function failed to find it. We cannot treat it
	// as "no solution".
	if (!SO.hasValue() \|\| !UO.hasValue())
	return { None, false };

	// Check the smaller value first to see if it leaves the range.
	// At this point, both SO and UO must have values.
	Optional<APInt> Min = MinOptional(SO, UO);
	if (LeavesRange(*Min))
	return { Min, true };
	Optional<APInt> Max = Min == SO ? UO : SO;
	if (LeavesRange(*Max))
	return { Max, true };

	// Solutions were found, but were eliminated, hence the "true".
	return { None, true };
	};

	std::tie(A, B, C, M, BitWidth) = *T;
	// Lower bound is inclusive, subtract 1 to represent the exiting value.
	APInt Lower = Range.getLower().sextOrSelf(A.getBitWidth()) - 1;
	APInt Upper = Range.getUpper().sextOrSelf(A.getBitWidth());
	auto SL = SolveForBoundary(Lower);
	auto SU = SolveForBoundary(Upper);
	// If any of the solutions was unknown, no meaninigful conclusions can
	// be made.
	if (!SL.second \|\| !SU.second)
	return None;

	// Claim: The correct solution is not some value between Min and Max.
	//
	// Justification: Assuming that Min and Max are different values, one of
	// them is when the first signed overflow happens, the other is when the
	// first unsigned overflow happens. Crossing the range boundary is only
	// possible via an overflow (treating 0 as a special case of it, modeling
	// an overflow as crossing k*2^W for some k).
	//
	// The interesting case here is when Min was eliminated as an invalid
	// solution, but Max was not. The argument is that if there was another
	// overflow between Min and Max, it would also have been eliminated if
	// it was considered.
	//
	// For a given boundary, it is possible to have two overflows of the same
	// type (signed/unsigned) without having the other type in between: this
	// can happen when the vertex of the parabola is between the iterations
	// corresponding to the overflows. This is only possible when the two
	// overflows cross k*2^W for the same k. In such case, if the second one
	// left the range (and was the first one to do so), the first overflow
	// would have to enter the range, which would mean that either we had left
	// the range before or that we started outside of it. Both of these cases
	// are contradictions.
	//
	// Claim: In the case where SolveForBoundary returns None, the correct
	// solution is not some value between the Max for this boundary and the
	// Min of the other boundary.
	//
	// Justification: Assume that we had such Max_A and Min_B corresponding
	// to range boundaries A and B and such that Max_A < Min_B. If there was
	// a solution between Max_A and Min_B, it would have to be caused by an
	// overflow corresponding to either A or B. It cannot correspond to B,
	// since Min_B is the first occurrence of such an overflow. If it
	// corresponded to A, it would have to be either a signed or an unsigned
	// overflow that is larger than both eliminated overflows for A. But
	// between the eliminated overflows and this overflow, the values would
	// cover the entire value space, thus crossing the other boundary, which
	// is a contradiction.

	return TruncIfPossible(MinOptional(SL.first, SU.first), BitWidth);
	}

	ScalarEvolution::ExitLimit
	ScalarEvolution::howFarToZero(const SCEV V, const Loop L, bool ControlsExit,
	bool AllowPredicates) {

	// This is only used for loops with a "x != y" exit test. The exit condition
	// is now expressed as a single expression, V = x-y. So the exit test is
	// effectively V != 0. We know and take advantage of the fact that this
	// expression only being used in a comparison by zero context.

	SmallPtrSet<const SCEVPredicate *, 4> Predicates;
	// If the value is a constant
	if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
	// If the value is already zero, the branch will execute zero times.
	if (C->getValue()->isZero()) return C;
	return getCouldNotCompute(); // Otherwise it will loop infinitely.
	}

	const SCEVAddRecExpr *AddRec =
	dyn_cast<SCEVAddRecExpr>(stripInjectiveFunctions(V));

	if (!AddRec && AllowPredicates)
	// Try to make this an AddRec using runtime tests, in the first X
	// iterations of this loop, where X is the SCEV expression found by the
	// algorithm below.
	AddRec = convertSCEVToAddRecWithPredicates(V, L, Predicates);

	if (!AddRec \|\| AddRec->getLoop() != L)
	return getCouldNotCompute();

	// If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of
	// the quadratic equation to solve it.
	if (AddRec->isQuadratic() && AddRec->getType()->isIntegerTy()) {
	// We can only use this value if the chrec ends up with an exact zero
	// value at this index. When solving for "X*X != 5", for example, we
	// should not accept a root of 2.
	if (auto S = SolveQuadraticAddRecExact(AddRec, *this)) {
	const auto *R = cast<SCEVConstant>(getConstant(S.getValue()));
	return ExitLimit(R, R, false, Predicates);
	}
	return getCouldNotCompute();
	}

	// Otherwise we can only handle this if it is affine.
	if (!AddRec->isAffine())
	return getCouldNotCompute();

	// If this is an affine expression, the execution count of this branch is
	// the minimum unsigned root of the following equation:
	//
	// Start + Step*N = 0 (mod 2^BW)
	//
	// equivalent to:
	//
	// Step*N = -Start (mod 2^BW)
	//
	// where BW is the common bit width of Start and Step.

	// Get the initial value for the loop.
	const SCEV *Start = getSCEVAtScope(AddRec->getStart(), L->getParentLoop());
	const SCEV *Step = getSCEVAtScope(AddRec->getOperand(1), L->getParentLoop());

	// For now we handle only constant steps.
	//
	// TODO: Handle a nonconstant Step given AddRec<NUW>. If the
	// AddRec is NUW, then (in an unsigned sense) it cannot be counting up to wrap
	// to 0, it must be counting down to equal 0. Consequently, N = Start / -Step.
	// We have not yet seen any such cases.
	const SCEVConstant *StepC = dyn_cast<SCEVConstant>(Step);
	if (!StepC \|\| StepC->getValue()->isZero())
	return getCouldNotCompute();

	// For positive steps (counting up until unsigned overflow):
	// N = -Start/Step (as unsigned)
	// For negative steps (counting down to zero):
	// N = Start/-Step
	// First compute the unsigned distance from zero in the direction of Step.
	bool CountDown = StepC->getAPInt().isNegative();
	const SCEV *Distance = CountDown ? Start : getNegativeSCEV(Start);

	// Handle unitary steps, which cannot wraparound.
	// 1N = -Start; -1N = Start (mod 2^BW), so:
	// N = Distance (as unsigned)
	if (StepC->getValue()->isOne() \|\| StepC->getValue()->isMinusOne()) {
	APInt MaxBECount = getUnsignedRangeMax(applyLoopGuards(Distance, L));
	APInt MaxBECountBase = getUnsignedRangeMax(Distance);
	if (MaxBECountBase.ult(MaxBECount))
	MaxBECount = MaxBECountBase;

	// When a loop like "for (int i = 0; i != n; ++i) { /* body */ }" is rotated,
	// we end up with a loop whose backedge-taken count is n - 1. Detect this
	// case, and see if we can improve the bound.
	//
	// Explicitly handling this here is necessary because getUnsignedRange
	// isn't context-sensitive; it doesn't know that we only care about the
	// range inside the loop.
	const SCEV *Zero = getZero(Distance->getType());
	const SCEV *One = getOne(Distance->getType());
	const SCEV *DistancePlusOne = getAddExpr(Distance, One);
	if (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, DistancePlusOne, Zero)) {
	// If Distance + 1 doesn't overflow, we can compute the maximum distance
	// as "unsigned_max(Distance + 1) - 1".
	ConstantRange CR = getUnsignedRange(DistancePlusOne);
	MaxBECount = APIntOps::umin(MaxBECount, CR.getUnsignedMax() - 1);
	}
	return ExitLimit(Distance, getConstant(MaxBECount), false, Predicates);
	}

	// If the condition controls loop exit (the loop exits only if the expression
	// is true) and the addition is no-wrap we can use unsigned divide to
	// compute the backedge count. In this case, the step may not divide the
	// distance, but we don't care because if the condition is "missed" the loop
	// will have undefined behavior due to wrapping.
	if (ControlsExit && AddRec->hasNoSelfWrap() &&
	loopHasNoAbnormalExits(AddRec->getLoop())) {
	const SCEV *Exact =
	getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
	const SCEV *Max = getCouldNotCompute();
	if (Exact != getCouldNotCompute()) {
	APInt MaxInt = getUnsignedRangeMax(applyLoopGuards(Exact, L));
	APInt BaseMaxInt = getUnsignedRangeMax(Exact);
	if (BaseMaxInt.ult(MaxInt))
	Max = getConstant(BaseMaxInt);
	else
	Max = getConstant(MaxInt);
	}
	return ExitLimit(Exact, Max, false, Predicates);
	}

	// Solve the general equation.
	const SCEV *E = SolveLinEquationWithOverflow(StepC->getAPInt(),
	getNegativeSCEV(Start), *this);
	const SCEV *M = E == getCouldNotCompute()
	? E
	: getConstant(getUnsignedRangeMax(E));
	return ExitLimit(E, M, false, Predicates);
	}

	ScalarEvolution::ExitLimit
	ScalarEvolution::howFarToNonZero(const SCEV V, const Loop L) {
	// Loops that look like: while (X == 0) are very strange indeed. We don't
	// handle them yet except for the trivial case. This could be expanded in the
	// future as needed.

	// If the value is a constant, check to see if it is known to be non-zero
	// already. If so, the backedge will execute zero times.
	if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
	if (!C->getValue()->isZero())
	return getZero(C->getType());
	return getCouldNotCompute(); // Otherwise it will loop infinitely.
	}

	// We could implement others, but I really doubt anyone writes loops like
	// this, and if they did, they would already be constant folded.
	return getCouldNotCompute();
	}

	std::pair<const BasicBlock , const BasicBlock >
	ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(const BasicBlock *BB)
	const {
	// If the block has a unique predecessor, then there is no path from the
	// predecessor to the block that does not go through the direct edge
	// from the predecessor to the block.
	if (const BasicBlock *Pred = BB->getSinglePredecessor())
	return {Pred, BB};

	// A loop's header is defined to be a block that dominates the loop.
	// If the header has a unique predecessor outside the loop, it must be
	// a block that has exactly one successor that can reach the loop.
	if (const Loop *L = LI.getLoopFor(BB))
	return {L->getLoopPredecessor(), L->getHeader()};

	return {nullptr, nullptr};
	}

	/// SCEV structural equivalence is usually sufficient for testing whether two
	/// expressions are equal, however for the purposes of looking for a condition
	/// guarding a loop, it can be useful to be a little more general, since a
	/// front-end may have replicated the controlling expression.
	static bool HasSameValue(const SCEV A, const SCEV B) {
	// Quick check to see if they are the same SCEV.
	if (A == B) return true;

	auto ComputesEqualValues = [](const Instruction A, const Instruction B) {
	// Not all instructions that are "identical" compute the same value. For
	// instance, two distinct alloca instructions allocating the same type are
	// identical and do not read memory; but compute distinct values.
	return A->isIdenticalTo(B) && (isa<BinaryOperator>(A) \|\| isa<GetElementPtrInst>(A));
	};

	// Otherwise, if they're both SCEVUnknown, it's possible that they hold
	// two different instructions with the same value. Check for this case.
	if (const SCEVUnknown *AU = dyn_cast<SCEVUnknown>(A))
	if (const SCEVUnknown *BU = dyn_cast<SCEVUnknown>(B))
	if (const Instruction *AI = dyn_cast<Instruction>(AU->getValue()))
	if (const Instruction *BI = dyn_cast<Instruction>(BU->getValue()))
	if (ComputesEqualValues(AI, BI))
	return true;

	// Otherwise assume they may have a different value.
	return false;
	}

	bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
	const SCEV &LHS, const SCEV &RHS,
	unsigned Depth) {
	bool Changed = false;
	// Simplifies ICMP to trivial true or false by turning it into '0 == 0' or
	// '0 != 0'.
	auto TrivialCase = [&](bool TriviallyTrue) {
	LHS = RHS = getConstant(ConstantInt::getFalse(getContext()));
	Pred = TriviallyTrue ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE;
	return true;
	};
	// If we hit the max recursion limit bail out.
	if (Depth >= 3)
	return false;

	// Canonicalize a constant to the right side.
	if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) {
	// Check for both operands constant.
	if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) {
	if (ConstantExpr::getICmp(Pred,
	LHSC->getValue(),
	RHSC->getValue())->isNullValue())
	return TrivialCase(false);
	else
	return TrivialCase(true);
	}
	// Otherwise swap the operands to put the constant on the right.
	std::swap(LHS, RHS);
	Pred = ICmpInst::getSwappedPredicate(Pred);
	Changed = true;
	}

	// If we're comparing an addrec with a value which is loop-invariant in the
	// addrec's loop, put the addrec on the left. Also make a dominance check,
	// as both operands could be addrecs loop-invariant in each other's loop.
	if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(RHS)) {
	const Loop *L = AR->getLoop();
	if (isLoopInvariant(LHS, L) && properlyDominates(LHS, L->getHeader())) {
	std::swap(LHS, RHS);
	Pred = ICmpInst::getSwappedPredicate(Pred);
	Changed = true;
	}
	}

	// If there's a constant operand, canonicalize comparisons with boundary
	// cases, and canonicalize *-or-equal comparisons to regular comparisons.
	if (const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS)) {
	const APInt &RA = RC->getAPInt();

	bool SimplifiedByConstantRange = false;

	if (!ICmpInst::isEquality(Pred)) {
	ConstantRange ExactCR = ConstantRange::makeExactICmpRegion(Pred, RA);
	if (ExactCR.isFullSet())
	return TrivialCase(true);
	else if (ExactCR.isEmptySet())
	return TrivialCase(false);

	APInt NewRHS;
	CmpInst::Predicate NewPred;
	if (ExactCR.getEquivalentICmp(NewPred, NewRHS) &&
	ICmpInst::isEquality(NewPred)) {
	// We were able to convert an inequality to an equality.
	Pred = NewPred;
	RHS = getConstant(NewRHS);
	Changed = SimplifiedByConstantRange = true;
	}
	}

	if (!SimplifiedByConstantRange) {
	switch (Pred) {
	default:
	break;
	case ICmpInst::ICMP_EQ:
	case ICmpInst::ICMP_NE:
	// Fold ((-1) * %a) + %b == 0 (equivalent to %b-%a == 0) into %a == %b.
	if (!RA)
	if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(LHS))
	if (const SCEVMulExpr *ME =
	dyn_cast<SCEVMulExpr>(AE->getOperand(0)))
	if (AE->getNumOperands() == 2 && ME->getNumOperands() == 2 &&
	ME->getOperand(0)->isAllOnesValue()) {
	RHS = AE->getOperand(1);
	LHS = ME->getOperand(1);
	Changed = true;
	}
	break;


	// The "Should have been caught earlier!" messages refer to the fact
	// that the ExactCR.isFullSet() or ExactCR.isEmptySet() check above
	// should have fired on the corresponding cases, and canonicalized the
	// check to trivial case.

	case ICmpInst::ICMP_UGE:
	assert(!RA.isMinValue() && "Should have been caught earlier!");
	Pred = ICmpInst::ICMP_UGT;
	RHS = getConstant(RA - 1);
	Changed = true;
	break;
	case ICmpInst::ICMP_ULE:
	assert(!RA.isMaxValue() && "Should have been caught earlier!");
	Pred = ICmpInst::ICMP_ULT;
	RHS = getConstant(RA + 1);
	Changed = true;
	break;
	case ICmpInst::ICMP_SGE:
	assert(!RA.isMinSignedValue() && "Should have been caught earlier!");
	Pred = ICmpInst::ICMP_SGT;
	RHS = getConstant(RA - 1);
	Changed = true;
	break;
	case ICmpInst::ICMP_SLE:
	assert(!RA.isMaxSignedValue() && "Should have been caught earlier!");
	Pred = ICmpInst::ICMP_SLT;
	RHS = getConstant(RA + 1);
	Changed = true;
	break;
	}
	}
	}

	// Check for obvious equality.
	if (HasSameValue(LHS, RHS)) {
	if (ICmpInst::isTrueWhenEqual(Pred))
	return TrivialCase(true);
	if (ICmpInst::isFalseWhenEqual(Pred))
	return TrivialCase(false);
	}

	// If possible, canonicalize GE/LE comparisons to GT/LT comparisons, by
	// adding or subtracting 1 from one of the operands.
	switch (Pred) {
	case ICmpInst::ICMP_SLE:
	if (!getSignedRangeMax(RHS).isMaxSignedValue()) {
	RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS,
	SCEV::FlagNSW);
	Pred = ICmpInst::ICMP_SLT;
	Changed = true;
	} else if (!getSignedRangeMin(LHS).isMinSignedValue()) {
	LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS,
	SCEV::FlagNSW);
	Pred = ICmpInst::ICMP_SLT;
	Changed = true;
	}
	break;
	case ICmpInst::ICMP_SGE:
	if (!getSignedRangeMin(RHS).isMinSignedValue()) {
	RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS,
	SCEV::FlagNSW);
	Pred = ICmpInst::ICMP_SGT;
	Changed = true;
	} else if (!getSignedRangeMax(LHS).isMaxSignedValue()) {
	LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS,
	SCEV::FlagNSW);
	Pred = ICmpInst::ICMP_SGT;
	Changed = true;
	}
	break;
	case ICmpInst::ICMP_ULE:
	if (!getUnsignedRangeMax(RHS).isMaxValue()) {
	RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS,
	SCEV::FlagNUW);
	Pred = ICmpInst::ICMP_ULT;
	Changed = true;
	} else if (!getUnsignedRangeMin(LHS).isMinValue()) {
	LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS);
	Pred = ICmpInst::ICMP_ULT;
	Changed = true;
	}
	break;
	case ICmpInst::ICMP_UGE:
	if (!getUnsignedRangeMin(RHS).isMinValue()) {
	RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS);
	Pred = ICmpInst::ICMP_UGT;
	Changed = true;
	} else if (!getUnsignedRangeMax(LHS).isMaxValue()) {
	LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS,
	SCEV::FlagNUW);
	Pred = ICmpInst::ICMP_UGT;
	Changed = true;
	}
	break;
	default:
	break;
	}

	// TODO: More simplifications are possible here.

	// Recursively simplify until we either hit a recursion limit or nothing
	// changes.
	if (Changed)
	return SimplifyICmpOperands(Pred, LHS, RHS, Depth+1);

	return Changed;
	}

	bool ScalarEvolution::isKnownNegative(const SCEV *S) {
	return getSignedRangeMax(S).isNegative();
	}

	bool ScalarEvolution::isKnownPositive(const SCEV *S) {
	return getSignedRangeMin(S).isStrictlyPositive();
	}

	bool ScalarEvolution::isKnownNonNegative(const SCEV *S) {
	return !getSignedRangeMin(S).isNegative();
	}

	bool ScalarEvolution::isKnownNonPositive(const SCEV *S) {
	return !getSignedRangeMax(S).isStrictlyPositive();
	}

	bool ScalarEvolution::isKnownNonZero(const SCEV *S) {
	return getUnsignedRangeMin(S) != 0;
	}

	std::pair<const SCEV , const SCEV >
	ScalarEvolution::SplitIntoInitAndPostInc(const Loop L, const SCEV S) {
	// Compute SCEV on entry of loop L.
	const SCEV Start = SCEVInitRewriter::rewrite(S, L, this);
	if (Start == getCouldNotCompute())
	return { Start, Start };
	// Compute post increment SCEV for loop L.
	const SCEV PostInc = SCEVPostIncRewriter::rewrite(S, L, this);
	assert(PostInc != getCouldNotCompute() && "Unexpected could not compute");
	return { Start, PostInc };
	}

	bool ScalarEvolution::isKnownViaInduction(ICmpInst::Predicate Pred,
	const SCEV LHS, const SCEV RHS) {
	// First collect all loops.
	SmallPtrSet<const Loop *, 8> LoopsUsed;
	getUsedLoops(LHS, LoopsUsed);
	getUsedLoops(RHS, LoopsUsed);

	if (LoopsUsed.empty())
	return false;

	// Domination relationship must be a linear order on collected loops.
	#ifndef NDEBUG
	for (auto *L1 : LoopsUsed)
	for (auto *L2 : LoopsUsed)
	assert((DT.dominates(L1->getHeader(), L2->getHeader()) \|\|
	DT.dominates(L2->getHeader(), L1->getHeader())) &&
	"Domination relationship is not a linear order");
	#endif

	const Loop *MDL =
	*std::max_element(LoopsUsed.begin(), LoopsUsed.end(),
	[&](const Loop L1, const Loop L2) {
	return DT.properlyDominates(L1->getHeader(), L2->getHeader());
	});

	// Get init and post increment value for LHS.
	auto SplitLHS = SplitIntoInitAndPostInc(MDL, LHS);
	// if LHS contains unknown non-invariant SCEV then bail out.
	if (SplitLHS.first == getCouldNotCompute())
	return false;
	assert (SplitLHS.second != getCouldNotCompute() && "Unexpected CNC");
	// Get init and post increment value for RHS.
	auto SplitRHS = SplitIntoInitAndPostInc(MDL, RHS);
	// if RHS contains unknown non-invariant SCEV then bail out.
	if (SplitRHS.first == getCouldNotCompute())
	return false;
	assert (SplitRHS.second != getCouldNotCompute() && "Unexpected CNC");
	// It is possible that init SCEV contains an invariant load but it does
	// not dominate MDL and is not available at MDL loop entry, so we should
	// check it here.
	if (!isAvailableAtLoopEntry(SplitLHS.first, MDL) \|\|
	!isAvailableAtLoopEntry(SplitRHS.first, MDL))
	return false;

	// It seems backedge guard check is faster than entry one so in some cases
	// it can speed up whole estimation by short circuit
	return isLoopBackedgeGuardedByCond(MDL, Pred, SplitLHS.second,
	SplitRHS.second) &&
	isLoopEntryGuardedByCond(MDL, Pred, SplitLHS.first, SplitRHS.first);
	}

	bool ScalarEvolution::isKnownPredicate(ICmpInst::Predicate Pred,
	const SCEV LHS, const SCEV RHS) {
	// Canonicalize the inputs first.
	(void)SimplifyICmpOperands(Pred, LHS, RHS);

	if (isKnownViaInduction(Pred, LHS, RHS))
	return true;

	if (isKnownPredicateViaSplitting(Pred, LHS, RHS))
	return true;

	// Otherwise see what can be done with some simple reasoning.
	return isKnownViaNonRecursiveReasoning(Pred, LHS, RHS);
	}

	Optional<bool> ScalarEvolution::evaluatePredicate(ICmpInst::Predicate Pred,
	const SCEV *LHS,
	const SCEV *RHS) {
	if (isKnownPredicate(Pred, LHS, RHS))
	return true;
	else if (isKnownPredicate(ICmpInst::getInversePredicate(Pred), LHS, RHS))
	return false;
	return None;
	}

	bool ScalarEvolution::isKnownPredicateAt(ICmpInst::Predicate Pred,
	const SCEV LHS, const SCEV RHS,
	const Instruction *Context) {
	// TODO: Analyze guards and assumes from Context's block.
	return isKnownPredicate(Pred, LHS, RHS) \|\|
	isBasicBlockEntryGuardedByCond(Context->getParent(), Pred, LHS, RHS);
	}

	Optional<bool>
	ScalarEvolution::evaluatePredicateAt(ICmpInst::Predicate Pred, const SCEV *LHS,
	const SCEV *RHS,
	const Instruction *Context) {
	Optional<bool> KnownWithoutContext = evaluatePredicate(Pred, LHS, RHS);
	if (KnownWithoutContext)
	return KnownWithoutContext;

	if (isBasicBlockEntryGuardedByCond(Context->getParent(), Pred, LHS, RHS))
	return true;
	else if (isBasicBlockEntryGuardedByCond(Context->getParent(),
	ICmpInst::getInversePredicate(Pred),
	LHS, RHS))
	return false;
	return None;
	}

	bool ScalarEvolution::isKnownOnEveryIteration(ICmpInst::Predicate Pred,
	const SCEVAddRecExpr *LHS,
	const SCEV *RHS) {
	const Loop *L = LHS->getLoop();
	return isLoopEntryGuardedByCond(L, Pred, LHS->getStart(), RHS) &&
	isLoopBackedgeGuardedByCond(L, Pred, LHS->getPostIncExpr(*this), RHS);
	}

	Optional<ScalarEvolution::MonotonicPredicateType>
	ScalarEvolution::getMonotonicPredicateType(const SCEVAddRecExpr *LHS,
	ICmpInst::Predicate Pred) {
	auto Result = getMonotonicPredicateTypeImpl(LHS, Pred);

	#ifndef NDEBUG
	// Verify an invariant: inverting the predicate should turn a monotonically
	// increasing change to a monotonically decreasing one, and vice versa.
	if (Result) {
	auto ResultSwapped =
	getMonotonicPredicateTypeImpl(LHS, ICmpInst::getSwappedPredicate(Pred));

	assert(ResultSwapped.hasValue() && "should be able to analyze both!");
	assert(ResultSwapped.getValue() != Result.getValue() &&
	"monotonicity should flip as we flip the predicate");
	}
	#endif

	return Result;
	}

	Optional<ScalarEvolution::MonotonicPredicateType>
	ScalarEvolution::getMonotonicPredicateTypeImpl(const SCEVAddRecExpr *LHS,
	ICmpInst::Predicate Pred) {
	// A zero step value for LHS means the induction variable is essentially a
	// loop invariant value. We don't really depend on the predicate actually
	// flipping from false to true (for increasing predicates, and the other way
	// around for decreasing predicates), all we care about is that if the
	// predicate changes then it only changes from false to true.
	//
	// A zero step value in itself is not very useful, but there may be places
	// where SCEV can prove X >= 0 but not prove X > 0, so it is helpful to be
	// as general as possible.

	// Only handle LE/LT/GE/GT predicates.
	if (!ICmpInst::isRelational(Pred))
	return None;

	bool IsGreater = ICmpInst::isGE(Pred) \|\| ICmpInst::isGT(Pred);
	assert((IsGreater \|\| ICmpInst::isLE(Pred) \|\| ICmpInst::isLT(Pred)) &&
	"Should be greater or less!");

	// Check that AR does not wrap.
	if (ICmpInst::isUnsigned(Pred)) {
	if (!LHS->hasNoUnsignedWrap())
	return None;
	return IsGreater ? MonotonicallyIncreasing : MonotonicallyDecreasing;
	} else {
	assert(ICmpInst::isSigned(Pred) &&
	"Relational predicate is either signed or unsigned!");
	if (!LHS->hasNoSignedWrap())
	return None;

	const SCEV Step = LHS->getStepRecurrence(this);

	if (isKnownNonNegative(Step))
	return IsGreater ? MonotonicallyIncreasing : MonotonicallyDecreasing;

	if (isKnownNonPositive(Step))
	return !IsGreater ? MonotonicallyIncreasing : MonotonicallyDecreasing;

	return None;
	}
	}

	Optional<ScalarEvolution::LoopInvariantPredicate>
	ScalarEvolution::getLoopInvariantPredicate(ICmpInst::Predicate Pred,
	const SCEV LHS, const SCEV RHS,
	const Loop *L) {

	// If there is a loop-invariant, force it into the RHS, otherwise bail out.
	if (!isLoopInvariant(RHS, L)) {
	if (!isLoopInvariant(LHS, L))
	return None;

	std::swap(LHS, RHS);
	Pred = ICmpInst::getSwappedPredicate(Pred);
	}

	const SCEVAddRecExpr *ArLHS = dyn_cast<SCEVAddRecExpr>(LHS);
	if (!ArLHS \|\| ArLHS->getLoop() != L)
	return None;

	auto MonotonicType = getMonotonicPredicateType(ArLHS, Pred);
	if (!MonotonicType)
	return None;
	// If the predicate "ArLHS `Pred` RHS" monotonically increases from false to
	// true as the loop iterates, and the backedge is control dependent on
	// "ArLHS `Pred` RHS" == true then we can reason as follows:
	//
	// * if the predicate was false in the first iteration then the predicate
	// is never evaluated again, since the loop exits without taking the
	// backedge.
	// * if the predicate was true in the first iteration then it will
	// continue to be true for all future iterations since it is
	// monotonically increasing.
	//
	// For both the above possibilities, we can replace the loop varying
	// predicate with its value on the first iteration of the loop (which is
	// loop invariant).
	//
	// A similar reasoning applies for a monotonically decreasing predicate, by
	// replacing true with false and false with true in the above two bullets.
	bool Increasing = *MonotonicType == ScalarEvolution::MonotonicallyIncreasing;
	auto P = Increasing ? Pred : ICmpInst::getInversePredicate(Pred);

	if (!isLoopBackedgeGuardedByCond(L, P, LHS, RHS))
	return None;

	return ScalarEvolution::LoopInvariantPredicate(Pred, ArLHS->getStart(), RHS);
	}

	Optional<ScalarEvolution::LoopInvariantPredicate>
	ScalarEvolution::getLoopInvariantExitCondDuringFirstIterations(
	ICmpInst::Predicate Pred, const SCEV LHS, const SCEV RHS, const Loop *L,
	const Instruction Context, const SCEV MaxIter) {
	// Try to prove the following set of facts:
	// - The predicate is monotonic in the iteration space.
	// - If the check does not fail on the 1st iteration:
	// - No overflow will happen during first MaxIter iterations;
	// - It will not fail on the MaxIter'th iteration.
	// If the check does fail on the 1st iteration, we leave the loop and no
	// other checks matter.

	// If there is a loop-invariant, force it into the RHS, otherwise bail out.
	if (!isLoopInvariant(RHS, L)) {
	if (!isLoopInvariant(LHS, L))
	return None;

	std::swap(LHS, RHS);
	Pred = ICmpInst::getSwappedPredicate(Pred);
	}

	auto *AR = dyn_cast<SCEVAddRecExpr>(LHS);
	if (!AR \|\| AR->getLoop() != L)
	return None;

	// The predicate must be relational (i.e. <, <=, >=, >).
	if (!ICmpInst::isRelational(Pred))
	return None;

	// TODO: Support steps other than +/- 1.
	const SCEV Step = AR->getStepRecurrence(this);
	auto *One = getOne(Step->getType());
	auto *MinusOne = getNegativeSCEV(One);
	if (Step != One && Step != MinusOne)
	return None;

	// Type mismatch here means that MaxIter is potentially larger than max
	// unsigned value in start type, which mean we cannot prove no wrap for the
	// indvar.
	if (AR->getType() != MaxIter->getType())
	return None;

	// Value of IV on suggested last iteration.
	const SCEV Last = AR->evaluateAtIteration(MaxIter, this);
	// Does it still meet the requirement?
	if (!isLoopBackedgeGuardedByCond(L, Pred, Last, RHS))
	return None;
	// Because step is +/- 1 and MaxIter has same type as Start (i.e. it does
	// not exceed max unsigned value of this type), this effectively proves
	// that there is no wrap during the iteration. To prove that there is no
	// signed/unsigned wrap, we need to check that
	// Start <= Last for step = 1 or Start >= Last for step = -1.
	ICmpInst::Predicate NoOverflowPred =
	CmpInst::isSigned(Pred) ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
	if (Step == MinusOne)
	NoOverflowPred = CmpInst::getSwappedPredicate(NoOverflowPred);
	const SCEV *Start = AR->getStart();
	if (!isKnownPredicateAt(NoOverflowPred, Start, Last, Context))
	return None;

	// Everything is fine.
	return ScalarEvolution::LoopInvariantPredicate(Pred, Start, RHS);
	}

	bool ScalarEvolution::isKnownPredicateViaConstantRanges(
	ICmpInst::Predicate Pred, const SCEV LHS, const SCEV RHS) {
	if (HasSameValue(LHS, RHS))
	return ICmpInst::isTrueWhenEqual(Pred);

	// This code is split out from isKnownPredicate because it is called from
	// within isLoopEntryGuardedByCond.

	auto CheckRanges = [&](const ConstantRange &RangeLHS,
	const ConstantRange &RangeRHS) {
	return RangeLHS.icmp(Pred, RangeRHS);
	};

	// The check at the top of the function catches the case where the values are
	// known to be equal.
	if (Pred == CmpInst::ICMP_EQ)
	return false;

	if (Pred == CmpInst::ICMP_NE) {
	if (CheckRanges(getSignedRange(LHS), getSignedRange(RHS)) \|\|
	CheckRanges(getUnsignedRange(LHS), getUnsignedRange(RHS)))
	return true;
	auto *Diff = getMinusSCEV(LHS, RHS);
	return !isa<SCEVCouldNotCompute>(Diff) && isKnownNonZero(Diff);
	}

	if (CmpInst::isSigned(Pred))
	return CheckRanges(getSignedRange(LHS), getSignedRange(RHS));

	return CheckRanges(getUnsignedRange(LHS), getUnsignedRange(RHS));
	}

	bool ScalarEvolution::isKnownPredicateViaNoOverflow(ICmpInst::Predicate Pred,
	const SCEV *LHS,
	const SCEV *RHS) {
	// Match X to (A + C1)<ExpectedFlags> and Y to (A + C2)<ExpectedFlags>, where
	// C1 and C2 are constant integers. If either X or Y are not add expressions,
	// consider them as X + 0 and Y + 0 respectively. C1 and C2 are returned via
	// OutC1 and OutC2.
	auto MatchBinaryAddToConst = [this](const SCEV X, const SCEV Y,
	APInt &OutC1, APInt &OutC2,
	SCEV::NoWrapFlags ExpectedFlags) {
	const SCEV XNonConstOp, XConstOp;
	const SCEV YNonConstOp, YConstOp;
	SCEV::NoWrapFlags XFlagsPresent;
	SCEV::NoWrapFlags YFlagsPresent;

	if (!splitBinaryAdd(X, XConstOp, XNonConstOp, XFlagsPresent)) {
	XConstOp = getZero(X->getType());
	XNonConstOp = X;
	XFlagsPresent = ExpectedFlags;
	}
	if (!isa<SCEVConstant>(XConstOp) \|\|
	(XFlagsPresent & ExpectedFlags) != ExpectedFlags)
	return false;

	if (!splitBinaryAdd(Y, YConstOp, YNonConstOp, YFlagsPresent)) {
	YConstOp = getZero(Y->getType());
	YNonConstOp = Y;
	YFlagsPresent = ExpectedFlags;
	}

	if (!isa<SCEVConstant>(YConstOp) \|\|
	(YFlagsPresent & ExpectedFlags) != ExpectedFlags)
	return false;

	if (YNonConstOp != XNonConstOp)
	return false;

	OutC1 = cast<SCEVConstant>(XConstOp)->getAPInt();
	OutC2 = cast<SCEVConstant>(YConstOp)->getAPInt();

	return true;
	};

	APInt C1;
	APInt C2;

	switch (Pred) {
	default:
	break;

	case ICmpInst::ICMP_SGE:
	std::swap(LHS, RHS);
	LLVM_FALLTHROUGH;
	case ICmpInst::ICMP_SLE:
	// (X + C1)<nsw> s<= (X + C2)<nsw> if C1 s<= C2.
	if (MatchBinaryAddToConst(LHS, RHS, C1, C2, SCEV::FlagNSW) && C1.sle(C2))
	return true;

	break;

	case ICmpInst::ICMP_SGT:
	std::swap(LHS, RHS);
	LLVM_FALLTHROUGH;
	case ICmpInst::ICMP_SLT:
	// (X + C1)<nsw> s< (X + C2)<nsw> if C1 s< C2.
	if (MatchBinaryAddToConst(LHS, RHS, C1, C2, SCEV::FlagNSW) && C1.slt(C2))
	return true;

	break;

	case ICmpInst::ICMP_UGE:
	std::swap(LHS, RHS);
	LLVM_FALLTHROUGH;
	case ICmpInst::ICMP_ULE:
	// (X + C1)<nuw> u<= (X + C2)<nuw> for C1 u<= C2.
	if (MatchBinaryAddToConst(RHS, LHS, C2, C1, SCEV::FlagNUW) && C1.ule(C2))
	return true;

	break;

	case ICmpInst::ICMP_UGT:
	std::swap(LHS, RHS);
	LLVM_FALLTHROUGH;
	case ICmpInst::ICMP_ULT:
	// (X + C1)<nuw> u< (X + C2)<nuw> if C1 u< C2.
	if (MatchBinaryAddToConst(RHS, LHS, C2, C1, SCEV::FlagNUW) && C1.ult(C2))
	return true;
	break;
	}

	return false;
	}

	bool ScalarEvolution::isKnownPredicateViaSplitting(ICmpInst::Predicate Pred,
	const SCEV *LHS,
	const SCEV *RHS) {
	if (Pred != ICmpInst::ICMP_ULT \|\| ProvingSplitPredicate)
	return false;

	// Allowing arbitrary number of activations of isKnownPredicateViaSplitting on
	// the stack can result in exponential time complexity.
	SaveAndRestore<bool> Restore(ProvingSplitPredicate, true);

	// If L >= 0 then I `ult` L <=> I >= 0 && I `slt` L
	//
	// To prove L >= 0 we use isKnownNonNegative whereas to prove I >= 0 we use
	// isKnownPredicate. isKnownPredicate is more powerful, but also more
	// expensive; and using isKnownNonNegative(RHS) is sufficient for most of the
	// interesting cases seen in practice. We can consider "upgrading" L >= 0 to
	// use isKnownPredicate later if needed.
	return isKnownNonNegative(RHS) &&
	isKnownPredicate(CmpInst::ICMP_SGE, LHS, getZero(LHS->getType())) &&
	isKnownPredicate(CmpInst::ICMP_SLT, LHS, RHS);
	}

	bool ScalarEvolution::isImpliedViaGuard(const BasicBlock *BB,
	ICmpInst::Predicate Pred,
	const SCEV LHS, const SCEV RHS) {
	// No need to even try if we know the module has no guards.
	if (!HasGuards)
	return false;

	return any_of(*BB, [&](const Instruction &I) {
	using namespace llvm::PatternMatch;

	Value *Condition;
	return match(&I, m_Intrinsic<Intrinsic::experimental_guard>(
	m_Value(Condition))) &&
	isImpliedCond(Pred, LHS, RHS, Condition, false);
	});
	}

	/// isLoopBackedgeGuardedByCond - Test whether the backedge of the loop is
	/// protected by a conditional between LHS and RHS. This is used to
	/// to eliminate casts.
	bool
	ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L,
	ICmpInst::Predicate Pred,
	const SCEV LHS, const SCEV RHS) {
	// Interpret a null as meaning no loop, where there is obviously no guard
	// (interprocedural conditions notwithstanding).
	if (!L) return true;

	if (VerifyIR)
	assert(!verifyFunction(*L->getHeader()->getParent(), &dbgs()) &&
	"This cannot be done on broken IR!");


	if (isKnownViaNonRecursiveReasoning(Pred, LHS, RHS))
	return true;

	BasicBlock *Latch = L->getLoopLatch();
	if (!Latch)
	return false;

	BranchInst *LoopContinuePredicate =
	dyn_cast<BranchInst>(Latch->getTerminator());
	if (LoopContinuePredicate && LoopContinuePredicate->isConditional() &&
	isImpliedCond(Pred, LHS, RHS,
	LoopContinuePredicate->getCondition(),
	LoopContinuePredicate->getSuccessor(0) != L->getHeader()))
	return true;

	// We don't want more than one activation of the following loops on the stack
	// -- that can lead to O(n!) time complexity.
	if (WalkingBEDominatingConds)
	return false;

	SaveAndRestore<bool> ClearOnExit(WalkingBEDominatingConds, true);

	// See if we can exploit a trip count to prove the predicate.
	const auto &BETakenInfo = getBackedgeTakenInfo(L);
	const SCEV *LatchBECount = BETakenInfo.getExact(Latch, this);
	if (LatchBECount != getCouldNotCompute()) {
	// We know that Latch branches back to the loop header exactly
	// LatchBECount times. This means the backdege condition at Latch is
	// equivalent to "{0,+,1} u< LatchBECount".
	Type *Ty = LatchBECount->getType();
	auto NoWrapFlags = SCEV::NoWrapFlags(SCEV::FlagNUW \| SCEV::FlagNW);
	const SCEV *LoopCounter =
	getAddRecExpr(getZero(Ty), getOne(Ty), L, NoWrapFlags);
	if (isImpliedCond(Pred, LHS, RHS, ICmpInst::ICMP_ULT, LoopCounter,
	LatchBECount))
	return true;
	}

	// Check conditions due to any @llvm.assume intrinsics.
	for (auto &AssumeVH : AC.assumptions()) {
	if (!AssumeVH)
	continue;
	auto *CI = cast<CallInst>(AssumeVH);
	if (!DT.dominates(CI, Latch->getTerminator()))
	continue;

	if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false))
	return true;
	}

	// If the loop is not reachable from the entry block, we risk running into an
	// infinite loop as we walk up into the dom tree. These loops do not matter
	// anyway, so we just return a conservative answer when we see them.
	if (!DT.isReachableFromEntry(L->getHeader()))
	return false;

	if (isImpliedViaGuard(Latch, Pred, LHS, RHS))
	return true;

	for (DomTreeNode DTN = DT[Latch], HeaderDTN = DT[L->getHeader()];
	DTN != HeaderDTN; DTN = DTN->getIDom()) {
	assert(DTN && "should reach the loop header before reaching the root!");

	BasicBlock *BB = DTN->getBlock();
	if (isImpliedViaGuard(BB, Pred, LHS, RHS))
	return true;

	BasicBlock *PBB = BB->getSinglePredecessor();
	if (!PBB)
	continue;

	BranchInst *ContinuePredicate = dyn_cast<BranchInst>(PBB->getTerminator());
	if (!ContinuePredicate \|\| !ContinuePredicate->isConditional())
	continue;

	Value *Condition = ContinuePredicate->getCondition();

	// If we have an edge `E` within the loop body that dominates the only
	// latch, the condition guarding `E` also guards the backedge. This
	// reasoning works only for loops with a single latch.

	BasicBlockEdge DominatingEdge(PBB, BB);
	if (DominatingEdge.isSingleEdge()) {
	// We're constructively (and conservatively) enumerating edges within the
	// loop body that dominate the latch. The dominator tree better agree
	// with us on this:
	assert(DT.dominates(DominatingEdge, Latch) && "should be!");

	if (isImpliedCond(Pred, LHS, RHS, Condition,
	BB != ContinuePredicate->getSuccessor(0)))
	return true;
	}
	}

	return false;
	}

	bool ScalarEvolution::isBasicBlockEntryGuardedByCond(const BasicBlock *BB,
	ICmpInst::Predicate Pred,
	const SCEV *LHS,
	const SCEV *RHS) {
	if (VerifyIR)
	assert(!verifyFunction(*BB->getParent(), &dbgs()) &&
	"This cannot be done on broken IR!");

	// If we cannot prove strict comparison (e.g. a > b), maybe we can prove
	// the facts (a >= b && a != b) separately. A typical situation is when the
	// non-strict comparison is known from ranges and non-equality is known from
	// dominating predicates. If we are proving strict comparison, we always try
	// to prove non-equality and non-strict comparison separately.
	auto NonStrictPredicate = ICmpInst::getNonStrictPredicate(Pred);
	const bool ProvingStrictComparison = (Pred != NonStrictPredicate);
	bool ProvedNonStrictComparison = false;
	bool ProvedNonEquality = false;

	auto SplitAndProve =
	[&](std::function<bool(ICmpInst::Predicate)> Fn) -> bool {
	if (!ProvedNonStrictComparison)
	ProvedNonStrictComparison = Fn(NonStrictPredicate);
	if (!ProvedNonEquality)
	ProvedNonEquality = Fn(ICmpInst::ICMP_NE);
	if (ProvedNonStrictComparison && ProvedNonEquality)
	return true;
	return false;
	};

	if (ProvingStrictComparison) {
	auto ProofFn = [&](ICmpInst::Predicate P) {
	return isKnownViaNonRecursiveReasoning(P, LHS, RHS);
	};
	if (SplitAndProve(ProofFn))
	return true;
	}

	// Try to prove (Pred, LHS, RHS) using isImpliedViaGuard.
	auto ProveViaGuard = [&](const BasicBlock *Block) {
	if (isImpliedViaGuard(Block, Pred, LHS, RHS))
	return true;
	if (ProvingStrictComparison) {
	auto ProofFn = [&](ICmpInst::Predicate P) {
	return isImpliedViaGuard(Block, P, LHS, RHS);
	};
	if (SplitAndProve(ProofFn))
	return true;
	}
	return false;
	};

	// Try to prove (Pred, LHS, RHS) using isImpliedCond.
	auto ProveViaCond = [&](const Value *Condition, bool Inverse) {
	const Instruction *Context = &BB->front();
	if (isImpliedCond(Pred, LHS, RHS, Condition, Inverse, Context))
	return true;
	if (ProvingStrictComparison) {
	auto ProofFn = [&](ICmpInst::Predicate P) {
	return isImpliedCond(P, LHS, RHS, Condition, Inverse, Context);
	};
	if (SplitAndProve(ProofFn))
	return true;
	}
	return false;
	};

	// Starting at the block's predecessor, climb up the predecessor chain, as long
	// as there are predecessors that can be found that have unique successors
	// leading to the original block.
	const Loop *ContainingLoop = LI.getLoopFor(BB);
	const BasicBlock *PredBB;
	if (ContainingLoop && ContainingLoop->getHeader() == BB)
	PredBB = ContainingLoop->getLoopPredecessor();
	else
	PredBB = BB->getSinglePredecessor();
	for (std::pair<const BasicBlock , const BasicBlock > Pair(PredBB, BB);
	Pair.first; Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) {
	if (ProveViaGuard(Pair.first))
	return true;

	const BranchInst *LoopEntryPredicate =
	dyn_cast<BranchInst>(Pair.first->getTerminator());
	if (!LoopEntryPredicate \|\|
	LoopEntryPredicate->isUnconditional())
	continue;

	if (ProveViaCond(LoopEntryPredicate->getCondition(),
	LoopEntryPredicate->getSuccessor(0) != Pair.second))
	return true;
	}

	// Check conditions due to any @llvm.assume intrinsics.
	for (auto &AssumeVH : AC.assumptions()) {
	if (!AssumeVH)
	continue;
	auto *CI = cast<CallInst>(AssumeVH);
	if (!DT.dominates(CI, BB))
	continue;

	if (ProveViaCond(CI->getArgOperand(0), false))
	return true;
	}

	return false;
	}

	bool ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,
	ICmpInst::Predicate Pred,
	const SCEV *LHS,
	const SCEV *RHS) {
	// Interpret a null as meaning no loop, where there is obviously no guard
	// (interprocedural conditions notwithstanding).
	if (!L)
	return false;

	// Both LHS and RHS must be available at loop entry.
	assert(isAvailableAtLoopEntry(LHS, L) &&
	"LHS is not available at Loop Entry");
	assert(isAvailableAtLoopEntry(RHS, L) &&
	"RHS is not available at Loop Entry");

	if (isKnownViaNonRecursiveReasoning(Pred, LHS, RHS))
	return true;

	return isBasicBlockEntryGuardedByCond(L->getHeader(), Pred, LHS, RHS);
	}

	bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS,
	const SCEV *RHS,
	const Value *FoundCondValue, bool Inverse,
	const Instruction *Context) {
	// False conditions implies anything. Do not bother analyzing it further.
	if (FoundCondValue ==
	ConstantInt::getBool(FoundCondValue->getContext(), Inverse))
	return true;

	if (!PendingLoopPredicates.insert(FoundCondValue).second)
	return false;

	auto ClearOnExit =
	make_scope_exit([&]() { PendingLoopPredicates.erase(FoundCondValue); });

	// Recursively handle And and Or conditions.
	const Value Op0, Op1;
	if (match(FoundCondValue, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
	if (!Inverse)
	return isImpliedCond(Pred, LHS, RHS, Op0, Inverse, Context) \|\|
	isImpliedCond(Pred, LHS, RHS, Op1, Inverse, Context);
	} else if (match(FoundCondValue, m_LogicalOr(m_Value(Op0), m_Value(Op1)))) {
	if (Inverse)
	return isImpliedCond(Pred, LHS, RHS, Op0, Inverse, Context) \|\|
	isImpliedCond(Pred, LHS, RHS, Op1, Inverse, Context);
	}

	const ICmpInst *ICI = dyn_cast<ICmpInst>(FoundCondValue);
	if (!ICI) return false;

	// Now that we found a conditional branch that dominates the loop or controls
	// the loop latch. Check to see if it is the comparison we are looking for.
	ICmpInst::Predicate FoundPred;
	if (Inverse)
	FoundPred = ICI->getInversePredicate();
	else
	FoundPred = ICI->getPredicate();

	const SCEV *FoundLHS = getSCEV(ICI->getOperand(0));
	const SCEV *FoundRHS = getSCEV(ICI->getOperand(1));

	return isImpliedCond(Pred, LHS, RHS, FoundPred, FoundLHS, FoundRHS, Context);
	}

	bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS,
	const SCEV *RHS,
	ICmpInst::Predicate FoundPred,
	const SCEV FoundLHS, const SCEV FoundRHS,
	const Instruction *Context) {
	// Balance the types.
	if (getTypeSizeInBits(LHS->getType()) <
	getTypeSizeInBits(FoundLHS->getType())) {
	// For unsigned and equality predicates, try to prove that both found
	// operands fit into narrow unsigned range. If so, try to prove facts in
	// narrow types.
	if (!CmpInst::isSigned(FoundPred) && !FoundLHS->getType()->isPointerTy()) {
	auto *NarrowType = LHS->getType();
	auto *WideType = FoundLHS->getType();
	auto BitWidth = getTypeSizeInBits(NarrowType);
	const SCEV *MaxValue = getZeroExtendExpr(
	getConstant(APInt::getMaxValue(BitWidth)), WideType);
	if (isKnownPredicate(ICmpInst::ICMP_ULE, FoundLHS, MaxValue) &&
	isKnownPredicate(ICmpInst::ICMP_ULE, FoundRHS, MaxValue)) {
	const SCEV *TruncFoundLHS = getTruncateExpr(FoundLHS, NarrowType);
	const SCEV *TruncFoundRHS = getTruncateExpr(FoundRHS, NarrowType);
	if (isImpliedCondBalancedTypes(Pred, LHS, RHS, FoundPred, TruncFoundLHS,
	TruncFoundRHS, Context))
	return true;
	}
	}

	if (LHS->getType()->isPointerTy())
	return false;
	if (CmpInst::isSigned(Pred)) {
	LHS = getSignExtendExpr(LHS, FoundLHS->getType());
	RHS = getSignExtendExpr(RHS, FoundLHS->getType());
	} else {
	LHS = getZeroExtendExpr(LHS, FoundLHS->getType());
	RHS = getZeroExtendExpr(RHS, FoundLHS->getType());
	}
	} else if (getTypeSizeInBits(LHS->getType()) >
	getTypeSizeInBits(FoundLHS->getType())) {
	if (FoundLHS->getType()->isPointerTy())
	return false;
	if (CmpInst::isSigned(FoundPred)) {
	FoundLHS = getSignExtendExpr(FoundLHS, LHS->getType());
	FoundRHS = getSignExtendExpr(FoundRHS, LHS->getType());
	} else {
	FoundLHS = getZeroExtendExpr(FoundLHS, LHS->getType());
	FoundRHS = getZeroExtendExpr(FoundRHS, LHS->getType());
	}
	}
	return isImpliedCondBalancedTypes(Pred, LHS, RHS, FoundPred, FoundLHS,
	FoundRHS, Context);
	}

	bool ScalarEvolution::isImpliedCondBalancedTypes(
	ICmpInst::Predicate Pred, const SCEV LHS, const SCEV RHS,
	ICmpInst::Predicate FoundPred, const SCEV FoundLHS, const SCEV FoundRHS,
	const Instruction *Context) {
	assert(getTypeSizeInBits(LHS->getType()) ==
	getTypeSizeInBits(FoundLHS->getType()) &&
	"Types should be balanced!");
	// Canonicalize the query to match the way instcombine will have
	// canonicalized the comparison.
	if (SimplifyICmpOperands(Pred, LHS, RHS))
	if (LHS == RHS)
	return CmpInst::isTrueWhenEqual(Pred);
	if (SimplifyICmpOperands(FoundPred, FoundLHS, FoundRHS))
	if (FoundLHS == FoundRHS)
	return CmpInst::isFalseWhenEqual(FoundPred);

	// Check to see if we can make the LHS or RHS match.
	if (LHS == FoundRHS \|\| RHS == FoundLHS) {
	if (isa<SCEVConstant>(RHS)) {
	std::swap(FoundLHS, FoundRHS);
	FoundPred = ICmpInst::getSwappedPredicate(FoundPred);
	} else {
	std::swap(LHS, RHS);
	Pred = ICmpInst::getSwappedPredicate(Pred);
	}
	}

	// Check whether the found predicate is the same as the desired predicate.
	if (FoundPred == Pred)
	return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, Context);

	// Check whether swapping the found predicate makes it the same as the
	// desired predicate.
	if (ICmpInst::getSwappedPredicate(FoundPred) == Pred) {
	// We can write the implication
	// 0. LHS Pred RHS <- FoundLHS SwapPred FoundRHS
	// using one of the following ways:
	// 1. LHS Pred RHS <- FoundRHS Pred FoundLHS
	// 2. RHS SwapPred LHS <- FoundLHS SwapPred FoundRHS
	// 3. LHS Pred RHS <- ~FoundLHS Pred ~FoundRHS
	// 4. ~LHS SwapPred ~RHS <- FoundLHS SwapPred FoundRHS
	// Forms 1. and 2. require swapping the operands of one condition. Don't
	// do this if it would break canonical constant/addrec ordering.
	if (!isa<SCEVConstant>(RHS) && !isa<SCEVAddRecExpr>(LHS))
	return isImpliedCondOperands(FoundPred, RHS, LHS, FoundLHS, FoundRHS,
	Context);
	if (!isa<SCEVConstant>(FoundRHS) && !isa<SCEVAddRecExpr>(FoundLHS))
	return isImpliedCondOperands(Pred, LHS, RHS, FoundRHS, FoundLHS, Context);

	// Don't try to getNotSCEV pointers.
	if (LHS->getType()->isPointerTy() \|\| FoundLHS->getType()->isPointerTy())
	return false;

	// There's no clear preference between forms 3. and 4., try both.
	return isImpliedCondOperands(FoundPred, getNotSCEV(LHS), getNotSCEV(RHS),
	FoundLHS, FoundRHS, Context) \|\|
	isImpliedCondOperands(Pred, LHS, RHS, getNotSCEV(FoundLHS),
	getNotSCEV(FoundRHS), Context);
	}

	// Unsigned comparison is the same as signed comparison when both the operands
	// are non-negative.
	if (CmpInst::isUnsigned(FoundPred) &&
	CmpInst::getSignedPredicate(FoundPred) == Pred &&
	isKnownNonNegative(FoundLHS) && isKnownNonNegative(FoundRHS))
	return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, Context);

	// Check if we can make progress by sharpening ranges.
	if (FoundPred == ICmpInst::ICMP_NE &&
	(isa<SCEVConstant>(FoundLHS) \|\| isa<SCEVConstant>(FoundRHS))) {

	const SCEVConstant *C = nullptr;
	const SCEV *V = nullptr;

	if (isa<SCEVConstant>(FoundLHS)) {
	C = cast<SCEVConstant>(FoundLHS);
	V = FoundRHS;
	} else {
	C = cast<SCEVConstant>(FoundRHS);
	V = FoundLHS;
	}

	// The guarding predicate tells us that C != V. If the known range
	// of V is [C, t), we can sharpen the range to [C + 1, t). The
	// range we consider has to correspond to same signedness as the
	// predicate we're interested in folding.

	APInt Min = ICmpInst::isSigned(Pred) ?
	getSignedRangeMin(V) : getUnsignedRangeMin(V);

	if (Min == C->getAPInt()) {
	// Given (V >= Min && V != Min) we conclude V >= (Min + 1).
	// This is true even if (Min + 1) wraps around -- in case of
	// wraparound, (Min + 1) < Min, so (V >= Min => V >= (Min + 1)).

	APInt SharperMin = Min + 1;

	switch (Pred) {
	case ICmpInst::ICMP_SGE:
	case ICmpInst::ICMP_UGE:
	// We know V `Pred` SharperMin. If this implies LHS `Pred`
	// RHS, we're done.
	if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(SharperMin),
	Context))
	return true;
	LLVM_FALLTHROUGH;

	case ICmpInst::ICMP_SGT:
	case ICmpInst::ICMP_UGT:
	// We know from the range information that (V `Pred` Min \|\|
	// V == Min). We know from the guarding condition that !(V
	// == Min). This gives us
	//
	// V `Pred` Min \|\| V == Min && !(V == Min)
	// => V `Pred` Min
	//
	// If V `Pred` Min implies LHS `Pred` RHS, we're done.

	if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(Min),
	Context))
	return true;
	break;

	// `LHS < RHS` and `LHS <= RHS` are handled in the same way as `RHS > LHS` and `RHS >= LHS` respectively.
	case ICmpInst::ICMP_SLE:
	case ICmpInst::ICMP_ULE:
	if (isImpliedCondOperands(CmpInst::getSwappedPredicate(Pred), RHS,
	LHS, V, getConstant(SharperMin), Context))
	return true;
	LLVM_FALLTHROUGH;

	case ICmpInst::ICMP_SLT:
	case ICmpInst::ICMP_ULT:
	if (isImpliedCondOperands(CmpInst::getSwappedPredicate(Pred), RHS,
	LHS, V, getConstant(Min), Context))
	return true;
	break;

	default:
	// No change
	break;
	}
	}
	}

	// Check whether the actual condition is beyond sufficient.
	if (FoundPred == ICmpInst::ICMP_EQ)
	if (ICmpInst::isTrueWhenEqual(Pred))
	if (isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, Context))
	return true;
	if (Pred == ICmpInst::ICMP_NE)
	if (!ICmpInst::isTrueWhenEqual(FoundPred))
	if (isImpliedCondOperands(FoundPred, LHS, RHS, FoundLHS, FoundRHS,
	Context))
	return true;

	// Otherwise assume the worst.
	return false;
	}

	bool ScalarEvolution::splitBinaryAdd(const SCEV *Expr,
	const SCEV &L, const SCEV &R,
	SCEV::NoWrapFlags &Flags) {
	const auto *AE = dyn_cast<SCEVAddExpr>(Expr);
	if (!AE \|\| AE->getNumOperands() != 2)
	return false;

	L = AE->getOperand(0);
	R = AE->getOperand(1);
	Flags = AE->getNoWrapFlags();
	return true;
	}

	Optional<APInt> ScalarEvolution::computeConstantDifference(const SCEV *More,
	const SCEV *Less) {
	// We avoid subtracting expressions here because this function is usually
	// fairly deep in the call stack (i.e. is called many times).

	// X - X = 0.
	if (More == Less)
	return APInt(getTypeSizeInBits(More->getType()), 0);

	if (isa<SCEVAddRecExpr>(Less) && isa<SCEVAddRecExpr>(More)) {
	const auto *LAR = cast<SCEVAddRecExpr>(Less);
	const auto *MAR = cast<SCEVAddRecExpr>(More);

	if (LAR->getLoop() != MAR->getLoop())
	return None;

	// We look at affine expressions only; not for correctness but to keep
	// getStepRecurrence cheap.
	if (!LAR->isAffine() \|\| !MAR->isAffine())
	return None;

	if (LAR->getStepRecurrence(this) != MAR->getStepRecurrence(this))
	return None;

	Less = LAR->getStart();
	More = MAR->getStart();

	// fall through
	}

	if (isa<SCEVConstant>(Less) && isa<SCEVConstant>(More)) {
	const auto &M = cast<SCEVConstant>(More)->getAPInt();
	const auto &L = cast<SCEVConstant>(Less)->getAPInt();
	return M - L;
	}

	SCEV::NoWrapFlags Flags;
	const SCEV LLess = nullptr, RLess = nullptr;
	const SCEV LMore = nullptr, RMore = nullptr;
	const SCEVConstant C1 = nullptr, C2 = nullptr;
	// Compare (X + C1) vs X.
	if (splitBinaryAdd(Less, LLess, RLess, Flags))
	if ((C1 = dyn_cast<SCEVConstant>(LLess)))
	if (RLess == More)
	return -(C1->getAPInt());

	// Compare X vs (X + C2).
	if (splitBinaryAdd(More, LMore, RMore, Flags))
	if ((C2 = dyn_cast<SCEVConstant>(LMore)))
	if (RMore == Less)
	return C2->getAPInt();

	// Compare (X + C1) vs (X + C2).
	if (C1 && C2 && RLess == RMore)
	return C2->getAPInt() - C1->getAPInt();

	return None;
	}

	bool ScalarEvolution::isImpliedCondOperandsViaAddRecStart(
	ICmpInst::Predicate Pred, const SCEV LHS, const SCEV RHS,
	const SCEV FoundLHS, const SCEV FoundRHS, const Instruction *Context) {
	// Try to recognize the following pattern:
	//
	// FoundRHS = ...
	// ...
	// loop:
	// FoundLHS = {Start,+,W}
	// context_bb: // Basic block from the same loop
	// known(Pred, FoundLHS, FoundRHS)
	//
	// If some predicate is known in the context of a loop, it is also known on
	// each iteration of this loop, including the first iteration. Therefore, in
	// this case, `FoundLHS Pred FoundRHS` implies `Start Pred FoundRHS`. Try to
	// prove the original pred using this fact.
	if (!Context)
	return false;
	const BasicBlock *ContextBB = Context->getParent();
	// Make sure AR varies in the context block.
	if (auto *AR = dyn_cast<SCEVAddRecExpr>(FoundLHS)) {
	const Loop *L = AR->getLoop();
	// Make sure that context belongs to the loop and executes on 1st iteration
	// (if it ever executes at all).
	if (!L->contains(ContextBB) \|\| !DT.dominates(ContextBB, L->getLoopLatch()))
	return false;
	if (!isAvailableAtLoopEntry(FoundRHS, AR->getLoop()))
	return false;
	return isImpliedCondOperands(Pred, LHS, RHS, AR->getStart(), FoundRHS);
	}

	if (auto *AR = dyn_cast<SCEVAddRecExpr>(FoundRHS)) {
	const Loop *L = AR->getLoop();
	// Make sure that context belongs to the loop and executes on 1st iteration
	// (if it ever executes at all).
	if (!L->contains(ContextBB) \|\| !DT.dominates(ContextBB, L->getLoopLatch()))
	return false;
	if (!isAvailableAtLoopEntry(FoundLHS, AR->getLoop()))
	return false;
	return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, AR->getStart());
	}

	return false;
	}

	bool ScalarEvolution::isImpliedCondOperandsViaNoOverflow(
	ICmpInst::Predicate Pred, const SCEV LHS, const SCEV RHS,
	const SCEV FoundLHS, const SCEV FoundRHS) {
	if (Pred != CmpInst::ICMP_SLT && Pred != CmpInst::ICMP_ULT)
	return false;

	const auto *AddRecLHS = dyn_cast<SCEVAddRecExpr>(LHS);
	if (!AddRecLHS)
	return false;

	const auto *AddRecFoundLHS = dyn_cast<SCEVAddRecExpr>(FoundLHS);
	if (!AddRecFoundLHS)
	return false;

	// We'd like to let SCEV reason about control dependencies, so we constrain
	// both the inequalities to be about add recurrences on the same loop. This
	// way we can use isLoopEntryGuardedByCond later.

	const Loop *L = AddRecFoundLHS->getLoop();
	if (L != AddRecLHS->getLoop())
	return false;

	// FoundLHS u< FoundRHS u< -C => (FoundLHS + C) u< (FoundRHS + C) ... (1)
	//
	// FoundLHS s< FoundRHS s< INT_MIN - C => (FoundLHS + C) s< (FoundRHS + C)
	// ... (2)
	//
	// Informal proof for (2), assuming (1) [*]:
	//
	// We'll also assume (A s< B) <=> ((A + INT_MIN) u< (B + INT_MIN)) ... (3)[**]
	//
	// Then
	//
	// FoundLHS s< FoundRHS s< INT_MIN - C
	// <=> (FoundLHS + INT_MIN) u< (FoundRHS + INT_MIN) u< -C [ using (3) ]
	// <=> (FoundLHS + INT_MIN + C) u< (FoundRHS + INT_MIN + C) [ using (1) ]
	// <=> (FoundLHS + INT_MIN + C + INT_MIN) s<
	// (FoundRHS + INT_MIN + C + INT_MIN) [ using (3) ]
	// <=> FoundLHS + C s< FoundRHS + C
	//
	// [*]: (1) can be proved by ruling out overflow.
	//
	// [**]: This can be proved by analyzing all the four possibilities:
	// (A s< 0, B s< 0), (A s< 0, B s>= 0), (A s>= 0, B s< 0) and
	// (A s>= 0, B s>= 0).
	//
	// Note:
	// Despite (2), "FoundRHS s< INT_MIN - C" does not mean that "FoundRHS + C"
	// will not sign underflow. For instance, say FoundLHS = (i8 -128), FoundRHS
	// = (i8 -127) and C = (i8 -100). Then INT_MIN - C = (i8 -28), and FoundRHS
	// s< (INT_MIN - C). Lack of sign overflow / underflow in "FoundRHS + C" is
	// neither necessary nor sufficient to prove "(FoundLHS + C) s< (FoundRHS +
	// C)".

	Optional<APInt> LDiff = computeConstantDifference(LHS, FoundLHS);
	Optional<APInt> RDiff = computeConstantDifference(RHS, FoundRHS);
	if (!LDiff \|\| !RDiff \|\| LDiff != RDiff)
	return false;

	if (LDiff->isMinValue())
	return true;

	APInt FoundRHSLimit;

	if (Pred == CmpInst::ICMP_ULT) {
	FoundRHSLimit = -(*RDiff);
	} else {
	assert(Pred == CmpInst::ICMP_SLT && "Checked above!");
	FoundRHSLimit = APInt::getSignedMinValue(getTypeSizeInBits(RHS->getType())) - *RDiff;
	}

	// Try to prove (1) or (2), as needed.
	return isAvailableAtLoopEntry(FoundRHS, L) &&
	isLoopEntryGuardedByCond(L, Pred, FoundRHS,
	getConstant(FoundRHSLimit));
	}

	bool ScalarEvolution::isImpliedViaMerge(ICmpInst::Predicate Pred,
	const SCEV LHS, const SCEV RHS,
	const SCEV *FoundLHS,
	const SCEV *FoundRHS, unsigned Depth) {
	const PHINode LPhi = nullptr, RPhi = nullptr;

	auto ClearOnExit = make_scope_exit([&]() {
	if (LPhi) {
	bool Erased = PendingMerges.erase(LPhi);
	assert(Erased && "Failed to erase LPhi!");
	(void)Erased;
	}
	if (RPhi) {
	bool Erased = PendingMerges.erase(RPhi);
	assert(Erased && "Failed to erase RPhi!");
	(void)Erased;
	}
	});

	// Find respective Phis and check that they are not being pending.
	if (const SCEVUnknown *LU = dyn_cast<SCEVUnknown>(LHS))
	if (auto *Phi = dyn_cast<PHINode>(LU->getValue())) {
	if (!PendingMerges.insert(Phi).second)
	return false;
	LPhi = Phi;
	}
	if (const SCEVUnknown *RU = dyn_cast<SCEVUnknown>(RHS))
	if (auto *Phi = dyn_cast<PHINode>(RU->getValue())) {
	// If we detect a loop of Phi nodes being processed by this method, for
	// example:
	//
	// %a = phi i32 [ %some1, %preheader ], [ %b, %latch ]
	// %b = phi i32 [ %some2, %preheader ], [ %a, %latch ]
	//
	// we don't want to deal with a case that complex, so return conservative
	// answer false.
	if (!PendingMerges.insert(Phi).second)
	return false;
	RPhi = Phi;
	}

	// If none of LHS, RHS is a Phi, nothing to do here.
	if (!LPhi && !RPhi)
	return false;

	// If there is a SCEVUnknown Phi we are interested in, make it left.
	if (!LPhi) {
	std::swap(LHS, RHS);
	std::swap(FoundLHS, FoundRHS);
	std::swap(LPhi, RPhi);
	Pred = ICmpInst::getSwappedPredicate(Pred);
	}

	assert(LPhi && "LPhi should definitely be a SCEVUnknown Phi!");
	const BasicBlock *LBB = LPhi->getParent();
	const SCEVAddRecExpr *RAR = dyn_cast<SCEVAddRecExpr>(RHS);

	auto ProvedEasily = [&](const SCEV S1, const SCEV S2) {
	return isKnownViaNonRecursiveReasoning(Pred, S1, S2) \|\|
	isImpliedCondOperandsViaRanges(Pred, S1, S2, FoundLHS, FoundRHS) \|\|
	isImpliedViaOperations(Pred, S1, S2, FoundLHS, FoundRHS, Depth);
	};

	if (RPhi && RPhi->getParent() == LBB) {
	// Case one: RHS is also a SCEVUnknown Phi from the same basic block.
	// If we compare two Phis from the same block, and for each entry block
	// the predicate is true for incoming values from this block, then the
	// predicate is also true for the Phis.
	for (const BasicBlock *IncBB : predecessors(LBB)) {
	const SCEV *L = getSCEV(LPhi->getIncomingValueForBlock(IncBB));
	const SCEV *R = getSCEV(RPhi->getIncomingValueForBlock(IncBB));
	if (!ProvedEasily(L, R))
	return false;
	}
	} else if (RAR && RAR->getLoop()->getHeader() == LBB) {
	// Case two: RHS is also a Phi from the same basic block, and it is an
	// AddRec. It means that there is a loop which has both AddRec and Unknown
	// PHIs, for it we can compare incoming values of AddRec from above the loop
	// and latch with their respective incoming values of LPhi.
	// TODO: Generalize to handle loops with many inputs in a header.
	if (LPhi->getNumIncomingValues() != 2) return false;

	auto *RLoop = RAR->getLoop();
	auto *Predecessor = RLoop->getLoopPredecessor();
	assert(Predecessor && "Loop with AddRec with no predecessor?");
	const SCEV *L1 = getSCEV(LPhi->getIncomingValueForBlock(Predecessor));
	if (!ProvedEasily(L1, RAR->getStart()))
	return false;
	auto *Latch = RLoop->getLoopLatch();
	assert(Latch && "Loop with AddRec with no latch?");
	const SCEV *L2 = getSCEV(LPhi->getIncomingValueForBlock(Latch));
	if (!ProvedEasily(L2, RAR->getPostIncExpr(*this)))
	return false;
	} else {
	// In all other cases go over inputs of LHS and compare each of them to RHS,
	// the predicate is true for (LHS, RHS) if it is true for all such pairs.
	// At this point RHS is either a non-Phi, or it is a Phi from some block
	// different from LBB.
	for (const BasicBlock *IncBB : predecessors(LBB)) {
	// Check that RHS is available in this block.
	if (!dominates(RHS, IncBB))
	return false;
	const SCEV *L = getSCEV(LPhi->getIncomingValueForBlock(IncBB));
	// Make sure L does not refer to a value from a potentially previous
	// iteration of a loop.
	if (!properlyDominates(L, IncBB))
	return false;
	if (!ProvedEasily(L, RHS))
	return false;
	}
	}
	return true;
	}

	bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred,
	const SCEV LHS, const SCEV RHS,
	const SCEV *FoundLHS,
	const SCEV *FoundRHS,
	const Instruction *Context) {
	if (isImpliedCondOperandsViaRanges(Pred, LHS, RHS, FoundLHS, FoundRHS))
	return true;

	if (isImpliedCondOperandsViaNoOverflow(Pred, LHS, RHS, FoundLHS, FoundRHS))
	return true;

	if (isImpliedCondOperandsViaAddRecStart(Pred, LHS, RHS, FoundLHS, FoundRHS,
	Context))
	return true;

	return isImpliedCondOperandsHelper(Pred, LHS, RHS,
	FoundLHS, FoundRHS);
	}

	/// Is MaybeMinMaxExpr an (U\|S)(Min\|Max) of Candidate and some other values?
	template <typename MinMaxExprType>
	static bool IsMinMaxConsistingOf(const SCEV *MaybeMinMaxExpr,
	const SCEV *Candidate) {
	const MinMaxExprType *MinMaxExpr = dyn_cast<MinMaxExprType>(MaybeMinMaxExpr);
	if (!MinMaxExpr)
	return false;

	return is_contained(MinMaxExpr->operands(), Candidate);
	}

	static bool IsKnownPredicateViaAddRecStart(ScalarEvolution &SE,
	ICmpInst::Predicate Pred,
	const SCEV LHS, const SCEV RHS) {
	// If both sides are affine addrecs for the same loop, with equal
	// steps, and we know the recurrences don't wrap, then we only
	// need to check the predicate on the starting values.

	if (!ICmpInst::isRelational(Pred))
	return false;

	const SCEVAddRecExpr *LAR = dyn_cast<SCEVAddRecExpr>(LHS);
	if (!LAR)
	return false;
	const SCEVAddRecExpr *RAR = dyn_cast<SCEVAddRecExpr>(RHS);
	if (!RAR)
	return false;
	if (LAR->getLoop() != RAR->getLoop())
	return false;
	if (!LAR->isAffine() \|\| !RAR->isAffine())
	return false;

	if (LAR->getStepRecurrence(SE) != RAR->getStepRecurrence(SE))
	return false;

	SCEV::NoWrapFlags NW = ICmpInst::isSigned(Pred) ?
	SCEV::FlagNSW : SCEV::FlagNUW;
	if (!LAR->getNoWrapFlags(NW) \|\| !RAR->getNoWrapFlags(NW))
	return false;

	return SE.isKnownPredicate(Pred, LAR->getStart(), RAR->getStart());
	}

	/// Is LHS `Pred` RHS true on the virtue of LHS or RHS being a Min or Max
	/// expression?
	static bool IsKnownPredicateViaMinOrMax(ScalarEvolution &SE,
	ICmpInst::Predicate Pred,
	const SCEV LHS, const SCEV RHS) {
	switch (Pred) {
	default:
	return false;

	case ICmpInst::ICMP_SGE:
	std::swap(LHS, RHS);
	LLVM_FALLTHROUGH;
	case ICmpInst::ICMP_SLE:
	return
	// min(A, ...) <= A
	IsMinMaxConsistingOf<SCEVSMinExpr>(LHS, RHS) \|\|
	// A <= max(A, ...)
	IsMinMaxConsistingOf<SCEVSMaxExpr>(RHS, LHS);

	case ICmpInst::ICMP_UGE:
	std::swap(LHS, RHS);
	LLVM_FALLTHROUGH;
	case ICmpInst::ICMP_ULE:
	return
	// min(A, ...) <= A
	IsMinMaxConsistingOf<SCEVUMinExpr>(LHS, RHS) \|\|
	// A <= max(A, ...)
	IsMinMaxConsistingOf<SCEVUMaxExpr>(RHS, LHS);
	}

	llvm_unreachable("covered switch fell through?!");
	}

	bool ScalarEvolution::isImpliedViaOperations(ICmpInst::Predicate Pred,
	const SCEV LHS, const SCEV RHS,
	const SCEV *FoundLHS,
	const SCEV *FoundRHS,
	unsigned Depth) {
	assert(getTypeSizeInBits(LHS->getType()) ==
	getTypeSizeInBits(RHS->getType()) &&
	"LHS and RHS have different sizes?");
	assert(getTypeSizeInBits(FoundLHS->getType()) ==
	getTypeSizeInBits(FoundRHS->getType()) &&
	"FoundLHS and FoundRHS have different sizes?");
	// We want to avoid hurting the compile time with analysis of too big trees.
	if (Depth > MaxSCEVOperationsImplicationDepth)
	return false;

	// We only want to work with GT comparison so far.
	if (Pred == ICmpInst::ICMP_ULT \|\| Pred == ICmpInst::ICMP_SLT) {
	Pred = CmpInst::getSwappedPredicate(Pred);
	std::swap(LHS, RHS);
	std::swap(FoundLHS, FoundRHS);
	}

	// For unsigned, try to reduce it to corresponding signed comparison.
	if (Pred == ICmpInst::ICMP_UGT)
	// We can replace unsigned predicate with its signed counterpart if all
	// involved values are non-negative.
	// TODO: We could have better support for unsigned.
	if (isKnownNonNegative(FoundLHS) && isKnownNonNegative(FoundRHS)) {
	// Knowing that both FoundLHS and FoundRHS are non-negative, and knowing
	// FoundLHS >u FoundRHS, we also know that FoundLHS >s FoundRHS. Let us
	// use this fact to prove that LHS and RHS are non-negative.
	const SCEV *MinusOne = getMinusOne(LHS->getType());
	if (isImpliedCondOperands(ICmpInst::ICMP_SGT, LHS, MinusOne, FoundLHS,
	FoundRHS) &&
	isImpliedCondOperands(ICmpInst::ICMP_SGT, RHS, MinusOne, FoundLHS,
	FoundRHS))
	Pred = ICmpInst::ICMP_SGT;
	}

	if (Pred != ICmpInst::ICMP_SGT)
	return false;

	auto GetOpFromSExt = [&](const SCEV *S) {
	if (auto *Ext = dyn_cast<SCEVSignExtendExpr>(S))
	return Ext->getOperand();
	// TODO: If S is a SCEVConstant then you can cheaply "strip" the sext off
	// the constant in some cases.
	return S;
	};

	// Acquire values from extensions.
	auto *OrigLHS = LHS;
	auto *OrigFoundLHS = FoundLHS;
	LHS = GetOpFromSExt(LHS);
	FoundLHS = GetOpFromSExt(FoundLHS);

	// Is the SGT predicate can be proved trivially or using the found context.
	auto IsSGTViaContext = [&](const SCEV S1, const SCEV S2) {
	return isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_SGT, S1, S2) \|\|
	isImpliedViaOperations(ICmpInst::ICMP_SGT, S1, S2, OrigFoundLHS,
	FoundRHS, Depth + 1);
	};

	if (auto *LHSAddExpr = dyn_cast<SCEVAddExpr>(LHS)) {
	// We want to avoid creation of any new non-constant SCEV. Since we are
	// going to compare the operands to RHS, we should be certain that we don't
	// need any size extensions for this. So let's decline all cases when the
	// sizes of types of LHS and RHS do not match.
	// TODO: Maybe try to get RHS from sext to catch more cases?
	if (getTypeSizeInBits(LHS->getType()) != getTypeSizeInBits(RHS->getType()))
	return false;

	// Should not overflow.
	if (!LHSAddExpr->hasNoSignedWrap())
	return false;

	auto *LL = LHSAddExpr->getOperand(0);
	auto *LR = LHSAddExpr->getOperand(1);
	auto *MinusOne = getMinusOne(RHS->getType());

	// Checks that S1 >= 0 && S2 > RHS, trivially or using the found context.
	auto IsSumGreaterThanRHS = [&](const SCEV S1, const SCEV S2) {
	return IsSGTViaContext(S1, MinusOne) && IsSGTViaContext(S2, RHS);
	};
	// Try to prove the following rule:
	// (LHS = LL + LR) && (LL >= 0) && (LR > RHS) => (LHS > RHS).
	// (LHS = LL + LR) && (LR >= 0) && (LL > RHS) => (LHS > RHS).
	if (IsSumGreaterThanRHS(LL, LR) \|\| IsSumGreaterThanRHS(LR, LL))
	return true;
	} else if (auto *LHSUnknownExpr = dyn_cast<SCEVUnknown>(LHS)) {
	Value LL, LR;
	// FIXME: Once we have SDiv implemented, we can get rid of this matching.

	using namespace llvm::PatternMatch;

	if (match(LHSUnknownExpr->getValue(), m_SDiv(m_Value(LL), m_Value(LR)))) {
	// Rules for division.
	// We are going to perform some comparisons with Denominator and its
	// derivative expressions. In general case, creating a SCEV for it may
	// lead to a complex analysis of the entire graph, and in particular it
	// can request trip count recalculation for the same loop. This would
	// cache as SCEVCouldNotCompute to avoid the infinite recursion. To avoid
	// this, we only want to create SCEVs that are constants in this section.
	// So we bail if Denominator is not a constant.
	if (!isa<ConstantInt>(LR))
	return false;

	auto *Denominator = cast<SCEVConstant>(getSCEV(LR));

	// We want to make sure that LHS = FoundLHS / Denominator. If it is so,
	// then a SCEV for the numerator already exists and matches with FoundLHS.
	auto *Numerator = getExistingSCEV(LL);
	if (!Numerator \|\| Numerator->getType() != FoundLHS->getType())
	return false;

	// Make sure that the numerator matches with FoundLHS and the denominator
	// is positive.
	if (!HasSameValue(Numerator, FoundLHS) \|\| !isKnownPositive(Denominator))
	return false;

	auto *DTy = Denominator->getType();
	auto *FRHSTy = FoundRHS->getType();
	if (DTy->isPointerTy() != FRHSTy->isPointerTy())
	// One of types is a pointer and another one is not. We cannot extend
	// them properly to a wider type, so let us just reject this case.
	// TODO: Usage of getEffectiveSCEVType for DTy, FRHSTy etc should help
	// to avoid this check.
	return false;

	// Given that:
	// FoundLHS > FoundRHS, LHS = FoundLHS / Denominator, Denominator > 0.
	auto *WTy = getWiderType(DTy, FRHSTy);
	auto *DenominatorExt = getNoopOrSignExtend(Denominator, WTy);
	auto *FoundRHSExt = getNoopOrSignExtend(FoundRHS, WTy);

	// Try to prove the following rule:
	// (FoundRHS > Denominator - 2) && (RHS <= 0) => (LHS > RHS).
	// For example, given that FoundLHS > 2. It means that FoundLHS is at
	// least 3. If we divide it by Denominator < 4, we will have at least 1.
	auto *DenomMinusTwo = getMinusSCEV(DenominatorExt, getConstant(WTy, 2));
	if (isKnownNonPositive(RHS) &&
	IsSGTViaContext(FoundRHSExt, DenomMinusTwo))
	return true;

	// Try to prove the following rule:
	// (FoundRHS > -1 - Denominator) && (RHS < 0) => (LHS > RHS).
	// For example, given that FoundLHS > -3. Then FoundLHS is at least -2.
	// If we divide it by Denominator > 2, then:
	// 1. If FoundLHS is negative, then the result is 0.
	// 2. If FoundLHS is non-negative, then the result is non-negative.
	// Anyways, the result is non-negative.
	auto *MinusOne = getMinusOne(WTy);
	auto *NegDenomMinusOne = getMinusSCEV(MinusOne, DenominatorExt);
	if (isKnownNegative(RHS) &&
	IsSGTViaContext(FoundRHSExt, NegDenomMinusOne))
	return true;
	}
	}

	// If our expression contained SCEVUnknown Phis, and we split it down and now
	// need to prove something for them, try to prove the predicate for every
	// possible incoming values of those Phis.
	if (isImpliedViaMerge(Pred, OrigLHS, RHS, OrigFoundLHS, FoundRHS, Depth + 1))
	return true;

	return false;
	}

	static bool isKnownPredicateExtendIdiom(ICmpInst::Predicate Pred,
	const SCEV LHS, const SCEV RHS) {
	// zext x u<= sext x, sext x s<= zext x
	switch (Pred) {
	case ICmpInst::ICMP_SGE:
	std::swap(LHS, RHS);
	LLVM_FALLTHROUGH;
	case ICmpInst::ICMP_SLE: {
	// If operand >=s 0 then ZExt == SExt. If operand <s 0 then SExt <s ZExt.
	const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(LHS);
	const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(RHS);
	if (SExt && ZExt && SExt->getOperand() == ZExt->getOperand())
	return true;
	break;
	}
	case ICmpInst::ICMP_UGE:
	std::swap(LHS, RHS);
	LLVM_FALLTHROUGH;
	case ICmpInst::ICMP_ULE: {
	// If operand >=s 0 then ZExt == SExt. If operand <s 0 then ZExt <u SExt.
	const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(LHS);
	const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(RHS);
	if (SExt && ZExt && SExt->getOperand() == ZExt->getOperand())
	return true;
	break;
	}
	default:
	break;
	};
	return false;
	}

	bool
	ScalarEvolution::isKnownViaNonRecursiveReasoning(ICmpInst::Predicate Pred,
	const SCEV LHS, const SCEV RHS) {
	return isKnownPredicateExtendIdiom(Pred, LHS, RHS) \|\|
	isKnownPredicateViaConstantRanges(Pred, LHS, RHS) \|\|
	IsKnownPredicateViaMinOrMax(*this, Pred, LHS, RHS) \|\|
	IsKnownPredicateViaAddRecStart(*this, Pred, LHS, RHS) \|\|
	isKnownPredicateViaNoOverflow(Pred, LHS, RHS);
	}

	bool
	ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,
	const SCEV LHS, const SCEV RHS,
	const SCEV *FoundLHS,
	const SCEV *FoundRHS) {
	switch (Pred) {
	default: llvm_unreachable("Unexpected ICmpInst::Predicate value!");
	case ICmpInst::ICMP_EQ:
	case ICmpInst::ICMP_NE:
	if (HasSameValue(LHS, FoundLHS) && HasSameValue(RHS, FoundRHS))
	return true;
	break;
	case ICmpInst::ICMP_SLT:
	case ICmpInst::ICMP_SLE:
	if (isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_SLE, LHS, FoundLHS) &&
	isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_SGE, RHS, FoundRHS))
	return true;
	break;
	case ICmpInst::ICMP_SGT:
	case ICmpInst::ICMP_SGE:
	if (isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_SGE, LHS, FoundLHS) &&
	isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_SLE, RHS, FoundRHS))
	return true;
	break;
	case ICmpInst::ICMP_ULT:
	case ICmpInst::ICMP_ULE:
	if (isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_ULE, LHS, FoundLHS) &&
	isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_UGE, RHS, FoundRHS))
	return true;
	break;
	case ICmpInst::ICMP_UGT:
	case ICmpInst::ICMP_UGE:
	if (isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_UGE, LHS, FoundLHS) &&
	isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_ULE, RHS, FoundRHS))
	return true;
	break;
	}

	// Maybe it can be proved via operations?
	if (isImpliedViaOperations(Pred, LHS, RHS, FoundLHS, FoundRHS))
	return true;

	return false;
	}

	bool ScalarEvolution::isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred,
	const SCEV *LHS,
	const SCEV *RHS,
	const SCEV *FoundLHS,
	const SCEV *FoundRHS) {
	if (!isa<SCEVConstant>(RHS) \|\| !isa<SCEVConstant>(FoundRHS))
	// The restriction on `FoundRHS` be lifted easily -- it exists only to
	// reduce the compile time impact of this optimization.
	return false;

	Optional<APInt> Addend = computeConstantDifference(LHS, FoundLHS);
	if (!Addend)
	return false;

	const APInt &ConstFoundRHS = cast<SCEVConstant>(FoundRHS)->getAPInt();

	// `FoundLHSRange` is the range we know `FoundLHS` to be in by virtue of the
	// antecedent "`FoundLHS` `Pred` `FoundRHS`".
	ConstantRange FoundLHSRange =
	ConstantRange::makeExactICmpRegion(Pred, ConstFoundRHS);

	// Since `LHS` is `FoundLHS` + `Addend`, we can compute a range for `LHS`:
	ConstantRange LHSRange = FoundLHSRange.add(ConstantRange(*Addend));

	// We can also compute the range of values for `LHS` that satisfy the
	// consequent, "`LHS` `Pred` `RHS`":
	const APInt &ConstRHS = cast<SCEVConstant>(RHS)->getAPInt();
	// The antecedent implies the consequent if every value of `LHS` that
	// satisfies the antecedent also satisfies the consequent.
	return LHSRange.icmp(Pred, ConstRHS);
	}

	bool ScalarEvolution::canIVOverflowOnLT(const SCEV RHS, const SCEV Stride,
	bool IsSigned) {
	assert(isKnownPositive(Stride) && "Positive stride expected!");

	unsigned BitWidth = getTypeSizeInBits(RHS->getType());
	const SCEV *One = getOne(Stride->getType());

	if (IsSigned) {
	APInt MaxRHS = getSignedRangeMax(RHS);
	APInt MaxValue = APInt::getSignedMaxValue(BitWidth);
	APInt MaxStrideMinusOne = getSignedRangeMax(getMinusSCEV(Stride, One));

	// SMaxRHS + SMaxStrideMinusOne > SMaxValue => overflow!
	return (std::move(MaxValue) - MaxStrideMinusOne).slt(MaxRHS);
	}

	APInt MaxRHS = getUnsignedRangeMax(RHS);
	APInt MaxValue = APInt::getMaxValue(BitWidth);
	APInt MaxStrideMinusOne = getUnsignedRangeMax(getMinusSCEV(Stride, One));

	// UMaxRHS + UMaxStrideMinusOne > UMaxValue => overflow!
	return (std::move(MaxValue) - MaxStrideMinusOne).ult(MaxRHS);
	}

	bool ScalarEvolution::canIVOverflowOnGT(const SCEV RHS, const SCEV Stride,
	bool IsSigned) {

	unsigned BitWidth = getTypeSizeInBits(RHS->getType());
	const SCEV *One = getOne(Stride->getType());

	if (IsSigned) {
	APInt MinRHS = getSignedRangeMin(RHS);
	APInt MinValue = APInt::getSignedMinValue(BitWidth);
	APInt MaxStrideMinusOne = getSignedRangeMax(getMinusSCEV(Stride, One));

	// SMinRHS - SMaxStrideMinusOne < SMinValue => overflow!
	return (std::move(MinValue) + MaxStrideMinusOne).sgt(MinRHS);
	}

	APInt MinRHS = getUnsignedRangeMin(RHS);
	APInt MinValue = APInt::getMinValue(BitWidth);
	APInt MaxStrideMinusOne = getUnsignedRangeMax(getMinusSCEV(Stride, One));

	// UMinRHS - UMaxStrideMinusOne < UMinValue => overflow!
	return (std::move(MinValue) + MaxStrideMinusOne).ugt(MinRHS);
	}

	const SCEV ScalarEvolution::getUDivCeilSCEV(const SCEV N, const SCEV *D) {
	// umin(N, 1) + floor((N - umin(N, 1)) / D)
	// This is equivalent to "1 + floor((N - 1) / D)" for N != 0. The umin
	// expression fixes the case of N=0.
	const SCEV *MinNOne = getUMinExpr(N, getOne(N->getType()));
	const SCEV *NMinusOne = getMinusSCEV(N, MinNOne);
	return getAddExpr(MinNOne, getUDivExpr(NMinusOne, D));
	}

	const SCEV ScalarEvolution::computeMaxBECountForLT(const SCEV Start,
	const SCEV *Stride,
	const SCEV *End,
	unsigned BitWidth,
	bool IsSigned) {
	// The logic in this function assumes we can represent a positive stride.
	// If we can't, the backedge-taken count must be zero.
	if (IsSigned && BitWidth == 1)
	return getZero(Stride->getType());

	// Calculate the maximum backedge count based on the range of values
	// permitted by Start, End, and Stride.
	APInt MinStart =
	IsSigned ? getSignedRangeMin(Start) : getUnsignedRangeMin(Start);

	APInt MinStride =
	IsSigned ? getSignedRangeMin(Stride) : getUnsignedRangeMin(Stride);

	// We assume either the stride is positive, or the backedge-taken count
	// is zero. So force StrideForMaxBECount to be at least one.
	APInt One(BitWidth, 1);
	APInt StrideForMaxBECount = IsSigned ? APIntOps::smax(One, MinStride)
	: APIntOps::umax(One, MinStride);

	APInt MaxValue = IsSigned ? APInt::getSignedMaxValue(BitWidth)
	: APInt::getMaxValue(BitWidth);
	APInt Limit = MaxValue - (StrideForMaxBECount - 1);

	// Although End can be a MAX expression we estimate MaxEnd considering only
	// the case End = RHS of the loop termination condition. This is safe because
	// in the other case (End - Start) is zero, leading to a zero maximum backedge
	// taken count.
	APInt MaxEnd = IsSigned ? APIntOps::smin(getSignedRangeMax(End), Limit)
	: APIntOps::umin(getUnsignedRangeMax(End), Limit);

	// MaxBECount = ceil((max(MaxEnd, MinStart) - MinStart) / Stride)
	MaxEnd = IsSigned ? APIntOps::smax(MaxEnd, MinStart)
	: APIntOps::umax(MaxEnd, MinStart);

	return getUDivCeilSCEV(getConstant(MaxEnd - MinStart) /* Delta */,
	getConstant(StrideForMaxBECount) /* Step */);
	}

	ScalarEvolution::ExitLimit
	ScalarEvolution::howManyLessThans(const SCEV LHS, const SCEV RHS,
	const Loop *L, bool IsSigned,
	bool ControlsExit, bool AllowPredicates) {
	SmallPtrSet<const SCEVPredicate *, 4> Predicates;

	const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS);
	bool PredicatedIV = false;

	if (!IV && AllowPredicates) {
	// Try to make this an AddRec using runtime tests, in the first X
	// iterations of this loop, where X is the SCEV expression found by the
	// algorithm below.
	IV = convertSCEVToAddRecWithPredicates(LHS, L, Predicates);
	PredicatedIV = true;
	}

	// Avoid weird loops
	if (!IV \|\| IV->getLoop() != L \|\| !IV->isAffine())
	return getCouldNotCompute();

	// A precondition of this method is that the condition being analyzed
	// reaches an exiting branch which dominates the latch. Given that, we can
	// assume that an increment which violates the nowrap specification and
	// produces poison must cause undefined behavior when the resulting poison
	// value is branched upon and thus we can conclude that the backedge is
	// taken no more often than would be required to produce that poison value.
	// Note that a well defined loop can exit on the iteration which violates
	// the nowrap specification if there is another exit (either explicit or
	// implicit/exceptional) which causes the loop to execute before the
	// exiting instruction we're analyzing would trigger UB.
	auto WrapType = IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW;
	bool NoWrap = ControlsExit && IV->getNoWrapFlags(WrapType);
	ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;

	const SCEV Stride = IV->getStepRecurrence(this);

	bool PositiveStride = isKnownPositive(Stride);

	// Avoid negative or zero stride values.
	if (!PositiveStride) {
	// We can compute the correct backedge taken count for loops with unknown
	// strides if we can prove that the loop is not an infinite loop with side
	// effects. Here's the loop structure we are trying to handle -
	//
	// i = start
	// do {
	// A[i] = i;
	// i += s;
	// } while (i < end);
	//
	// The backedge taken count for such loops is evaluated as -
	// (max(end, start + stride) - start - 1) /u stride
	//
	// The additional preconditions that we need to check to prove correctness
	// of the above formula is as follows -
	//
	// a) IV is either nuw or nsw depending upon signedness (indicated by the
	// NoWrap flag).
	// b) loop is single exit with no side effects.
	//
	//
	// Precondition a) implies that if the stride is negative, this is a single
	// trip loop. The backedge taken count formula reduces to zero in this case.
	//
	// Precondition b) implies that if rhs is invariant in L, then unknown
	// stride being zero means the backedge can't be taken without UB.
	//
	// The positive stride case is the same as isKnownPositive(Stride) returning
	// true (original behavior of the function).
	//
	// We want to make sure that the stride is truly unknown as there are edge
	// cases where ScalarEvolution propagates no wrap flags to the
	// post-increment/decrement IV even though the increment/decrement operation
	// itself is wrapping. The computed backedge taken count may be wrong in
	// such cases. This is prevented by checking that the stride is not known to
	// be either positive or non-positive. For example, no wrap flags are
	// propagated to the post-increment IV of this loop with a trip count of 2 -
	//
	// unsigned char i;
	// for(i=127; i<128; i+=129)
	// A[i] = i;
	//
	if (PredicatedIV \|\| !NoWrap \|\| isKnownNonPositive(Stride) \|\|
	!loopIsFiniteByAssumption(L))
	return getCouldNotCompute();

	if (!isKnownNonZero(Stride)) {
	// If we have a step of zero, and RHS isn't invariant in L, we don't know
	// if it might eventually be greater than start and if so, on which
	// iteration. We can't even produce a useful upper bound.
	if (!isLoopInvariant(RHS, L))
	return getCouldNotCompute();

	// We allow a potentially zero stride, but we need to divide by stride
	// below. Since the loop can't be infinite and this check must control
	// the sole exit, we can infer the exit must be taken on the first
	// iteration (e.g. backedge count = 0) if the stride is zero. Given that,
	// we know the numerator in the divides below must be zero, so we can
	// pick an arbitrary non-zero value for the denominator (e.g. stride)
	// and produce the right result.
	// FIXME: Handle the case where Stride is poison?
	auto wouldZeroStrideBeUB = [&]() {
	// Proof by contradiction. Suppose the stride were zero. If we can
	// prove that the backedge is taken on the first iteration, then since
	// we know this condition controls the sole exit, we must have an
	// infinite loop. We can't have a (well defined) infinite loop per
	// check just above.
	// Note: The (Start - Stride) term is used to get the start' term from
	// (start' + stride,+,stride). Remember that we only care about the
	// result of this expression when stride == 0 at runtime.
	auto *StartIfZero = getMinusSCEV(IV->getStart(), Stride);
	return isLoopEntryGuardedByCond(L, Cond, StartIfZero, RHS);
	};
	if (!wouldZeroStrideBeUB()) {
	Stride = getUMaxExpr(Stride, getOne(Stride->getType()));
	}
	}
	} else if (!Stride->isOne() && !NoWrap) {
	auto isUBOnWrap = [&]() {
	// Can we prove this loop must be UB if overflow of IV occurs?
	// Reasoning goes as follows:
	// * Suppose the IV did self wrap.
	// * If Stride evenly divides the iteration space, then once wrap
	// occurs, the loop must revisit the same values.
	// * We know that RHS is invariant, and that none of those values
	// caused this exit to be taken previously. Thus, this exit is
	// dynamically dead.
	// * If this is the sole exit, then a dead exit implies the loop
	// must be infinite if there are no abnormal exits.
	// * If the loop were infinite, then it must either not be mustprogress
	// or have side effects. Otherwise, it must be UB.
	// * It can't (by assumption), be UB so we have contradicted our
	// premise and can conclude the IV did not in fact self-wrap.
	// From no-self-wrap, we need to then prove no-(un)signed-wrap. This
	// follows trivially from the fact that every (un)signed-wrapped, but
	// not self-wrapped value must be LT than the last value before
	// (un)signed wrap. Since we know that last value didn't exit, nor
	// will any smaller one.

	if (!isLoopInvariant(RHS, L))
	return false;

	auto *StrideC = dyn_cast<SCEVConstant>(Stride);
	if (!StrideC \|\| !StrideC->getAPInt().isPowerOf2())
	return false;

	if (!ControlsExit \|\| !loopHasNoAbnormalExits(L))
	return false;

	return loopIsFiniteByAssumption(L);
	};

	// Avoid proven overflow cases: this will ensure that the backedge taken
	// count will not generate any unsigned overflow. Relaxed no-overflow
	// conditions exploit NoWrapFlags, allowing to optimize in presence of
	// undefined behaviors like the case of C language.
	if (canIVOverflowOnLT(RHS, Stride, IsSigned) && !isUBOnWrap())
	return getCouldNotCompute();
	}

	// On all paths just preceeding, we established the following invariant:
	// IV can be assumed not to overflow up to and including the exiting
	// iteration. We proved this in one of two ways:
	// 1) We can show overflow doesn't occur before the exiting iteration
	// 1a) canIVOverflowOnLT, and b) step of one
	// 2) We can show that if overflow occurs, the loop must execute UB
	// before any possible exit.
	// Note that we have not yet proved RHS invariant (in general).

	const SCEV *Start = IV->getStart();

	// Preserve pointer-typed Start/RHS to pass to isLoopEntryGuardedByCond.
	// Use integer-typed versions for actual computation.
	const SCEV *OrigStart = Start;
	const SCEV *OrigRHS = RHS;
	if (Start->getType()->isPointerTy()) {
	Start = getLosslessPtrToIntExpr(Start);
	if (isa<SCEVCouldNotCompute>(Start))
	return Start;
	}
	if (RHS->getType()->isPointerTy()) {
	RHS = getLosslessPtrToIntExpr(RHS);
	if (isa<SCEVCouldNotCompute>(RHS))
	return RHS;
	}

	// When the RHS is not invariant, we do not know the end bound of the loop and
	// cannot calculate the ExactBECount needed by ExitLimit. However, we can
	// calculate the MaxBECount, given the start, stride and max value for the end
	// bound of the loop (RHS), and the fact that IV does not overflow (which is
	// checked above).
	if (!isLoopInvariant(RHS, L)) {
	const SCEV *MaxBECount = computeMaxBECountForLT(
	Start, Stride, RHS, getTypeSizeInBits(LHS->getType()), IsSigned);
	return ExitLimit(getCouldNotCompute() /* ExactNotTaken */, MaxBECount,
	false /MaxOrZero/, Predicates);
	}

	// We use the expression (max(End,Start)-Start)/Stride to describe the
	// backedge count, as if the backedge is taken at least once max(End,Start)
	// is End and so the result is as above, and if not max(End,Start) is Start
	// so we get a backedge count of zero.
	const SCEV *BECount = nullptr;
	auto *StartMinusStride = getMinusSCEV(OrigStart, Stride);
	// Can we prove (max(RHS,Start) > Start - Stride?
	if (isLoopEntryGuardedByCond(L, Cond, StartMinusStride, Start) &&
	isLoopEntryGuardedByCond(L, Cond, StartMinusStride, RHS)) {
	// In this case, we can use a refined formula for computing backedge taken
	// count. The general formula remains:
	// "End-Start /uceiling Stride" where "End = max(RHS,Start)"
	// We want to use the alternate formula:
	// "((End - 1) - (Start - Stride)) /u Stride"
	// Let's do a quick case analysis to show these are equivalent under
	// our precondition that max(RHS,Start) > Start - Stride.
	// * For RHS <= Start, the backedge-taken count must be zero.
	// "((End - 1) - (Start - Stride)) /u Stride" reduces to
	// "((Start - 1) - (Start - Stride)) /u Stride" which simplies to
	// "Stride - 1 /u Stride" which is indeed zero for all non-zero values
	// of Stride. For 0 stride, we've use umin(1,Stride) above, reducing
	// this to the stride of 1 case.
	// * For RHS >= Start, the backedge count must be "RHS-Start /uceil Stride".
	// "((End - 1) - (Start - Stride)) /u Stride" reduces to
	// "((RHS - 1) - (Start - Stride)) /u Stride" reassociates to
	// "((RHS - (Start - Stride) - 1) /u Stride".
	// Our preconditions trivially imply no overflow in that form.
	const SCEV *MinusOne = getMinusOne(Stride->getType());
	const SCEV *Numerator =
	getMinusSCEV(getAddExpr(RHS, MinusOne), StartMinusStride);
	if (!isa<SCEVCouldNotCompute>(Numerator)) {
	BECount = getUDivExpr(Numerator, Stride);
	}
	}

	const SCEV *BECountIfBackedgeTaken = nullptr;
	if (!BECount) {
	auto canProveRHSGreaterThanEqualStart = [&]() {
	auto CondGE = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
	if (isLoopEntryGuardedByCond(L, CondGE, OrigRHS, OrigStart))
	return true;

	// (RHS > Start - 1) implies RHS >= Start.
	// * "RHS >= Start" is trivially equivalent to "RHS > Start - 1" if
	// "Start - 1" doesn't overflow.
	// * For signed comparison, if Start - 1 does overflow, it's equal
	// to INT_MAX, and "RHS >s INT_MAX" is trivially false.
	// * For unsigned comparison, if Start - 1 does overflow, it's equal
	// to UINT_MAX, and "RHS >u UINT_MAX" is trivially false.
	//
	// FIXME: Should isLoopEntryGuardedByCond do this for us?
	auto CondGT = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
	auto *StartMinusOne = getAddExpr(OrigStart,
	getMinusOne(OrigStart->getType()));
	return isLoopEntryGuardedByCond(L, CondGT, OrigRHS, StartMinusOne);
	};

	// If we know that RHS >= Start in the context of loop, then we know that
	// max(RHS, Start) = RHS at this point.
	const SCEV *End;
	if (canProveRHSGreaterThanEqualStart()) {
	End = RHS;
	} else {
	// If RHS < Start, the backedge will be taken zero times. So in
	// general, we can write the backedge-taken count as:
	//
	// RHS >= Start ? ceil(RHS - Start) / Stride : 0
	//
	// We convert it to the following to make it more convenient for SCEV:
	//
	// ceil(max(RHS, Start) - Start) / Stride
	End = IsSigned ? getSMaxExpr(RHS, Start) : getUMaxExpr(RHS, Start);

	// See what would happen if we assume the backedge is taken. This is
	// used to compute MaxBECount.
	BECountIfBackedgeTaken = getUDivCeilSCEV(getMinusSCEV(RHS, Start), Stride);
	}

	// At this point, we know:
	//
	// 1. If IsSigned, Start <=s End; otherwise, Start <=u End
	// 2. The index variable doesn't overflow.
	//
	// Therefore, we know N exists such that
	// (Start + Stride * N) >= End, and computing "(Start + Stride * N)"
	// doesn't overflow.
	//
	// Using this information, try to prove whether the addition in
	// "(Start - End) + (Stride - 1)" has unsigned overflow.
	const SCEV *One = getOne(Stride->getType());
	bool MayAddOverflow = [&] {
	if (auto *StrideC = dyn_cast<SCEVConstant>(Stride)) {
	if (StrideC->getAPInt().isPowerOf2()) {
	// Suppose Stride is a power of two, and Start/End are unsigned
	// integers. Let UMAX be the largest representable unsigned
	// integer.
	//
	// By the preconditions of this function, we know
	// "(Start + Stride * N) >= End", and this doesn't overflow.
	// As a formula:
	//
	// End <= (Start + Stride * N) <= UMAX
	//
	// Subtracting Start from all the terms:
	//
	// End - Start <= Stride * N <= UMAX - Start
	//
	// Since Start is unsigned, UMAX - Start <= UMAX. Therefore:
	//
	// End - Start <= Stride * N <= UMAX
	//
	// Stride * N is a multiple of Stride. Therefore,
	//
	// End - Start <= Stride * N <= UMAX - (UMAX mod Stride)
	//
	// Since Stride is a power of two, UMAX + 1 is divisible by Stride.
	// Therefore, UMAX mod Stride == Stride - 1. So we can write:
	//
	// End - Start <= Stride * N <= UMAX - Stride - 1
	//
	// Dropping the middle term:
	//
	// End - Start <= UMAX - Stride - 1
	//
	// Adding Stride - 1 to both sides:
	//
	// (End - Start) + (Stride - 1) <= UMAX
	//
	// In other words, the addition doesn't have unsigned overflow.
	//
	// A similar proof works if we treat Start/End as signed values.
	// Just rewrite steps before "End - Start <= Stride * N <= UMAX" to
	// use signed max instead of unsigned max. Note that we're trying
	// to prove a lack of unsigned overflow in either case.
	return false;
	}
	}
	if (Start == Stride \|\| Start == getMinusSCEV(Stride, One)) {
	// If Start is equal to Stride, (End - Start) + (Stride - 1) == End - 1.
	// If !IsSigned, 0 <u Stride == Start <=u End; so 0 <u End - 1 <u End.
	// If IsSigned, 0 <s Stride == Start <=s End; so 0 <s End - 1 <s End.
	//
	// If Start is equal to Stride - 1, (End - Start) + Stride - 1 == End.
	return false;
	}
	return true;
	}();

	const SCEV *Delta = getMinusSCEV(End, Start);
	if (!MayAddOverflow) {
	// floor((D + (S - 1)) / S)
	// We prefer this formulation if it's legal because it's fewer operations.
	BECount =
	getUDivExpr(getAddExpr(Delta, getMinusSCEV(Stride, One)), Stride);
	} else {
	BECount = getUDivCeilSCEV(Delta, Stride);
	}
	}

	const SCEV *MaxBECount;
	bool MaxOrZero = false;
	if (isa<SCEVConstant>(BECount)) {
	MaxBECount = BECount;
	} else if (BECountIfBackedgeTaken &&
	isa<SCEVConstant>(BECountIfBackedgeTaken)) {
	// If we know exactly how many times the backedge will be taken if it's
	// taken at least once, then the backedge count will either be that or
	// zero.
	MaxBECount = BECountIfBackedgeTaken;
	MaxOrZero = true;
	} else {
	MaxBECount = computeMaxBECountForLT(
	Start, Stride, RHS, getTypeSizeInBits(LHS->getType()), IsSigned);
	}

	if (isa<SCEVCouldNotCompute>(MaxBECount) &&
	!isa<SCEVCouldNotCompute>(BECount))
	MaxBECount = getConstant(getUnsignedRangeMax(BECount));

	return ExitLimit(BECount, MaxBECount, MaxOrZero, Predicates);
	}

	ScalarEvolution::ExitLimit
	ScalarEvolution::howManyGreaterThans(const SCEV LHS, const SCEV RHS,
	const Loop *L, bool IsSigned,
	bool ControlsExit, bool AllowPredicates) {
	SmallPtrSet<const SCEVPredicate *, 4> Predicates;
	// We handle only IV > Invariant
	if (!isLoopInvariant(RHS, L))
	return getCouldNotCompute();

	const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS);
	if (!IV && AllowPredicates)
	// Try to make this an AddRec using runtime tests, in the first X
	// iterations of this loop, where X is the SCEV expression found by the
	// algorithm below.
	IV = convertSCEVToAddRecWithPredicates(LHS, L, Predicates);

	// Avoid weird loops
	if (!IV \|\| IV->getLoop() != L \|\| !IV->isAffine())
	return getCouldNotCompute();

	auto WrapType = IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW;
	bool NoWrap = ControlsExit && IV->getNoWrapFlags(WrapType);
	ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;

	const SCEV Stride = getNegativeSCEV(IV->getStepRecurrence(this));

	// Avoid negative or zero stride values
	if (!isKnownPositive(Stride))
	return getCouldNotCompute();

	// Avoid proven overflow cases: this will ensure that the backedge taken count
	// will not generate any unsigned overflow. Relaxed no-overflow conditions
	// exploit NoWrapFlags, allowing to optimize in presence of undefined
	// behaviors like the case of C language.
	if (!Stride->isOne() && !NoWrap)
	if (canIVOverflowOnGT(RHS, Stride, IsSigned))
	return getCouldNotCompute();

	const SCEV *Start = IV->getStart();
	const SCEV *End = RHS;
	if (!isLoopEntryGuardedByCond(L, Cond, getAddExpr(Start, Stride), RHS)) {
	// If we know that Start >= RHS in the context of loop, then we know that
	// min(RHS, Start) = RHS at this point.
	if (isLoopEntryGuardedByCond(
	L, IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE, Start, RHS))
	End = RHS;
	else
	End = IsSigned ? getSMinExpr(RHS, Start) : getUMinExpr(RHS, Start);
	}

	if (Start->getType()->isPointerTy()) {
	Start = getLosslessPtrToIntExpr(Start);
	if (isa<SCEVCouldNotCompute>(Start))
	return Start;
	}
	if (End->getType()->isPointerTy()) {
	End = getLosslessPtrToIntExpr(End);
	if (isa<SCEVCouldNotCompute>(End))
	return End;
	}

	// Compute ((Start - End) + (Stride - 1)) / Stride.
	// FIXME: This can overflow. Holding off on fixing this for now;
	// howManyGreaterThans will hopefully be gone soon.
	const SCEV *One = getOne(Stride->getType());
	const SCEV *BECount = getUDivExpr(
	getAddExpr(getMinusSCEV(Start, End), getMinusSCEV(Stride, One)), Stride);

	APInt MaxStart = IsSigned ? getSignedRangeMax(Start)
	: getUnsignedRangeMax(Start);

	APInt MinStride = IsSigned ? getSignedRangeMin(Stride)
	: getUnsignedRangeMin(Stride);

	unsigned BitWidth = getTypeSizeInBits(LHS->getType());
	APInt Limit = IsSigned ? APInt::getSignedMinValue(BitWidth) + (MinStride - 1)
	: APInt::getMinValue(BitWidth) + (MinStride - 1);

	// Although End can be a MIN expression we estimate MinEnd considering only
	// the case End = RHS. This is safe because in the other case (Start - End)
	// is zero, leading to a zero maximum backedge taken count.
	APInt MinEnd =
	IsSigned ? APIntOps::smax(getSignedRangeMin(RHS), Limit)
	: APIntOps::umax(getUnsignedRangeMin(RHS), Limit);

	const SCEV *MaxBECount = isa<SCEVConstant>(BECount)
	? BECount
	: getUDivCeilSCEV(getConstant(MaxStart - MinEnd),
	getConstant(MinStride));

	if (isa<SCEVCouldNotCompute>(MaxBECount))
	MaxBECount = BECount;

	return ExitLimit(BECount, MaxBECount, false, Predicates);
	}

	const SCEV *SCEVAddRecExpr::getNumIterationsInRange(const ConstantRange &Range,
	ScalarEvolution &SE) const {
	if (Range.isFullSet()) // Infinite loop.
	return SE.getCouldNotCompute();

	// If the start is a non-zero constant, shift the range to simplify things.
	if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(getStart()))
	if (!SC->getValue()->isZero()) {
	SmallVector<const SCEV *, 4> Operands(operands());
	Operands[0] = SE.getZero(SC->getType());
	const SCEV *Shifted = SE.getAddRecExpr(Operands, getLoop(),
	getNoWrapFlags(FlagNW));
	if (const auto *ShiftedAddRec = dyn_cast<SCEVAddRecExpr>(Shifted))
	return ShiftedAddRec->getNumIterationsInRange(
	Range.subtract(SC->getAPInt()), SE);
	// This is strange and shouldn't happen.
	return SE.getCouldNotCompute();
	}

	// The only time we can solve this is when we have all constant indices.
	// Otherwise, we cannot determine the overflow conditions.
	if (any_of(operands(), [](const SCEV *Op) { return !isa<SCEVConstant>(Op); }))
	return SE.getCouldNotCompute();

	// Okay at this point we know that all elements of the chrec are constants and
	// that the start element is zero.

	// First check to see if the range contains zero. If not, the first
	// iteration exits.
	unsigned BitWidth = SE.getTypeSizeInBits(getType());
	if (!Range.contains(APInt(BitWidth, 0)))
	return SE.getZero(getType());

	if (isAffine()) {
	// If this is an affine expression then we have this situation:
	// Solve {0,+,A} in Range === Ax in Range

	// We know that zero is in the range. If A is positive then we know that
	// the upper value of the range must be the first possible exit value.
	// If A is negative then the lower of the range is the last possible loop
	// value. Also note that we already checked for a full range.
	APInt A = cast<SCEVConstant>(getOperand(1))->getAPInt();
	APInt End = A.sge(1) ? (Range.getUpper() - 1) : Range.getLower();

	// The exit value should be (End+A)/A.
	APInt ExitVal = (End + A).udiv(A);
	ConstantInt *ExitValue = ConstantInt::get(SE.getContext(), ExitVal);

	// Evaluate at the exit value. If we really did fall out of the valid
	// range, then we computed our trip count, otherwise wrap around or other
	// things must have happened.
	ConstantInt *Val = EvaluateConstantChrecAtConstant(this, ExitValue, SE);
	if (Range.contains(Val->getValue()))
	return SE.getCouldNotCompute(); // Something strange happened

	// Ensure that the previous value is in the range. This is a sanity check.
	assert(Range.contains(
	EvaluateConstantChrecAtConstant(this,
	ConstantInt::get(SE.getContext(), ExitVal - 1), SE)->getValue()) &&
	"Linear scev computation is off in a bad way!");
	return SE.getConstant(ExitValue);
	}

	if (isQuadratic()) {
	if (auto S = SolveQuadraticAddRecRange(this, Range, SE))
	return SE.getConstant(S.getValue());
	}

	return SE.getCouldNotCompute();
	}

	const SCEVAddRecExpr *
	SCEVAddRecExpr::getPostIncExpr(ScalarEvolution &SE) const {
	assert(getNumOperands() > 1 && "AddRec with zero step?");
	// There is a temptation to just call getAddExpr(this, getStepRecurrence(SE)),
	// but in this case we cannot guarantee that the value returned will be an
	// AddRec because SCEV does not have a fixed point where it stops
	// simplification: it is legal to return ({rec1} + {rec2}). For example, it
	// may happen if we reach arithmetic depth limit while simplifying. So we
	// construct the returned value explicitly.
	SmallVector<const SCEV *, 3> Ops;
	// If this is {A,+,B,+,C,...,+,N}, then its step is {B,+,C,+,...,+,N}, and
	// (this + Step) is {A+B,+,B+C,+...,+,N}.
	for (unsigned i = 0, e = getNumOperands() - 1; i < e; ++i)
	Ops.push_back(SE.getAddExpr(getOperand(i), getOperand(i + 1)));
	// We know that the last operand is not a constant zero (otherwise it would
	// have been popped out earlier). This guarantees us that if the result has
	// the same last operand, then it will also not be popped out, meaning that
	// the returned value will be an AddRec.
	const SCEV *Last = getOperand(getNumOperands() - 1);
	assert(!Last->isZero() && "Recurrency with zero step?");
	Ops.push_back(Last);
	return cast<SCEVAddRecExpr>(SE.getAddRecExpr(Ops, getLoop(),
	SCEV::FlagAnyWrap));
	}

	// Return true when S contains at least an undef value.
	static inline bool containsUndefs(const SCEV *S) {
	return SCEVExprContains(S, [](const SCEV *S) {
	if (const auto *SU = dyn_cast<SCEVUnknown>(S))
	return isa<UndefValue>(SU->getValue());
	return false;
	});
	}

	namespace {

	// Collect all steps of SCEV expressions.
	struct SCEVCollectStrides {
	ScalarEvolution &SE;
	SmallVectorImpl<const SCEV *> &Strides;

	SCEVCollectStrides(ScalarEvolution &SE, SmallVectorImpl<const SCEV *> &S)
	: SE(SE), Strides(S) {}

	bool follow(const SCEV *S) {
	if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
	Strides.push_back(AR->getStepRecurrence(SE));
	return true;
	}

	bool isDone() const { return false; }
	};

	// Collect all SCEVUnknown and SCEVMulExpr expressions.
	struct SCEVCollectTerms {
	SmallVectorImpl<const SCEV *> &Terms;

	SCEVCollectTerms(SmallVectorImpl<const SCEV *> &T) : Terms(T) {}

	bool follow(const SCEV *S) {
	if (isa<SCEVUnknown>(S) \|\| isa<SCEVMulExpr>(S) \|\|
	isa<SCEVSignExtendExpr>(S)) {
	if (!containsUndefs(S))
	Terms.push_back(S);

	// Stop recursion: once we collected a term, do not walk its operands.
	return false;
	}

	// Keep looking.
	return true;
	}

	bool isDone() const { return false; }
	};

	// Check if a SCEV contains an AddRecExpr.
	struct SCEVHasAddRec {
	bool &ContainsAddRec;

	SCEVHasAddRec(bool &ContainsAddRec) : ContainsAddRec(ContainsAddRec) {
	ContainsAddRec = false;
	}

	bool follow(const SCEV *S) {
	if (isa<SCEVAddRecExpr>(S)) {
	ContainsAddRec = true;

	// Stop recursion: once we collected a term, do not walk its operands.
	return false;
	}

	// Keep looking.
	return true;
	}

	bool isDone() const { return false; }
	};

	// Find factors that are multiplied with an expression that (possibly as a
	// subexpression) contains an AddRecExpr. In the expression:
	//
	// 8 * (100 + %p * %q * (%a + {0, +, 1}_loop))
	//
	// "%p * %q" are factors multiplied by the expression "(%a + {0, +, 1}_loop)"
	// that contains the AddRec {0, +, 1}_loop. %p * %q are likely to be array size
	// parameters as they form a product with an induction variable.
	//
	// This collector expects all array size parameters to be in the same MulExpr.
	// It might be necessary to later add support for collecting parameters that are
	// spread over different nested MulExpr.
	struct SCEVCollectAddRecMultiplies {
	SmallVectorImpl<const SCEV *> &Terms;
	ScalarEvolution &SE;

	SCEVCollectAddRecMultiplies(SmallVectorImpl<const SCEV *> &T, ScalarEvolution &SE)
	: Terms(T), SE(SE) {}

	bool follow(const SCEV *S) {
	if (auto *Mul = dyn_cast<SCEVMulExpr>(S)) {
	bool HasAddRec = false;
	SmallVector<const SCEV *, 0> Operands;
	for (auto Op : Mul->operands()) {
	const SCEVUnknown *Unknown = dyn_cast<SCEVUnknown>(Op);
	if (Unknown && !isa<CallInst>(Unknown->getValue())) {
	Operands.push_back(Op);
	} else if (Unknown) {
	HasAddRec = true;
	} else {
	bool ContainsAddRec = false;
	SCEVHasAddRec ContiansAddRec(ContainsAddRec);
	visitAll(Op, ContiansAddRec);
	HasAddRec \|= ContainsAddRec;
	}
	}
	if (Operands.size() == 0)
	return true;

	if (!HasAddRec)
	return false;

	Terms.push_back(SE.getMulExpr(Operands));
	// Stop recursion: once we collected a term, do not walk its operands.
	return false;
	}

	// Keep looking.
	return true;
	}

	bool isDone() const { return false; }
	};

	} // end anonymous namespace

	/// Find parametric terms in this SCEVAddRecExpr. We first for parameters in
	/// two places:
	/// 1) The strides of AddRec expressions.
	/// 2) Unknowns that are multiplied with AddRec expressions.
	void ScalarEvolution::collectParametricTerms(const SCEV *Expr,
	SmallVectorImpl<const SCEV *> &Terms) {
	SmallVector<const SCEV *, 4> Strides;
	SCEVCollectStrides StrideCollector(*this, Strides);
	visitAll(Expr, StrideCollector);

	LLVM_DEBUG({
	dbgs() << "Strides:\n";
	for (const SCEV *S : Strides)
	dbgs() << *S << "\n";
	});

	for (const SCEV *S : Strides) {
	SCEVCollectTerms TermCollector(Terms);
	visitAll(S, TermCollector);
	}

	LLVM_DEBUG({
	dbgs() << "Terms:\n";
	for (const SCEV *T : Terms)
	dbgs() << *T << "\n";
	});

	SCEVCollectAddRecMultiplies MulCollector(Terms, *this);
	visitAll(Expr, MulCollector);
	}

	static bool findArrayDimensionsRec(ScalarEvolution &SE,
	SmallVectorImpl<const SCEV *> &Terms,
	SmallVectorImpl<const SCEV *> &Sizes) {
	int Last = Terms.size() - 1;
	const SCEV *Step = Terms[Last];

	// End of recursion.
	if (Last == 0) {
	if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Step)) {
	SmallVector<const SCEV *, 2> Qs;
	for (const SCEV *Op : M->operands())
	if (!isa<SCEVConstant>(Op))
	Qs.push_back(Op);

	Step = SE.getMulExpr(Qs);
	}

	Sizes.push_back(Step);
	return true;
	}

	for (const SCEV *&Term : Terms) {
	// Normalize the terms before the next call to findArrayDimensionsRec.
	const SCEV Q, R;
	SCEVDivision::divide(SE, Term, Step, &Q, &R);

	// Bail out when GCD does not evenly divide one of the terms.
	if (!R->isZero())
	return false;

	Term = Q;
	}

	// Remove all SCEVConstants.
	erase_if(Terms, [](const SCEV *E) { return isa<SCEVConstant>(E); });

	if (Terms.size() > 0)
	if (!findArrayDimensionsRec(SE, Terms, Sizes))
	return false;

	Sizes.push_back(Step);
	return true;
	}

	// Returns true when one of the SCEVs of Terms contains a SCEVUnknown parameter.
	static inline bool containsParameters(SmallVectorImpl<const SCEV *> &Terms) {
	for (const SCEV *T : Terms)
	if (SCEVExprContains(T, [](const SCEV *S) { return isa<SCEVUnknown>(S); }))
	return true;

	return false;
	}

	// Return the number of product terms in S.
	static inline int numberOfTerms(const SCEV *S) {
	if (const SCEVMulExpr *Expr = dyn_cast<SCEVMulExpr>(S))
	return Expr->getNumOperands();
	return 1;
	}

	static const SCEV removeConstantFactors(ScalarEvolution &SE, const SCEV T) {
	if (isa<SCEVConstant>(T))
	return nullptr;

	if (isa<SCEVUnknown>(T))
	return T;

	if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(T)) {
	SmallVector<const SCEV *, 2> Factors;
	for (const SCEV *Op : M->operands())
	if (!isa<SCEVConstant>(Op))
	Factors.push_back(Op);

	return SE.getMulExpr(Factors);
	}

	return T;
	}

	/// Return the size of an element read or written by Inst.
	const SCEV ScalarEvolution::getElementSize(Instruction Inst) {
	Type *Ty;
	if (StoreInst *Store = dyn_cast<StoreInst>(Inst))
	Ty = Store->getValueOperand()->getType();
	else if (LoadInst *Load = dyn_cast<LoadInst>(Inst))
	Ty = Load->getType();
	else
	return nullptr;

	Type *ETy = getEffectiveSCEVType(PointerType::getUnqual(Ty));
	return getSizeOfExpr(ETy, Ty);
	}

	void ScalarEvolution::findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms,
	SmallVectorImpl<const SCEV *> &Sizes,
	const SCEV *ElementSize) {
	if (Terms.size() < 1 \|\| !ElementSize)
	return;

	// Early return when Terms do not contain parameters: we do not delinearize
	// non parametric SCEVs.
	if (!containsParameters(Terms))
	return;

	LLVM_DEBUG({
	dbgs() << "Terms:\n";
	for (const SCEV *T : Terms)
	dbgs() << *T << "\n";
	});

	// Remove duplicates.
	array_pod_sort(Terms.begin(), Terms.end());
	Terms.erase(std::unique(Terms.begin(), Terms.end()), Terms.end());

	// Put larger terms first.
	llvm::sort(Terms, [](const SCEV LHS, const SCEV RHS) {
	return numberOfTerms(LHS) > numberOfTerms(RHS);
	});

	// Try to divide all terms by the element size. If term is not divisible by
	// element size, proceed with the original term.
	for (const SCEV *&Term : Terms) {
	const SCEV Q, R;
	SCEVDivision::divide(*this, Term, ElementSize, &Q, &R);
	if (!Q->isZero())
	Term = Q;
	}

	SmallVector<const SCEV *, 4> NewTerms;

	// Remove constant factors.
	for (const SCEV *T : Terms)
	if (const SCEV NewT = removeConstantFactors(this, T))
	NewTerms.push_back(NewT);

	LLVM_DEBUG({
	dbgs() << "Terms after sorting:\n";
	for (const SCEV *T : NewTerms)
	dbgs() << *T << "\n";
	});

	if (NewTerms.empty() \|\| !findArrayDimensionsRec(*this, NewTerms, Sizes)) {
	Sizes.clear();
	return;
	}

	// The last element to be pushed into Sizes is the size of an element.
	Sizes.push_back(ElementSize);

	LLVM_DEBUG({
	dbgs() << "Sizes:\n";
	for (const SCEV *S : Sizes)
	dbgs() << *S << "\n";
	});
	}

	void ScalarEvolution::computeAccessFunctions(
	const SCEV Expr, SmallVectorImpl<const SCEV > &Subscripts,
	SmallVectorImpl<const SCEV *> &Sizes) {
	// Early exit in case this SCEV is not an affine multivariate function.
	if (Sizes.empty())
	return;

	if (auto *AR = dyn_cast<SCEVAddRecExpr>(Expr))
	if (!AR->isAffine())
	return;

	const SCEV *Res = Expr;
	int Last = Sizes.size() - 1;
	for (int i = Last; i >= 0; i--) {
	const SCEV Q, R;
	SCEVDivision::divide(*this, Res, Sizes[i], &Q, &R);

	LLVM_DEBUG({
	dbgs() << "Res: " << *Res << "\n";
	dbgs() << "Sizes[i]: " << *Sizes[i] << "\n";
	dbgs() << "Res divided by Sizes[i]:\n";
	dbgs() << "Quotient: " << *Q << "\n";
	dbgs() << "Remainder: " << *R << "\n";
	});

	Res = Q;

	// Do not record the last subscript corresponding to the size of elements in
	// the array.
	if (i == Last) {

	// Bail out if the remainder is too complex.
	if (isa<SCEVAddRecExpr>(R)) {
	Subscripts.clear();
	Sizes.clear();
	return;
	}

	continue;
	}

	// Record the access function for the current subscript.
	Subscripts.push_back(R);
	}

	// Also push in last position the remainder of the last division: it will be
	// the access function of the innermost dimension.
	Subscripts.push_back(Res);

	std::reverse(Subscripts.begin(), Subscripts.end());

	LLVM_DEBUG({
	dbgs() << "Subscripts:\n";
	for (const SCEV *S : Subscripts)
	dbgs() << *S << "\n";
	});
	}

	/// Splits the SCEV into two vectors of SCEVs representing the subscripts and
	/// sizes of an array access. Returns the remainder of the delinearization that
	/// is the offset start of the array. The SCEV->delinearize algorithm computes
	/// the multiples of SCEV coefficients: that is a pattern matching of sub
	/// expressions in the stride and base of a SCEV corresponding to the
	/// computation of a GCD (greatest common divisor) of base and stride. When
	/// SCEV->delinearize fails, it returns the SCEV unchanged.
	///
	/// For example: when analyzing the memory access A[i][j][k] in this loop nest
	///
	/// void foo(long n, long m, long o, double A[n][m][o]) {
	///
	/// for (long i = 0; i < n; i++)
	/// for (long j = 0; j < m; j++)
	/// for (long k = 0; k < o; k++)
	/// A[i][j][k] = 1.0;
	/// }
	///
	/// the delinearization input is the following AddRec SCEV:
	///
	/// AddRec: {{{%A,+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k>
	///
	/// From this SCEV, we are able to say that the base offset of the access is %A
	/// because it appears as an offset that does not divide any of the strides in
	/// the loops:
	///
	/// CHECK: Base offset: %A
	///
	/// and then SCEV->delinearize determines the size of some of the dimensions of
	/// the array as these are the multiples by which the strides are happening:
	///
	/// CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(double) bytes.
	///
	/// Note that the outermost dimension remains of UnknownSize because there are
	/// no strides that would help identifying the size of the last dimension: when
	/// the array has been statically allocated, one could compute the size of that
	/// dimension by dividing the overall size of the array by the size of the known
	/// dimensions: %m * %o * 8.
	///
	/// Finally delinearize provides the access functions for the array reference
	/// that does correspond to A[i][j][k] of the above C testcase:
	///
	/// CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>]
	///
	/// The testcases are checking the output of a function pass:
	/// DelinearizationPass that walks through all loads and stores of a function
	/// asking for the SCEV of the memory access with respect to all enclosing
	/// loops, calling SCEV->delinearize on that and printing the results.
	void ScalarEvolution::delinearize(const SCEV *Expr,
	SmallVectorImpl<const SCEV *> &Subscripts,
	SmallVectorImpl<const SCEV *> &Sizes,
	const SCEV *ElementSize) {
	// First step: collect parametric terms.
	SmallVector<const SCEV *, 4> Terms;
	collectParametricTerms(Expr, Terms);

	if (Terms.empty())
	return;

	// Second step: find subscript sizes.
	findArrayDimensions(Terms, Sizes, ElementSize);

	if (Sizes.empty())
	return;

	// Third step: compute the access functions for each subscript.
	computeAccessFunctions(Expr, Subscripts, Sizes);

	if (Subscripts.empty())
	return;

	LLVM_DEBUG({
	dbgs() << "succeeded to delinearize " << *Expr << "\n";
	dbgs() << "ArrayDecl[UnknownSize]";
	for (const SCEV *S : Sizes)
	dbgs() << "[" << *S << "]";

	dbgs() << "\nArrayRef";
	for (const SCEV *S : Subscripts)
	dbgs() << "[" << *S << "]";
	dbgs() << "\n";
	});
	}

	bool ScalarEvolution::getIndexExpressionsFromGEP(
	const GetElementPtrInst GEP, SmallVectorImpl<const SCEV > &Subscripts,
	SmallVectorImpl<int> &Sizes) {
	assert(Subscripts.empty() && Sizes.empty() &&
	"Expected output lists to be empty on entry to this function.");
	assert(GEP && "getIndexExpressionsFromGEP called with a null GEP");
	Type *Ty = nullptr;
	bool DroppedFirstDim = false;
	for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
	const SCEV *Expr = getSCEV(GEP->getOperand(i));
	if (i == 1) {
	Ty = GEP->getSourceElementType();
	if (auto *Const = dyn_cast<SCEVConstant>(Expr))
	if (Const->getValue()->isZero()) {
	DroppedFirstDim = true;
	continue;
	}
	Subscripts.push_back(Expr);
	continue;
	}

	auto *ArrayTy = dyn_cast<ArrayType>(Ty);
	if (!ArrayTy) {
	Subscripts.clear();
	Sizes.clear();
	return false;
	}

	Subscripts.push_back(Expr);
	if (!(DroppedFirstDim && i == 2))
	Sizes.push_back(ArrayTy->getNumElements());

	Ty = ArrayTy->getElementType();
	}
	return !Subscripts.empty();
	}

	//===----------------------------------------------------------------------===//
	// SCEVCallbackVH Class Implementation
	//===----------------------------------------------------------------------===//

	void ScalarEvolution::SCEVCallbackVH::deleted() {
	assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!");
	if (PHINode *PN = dyn_cast<PHINode>(getValPtr()))
	SE->ConstantEvolutionLoopExitValue.erase(PN);
	SE->eraseValueFromMap(getValPtr());
	// this now dangles!
	}

	void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *V) {
	assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!");

	// Forget all the expressions associated with users of the old value,
	// so that future queries will recompute the expressions using the new
	// value.
	Value *Old = getValPtr();
	SmallVector<User *, 16> Worklist(Old->users());
	SmallPtrSet<User *, 8> Visited;
	while (!Worklist.empty()) {
	User *U = Worklist.pop_back_val();
	// Deleting the Old value will cause this to dangle. Postpone
	// that until everything else is done.
	if (U == Old)
	continue;
	if (!Visited.insert(U).second)
	continue;
	if (PHINode *PN = dyn_cast<PHINode>(U))
	SE->ConstantEvolutionLoopExitValue.erase(PN);
	SE->eraseValueFromMap(U);
	llvm::append_range(Worklist, U->users());
	}
	// Delete the Old value.
	if (PHINode *PN = dyn_cast<PHINode>(Old))
	SE->ConstantEvolutionLoopExitValue.erase(PN);
	SE->eraseValueFromMap(Old);
	// this now dangles!
	}

	ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value V, ScalarEvolution se)
	: CallbackVH(V), SE(se) {}

	//===----------------------------------------------------------------------===//
	// ScalarEvolution Class Implementation
	//===----------------------------------------------------------------------===//

	ScalarEvolution::ScalarEvolution(Function &F, TargetLibraryInfo &TLI,
	AssumptionCache &AC, DominatorTree &DT,
	LoopInfo &LI)
	: F(F), TLI(TLI), AC(AC), DT(DT), LI(LI),
	CouldNotCompute(new SCEVCouldNotCompute()), ValuesAtScopes(64),
	LoopDispositions(64), BlockDispositions(64) {
	// To use guards for proving predicates, we need to scan every instruction in
	// relevant basic blocks, and not just terminators. Doing this is a waste of
	// time if the IR does not actually contain any calls to
	// @llvm.experimental.guard, so do a quick check and remember this beforehand.
	//
	// This pessimizes the case where a pass that preserves ScalarEvolution wants
	// to _add_ guards to the module when there weren't any before, and wants
	// ScalarEvolution to optimize based on those guards. For now we prefer to be
	// efficient in lieu of being smart in that rather obscure case.

	auto *GuardDecl = F.getParent()->getFunction(
	Intrinsic::getName(Intrinsic::experimental_guard));
	HasGuards = GuardDecl && !GuardDecl->use_empty();
	}

	ScalarEvolution::ScalarEvolution(ScalarEvolution &&Arg)
	: F(Arg.F), HasGuards(Arg.HasGuards), TLI(Arg.TLI), AC(Arg.AC), DT(Arg.DT),
	LI(Arg.LI), CouldNotCompute(std::move(Arg.CouldNotCompute)),
	ValueExprMap(std::move(Arg.ValueExprMap)),
	PendingLoopPredicates(std::move(Arg.PendingLoopPredicates)),
	PendingPhiRanges(std::move(Arg.PendingPhiRanges)),
	PendingMerges(std::move(Arg.PendingMerges)),
	MinTrailingZerosCache(std::move(Arg.MinTrailingZerosCache)),
	BackedgeTakenCounts(std::move(Arg.BackedgeTakenCounts)),
	PredicatedBackedgeTakenCounts(
	std::move(Arg.PredicatedBackedgeTakenCounts)),
	ConstantEvolutionLoopExitValue(
	std::move(Arg.ConstantEvolutionLoopExitValue)),
	ValuesAtScopes(std::move(Arg.ValuesAtScopes)),
	LoopDispositions(std::move(Arg.LoopDispositions)),
	LoopPropertiesCache(std::move(Arg.LoopPropertiesCache)),
	BlockDispositions(std::move(Arg.BlockDispositions)),
	UnsignedRanges(std::move(Arg.UnsignedRanges)),
	SignedRanges(std::move(Arg.SignedRanges)),
	UniqueSCEVs(std::move(Arg.UniqueSCEVs)),
	UniquePreds(std::move(Arg.UniquePreds)),
	SCEVAllocator(std::move(Arg.SCEVAllocator)),
	LoopUsers(std::move(Arg.LoopUsers)),
	PredicatedSCEVRewrites(std::move(Arg.PredicatedSCEVRewrites)),
	FirstUnknown(Arg.FirstUnknown) {
	Arg.FirstUnknown = nullptr;
	}

	ScalarEvolution::~ScalarEvolution() {
	// Iterate through all the SCEVUnknown instances and call their
	// destructors, so that they release their references to their values.
	for (SCEVUnknown *U = FirstUnknown; U;) {
	SCEVUnknown *Tmp = U;
	U = U->Next;
	Tmp->~SCEVUnknown();
	}
	FirstUnknown = nullptr;

	ExprValueMap.clear();
	ValueExprMap.clear();
	HasRecMap.clear();
	BackedgeTakenCounts.clear();
	PredicatedBackedgeTakenCounts.clear();

	assert(PendingLoopPredicates.empty() && "isImpliedCond garbage");
	assert(PendingPhiRanges.empty() && "getRangeRef garbage");
	assert(PendingMerges.empty() && "isImpliedViaMerge garbage");
	assert(!WalkingBEDominatingConds && "isLoopBackedgeGuardedByCond garbage!");
	assert(!ProvingSplitPredicate && "ProvingSplitPredicate garbage!");
	}

	bool ScalarEvolution::hasLoopInvariantBackedgeTakenCount(const Loop *L) {
	return !isa<SCEVCouldNotCompute>(getBackedgeTakenCount(L));
	}

	static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
	const Loop *L) {
	// Print all inner loops first
	for (Loop I : L)
	PrintLoopInfo(OS, SE, I);

	OS << "Loop ";
	L->getHeader()->printAsOperand(OS, /PrintType=/false);
	OS << ": ";

	SmallVector<BasicBlock *, 8> ExitingBlocks;
	L->getExitingBlocks(ExitingBlocks);
	if (ExitingBlocks.size() != 1)
	OS << "<multiple exits> ";

	if (SE->hasLoopInvariantBackedgeTakenCount(L))
	OS << "backedge-taken count is " << *SE->getBackedgeTakenCount(L) << "\n";
	else
	OS << "Unpredictable backedge-taken count.\n";

	if (ExitingBlocks.size() > 1)
	for (BasicBlock *ExitingBlock : ExitingBlocks) {
	OS << " exit count for " << ExitingBlock->getName() << ": "
	<< *SE->getExitCount(L, ExitingBlock) << "\n";
	}

	OS << "Loop ";
	L->getHeader()->printAsOperand(OS, /PrintType=/false);
	OS << ": ";

	if (!isa<SCEVCouldNotCompute>(SE->getConstantMaxBackedgeTakenCount(L))) {
	OS << "max backedge-taken count is " << *SE->getConstantMaxBackedgeTakenCount(L);
	if (SE->isBackedgeTakenCountMaxOrZero(L))
	OS << ", actual taken count either this or zero.";
	} else {
	OS << "Unpredictable max backedge-taken count. ";
	}

	OS << "\n"
	"Loop ";
	L->getHeader()->printAsOperand(OS, /PrintType=/false);
	OS << ": ";

	SCEVUnionPredicate Pred;
	auto PBT = SE->getPredicatedBackedgeTakenCount(L, Pred);
	if (!isa<SCEVCouldNotCompute>(PBT)) {
	OS << "Predicated backedge-taken count is " << *PBT << "\n";
	OS << " Predicates:\n";
	Pred.print(OS, 4);
	} else {
	OS << "Unpredictable predicated backedge-taken count. ";
	}
	OS << "\n";

	if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
	OS << "Loop ";
	L->getHeader()->printAsOperand(OS, /PrintType=/false);
	OS << ": ";
	OS << "Trip multiple is " << SE->getSmallConstantTripMultiple(L) << "\n";
	}
	}

	static StringRef loopDispositionToStr(ScalarEvolution::LoopDisposition LD) {
	switch (LD) {
	case ScalarEvolution::LoopVariant:
	return "Variant";
	case ScalarEvolution::LoopInvariant:
	return "Invariant";
	case ScalarEvolution::LoopComputable:
	return "Computable";
	}
	llvm_unreachable("Unknown ScalarEvolution::LoopDisposition kind!");
	}

	void ScalarEvolution::print(raw_ostream &OS) const {
	// ScalarEvolution's implementation of the print method is to print
	// out SCEV values of all instructions that are interesting. Doing
	// this potentially causes it to create new SCEV objects though,
	// which technically conflicts with the const qualifier. This isn't
	// observable from outside the class though, so casting away the
	// const isn't dangerous.
	ScalarEvolution &SE = const_cast<ScalarEvolution >(this);

	if (ClassifyExpressions) {
	OS << "Classifying expressions for: ";
	F.printAsOperand(OS, /PrintType=/false);
	OS << "\n";
	for (Instruction &I : instructions(F))
	if (isSCEVable(I.getType()) && !isa<CmpInst>(I)) {
	OS << I << '\n';
	OS << " --> ";
	const SCEV *SV = SE.getSCEV(&I);
	SV->print(OS);
	if (!isa<SCEVCouldNotCompute>(SV)) {
	OS << " U: ";
	SE.getUnsignedRange(SV).print(OS);
	OS << " S: ";
	SE.getSignedRange(SV).print(OS);
	}

	const Loop *L = LI.getLoopFor(I.getParent());

	const SCEV *AtUse = SE.getSCEVAtScope(SV, L);
	if (AtUse != SV) {
	OS << " --> ";
	AtUse->print(OS);
	if (!isa<SCEVCouldNotCompute>(AtUse)) {
	OS << " U: ";
	SE.getUnsignedRange(AtUse).print(OS);
	OS << " S: ";
	SE.getSignedRange(AtUse).print(OS);
	}
	}

	if (L) {
	OS << "\t\t" "Exits: ";
	const SCEV *ExitValue = SE.getSCEVAtScope(SV, L->getParentLoop());
	if (!SE.isLoopInvariant(ExitValue, L)) {
	OS << "<<Unknown>>";
	} else {
	OS << *ExitValue;
	}

	bool First = true;
	for (auto *Iter = L; Iter; Iter = Iter->getParentLoop()) {
	if (First) {
	OS << "\t\t" "LoopDispositions: { ";
	First = false;
	} else {
	OS << ", ";
	}

	Iter->getHeader()->printAsOperand(OS, /PrintType=/false);
	OS << ": " << loopDispositionToStr(SE.getLoopDisposition(SV, Iter));
	}

	for (auto *InnerL : depth_first(L)) {
	if (InnerL == L)
	continue;
	if (First) {
	OS << "\t\t" "LoopDispositions: { ";
	First = false;
	} else {
	OS << ", ";
	}

	InnerL->getHeader()->printAsOperand(OS, /PrintType=/false);
	OS << ": " << loopDispositionToStr(SE.getLoopDisposition(SV, InnerL));
	}

	OS << " }";
	}

	OS << "\n";
	}
	}

	OS << "Determining loop execution counts for: ";
	F.printAsOperand(OS, /PrintType=/false);
	OS << "\n";
	for (Loop *I : LI)
	PrintLoopInfo(OS, &SE, I);
	}

	ScalarEvolution::LoopDisposition
	ScalarEvolution::getLoopDisposition(const SCEV S, const Loop L) {
	auto &Values = LoopDispositions[S];
	for (auto &V : Values) {
	if (V.getPointer() == L)
	return V.getInt();
	}
	Values.emplace_back(L, LoopVariant);
	LoopDisposition D = computeLoopDisposition(S, L);
	auto &Values2 = LoopDispositions[S];
	for (auto &V : make_range(Values2.rbegin(), Values2.rend())) {
	if (V.getPointer() == L) {
	V.setInt(D);
	break;
	}
	}
	return D;
	}

	ScalarEvolution::LoopDisposition
	ScalarEvolution::computeLoopDisposition(const SCEV S, const Loop L) {
	switch (S->getSCEVType()) {
	case scConstant:
	return LoopInvariant;
	case scPtrToInt:
	case scTruncate:
	case scZeroExtend:
	case scSignExtend:
	return getLoopDisposition(cast<SCEVCastExpr>(S)->getOperand(), L);
	case scAddRecExpr: {
	const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S);

	// If L is the addrec's loop, it's computable.
	if (AR->getLoop() == L)
	return LoopComputable;

	// Add recurrences are never invariant in the function-body (null loop).
	if (!L)
	return LoopVariant;

	// Everything that is not defined at loop entry is variant.
	if (DT.dominates(L->getHeader(), AR->getLoop()->getHeader()))
	return LoopVariant;
	assert(!L->contains(AR->getLoop()) && "Containing loop's header does not"
	" dominate the contained loop's header?");

	// This recurrence is invariant w.r.t. L if AR's loop contains L.
	if (AR->getLoop()->contains(L))
	return LoopInvariant;

	// This recurrence is variant w.r.t. L if any of its operands
	// are variant.
	for (auto *Op : AR->operands())
	if (!isLoopInvariant(Op, L))
	return LoopVariant;

	// Otherwise it's loop-invariant.
	return LoopInvariant;
	}
	case scAddExpr:
	case scMulExpr:
	case scUMaxExpr:
	case scSMaxExpr:
	case scUMinExpr:
	case scSMinExpr: {
	bool HasVarying = false;
	for (auto *Op : cast<SCEVNAryExpr>(S)->operands()) {
	LoopDisposition D = getLoopDisposition(Op, L);
	if (D == LoopVariant)
	return LoopVariant;
	if (D == LoopComputable)
	HasVarying = true;
	}
	return HasVarying ? LoopComputable : LoopInvariant;
	}
	case scUDivExpr: {
	const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S);
	LoopDisposition LD = getLoopDisposition(UDiv->getLHS(), L);
	if (LD == LoopVariant)
	return LoopVariant;
	LoopDisposition RD = getLoopDisposition(UDiv->getRHS(), L);
	if (RD == LoopVariant)
	return LoopVariant;
	return (LD == LoopInvariant && RD == LoopInvariant) ?
	LoopInvariant : LoopComputable;
	}
	case scUnknown:
	// All non-instruction values are loop invariant. All instructions are loop
	// invariant if they are not contained in the specified loop.
	// Instructions are never considered invariant in the function body
	// (null loop) because they are defined within the "loop".
	if (auto *I = dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue()))
	return (L && !L->contains(I)) ? LoopInvariant : LoopVariant;
	return LoopInvariant;
	case scCouldNotCompute:
	llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
	}
	llvm_unreachable("Unknown SCEV kind!");
	}

	bool ScalarEvolution::isLoopInvariant(const SCEV S, const Loop L) {
	return getLoopDisposition(S, L) == LoopInvariant;
	}

	bool ScalarEvolution::hasComputableLoopEvolution(const SCEV S, const Loop L) {
	return getLoopDisposition(S, L) == LoopComputable;
	}

	ScalarEvolution::BlockDisposition
	ScalarEvolution::getBlockDisposition(const SCEV S, const BasicBlock BB) {
	auto &Values = BlockDispositions[S];
	for (auto &V : Values) {
	if (V.getPointer() == BB)
	return V.getInt();
	}
	Values.emplace_back(BB, DoesNotDominateBlock);
	BlockDisposition D = computeBlockDisposition(S, BB);
	auto &Values2 = BlockDispositions[S];
	for (auto &V : make_range(Values2.rbegin(), Values2.rend())) {
	if (V.getPointer() == BB) {
	V.setInt(D);
	break;
	}
	}
	return D;
	}

	ScalarEvolution::BlockDisposition
	ScalarEvolution::computeBlockDisposition(const SCEV S, const BasicBlock BB) {
	switch (S->getSCEVType()) {
	case scConstant:
	return ProperlyDominatesBlock;
	case scPtrToInt:
	case scTruncate:
	case scZeroExtend:
	case scSignExtend:
	return getBlockDisposition(cast<SCEVCastExpr>(S)->getOperand(), BB);
	case scAddRecExpr: {
	// This uses a "dominates" query instead of "properly dominates" query
	// to test for proper dominance too, because the instruction which
	// produces the addrec's value is a PHI, and a PHI effectively properly
	// dominates its entire containing block.
	const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S);
	if (!DT.dominates(AR->getLoop()->getHeader(), BB))
	return DoesNotDominateBlock;

	// Fall through into SCEVNAryExpr handling.
	LLVM_FALLTHROUGH;
	}
	case scAddExpr:
	case scMulExpr:
	case scUMaxExpr:
	case scSMaxExpr:
	case scUMinExpr:
	case scSMinExpr: {
	const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S);
	bool Proper = true;
	for (const SCEV *NAryOp : NAry->operands()) {
	BlockDisposition D = getBlockDisposition(NAryOp, BB);
	if (D == DoesNotDominateBlock)
	return DoesNotDominateBlock;
	if (D == DominatesBlock)
	Proper = false;
	}
	return Proper ? ProperlyDominatesBlock : DominatesBlock;
	}
	case scUDivExpr: {
	const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S);
	const SCEV LHS = UDiv->getLHS(), RHS = UDiv->getRHS();
	BlockDisposition LD = getBlockDisposition(LHS, BB);
	if (LD == DoesNotDominateBlock)
	return DoesNotDominateBlock;
	BlockDisposition RD = getBlockDisposition(RHS, BB);
	if (RD == DoesNotDominateBlock)
	return DoesNotDominateBlock;
	return (LD == ProperlyDominatesBlock && RD == ProperlyDominatesBlock) ?
	ProperlyDominatesBlock : DominatesBlock;
	}
	case scUnknown:
	if (Instruction *I =
	dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue())) {
	if (I->getParent() == BB)
	return DominatesBlock;
	if (DT.properlyDominates(I->getParent(), BB))
	return ProperlyDominatesBlock;
	return DoesNotDominateBlock;
	}
	return ProperlyDominatesBlock;
	case scCouldNotCompute:
	llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
	}
	llvm_unreachable("Unknown SCEV kind!");
	}

	bool ScalarEvolution::dominates(const SCEV S, const BasicBlock BB) {
	return getBlockDisposition(S, BB) >= DominatesBlock;
	}

	bool ScalarEvolution::properlyDominates(const SCEV S, const BasicBlock BB) {
	return getBlockDisposition(S, BB) == ProperlyDominatesBlock;
	}

	bool ScalarEvolution::hasOperand(const SCEV S, const SCEV Op) const {
	return SCEVExprContains(S, [&](const SCEV *Expr) { return Expr == Op; });
	}

	void
	ScalarEvolution::forgetMemoizedResults(const SCEV *S) {
	ValuesAtScopes.erase(S);
	LoopDispositions.erase(S);
	BlockDispositions.erase(S);
	UnsignedRanges.erase(S);
	SignedRanges.erase(S);
	ExprValueMap.erase(S);
	HasRecMap.erase(S);
	MinTrailingZerosCache.erase(S);

	for (auto I = PredicatedSCEVRewrites.begin();
	I != PredicatedSCEVRewrites.end();) {
	std::pair<const SCEV , const Loop > Entry = I->first;
	if (Entry.first == S)
	PredicatedSCEVRewrites.erase(I++);
	else
	++I;
	}

	auto RemoveSCEVFromBackedgeMap =
	[S](DenseMap<const Loop *, BackedgeTakenInfo> &Map) {
	for (auto I = Map.begin(), E = Map.end(); I != E;) {
	BackedgeTakenInfo &BEInfo = I->second;
	if (BEInfo.hasOperand(S))
	Map.erase(I++);
	else
	++I;
	}
	};

	RemoveSCEVFromBackedgeMap(BackedgeTakenCounts);
	RemoveSCEVFromBackedgeMap(PredicatedBackedgeTakenCounts);
	}

	void
	ScalarEvolution::getUsedLoops(const SCEV *S,
	SmallPtrSetImpl<const Loop *> &LoopsUsed) {
	struct FindUsedLoops {
	FindUsedLoops(SmallPtrSetImpl<const Loop *> &LoopsUsed)
	: LoopsUsed(LoopsUsed) {}
	SmallPtrSetImpl<const Loop *> &LoopsUsed;
	bool follow(const SCEV *S) {
	if (auto *AR = dyn_cast<SCEVAddRecExpr>(S))
	LoopsUsed.insert(AR->getLoop());
	return true;
	}

	bool isDone() const { return false; }
	};

	FindUsedLoops F(LoopsUsed);
	SCEVTraversal<FindUsedLoops>(F).visitAll(S);
	}

	void ScalarEvolution::addToLoopUseLists(const SCEV *S) {
	SmallPtrSet<const Loop *, 8> LoopsUsed;
	getUsedLoops(S, LoopsUsed);
	for (auto *L : LoopsUsed)
	LoopUsers[L].push_back(S);
	}

	void ScalarEvolution::verify() const {
	ScalarEvolution &SE = const_cast<ScalarEvolution >(this);
	ScalarEvolution SE2(F, TLI, AC, DT, LI);

	SmallVector<Loop *, 8> LoopStack(LI.begin(), LI.end());

	// Map's SCEV expressions from one ScalarEvolution "universe" to another.
	struct SCEVMapper : public SCEVRewriteVisitor<SCEVMapper> {
	SCEVMapper(ScalarEvolution &SE) : SCEVRewriteVisitor<SCEVMapper>(SE) {}

	const SCEV visitConstant(const SCEVConstant Constant) {
	return SE.getConstant(Constant->getAPInt());
	}

	const SCEV visitUnknown(const SCEVUnknown Expr) {
	return SE.getUnknown(Expr->getValue());
	}

	const SCEV visitCouldNotCompute(const SCEVCouldNotCompute Expr) {
	return SE.getCouldNotCompute();
	}
	};

	SCEVMapper SCM(SE2);

	while (!LoopStack.empty()) {
	auto *L = LoopStack.pop_back_val();
	llvm::append_range(LoopStack, *L);

	auto *CurBECount = SCM.visit(
	const_cast<ScalarEvolution *>(this)->getBackedgeTakenCount(L));
	auto *NewBECount = SE2.getBackedgeTakenCount(L);

	if (CurBECount == SE2.getCouldNotCompute() \|\|
	NewBECount == SE2.getCouldNotCompute()) {
	// NB! This situation is legal, but is very suspicious -- whatever pass
	// change the loop to make a trip count go from could not compute to
	// computable or vice-versa should have invalidated SCEV. However, we
	// choose not to assert here (for now) since we don't want false
	// positives.
	continue;
	}

	if (containsUndefs(CurBECount) \|\| containsUndefs(NewBECount)) {
	// SCEV treats "undef" as an unknown but consistent value (i.e. it does
	// not propagate undef aggressively). This means we can (and do) fail
	// verification in cases where a transform makes the trip count of a loop
	// go from "undef" to "undef+1" (say). The transform is fine, since in
	// both cases the loop iterates "undef" times, but SCEV thinks we
	// increased the trip count of the loop by 1 incorrectly.
	continue;
	}

	if (SE.getTypeSizeInBits(CurBECount->getType()) >
	SE.getTypeSizeInBits(NewBECount->getType()))
	NewBECount = SE2.getZeroExtendExpr(NewBECount, CurBECount->getType());
	else if (SE.getTypeSizeInBits(CurBECount->getType()) <
	SE.getTypeSizeInBits(NewBECount->getType()))
	CurBECount = SE2.getZeroExtendExpr(CurBECount, NewBECount->getType());

	const SCEV *Delta = SE2.getMinusSCEV(CurBECount, NewBECount);

	// Unless VerifySCEVStrict is set, we only compare constant deltas.
	if ((VerifySCEVStrict \|\| isa<SCEVConstant>(Delta)) && !Delta->isZero()) {
	dbgs() << "Trip Count for " << *L << " Changed!\n";
	dbgs() << "Old: " << *CurBECount << "\n";
	dbgs() << "New: " << *NewBECount << "\n";
	dbgs() << "Delta: " << *Delta << "\n";
	std::abort();
	}
	}

	// Collect all valid loops currently in LoopInfo.
	SmallPtrSet<Loop *, 32> ValidLoops;
	SmallVector<Loop *, 32> Worklist(LI.begin(), LI.end());
	while (!Worklist.empty()) {
	Loop *L = Worklist.pop_back_val();
	if (ValidLoops.contains(L))
	continue;
	ValidLoops.insert(L);
	Worklist.append(L->begin(), L->end());
	}
	// Check for SCEV expressions referencing invalid/deleted loops.
	for (auto &KV : ValueExprMap) {
	auto *AR = dyn_cast<SCEVAddRecExpr>(KV.second);
	if (!AR)
	continue;
	assert(ValidLoops.contains(AR->getLoop()) &&
	"AddRec references invalid loop");
	}
	}

	bool ScalarEvolution::invalidate(
	Function &F, const PreservedAnalyses &PA,
	FunctionAnalysisManager::Invalidator &Inv) {
	// Invalidate the ScalarEvolution object whenever it isn't preserved or one
	// of its dependencies is invalidated.
	auto PAC = PA.getChecker<ScalarEvolutionAnalysis>();
	return !(PAC.preserved() \|\| PAC.preservedSet<AllAnalysesOn<Function>>()) \|\|
	Inv.invalidate<AssumptionAnalysis>(F, PA) \|\|
	Inv.invalidate<DominatorTreeAnalysis>(F, PA) \|\|
	Inv.invalidate<LoopAnalysis>(F, PA);
	}

	AnalysisKey ScalarEvolutionAnalysis::Key;

	ScalarEvolution ScalarEvolutionAnalysis::run(Function &F,
	FunctionAnalysisManager &AM) {
	return ScalarEvolution(F, AM.getResult<TargetLibraryAnalysis>(F),
	AM.getResult<AssumptionAnalysis>(F),
	AM.getResult<DominatorTreeAnalysis>(F),
	AM.getResult<LoopAnalysis>(F));
	}

	PreservedAnalyses
	ScalarEvolutionVerifierPass::run(Function &F, FunctionAnalysisManager &AM) {
	AM.getResult<ScalarEvolutionAnalysis>(F).verify();
	return PreservedAnalyses::all();
	}

	PreservedAnalyses
	ScalarEvolutionPrinterPass::run(Function &F, FunctionAnalysisManager &AM) {
	// For compatibility with opt's -analyze feature under legacy pass manager
	// which was not ported to NPM. This keeps tests using
	// update_analyze_test_checks.py working.
	OS << "Printing analysis 'Scalar Evolution Analysis' for function '"
	<< F.getName() << "':\n";
	AM.getResult<ScalarEvolutionAnalysis>(F).print(OS);
	return PreservedAnalyses::all();
	}

	INITIALIZE_PASS_BEGIN(ScalarEvolutionWrapperPass, "scalar-evolution",
	"Scalar Evolution Analysis", false, true)
	INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
	INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
	INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
	INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
	INITIALIZE_PASS_END(ScalarEvolutionWrapperPass, "scalar-evolution",
	"Scalar Evolution Analysis", false, true)

	char ScalarEvolutionWrapperPass::ID = 0;

	ScalarEvolutionWrapperPass::ScalarEvolutionWrapperPass() : FunctionPass(ID) {
	initializeScalarEvolutionWrapperPassPass(*PassRegistry::getPassRegistry());
	}

	bool ScalarEvolutionWrapperPass::runOnFunction(Function &F) {
	SE.reset(new ScalarEvolution(
	F, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F),
	getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F),
	getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
	getAnalysis<LoopInfoWrapperPass>().getLoopInfo()));
	return false;
	}

	void ScalarEvolutionWrapperPass::releaseMemory() { SE.reset(); }

	void ScalarEvolutionWrapperPass::print(raw_ostream &OS, const Module *) const {
	SE->print(OS);
	}

	void ScalarEvolutionWrapperPass::verifyAnalysis() const {
	if (!VerifySCEV)
	return;

	SE->verify();
	}

	void ScalarEvolutionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
	AU.setPreservesAll();
	AU.addRequiredTransitive<AssumptionCacheTracker>();
	AU.addRequiredTransitive<LoopInfoWrapperPass>();
	AU.addRequiredTransitive<DominatorTreeWrapperPass>();
	AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>();
	}

	const SCEVPredicate ScalarEvolution::getEqualPredicate(const SCEV LHS,
	const SCEV *RHS) {
	FoldingSetNodeID ID;
	assert(LHS->getType() == RHS->getType() &&
	"Type mismatch between LHS and RHS");
	// Unique this node based on the arguments
	ID.AddInteger(SCEVPredicate::P_Equal);
	ID.AddPointer(LHS);
	ID.AddPointer(RHS);
	void *IP = nullptr;
	if (const auto *S = UniquePreds.FindNodeOrInsertPos(ID, IP))
	return S;
	SCEVEqualPredicate *Eq = new (SCEVAllocator)
	SCEVEqualPredicate(ID.Intern(SCEVAllocator), LHS, RHS);
	UniquePreds.InsertNode(Eq, IP);
	return Eq;
	}

	const SCEVPredicate *ScalarEvolution::getWrapPredicate(
	const SCEVAddRecExpr *AR,
	SCEVWrapPredicate::IncrementWrapFlags AddedFlags) {
	FoldingSetNodeID ID;
	// Unique this node based on the arguments
	ID.AddInteger(SCEVPredicate::P_Wrap);
	ID.AddPointer(AR);
	ID.AddInteger(AddedFlags);
	void *IP = nullptr;
	if (const auto *S = UniquePreds.FindNodeOrInsertPos(ID, IP))
	return S;
	auto *OF = new (SCEVAllocator)
	SCEVWrapPredicate(ID.Intern(SCEVAllocator), AR, AddedFlags);
	UniquePreds.InsertNode(OF, IP);
	return OF;
	}

	namespace {

	class SCEVPredicateRewriter : public SCEVRewriteVisitor<SCEVPredicateRewriter> {
	public:

	/// Rewrites \p S in the context of a loop L and the SCEV predication
	/// infrastructure.
	///
	/// If \p Pred is non-null, the SCEV expression is rewritten to respect the
	/// equivalences present in \p Pred.
	///
	/// If \p NewPreds is non-null, rewrite is free to add further predicates to
	/// \p NewPreds such that the result will be an AddRecExpr.
	static const SCEV rewrite(const SCEV S, const Loop *L, ScalarEvolution &SE,
	SmallPtrSetImpl<const SCEVPredicate > NewPreds,
	SCEVUnionPredicate *Pred) {
	SCEVPredicateRewriter Rewriter(L, SE, NewPreds, Pred);
	return Rewriter.visit(S);
	}

	const SCEV visitUnknown(const SCEVUnknown Expr) {
	if (Pred) {
	auto ExprPreds = Pred->getPredicatesForExpr(Expr);
	for (auto *Pred : ExprPreds)
	if (const auto *IPred = dyn_cast<SCEVEqualPredicate>(Pred))
	if (IPred->getLHS() == Expr)
	return IPred->getRHS();
	}
	return convertToAddRecWithPreds(Expr);
	}

	const SCEV visitZeroExtendExpr(const SCEVZeroExtendExpr Expr) {
	const SCEV *Operand = visit(Expr->getOperand());
	const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Operand);
	if (AR && AR->getLoop() == L && AR->isAffine()) {
	// This couldn't be folded because the operand didn't have the nuw
	// flag. Add the nusw flag as an assumption that we could make.
	const SCEV *Step = AR->getStepRecurrence(SE);
	Type *Ty = Expr->getType();
	if (addOverflowAssumption(AR, SCEVWrapPredicate::IncrementNUSW))
	return SE.getAddRecExpr(SE.getZeroExtendExpr(AR->getStart(), Ty),
	SE.getSignExtendExpr(Step, Ty), L,
	AR->getNoWrapFlags());
	}
	return SE.getZeroExtendExpr(Operand, Expr->getType());
	}

	const SCEV visitSignExtendExpr(const SCEVSignExtendExpr Expr) {
	const SCEV *Operand = visit(Expr->getOperand());
	const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Operand);
	if (AR && AR->getLoop() == L && AR->isAffine()) {
	// This couldn't be folded because the operand didn't have the nsw
	// flag. Add the nssw flag as an assumption that we could make.
	const SCEV *Step = AR->getStepRecurrence(SE);
	Type *Ty = Expr->getType();
	if (addOverflowAssumption(AR, SCEVWrapPredicate::IncrementNSSW))
	return SE.getAddRecExpr(SE.getSignExtendExpr(AR->getStart(), Ty),
	SE.getSignExtendExpr(Step, Ty), L,
	AR->getNoWrapFlags());
	}
	return SE.getSignExtendExpr(Operand, Expr->getType());
	}

	private:
	explicit SCEVPredicateRewriter(const Loop *L, ScalarEvolution &SE,
	SmallPtrSetImpl<const SCEVPredicate > NewPreds,
	SCEVUnionPredicate *Pred)
	: SCEVRewriteVisitor(SE), NewPreds(NewPreds), Pred(Pred), L(L) {}

	bool addOverflowAssumption(const SCEVPredicate *P) {
	if (!NewPreds) {
	// Check if we've already made this assumption.
	return Pred && Pred->implies(P);
	}
	NewPreds->insert(P);
	return true;
	}

	bool addOverflowAssumption(const SCEVAddRecExpr *AR,
	SCEVWrapPredicate::IncrementWrapFlags AddedFlags) {
	auto *A = SE.getWrapPredicate(AR, AddedFlags);
	return addOverflowAssumption(A);
	}

	// If \p Expr represents a PHINode, we try to see if it can be represented
	// as an AddRec, possibly under a predicate (PHISCEVPred). If it is possible
	// to add this predicate as a runtime overflow check, we return the AddRec.
	// If \p Expr does not meet these conditions (is not a PHI node, or we
	// couldn't create an AddRec for it, or couldn't add the predicate), we just
	// return \p Expr.
	const SCEV convertToAddRecWithPreds(const SCEVUnknown Expr) {
	if (!isa<PHINode>(Expr->getValue()))
	return Expr;
	Optional<std::pair<const SCEV , SmallVector<const SCEVPredicate , 3>>>
	PredicatedRewrite = SE.createAddRecFromPHIWithCasts(Expr);
	if (!PredicatedRewrite)
	return Expr;
	for (auto *P : PredicatedRewrite->second){
	// Wrap predicates from outer loops are not supported.
	if (auto *WP = dyn_cast<const SCEVWrapPredicate>(P)) {
	auto *AR = cast<const SCEVAddRecExpr>(WP->getExpr());
	if (L != AR->getLoop())
	return Expr;
	}
	if (!addOverflowAssumption(P))
	return Expr;
	}
	return PredicatedRewrite->first;
	}

	SmallPtrSetImpl<const SCEVPredicate > NewPreds;
	SCEVUnionPredicate *Pred;
	const Loop *L;
	};

	} // end anonymous namespace

	const SCEV ScalarEvolution::rewriteUsingPredicate(const SCEV S, const Loop *L,
	SCEVUnionPredicate &Preds) {
	return SCEVPredicateRewriter::rewrite(S, L, *this, nullptr, &Preds);
	}

	const SCEVAddRecExpr *ScalarEvolution::convertSCEVToAddRecWithPredicates(
	const SCEV S, const Loop L,
	SmallPtrSetImpl<const SCEVPredicate *> &Preds) {
	SmallPtrSet<const SCEVPredicate *, 4> TransformPreds;
	S = SCEVPredicateRewriter::rewrite(S, L, *this, &TransformPreds, nullptr);
	auto *AddRec = dyn_cast<SCEVAddRecExpr>(S);

	if (!AddRec)
	return nullptr;

	// Since the transformation was successful, we can now transfer the SCEV
	// predicates.
	for (auto *P : TransformPreds)
	Preds.insert(P);

	return AddRec;
	}

	/// SCEV predicates
	SCEVPredicate::SCEVPredicate(const FoldingSetNodeIDRef ID,
	SCEVPredicateKind Kind)
	: FastID(ID), Kind(Kind) {}

	SCEVEqualPredicate::SCEVEqualPredicate(const FoldingSetNodeIDRef ID,
	const SCEV LHS, const SCEV RHS)
	: SCEVPredicate(ID, P_Equal), LHS(LHS), RHS(RHS) {
	assert(LHS->getType() == RHS->getType() && "LHS and RHS types don't match");
	assert(LHS != RHS && "LHS and RHS are the same SCEV");
	}

	bool SCEVEqualPredicate::implies(const SCEVPredicate *N) const {
	const auto *Op = dyn_cast<SCEVEqualPredicate>(N);

	if (!Op)
	return false;

	return Op->LHS == LHS && Op->RHS == RHS;
	}

	bool SCEVEqualPredicate::isAlwaysTrue() const { return false; }

	const SCEV *SCEVEqualPredicate::getExpr() const { return LHS; }

	void SCEVEqualPredicate::print(raw_ostream &OS, unsigned Depth) const {
	OS.indent(Depth) << "Equal predicate: " << LHS << " == " << RHS << "\n";
	}

	SCEVWrapPredicate::SCEVWrapPredicate(const FoldingSetNodeIDRef ID,
	const SCEVAddRecExpr *AR,
	IncrementWrapFlags Flags)
	: SCEVPredicate(ID, P_Wrap), AR(AR), Flags(Flags) {}

	const SCEV *SCEVWrapPredicate::getExpr() const { return AR; }

	bool SCEVWrapPredicate::implies(const SCEVPredicate *N) const {
	const auto *Op = dyn_cast<SCEVWrapPredicate>(N);

	return Op && Op->AR == AR && setFlags(Flags, Op->Flags) == Flags;
	}

	bool SCEVWrapPredicate::isAlwaysTrue() const {
	SCEV::NoWrapFlags ScevFlags = AR->getNoWrapFlags();
	IncrementWrapFlags IFlags = Flags;

	if (ScalarEvolution::setFlags(ScevFlags, SCEV::FlagNSW) == ScevFlags)
	IFlags = clearFlags(IFlags, IncrementNSSW);

	return IFlags == IncrementAnyWrap;
	}

	void SCEVWrapPredicate::print(raw_ostream &OS, unsigned Depth) const {
	OS.indent(Depth) << *getExpr() << " Added Flags: ";
	if (SCEVWrapPredicate::IncrementNUSW & getFlags())
	OS << "<nusw>";
	if (SCEVWrapPredicate::IncrementNSSW & getFlags())
	OS << "<nssw>";
	OS << "\n";
	}

	SCEVWrapPredicate::IncrementWrapFlags
	SCEVWrapPredicate::getImpliedFlags(const SCEVAddRecExpr *AR,
	ScalarEvolution &SE) {
	IncrementWrapFlags ImpliedFlags = IncrementAnyWrap;
	SCEV::NoWrapFlags StaticFlags = AR->getNoWrapFlags();

	// We can safely transfer the NSW flag as NSSW.
	if (ScalarEvolution::setFlags(StaticFlags, SCEV::FlagNSW) == StaticFlags)
	ImpliedFlags = IncrementNSSW;

	if (ScalarEvolution::setFlags(StaticFlags, SCEV::FlagNUW) == StaticFlags) {
	// If the increment is positive, the SCEV NUW flag will also imply the
	// WrapPredicate NUSW flag.
	if (const auto *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(SE)))
	if (Step->getValue()->getValue().isNonNegative())
	ImpliedFlags = setFlags(ImpliedFlags, IncrementNUSW);
	}

	return ImpliedFlags;
	}

	/// Union predicates don't get cached so create a dummy set ID for it.
	SCEVUnionPredicate::SCEVUnionPredicate()
	: SCEVPredicate(FoldingSetNodeIDRef(nullptr, 0), P_Union) {}

	bool SCEVUnionPredicate::isAlwaysTrue() const {
	return all_of(Preds,
	[](const SCEVPredicate *I) { return I->isAlwaysTrue(); });
	}

	ArrayRef<const SCEVPredicate *>
	SCEVUnionPredicate::getPredicatesForExpr(const SCEV *Expr) {
	auto I = SCEVToPreds.find(Expr);
	if (I == SCEVToPreds.end())
	return ArrayRef<const SCEVPredicate *>();
	return I->second;
	}

	bool SCEVUnionPredicate::implies(const SCEVPredicate *N) const {
	if (const auto *Set = dyn_cast<SCEVUnionPredicate>(N))
	return all_of(Set->Preds,
	[this](const SCEVPredicate *I) { return this->implies(I); });

	auto ScevPredsIt = SCEVToPreds.find(N->getExpr());
	if (ScevPredsIt == SCEVToPreds.end())
	return false;
	auto &SCEVPreds = ScevPredsIt->second;

	return any_of(SCEVPreds,
	[N](const SCEVPredicate *I) { return I->implies(N); });
	}

	const SCEV *SCEVUnionPredicate::getExpr() const { return nullptr; }

	void SCEVUnionPredicate::print(raw_ostream &OS, unsigned Depth) const {
	for (auto Pred : Preds)
	Pred->print(OS, Depth);
	}

	void SCEVUnionPredicate::add(const SCEVPredicate *N) {
	if (const auto *Set = dyn_cast<SCEVUnionPredicate>(N)) {
	for (auto Pred : Set->Preds)
	add(Pred);
	return;
	}

	if (implies(N))
	return;

	const SCEV *Key = N->getExpr();
	assert(Key && "Only SCEVUnionPredicate doesn't have an "
	" associated expression!");

	SCEVToPreds[Key].push_back(N);
	Preds.push_back(N);
	}

	PredicatedScalarEvolution::PredicatedScalarEvolution(ScalarEvolution &SE,
	Loop &L)
	: SE(SE), L(L) {}

	const SCEV PredicatedScalarEvolution::getSCEV(Value V) {
	const SCEV *Expr = SE.getSCEV(V);
	RewriteEntry &Entry = RewriteMap[Expr];

	// If we already have an entry and the version matches, return it.
	if (Entry.second && Generation == Entry.first)
	return Entry.second;

	// We found an entry but it's stale. Rewrite the stale entry
	// according to the current predicate.
	if (Entry.second)
	Expr = Entry.second;

	const SCEV *NewSCEV = SE.rewriteUsingPredicate(Expr, &L, Preds);
	Entry = {Generation, NewSCEV};

	return NewSCEV;
	}

	const SCEV *PredicatedScalarEvolution::getBackedgeTakenCount() {
	if (!BackedgeCount) {
	SCEVUnionPredicate BackedgePred;
	BackedgeCount = SE.getPredicatedBackedgeTakenCount(&L, BackedgePred);
	addPredicate(BackedgePred);
	}
	return BackedgeCount;
	}

	void PredicatedScalarEvolution::addPredicate(const SCEVPredicate &Pred) {
	if (Preds.implies(&Pred))
	return;
	Preds.add(&Pred);
	updateGeneration();
	}

	const SCEVUnionPredicate &PredicatedScalarEvolution::getUnionPredicate() const {
	return Preds;
	}

	void PredicatedScalarEvolution::updateGeneration() {
	// If the generation number wrapped recompute everything.
	if (++Generation == 0) {
	for (auto &II : RewriteMap) {
	const SCEV *Rewritten = II.second.second;
	II.second = {Generation, SE.rewriteUsingPredicate(Rewritten, &L, Preds)};
	}
	}
	}

	void PredicatedScalarEvolution::setNoOverflow(
	Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags) {
	const SCEV *Expr = getSCEV(V);
	const auto *AR = cast<SCEVAddRecExpr>(Expr);

	auto ImpliedFlags = SCEVWrapPredicate::getImpliedFlags(AR, SE);

	// Clear the statically implied flags.
	Flags = SCEVWrapPredicate::clearFlags(Flags, ImpliedFlags);
	addPredicate(*SE.getWrapPredicate(AR, Flags));

	auto II = FlagsMap.insert({V, Flags});
	if (!II.second)
	II.first->second = SCEVWrapPredicate::setFlags(Flags, II.first->second);
	}

	bool PredicatedScalarEvolution::hasNoOverflow(
	Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags) {
	const SCEV *Expr = getSCEV(V);
	const auto *AR = cast<SCEVAddRecExpr>(Expr);

	Flags = SCEVWrapPredicate::clearFlags(
	Flags, SCEVWrapPredicate::getImpliedFlags(AR, SE));

	auto II = FlagsMap.find(V);

	if (II != FlagsMap.end())
	Flags = SCEVWrapPredicate::clearFlags(Flags, II->second);

	return Flags == SCEVWrapPredicate::IncrementAnyWrap;
	}

	const SCEVAddRecExpr PredicatedScalarEvolution::getAsAddRec(Value V) {
	const SCEV *Expr = this->getSCEV(V);
	SmallPtrSet<const SCEVPredicate *, 4> NewPreds;
	auto *New = SE.convertSCEVToAddRecWithPredicates(Expr, &L, NewPreds);

	if (!New)
	return nullptr;

	for (auto *P : NewPreds)
	Preds.add(P);

	updateGeneration();
	RewriteMap[SE.getSCEV(V)] = {Generation, New};
	return New;
	}

	PredicatedScalarEvolution::PredicatedScalarEvolution(
	const PredicatedScalarEvolution &Init)
	: RewriteMap(Init.RewriteMap), SE(Init.SE), L(Init.L), Preds(Init.Preds),
	Generation(Init.Generation), BackedgeCount(Init.BackedgeCount) {
	for (auto I : Init.FlagsMap)
	FlagsMap.insert(I);
	}

	void PredicatedScalarEvolution::print(raw_ostream &OS, unsigned Depth) const {
	// For each block.
	for (auto *BB : L.getBlocks())
	for (auto &I : *BB) {
	if (!SE.isSCEVable(I.getType()))
	continue;

	auto *Expr = SE.getSCEV(&I);
	auto II = RewriteMap.find(Expr);

	if (II == RewriteMap.end())
	continue;

	// Don't print things that are not interesting.
	if (II->second.second == Expr)
	continue;

	OS.indent(Depth) << "[PSE]" << I << ":\n";
	OS.indent(Depth + 2) << *Expr << "\n";
	OS.indent(Depth + 2) << "--> " << *II->second.second << "\n";
	}
	}

	// Match the mathematical pattern A - (A / B) * B, where A and B can be
	// arbitrary expressions. Also match zext (trunc A to iB) to iY, which is used
	// for URem with constant power-of-2 second operands.
	// It's not always easy, as A and B can be folded (imagine A is X / 2, and B is
	// 4, A / B becomes X / 8).
	bool ScalarEvolution::matchURem(const SCEV Expr, const SCEV &LHS,
	const SCEV *&RHS) {
	// Try to match 'zext (trunc A to iB) to iY', which is used
	// for URem with constant power-of-2 second operands. Make sure the size of
	// the operand A matches the size of the whole expressions.
	if (const auto *ZExt = dyn_cast<SCEVZeroExtendExpr>(Expr))
	if (const auto *Trunc = dyn_cast<SCEVTruncateExpr>(ZExt->getOperand(0))) {
	LHS = Trunc->getOperand();
	// Bail out if the type of the LHS is larger than the type of the
	// expression for now.
	if (getTypeSizeInBits(LHS->getType()) >
	getTypeSizeInBits(Expr->getType()))
	return false;
	if (LHS->getType() != Expr->getType())
	LHS = getZeroExtendExpr(LHS, Expr->getType());
	RHS = getConstant(APInt(getTypeSizeInBits(Expr->getType()), 1)
	<< getTypeSizeInBits(Trunc->getType()));
	return true;
	}
	const auto *Add = dyn_cast<SCEVAddExpr>(Expr);
	if (Add == nullptr \|\| Add->getNumOperands() != 2)
	return false;

	const SCEV *A = Add->getOperand(1);
	const auto *Mul = dyn_cast<SCEVMulExpr>(Add->getOperand(0));

	if (Mul == nullptr)
	return false;

	const auto MatchURemWithDivisor = [&](const SCEV *B) {
	// (SomeExpr + (-(SomeExpr / B) * B)).
	if (Expr == getURemExpr(A, B)) {
	LHS = A;
	RHS = B;
	return true;
	}
	return false;
	};

	// (SomeExpr + (-1 * (SomeExpr / B) * B)).
	if (Mul->getNumOperands() == 3 && isa<SCEVConstant>(Mul->getOperand(0)))
	return MatchURemWithDivisor(Mul->getOperand(1)) \|\|
	MatchURemWithDivisor(Mul->getOperand(2));

	// (SomeExpr + ((-SomeExpr / B) * B)) or (SomeExpr + ((SomeExpr / B) * -B)).
	if (Mul->getNumOperands() == 2)
	return MatchURemWithDivisor(Mul->getOperand(1)) \|\|
	MatchURemWithDivisor(Mul->getOperand(0)) \|\|
	MatchURemWithDivisor(getNegativeSCEV(Mul->getOperand(1))) \|\|
	MatchURemWithDivisor(getNegativeSCEV(Mul->getOperand(0)));
	return false;
	}

	const SCEV *
	ScalarEvolution::computeSymbolicMaxBackedgeTakenCount(const Loop *L) {
	SmallVector<BasicBlock*, 16> ExitingBlocks;
	L->getExitingBlocks(ExitingBlocks);

	// Form an expression for the maximum exit count possible for this loop. We
	// merge the max and exact information to approximate a version of
	// getConstantMaxBackedgeTakenCount which isn't restricted to just constants.
	SmallVector<const SCEV*, 4> ExitCounts;
	for (BasicBlock *ExitingBB : ExitingBlocks) {
	const SCEV *ExitCount = getExitCount(L, ExitingBB);
	if (isa<SCEVCouldNotCompute>(ExitCount))
	ExitCount = getExitCount(L, ExitingBB,
	ScalarEvolution::ConstantMaximum);
	if (!isa<SCEVCouldNotCompute>(ExitCount)) {
	assert(DT.dominates(ExitingBB, L->getLoopLatch()) &&
	"We should only have known counts for exiting blocks that "
	"dominate latch!");
	ExitCounts.push_back(ExitCount);
	}
	}
	if (ExitCounts.empty())
	return getCouldNotCompute();
	return getUMinFromMismatchedTypes(ExitCounts);
	}

	/// This rewriter is similar to SCEVParameterRewriter (it replaces SCEVUnknown
	/// components following the Map (Value -> SCEV)), but skips AddRecExpr because
	/// we cannot guarantee that the replacement is loop invariant in the loop of
	/// the AddRec.
	class SCEVLoopGuardRewriter : public SCEVRewriteVisitor<SCEVLoopGuardRewriter> {
	ValueToSCEVMapTy &Map;

	public:
	SCEVLoopGuardRewriter(ScalarEvolution &SE, ValueToSCEVMapTy &M)
	: SCEVRewriteVisitor(SE), Map(M) {}

	const SCEV visitAddRecExpr(const SCEVAddRecExpr Expr) { return Expr; }

	const SCEV visitUnknown(const SCEVUnknown Expr) {
	auto I = Map.find(Expr->getValue());
	if (I == Map.end())
	return Expr;
	return I->second;
	}
	};

	const SCEV ScalarEvolution::applyLoopGuards(const SCEV Expr, const Loop *L) {
	auto CollectCondition = [&](ICmpInst::Predicate Predicate, const SCEV *LHS,
	const SCEV *RHS, ValueToSCEVMapTy &RewriteMap) {
	// If we have LHS == 0, check if LHS is computing a property of some unknown
	// SCEV %v which we can rewrite %v to express explicitly.
	const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS);
	if (Predicate == CmpInst::ICMP_EQ && RHSC &&
	RHSC->getValue()->isNullValue()) {
	// If LHS is A % B, i.e. A % B == 0, rewrite A to (A /u B) * B to
	// explicitly express that.
	const SCEV *URemLHS = nullptr;
	const SCEV *URemRHS = nullptr;
	if (matchURem(LHS, URemLHS, URemRHS)) {
	if (const SCEVUnknown *LHSUnknown = dyn_cast<SCEVUnknown>(URemLHS)) {
	Value *V = LHSUnknown->getValue();
	auto Multiple =
	getMulExpr(getUDivExpr(URemLHS, URemRHS), URemRHS,
	(SCEV::NoWrapFlags)(SCEV::FlagNUW \| SCEV::FlagNSW));
	RewriteMap[V] = Multiple;
	return;
	}
	}
	}

	if (!isa<SCEVUnknown>(LHS) && isa<SCEVUnknown>(RHS)) {
	std::swap(LHS, RHS);
	Predicate = CmpInst::getSwappedPredicate(Predicate);
	}

	// Check for a condition of the form (-C1 + X < C2). InstCombine will
	// create this form when combining two checks of the form (X u< C2 + C1) and
	// (X >=u C1).
	auto MatchRangeCheckIdiom = [this, Predicate, LHS, RHS, &RewriteMap]() {
	auto *AddExpr = dyn_cast<SCEVAddExpr>(LHS);
	if (!AddExpr \|\| AddExpr->getNumOperands() != 2)
	return false;

	auto *C1 = dyn_cast<SCEVConstant>(AddExpr->getOperand(0));
	auto *LHSUnknown = dyn_cast<SCEVUnknown>(AddExpr->getOperand(1));
	auto *C2 = dyn_cast<SCEVConstant>(RHS);
	if (!C1 \|\| !C2 \|\| !LHSUnknown)
	return false;

	auto ExactRegion =
	ConstantRange::makeExactICmpRegion(Predicate, C2->getAPInt())
	.sub(C1->getAPInt());

	// Bail out, unless we have a non-wrapping, monotonic range.
	if (ExactRegion.isWrappedSet() \|\| ExactRegion.isFullSet())
	return false;
	auto I = RewriteMap.find(LHSUnknown->getValue());
	- const SCEV *RewrittenLHS = I != RewriteMap.end() ? I->second : LHS;
	+ const SCEV *RewrittenLHS = I != RewriteMap.end() ? I->second : LHSUnknown;
	RewriteMap[LHSUnknown->getValue()] = getUMaxExpr(
	getConstant(ExactRegion.getUnsignedMin()),
	getUMinExpr(RewrittenLHS, getConstant(ExactRegion.getUnsignedMax())));
	return true;
	};
	if (MatchRangeCheckIdiom())
	return;

	// For now, limit to conditions that provide information about unknown
	// expressions. RHS also cannot contain add recurrences.
	auto *LHSUnknown = dyn_cast<SCEVUnknown>(LHS);
	if (!LHSUnknown \|\| containsAddRecurrence(RHS))
	return;

	// Check whether LHS has already been rewritten. In that case we want to
	// chain further rewrites onto the already rewritten value.
	auto I = RewriteMap.find(LHSUnknown->getValue());
	const SCEV *RewrittenLHS = I != RewriteMap.end() ? I->second : LHS;
	const SCEV *RewrittenRHS = nullptr;
	switch (Predicate) {
	case CmpInst::ICMP_ULT:
	RewrittenRHS =
	getUMinExpr(RewrittenLHS, getMinusSCEV(RHS, getOne(RHS->getType())));
	break;
	case CmpInst::ICMP_SLT:
	RewrittenRHS =
	getSMinExpr(RewrittenLHS, getMinusSCEV(RHS, getOne(RHS->getType())));
	break;
	case CmpInst::ICMP_ULE:
	RewrittenRHS = getUMinExpr(RewrittenLHS, RHS);
	break;
	case CmpInst::ICMP_SLE:
	RewrittenRHS = getSMinExpr(RewrittenLHS, RHS);
	break;
	case CmpInst::ICMP_UGT:
	RewrittenRHS =
	getUMaxExpr(RewrittenLHS, getAddExpr(RHS, getOne(RHS->getType())));
	break;
	case CmpInst::ICMP_SGT:
	RewrittenRHS =
	getSMaxExpr(RewrittenLHS, getAddExpr(RHS, getOne(RHS->getType())));
	break;
	case CmpInst::ICMP_UGE:
	RewrittenRHS = getUMaxExpr(RewrittenLHS, RHS);
	break;
	case CmpInst::ICMP_SGE:
	RewrittenRHS = getSMaxExpr(RewrittenLHS, RHS);
	break;
	case CmpInst::ICMP_EQ:
	if (isa<SCEVConstant>(RHS))
	RewrittenRHS = RHS;
	break;
	case CmpInst::ICMP_NE:
	if (isa<SCEVConstant>(RHS) &&
	cast<SCEVConstant>(RHS)->getValue()->isNullValue())
	RewrittenRHS = getUMaxExpr(RewrittenLHS, getOne(RHS->getType()));
	break;
	default:
	break;
	}

	if (RewrittenRHS)
	RewriteMap[LHSUnknown->getValue()] = RewrittenRHS;
	};
	// Starting at the loop predecessor, climb up the predecessor chain, as long
	// as there are predecessors that can be found that have unique successors
	// leading to the original header.
	// TODO: share this logic with isLoopEntryGuardedByCond.
	ValueToSCEVMapTy RewriteMap;
	for (std::pair<const BasicBlock , const BasicBlock > Pair(
	L->getLoopPredecessor(), L->getHeader());
	Pair.first; Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) {

	const BranchInst *LoopEntryPredicate =
	dyn_cast<BranchInst>(Pair.first->getTerminator());
	if (!LoopEntryPredicate \|\| LoopEntryPredicate->isUnconditional())
	continue;

	bool EnterIfTrue = LoopEntryPredicate->getSuccessor(0) == Pair.second;
	SmallVector<Value *, 8> Worklist;
	SmallPtrSet<Value *, 8> Visited;
	Worklist.push_back(LoopEntryPredicate->getCondition());
	while (!Worklist.empty()) {
	Value *Cond = Worklist.pop_back_val();
	if (!Visited.insert(Cond).second)
	continue;

	if (auto *Cmp = dyn_cast<ICmpInst>(Cond)) {
	auto Predicate =
	EnterIfTrue ? Cmp->getPredicate() : Cmp->getInversePredicate();
	CollectCondition(Predicate, getSCEV(Cmp->getOperand(0)),
	getSCEV(Cmp->getOperand(1)), RewriteMap);
	continue;
	}

	Value L, R;
	if (EnterIfTrue ? match(Cond, m_LogicalAnd(m_Value(L), m_Value(R)))
	: match(Cond, m_LogicalOr(m_Value(L), m_Value(R)))) {
	Worklist.push_back(L);
	Worklist.push_back(R);
	}
	}
	}

	// Also collect information from assumptions dominating the loop.
	for (auto &AssumeVH : AC.assumptions()) {
	if (!AssumeVH)
	continue;
	auto *AssumeI = cast<CallInst>(AssumeVH);
	auto *Cmp = dyn_cast<ICmpInst>(AssumeI->getOperand(0));
	if (!Cmp \|\| !DT.dominates(AssumeI, L->getHeader()))
	continue;
	CollectCondition(Cmp->getPredicate(), getSCEV(Cmp->getOperand(0)),
	getSCEV(Cmp->getOperand(1)), RewriteMap);
	}

	if (RewriteMap.empty())
	return Expr;
	SCEVLoopGuardRewriter Rewriter(*this, RewriteMap);
	return Rewriter.visit(Expr);
	}
	diff --git a/contrib/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp
	index 65828898d392..9053acce60c4 100644
	--- a/contrib/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp
	+++ b/contrib/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp
	@@ -1,1164 +1,1163 @@
	//===- llvm/Analysis/TargetTransformInfo.cpp ------------------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#include "llvm/Analysis/TargetTransformInfo.h"
	#include "llvm/Analysis/CFG.h"
	#include "llvm/Analysis/LoopIterator.h"
	#include "llvm/Analysis/TargetTransformInfoImpl.h"
	#include "llvm/IR/CFG.h"
	#include "llvm/IR/DataLayout.h"
	#include "llvm/IR/Dominators.h"
	#include "llvm/IR/Instruction.h"
	#include "llvm/IR/Instructions.h"
	#include "llvm/IR/IntrinsicInst.h"
	#include "llvm/IR/Module.h"
	#include "llvm/IR/Operator.h"
	#include "llvm/IR/PatternMatch.h"
	#include "llvm/InitializePasses.h"
	#include "llvm/Support/CommandLine.h"
	#include "llvm/Support/ErrorHandling.h"
	#include <utility>

	using namespace llvm;
	using namespace PatternMatch;

	#define DEBUG_TYPE "tti"

	static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false),
	cl::Hidden,
	cl::desc("Recognize reduction patterns."));

	namespace {
	/// No-op implementation of the TTI interface using the utility base
	/// classes.
	///
	/// This is used when no target specific information is available.
	struct NoTTIImpl : TargetTransformInfoImplCRTPBase<NoTTIImpl> {
	explicit NoTTIImpl(const DataLayout &DL)
	: TargetTransformInfoImplCRTPBase<NoTTIImpl>(DL) {}
	};
	} // namespace

	bool HardwareLoopInfo::canAnalyze(LoopInfo &LI) {
	// If the loop has irreducible control flow, it can not be converted to
	// Hardware loop.
	LoopBlocksRPO RPOT(L);
	RPOT.perform(&LI);
	if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
	return false;
	return true;
	}

	IntrinsicCostAttributes::IntrinsicCostAttributes(
	Intrinsic::ID Id, const CallBase &CI, InstructionCost ScalarizationCost)
	: II(dyn_cast<IntrinsicInst>(&CI)), RetTy(CI.getType()), IID(Id),
	ScalarizationCost(ScalarizationCost) {

	if (const auto *FPMO = dyn_cast<FPMathOperator>(&CI))
	FMF = FPMO->getFastMathFlags();

	Arguments.insert(Arguments.begin(), CI.arg_begin(), CI.arg_end());
	FunctionType *FTy = CI.getCalledFunction()->getFunctionType();
	ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end());
	}

	IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
	ArrayRef<Type *> Tys,
	FastMathFlags Flags,
	const IntrinsicInst *I,
	InstructionCost ScalarCost)
	: II(I), RetTy(RTy), IID(Id), FMF(Flags), ScalarizationCost(ScalarCost) {
	ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end());
	}

	IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *Ty,
	ArrayRef<const Value *> Args)
	: RetTy(Ty), IID(Id) {

	Arguments.insert(Arguments.begin(), Args.begin(), Args.end());
	ParamTys.reserve(Arguments.size());
	for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx)
	ParamTys.push_back(Arguments[Idx]->getType());
	}

	IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
	ArrayRef<const Value *> Args,
	ArrayRef<Type *> Tys,
	FastMathFlags Flags,
	const IntrinsicInst *I,
	InstructionCost ScalarCost)
	: II(I), RetTy(RTy), IID(Id), FMF(Flags), ScalarizationCost(ScalarCost) {
	ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end());
	Arguments.insert(Arguments.begin(), Args.begin(), Args.end());
	}

	bool HardwareLoopInfo::isHardwareLoopCandidate(ScalarEvolution &SE,
	LoopInfo &LI, DominatorTree &DT,
	bool ForceNestedLoop,
	bool ForceHardwareLoopPHI) {
	SmallVector<BasicBlock *, 4> ExitingBlocks;
	L->getExitingBlocks(ExitingBlocks);

	for (BasicBlock *BB : ExitingBlocks) {
	// If we pass the updated counter back through a phi, we need to know
	// which latch the updated value will be coming from.
	if (!L->isLoopLatch(BB)) {
	if (ForceHardwareLoopPHI \|\| CounterInReg)
	continue;
	}

	const SCEV *EC = SE.getExitCount(L, BB);
	if (isa<SCEVCouldNotCompute>(EC))
	continue;
	if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) {
	if (ConstEC->getValue()->isZero())
	continue;
	} else if (!SE.isLoopInvariant(EC, L))
	continue;

	if (SE.getTypeSizeInBits(EC->getType()) > CountType->getBitWidth())
	continue;

	// If this exiting block is contained in a nested loop, it is not eligible
	// for insertion of the branch-and-decrement since the inner loop would
	// end up messing up the value in the CTR.
	if (!IsNestingLegal && LI.getLoopFor(BB) != L && !ForceNestedLoop)
	continue;

	// We now have a loop-invariant count of loop iterations (which is not the
	// constant zero) for which we know that this loop will not exit via this
	// existing block.

	// We need to make sure that this block will run on every loop iteration.
	// For this to be true, we must dominate all blocks with backedges. Such
	// blocks are in-loop predecessors to the header block.
	bool NotAlways = false;
	for (BasicBlock *Pred : predecessors(L->getHeader())) {
	if (!L->contains(Pred))
	continue;

	if (!DT.dominates(BB, Pred)) {
	NotAlways = true;
	break;
	}
	}

	if (NotAlways)
	continue;

	// Make sure this blocks ends with a conditional branch.
	Instruction *TI = BB->getTerminator();
	if (!TI)
	continue;

	if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
	if (!BI->isConditional())
	continue;

	ExitBranch = BI;
	} else
	continue;

	// Note that this block may not be the loop latch block, even if the loop
	// has a latch block.
	ExitBlock = BB;
	ExitCount = EC;
	-
	break;
	}

	if (!ExitBlock)
	return false;
	return true;
	}

	TargetTransformInfo::TargetTransformInfo(const DataLayout &DL)
	: TTIImpl(new Model<NoTTIImpl>(NoTTIImpl(DL))) {}

	TargetTransformInfo::~TargetTransformInfo() {}

	TargetTransformInfo::TargetTransformInfo(TargetTransformInfo &&Arg)
	: TTIImpl(std::move(Arg.TTIImpl)) {}

	TargetTransformInfo &TargetTransformInfo::operator=(TargetTransformInfo &&RHS) {
	TTIImpl = std::move(RHS.TTIImpl);
	return *this;
	}

	unsigned TargetTransformInfo::getInliningThresholdMultiplier() const {
	return TTIImpl->getInliningThresholdMultiplier();
	}

	unsigned
	TargetTransformInfo::adjustInliningThreshold(const CallBase *CB) const {
	return TTIImpl->adjustInliningThreshold(CB);
	}

	int TargetTransformInfo::getInlinerVectorBonusPercent() const {
	return TTIImpl->getInlinerVectorBonusPercent();
	}

	InstructionCost
	TargetTransformInfo::getGEPCost(Type PointeeType, const Value Ptr,
	ArrayRef<const Value *> Operands,
	TTI::TargetCostKind CostKind) const {
	return TTIImpl->getGEPCost(PointeeType, Ptr, Operands, CostKind);
	}

	unsigned TargetTransformInfo::getEstimatedNumberOfCaseClusters(
	const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI,
	BlockFrequencyInfo *BFI) const {
	return TTIImpl->getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
	}

	InstructionCost
	TargetTransformInfo::getUserCost(const User *U,
	ArrayRef<const Value *> Operands,
	enum TargetCostKind CostKind) const {
	InstructionCost Cost = TTIImpl->getUserCost(U, Operands, CostKind);
	assert((CostKind == TTI::TCK_RecipThroughput \|\| Cost >= 0) &&
	"TTI should not produce negative costs!");
	return Cost;
	}

	BranchProbability TargetTransformInfo::getPredictableBranchThreshold() const {
	return TTIImpl->getPredictableBranchThreshold();
	}

	bool TargetTransformInfo::hasBranchDivergence() const {
	return TTIImpl->hasBranchDivergence();
	}

	bool TargetTransformInfo::useGPUDivergenceAnalysis() const {
	return TTIImpl->useGPUDivergenceAnalysis();
	}

	bool TargetTransformInfo::isSourceOfDivergence(const Value *V) const {
	return TTIImpl->isSourceOfDivergence(V);
	}

	bool llvm::TargetTransformInfo::isAlwaysUniform(const Value *V) const {
	return TTIImpl->isAlwaysUniform(V);
	}

	unsigned TargetTransformInfo::getFlatAddressSpace() const {
	return TTIImpl->getFlatAddressSpace();
	}

	bool TargetTransformInfo::collectFlatAddressOperands(
	SmallVectorImpl<int> &OpIndexes, Intrinsic::ID IID) const {
	return TTIImpl->collectFlatAddressOperands(OpIndexes, IID);
	}

	bool TargetTransformInfo::isNoopAddrSpaceCast(unsigned FromAS,
	unsigned ToAS) const {
	return TTIImpl->isNoopAddrSpaceCast(FromAS, ToAS);
	}

	unsigned TargetTransformInfo::getAssumedAddrSpace(const Value *V) const {
	return TTIImpl->getAssumedAddrSpace(V);
	}

	Value *TargetTransformInfo::rewriteIntrinsicWithAddressSpace(
	IntrinsicInst II, Value OldV, Value *NewV) const {
	return TTIImpl->rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
	}

	bool TargetTransformInfo::isLoweredToCall(const Function *F) const {
	return TTIImpl->isLoweredToCall(F);
	}

	bool TargetTransformInfo::isHardwareLoopProfitable(
	Loop *L, ScalarEvolution &SE, AssumptionCache &AC,
	TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const {
	return TTIImpl->isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
	}

	bool TargetTransformInfo::preferPredicateOverEpilogue(
	Loop L, LoopInfo LI, ScalarEvolution &SE, AssumptionCache &AC,
	TargetLibraryInfo TLI, DominatorTree DT,
	const LoopAccessInfo *LAI) const {
	return TTIImpl->preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI);
	}

	bool TargetTransformInfo::emitGetActiveLaneMask() const {
	return TTIImpl->emitGetActiveLaneMask();
	}

	Optional<Instruction *>
	TargetTransformInfo::instCombineIntrinsic(InstCombiner &IC,
	IntrinsicInst &II) const {
	return TTIImpl->instCombineIntrinsic(IC, II);
	}

	Optional<Value *> TargetTransformInfo::simplifyDemandedUseBitsIntrinsic(
	InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known,
	bool &KnownBitsComputed) const {
	return TTIImpl->simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
	KnownBitsComputed);
	}

	Optional<Value *> TargetTransformInfo::simplifyDemandedVectorEltsIntrinsic(
	InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
	APInt &UndefElts2, APInt &UndefElts3,
	std::function<void(Instruction *, unsigned, APInt, APInt &)>
	SimplifyAndSetOp) const {
	return TTIImpl->simplifyDemandedVectorEltsIntrinsic(
	IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
	SimplifyAndSetOp);
	}

	void TargetTransformInfo::getUnrollingPreferences(
	Loop *L, ScalarEvolution &SE, UnrollingPreferences &UP) const {
	return TTIImpl->getUnrollingPreferences(L, SE, UP);
	}

	void TargetTransformInfo::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
	PeelingPreferences &PP) const {
	return TTIImpl->getPeelingPreferences(L, SE, PP);
	}

	bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const {
	return TTIImpl->isLegalAddImmediate(Imm);
	}

	bool TargetTransformInfo::isLegalICmpImmediate(int64_t Imm) const {
	return TTIImpl->isLegalICmpImmediate(Imm);
	}

	bool TargetTransformInfo::isLegalAddressingMode(Type Ty, GlobalValue BaseGV,
	int64_t BaseOffset,
	bool HasBaseReg, int64_t Scale,
	unsigned AddrSpace,
	Instruction *I) const {
	return TTIImpl->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
	Scale, AddrSpace, I);
	}

	bool TargetTransformInfo::isLSRCostLess(LSRCost &C1, LSRCost &C2) const {
	return TTIImpl->isLSRCostLess(C1, C2);
	}

	bool TargetTransformInfo::isNumRegsMajorCostOfLSR() const {
	return TTIImpl->isNumRegsMajorCostOfLSR();
	}

	bool TargetTransformInfo::isProfitableLSRChainElement(Instruction *I) const {
	return TTIImpl->isProfitableLSRChainElement(I);
	}

	bool TargetTransformInfo::canMacroFuseCmp() const {
	return TTIImpl->canMacroFuseCmp();
	}

	bool TargetTransformInfo::canSaveCmp(Loop L, BranchInst *BI,
	ScalarEvolution SE, LoopInfo LI,
	DominatorTree DT, AssumptionCache AC,
	TargetLibraryInfo *LibInfo) const {
	return TTIImpl->canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
	}

	TTI::AddressingModeKind
	TargetTransformInfo::getPreferredAddressingMode(const Loop *L,
	ScalarEvolution *SE) const {
	return TTIImpl->getPreferredAddressingMode(L, SE);
	}

	bool TargetTransformInfo::isLegalMaskedStore(Type *DataType,
	Align Alignment) const {
	return TTIImpl->isLegalMaskedStore(DataType, Alignment);
	}

	bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType,
	Align Alignment) const {
	return TTIImpl->isLegalMaskedLoad(DataType, Alignment);
	}

	bool TargetTransformInfo::isLegalNTStore(Type *DataType,
	Align Alignment) const {
	return TTIImpl->isLegalNTStore(DataType, Alignment);
	}

	bool TargetTransformInfo::isLegalNTLoad(Type *DataType, Align Alignment) const {
	return TTIImpl->isLegalNTLoad(DataType, Alignment);
	}

	bool TargetTransformInfo::isLegalMaskedGather(Type *DataType,
	Align Alignment) const {
	return TTIImpl->isLegalMaskedGather(DataType, Alignment);
	}

	bool TargetTransformInfo::isLegalMaskedScatter(Type *DataType,
	Align Alignment) const {
	return TTIImpl->isLegalMaskedScatter(DataType, Alignment);
	}

	bool TargetTransformInfo::isLegalMaskedCompressStore(Type *DataType) const {
	return TTIImpl->isLegalMaskedCompressStore(DataType);
	}

	bool TargetTransformInfo::isLegalMaskedExpandLoad(Type *DataType) const {
	return TTIImpl->isLegalMaskedExpandLoad(DataType);
	}

	bool TargetTransformInfo::hasDivRemOp(Type *DataType, bool IsSigned) const {
	return TTIImpl->hasDivRemOp(DataType, IsSigned);
	}

	bool TargetTransformInfo::hasVolatileVariant(Instruction *I,
	unsigned AddrSpace) const {
	return TTIImpl->hasVolatileVariant(I, AddrSpace);
	}

	bool TargetTransformInfo::prefersVectorizedAddressing() const {
	return TTIImpl->prefersVectorizedAddressing();
	}

	InstructionCost TargetTransformInfo::getScalingFactorCost(
	Type Ty, GlobalValue BaseGV, int64_t BaseOffset, bool HasBaseReg,
	int64_t Scale, unsigned AddrSpace) const {
	InstructionCost Cost = TTIImpl->getScalingFactorCost(
	Ty, BaseGV, BaseOffset, HasBaseReg, Scale, AddrSpace);
	assert(Cost >= 0 && "TTI should not produce negative costs!");
	return Cost;
	}

	bool TargetTransformInfo::LSRWithInstrQueries() const {
	return TTIImpl->LSRWithInstrQueries();
	}

	bool TargetTransformInfo::isTruncateFree(Type Ty1, Type Ty2) const {
	return TTIImpl->isTruncateFree(Ty1, Ty2);
	}

	bool TargetTransformInfo::isProfitableToHoist(Instruction *I) const {
	return TTIImpl->isProfitableToHoist(I);
	}

	bool TargetTransformInfo::useAA() const { return TTIImpl->useAA(); }

	bool TargetTransformInfo::isTypeLegal(Type *Ty) const {
	return TTIImpl->isTypeLegal(Ty);
	}

	InstructionCost TargetTransformInfo::getRegUsageForType(Type *Ty) const {
	return TTIImpl->getRegUsageForType(Ty);
	}

	bool TargetTransformInfo::shouldBuildLookupTables() const {
	return TTIImpl->shouldBuildLookupTables();
	}

	bool TargetTransformInfo::shouldBuildLookupTablesForConstant(
	Constant *C) const {
	return TTIImpl->shouldBuildLookupTablesForConstant(C);
	}

	bool TargetTransformInfo::shouldBuildRelLookupTables() const {
	return TTIImpl->shouldBuildRelLookupTables();
	}

	bool TargetTransformInfo::useColdCCForColdCall(Function &F) const {
	return TTIImpl->useColdCCForColdCall(F);
	}

	InstructionCost
	TargetTransformInfo::getScalarizationOverhead(VectorType *Ty,
	const APInt &DemandedElts,
	bool Insert, bool Extract) const {
	return TTIImpl->getScalarizationOverhead(Ty, DemandedElts, Insert, Extract);
	}

	InstructionCost TargetTransformInfo::getOperandsScalarizationOverhead(
	ArrayRef<const Value > Args, ArrayRef<Type > Tys) const {
	return TTIImpl->getOperandsScalarizationOverhead(Args, Tys);
	}

	bool TargetTransformInfo::supportsEfficientVectorElementLoadStore() const {
	return TTIImpl->supportsEfficientVectorElementLoadStore();
	}

	bool TargetTransformInfo::enableAggressiveInterleaving(
	bool LoopHasReductions) const {
	return TTIImpl->enableAggressiveInterleaving(LoopHasReductions);
	}

	TargetTransformInfo::MemCmpExpansionOptions
	TargetTransformInfo::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
	return TTIImpl->enableMemCmpExpansion(OptSize, IsZeroCmp);
	}

	bool TargetTransformInfo::enableInterleavedAccessVectorization() const {
	return TTIImpl->enableInterleavedAccessVectorization();
	}

	bool TargetTransformInfo::enableMaskedInterleavedAccessVectorization() const {
	return TTIImpl->enableMaskedInterleavedAccessVectorization();
	}

	bool TargetTransformInfo::isFPVectorizationPotentiallyUnsafe() const {
	return TTIImpl->isFPVectorizationPotentiallyUnsafe();
	}

	bool TargetTransformInfo::allowsMisalignedMemoryAccesses(LLVMContext &Context,
	unsigned BitWidth,
	unsigned AddressSpace,
	Align Alignment,
	bool *Fast) const {
	return TTIImpl->allowsMisalignedMemoryAccesses(Context, BitWidth,
	AddressSpace, Alignment, Fast);
	}

	TargetTransformInfo::PopcntSupportKind
	TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const {
	return TTIImpl->getPopcntSupport(IntTyWidthInBit);
	}

	bool TargetTransformInfo::haveFastSqrt(Type *Ty) const {
	return TTIImpl->haveFastSqrt(Ty);
	}

	bool TargetTransformInfo::isFCmpOrdCheaperThanFCmpZero(Type *Ty) const {
	return TTIImpl->isFCmpOrdCheaperThanFCmpZero(Ty);
	}

	InstructionCost TargetTransformInfo::getFPOpCost(Type *Ty) const {
	InstructionCost Cost = TTIImpl->getFPOpCost(Ty);
	assert(Cost >= 0 && "TTI should not produce negative costs!");
	return Cost;
	}

	InstructionCost TargetTransformInfo::getIntImmCodeSizeCost(unsigned Opcode,
	unsigned Idx,
	const APInt &Imm,
	Type *Ty) const {
	InstructionCost Cost = TTIImpl->getIntImmCodeSizeCost(Opcode, Idx, Imm, Ty);
	assert(Cost >= 0 && "TTI should not produce negative costs!");
	return Cost;
	}

	InstructionCost
	TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty,
	TTI::TargetCostKind CostKind) const {
	InstructionCost Cost = TTIImpl->getIntImmCost(Imm, Ty, CostKind);
	assert(Cost >= 0 && "TTI should not produce negative costs!");
	return Cost;
	}

	InstructionCost TargetTransformInfo::getIntImmCostInst(
	unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty,
	TTI::TargetCostKind CostKind, Instruction *Inst) const {
	InstructionCost Cost =
	TTIImpl->getIntImmCostInst(Opcode, Idx, Imm, Ty, CostKind, Inst);
	assert(Cost >= 0 && "TTI should not produce negative costs!");
	return Cost;
	}

	InstructionCost
	TargetTransformInfo::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
	const APInt &Imm, Type *Ty,
	TTI::TargetCostKind CostKind) const {
	InstructionCost Cost =
	TTIImpl->getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);
	assert(Cost >= 0 && "TTI should not produce negative costs!");
	return Cost;
	}

	unsigned TargetTransformInfo::getNumberOfRegisters(unsigned ClassID) const {
	return TTIImpl->getNumberOfRegisters(ClassID);
	}

	unsigned TargetTransformInfo::getRegisterClassForType(bool Vector,
	Type *Ty) const {
	return TTIImpl->getRegisterClassForType(Vector, Ty);
	}

	const char *TargetTransformInfo::getRegisterClassName(unsigned ClassID) const {
	return TTIImpl->getRegisterClassName(ClassID);
	}

	TypeSize TargetTransformInfo::getRegisterBitWidth(
	TargetTransformInfo::RegisterKind K) const {
	return TTIImpl->getRegisterBitWidth(K);
	}

	unsigned TargetTransformInfo::getMinVectorRegisterBitWidth() const {
	return TTIImpl->getMinVectorRegisterBitWidth();
	}

	Optional<unsigned> TargetTransformInfo::getMaxVScale() const {
	return TTIImpl->getMaxVScale();
	}

	bool TargetTransformInfo::shouldMaximizeVectorBandwidth() const {
	return TTIImpl->shouldMaximizeVectorBandwidth();
	}

	ElementCount TargetTransformInfo::getMinimumVF(unsigned ElemWidth,
	bool IsScalable) const {
	return TTIImpl->getMinimumVF(ElemWidth, IsScalable);
	}

	unsigned TargetTransformInfo::getMaximumVF(unsigned ElemWidth,
	unsigned Opcode) const {
	return TTIImpl->getMaximumVF(ElemWidth, Opcode);
	}

	bool TargetTransformInfo::shouldConsiderAddressTypePromotion(
	const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
	return TTIImpl->shouldConsiderAddressTypePromotion(
	I, AllowPromotionWithoutCommonHeader);
	}

	unsigned TargetTransformInfo::getCacheLineSize() const {
	return TTIImpl->getCacheLineSize();
	}

	llvm::Optional<unsigned>
	TargetTransformInfo::getCacheSize(CacheLevel Level) const {
	return TTIImpl->getCacheSize(Level);
	}

	llvm::Optional<unsigned>
	TargetTransformInfo::getCacheAssociativity(CacheLevel Level) const {
	return TTIImpl->getCacheAssociativity(Level);
	}

	unsigned TargetTransformInfo::getPrefetchDistance() const {
	return TTIImpl->getPrefetchDistance();
	}

	unsigned TargetTransformInfo::getMinPrefetchStride(
	unsigned NumMemAccesses, unsigned NumStridedMemAccesses,
	unsigned NumPrefetches, bool HasCall) const {
	return TTIImpl->getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
	NumPrefetches, HasCall);
	}

	unsigned TargetTransformInfo::getMaxPrefetchIterationsAhead() const {
	return TTIImpl->getMaxPrefetchIterationsAhead();
	}

	bool TargetTransformInfo::enableWritePrefetching() const {
	return TTIImpl->enableWritePrefetching();
	}

	unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const {
	return TTIImpl->getMaxInterleaveFactor(VF);
	}

	TargetTransformInfo::OperandValueKind
	TargetTransformInfo::getOperandInfo(const Value *V,
	OperandValueProperties &OpProps) {
	OperandValueKind OpInfo = OK_AnyValue;
	OpProps = OP_None;

	if (const auto *CI = dyn_cast<ConstantInt>(V)) {
	if (CI->getValue().isPowerOf2())
	OpProps = OP_PowerOf2;
	return OK_UniformConstantValue;
	}

	// A broadcast shuffle creates a uniform value.
	// TODO: Add support for non-zero index broadcasts.
	// TODO: Add support for different source vector width.
	if (const auto *ShuffleInst = dyn_cast<ShuffleVectorInst>(V))
	if (ShuffleInst->isZeroEltSplat())
	OpInfo = OK_UniformValue;

	const Value *Splat = getSplatValue(V);

	// Check for a splat of a constant or for a non uniform vector of constants
	// and check if the constant(s) are all powers of two.
	if (isa<ConstantVector>(V) \|\| isa<ConstantDataVector>(V)) {
	OpInfo = OK_NonUniformConstantValue;
	if (Splat) {
	OpInfo = OK_UniformConstantValue;
	if (auto *CI = dyn_cast<ConstantInt>(Splat))
	if (CI->getValue().isPowerOf2())
	OpProps = OP_PowerOf2;
	} else if (const auto *CDS = dyn_cast<ConstantDataSequential>(V)) {
	OpProps = OP_PowerOf2;
	for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) {
	if (auto *CI = dyn_cast<ConstantInt>(CDS->getElementAsConstant(I)))
	if (CI->getValue().isPowerOf2())
	continue;
	OpProps = OP_None;
	break;
	}
	}
	}

	// Check for a splat of a uniform value. This is not loop aware, so return
	// true only for the obviously uniform cases (argument, globalvalue)
	if (Splat && (isa<Argument>(Splat) \|\| isa<GlobalValue>(Splat)))
	OpInfo = OK_UniformValue;

	return OpInfo;
	}

	InstructionCost TargetTransformInfo::getArithmeticInstrCost(
	unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
	OperandValueKind Opd1Info, OperandValueKind Opd2Info,
	OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo,
	ArrayRef<const Value > Args, const Instruction CxtI) const {
	InstructionCost Cost =
	TTIImpl->getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info,
	Opd1PropInfo, Opd2PropInfo, Args, CxtI);
	assert(Cost >= 0 && "TTI should not produce negative costs!");
	return Cost;
	}

	InstructionCost TargetTransformInfo::getShuffleCost(ShuffleKind Kind,
	VectorType *Ty,
	ArrayRef<int> Mask,
	int Index,
	VectorType *SubTp) const {
	InstructionCost Cost = TTIImpl->getShuffleCost(Kind, Ty, Mask, Index, SubTp);
	assert(Cost >= 0 && "TTI should not produce negative costs!");
	return Cost;
	}

	TTI::CastContextHint
	TargetTransformInfo::getCastContextHint(const Instruction *I) {
	if (!I)
	return CastContextHint::None;

	auto getLoadStoreKind = [](const Value *V, unsigned LdStOp, unsigned MaskedOp,
	unsigned GatScatOp) {
	const Instruction *I = dyn_cast<Instruction>(V);
	if (!I)
	return CastContextHint::None;

	if (I->getOpcode() == LdStOp)
	return CastContextHint::Normal;

	if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
	if (II->getIntrinsicID() == MaskedOp)
	return TTI::CastContextHint::Masked;
	if (II->getIntrinsicID() == GatScatOp)
	return TTI::CastContextHint::GatherScatter;
	}

	return TTI::CastContextHint::None;
	};

	switch (I->getOpcode()) {
	case Instruction::ZExt:
	case Instruction::SExt:
	case Instruction::FPExt:
	return getLoadStoreKind(I->getOperand(0), Instruction::Load,
	Intrinsic::masked_load, Intrinsic::masked_gather);
	case Instruction::Trunc:
	case Instruction::FPTrunc:
	if (I->hasOneUse())
	return getLoadStoreKind(*I->user_begin(), Instruction::Store,
	Intrinsic::masked_store,
	Intrinsic::masked_scatter);
	break;
	default:
	return CastContextHint::None;
	}

	return TTI::CastContextHint::None;
	}

	InstructionCost TargetTransformInfo::getCastInstrCost(
	unsigned Opcode, Type Dst, Type Src, CastContextHint CCH,
	TTI::TargetCostKind CostKind, const Instruction *I) const {
	assert((I == nullptr \|\| I->getOpcode() == Opcode) &&
	"Opcode should reflect passed instruction.");
	InstructionCost Cost =
	TTIImpl->getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
	assert(Cost >= 0 && "TTI should not produce negative costs!");
	return Cost;
	}

	InstructionCost TargetTransformInfo::getExtractWithExtendCost(
	unsigned Opcode, Type Dst, VectorType VecTy, unsigned Index) const {
	InstructionCost Cost =
	TTIImpl->getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
	assert(Cost >= 0 && "TTI should not produce negative costs!");
	return Cost;
	}

	InstructionCost TargetTransformInfo::getCFInstrCost(
	unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I) const {
	assert((I == nullptr \|\| I->getOpcode() == Opcode) &&
	"Opcode should reflect passed instruction.");
	InstructionCost Cost = TTIImpl->getCFInstrCost(Opcode, CostKind, I);
	assert(Cost >= 0 && "TTI should not produce negative costs!");
	return Cost;
	}

	InstructionCost TargetTransformInfo::getCmpSelInstrCost(
	unsigned Opcode, Type ValTy, Type CondTy, CmpInst::Predicate VecPred,
	TTI::TargetCostKind CostKind, const Instruction *I) const {
	assert((I == nullptr \|\| I->getOpcode() == Opcode) &&
	"Opcode should reflect passed instruction.");
	InstructionCost Cost =
	TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
	assert(Cost >= 0 && "TTI should not produce negative costs!");
	return Cost;
	}

	InstructionCost TargetTransformInfo::getVectorInstrCost(unsigned Opcode,
	Type *Val,
	unsigned Index) const {
	InstructionCost Cost = TTIImpl->getVectorInstrCost(Opcode, Val, Index);
	assert(Cost >= 0 && "TTI should not produce negative costs!");
	return Cost;
	}

	InstructionCost TargetTransformInfo::getMemoryOpCost(
	unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
	TTI::TargetCostKind CostKind, const Instruction *I) const {
	assert((I == nullptr \|\| I->getOpcode() == Opcode) &&
	"Opcode should reflect passed instruction.");
	InstructionCost Cost = TTIImpl->getMemoryOpCost(Opcode, Src, Alignment,
	AddressSpace, CostKind, I);
	assert(Cost >= 0 && "TTI should not produce negative costs!");
	return Cost;
	}

	InstructionCost TargetTransformInfo::getMaskedMemoryOpCost(
	unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
	TTI::TargetCostKind CostKind) const {
	InstructionCost Cost = TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment,
	AddressSpace, CostKind);
	assert(Cost >= 0 && "TTI should not produce negative costs!");
	return Cost;
	}

	InstructionCost TargetTransformInfo::getGatherScatterOpCost(
	unsigned Opcode, Type DataTy, const Value Ptr, bool VariableMask,
	Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const {
	InstructionCost Cost = TTIImpl->getGatherScatterOpCost(
	Opcode, DataTy, Ptr, VariableMask, Alignment, CostKind, I);
	assert(Cost >= 0 && "TTI should not produce negative costs!");
	return Cost;
	}

	InstructionCost TargetTransformInfo::getInterleavedMemoryOpCost(
	unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
	Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
	bool UseMaskForCond, bool UseMaskForGaps) const {
	InstructionCost Cost = TTIImpl->getInterleavedMemoryOpCost(
	Opcode, VecTy, Factor, Indices, Alignment, AddressSpace, CostKind,
	UseMaskForCond, UseMaskForGaps);
	assert(Cost >= 0 && "TTI should not produce negative costs!");
	return Cost;
	}

	InstructionCost
	TargetTransformInfo::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
	TTI::TargetCostKind CostKind) const {
	InstructionCost Cost = TTIImpl->getIntrinsicInstrCost(ICA, CostKind);
	assert(Cost >= 0 && "TTI should not produce negative costs!");
	return Cost;
	}

	InstructionCost
	TargetTransformInfo::getCallInstrCost(Function F, Type RetTy,
	ArrayRef<Type *> Tys,
	TTI::TargetCostKind CostKind) const {
	InstructionCost Cost = TTIImpl->getCallInstrCost(F, RetTy, Tys, CostKind);
	assert(Cost >= 0 && "TTI should not produce negative costs!");
	return Cost;
	}

	unsigned TargetTransformInfo::getNumberOfParts(Type *Tp) const {
	return TTIImpl->getNumberOfParts(Tp);
	}

	InstructionCost
	TargetTransformInfo::getAddressComputationCost(Type Tp, ScalarEvolution SE,
	const SCEV *Ptr) const {
	InstructionCost Cost = TTIImpl->getAddressComputationCost(Tp, SE, Ptr);
	assert(Cost >= 0 && "TTI should not produce negative costs!");
	return Cost;
	}

	InstructionCost TargetTransformInfo::getMemcpyCost(const Instruction *I) const {
	InstructionCost Cost = TTIImpl->getMemcpyCost(I);
	assert(Cost >= 0 && "TTI should not produce negative costs!");
	return Cost;
	}

	InstructionCost TargetTransformInfo::getArithmeticReductionCost(
	unsigned Opcode, VectorType *Ty, Optional<FastMathFlags> FMF,
	TTI::TargetCostKind CostKind) const {
	InstructionCost Cost =
	TTIImpl->getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
	assert(Cost >= 0 && "TTI should not produce negative costs!");
	return Cost;
	}

	InstructionCost TargetTransformInfo::getMinMaxReductionCost(
	VectorType Ty, VectorType CondTy, bool IsUnsigned,
	TTI::TargetCostKind CostKind) const {
	InstructionCost Cost =
	TTIImpl->getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind);
	assert(Cost >= 0 && "TTI should not produce negative costs!");
	return Cost;
	}

	InstructionCost TargetTransformInfo::getExtendedAddReductionCost(
	bool IsMLA, bool IsUnsigned, Type ResTy, VectorType Ty,
	TTI::TargetCostKind CostKind) const {
	return TTIImpl->getExtendedAddReductionCost(IsMLA, IsUnsigned, ResTy, Ty,
	CostKind);
	}

	InstructionCost
	TargetTransformInfo::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const {
	return TTIImpl->getCostOfKeepingLiveOverCall(Tys);
	}

	bool TargetTransformInfo::getTgtMemIntrinsic(IntrinsicInst *Inst,
	MemIntrinsicInfo &Info) const {
	return TTIImpl->getTgtMemIntrinsic(Inst, Info);
	}

	unsigned TargetTransformInfo::getAtomicMemIntrinsicMaxElementSize() const {
	return TTIImpl->getAtomicMemIntrinsicMaxElementSize();
	}

	Value *TargetTransformInfo::getOrCreateResultFromMemIntrinsic(
	IntrinsicInst Inst, Type ExpectedType) const {
	return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
	}

	Type *TargetTransformInfo::getMemcpyLoopLoweringType(
	LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
	unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign) const {
	return TTIImpl->getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace,
	DestAddrSpace, SrcAlign, DestAlign);
	}

	void TargetTransformInfo::getMemcpyLoopResidualLoweringType(
	SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
	unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
	unsigned SrcAlign, unsigned DestAlign) const {
	TTIImpl->getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
	SrcAddrSpace, DestAddrSpace,
	SrcAlign, DestAlign);
	}

	bool TargetTransformInfo::areInlineCompatible(const Function *Caller,
	const Function *Callee) const {
	return TTIImpl->areInlineCompatible(Caller, Callee);
	}

	bool TargetTransformInfo::areFunctionArgsABICompatible(
	const Function Caller, const Function Callee,
	SmallPtrSetImpl<Argument *> &Args) const {
	return TTIImpl->areFunctionArgsABICompatible(Caller, Callee, Args);
	}

	bool TargetTransformInfo::isIndexedLoadLegal(MemIndexedMode Mode,
	Type *Ty) const {
	return TTIImpl->isIndexedLoadLegal(Mode, Ty);
	}

	bool TargetTransformInfo::isIndexedStoreLegal(MemIndexedMode Mode,
	Type *Ty) const {
	return TTIImpl->isIndexedStoreLegal(Mode, Ty);
	}

	unsigned TargetTransformInfo::getLoadStoreVecRegBitWidth(unsigned AS) const {
	return TTIImpl->getLoadStoreVecRegBitWidth(AS);
	}

	bool TargetTransformInfo::isLegalToVectorizeLoad(LoadInst *LI) const {
	return TTIImpl->isLegalToVectorizeLoad(LI);
	}

	bool TargetTransformInfo::isLegalToVectorizeStore(StoreInst *SI) const {
	return TTIImpl->isLegalToVectorizeStore(SI);
	}

	bool TargetTransformInfo::isLegalToVectorizeLoadChain(
	unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const {
	return TTIImpl->isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
	AddrSpace);
	}

	bool TargetTransformInfo::isLegalToVectorizeStoreChain(
	unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const {
	return TTIImpl->isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
	AddrSpace);
	}

	bool TargetTransformInfo::isLegalToVectorizeReduction(
	const RecurrenceDescriptor &RdxDesc, ElementCount VF) const {
	return TTIImpl->isLegalToVectorizeReduction(RdxDesc, VF);
	}

	bool TargetTransformInfo::isElementTypeLegalForScalableVector(Type *Ty) const {
	return TTIImpl->isElementTypeLegalForScalableVector(Ty);
	}

	unsigned TargetTransformInfo::getLoadVectorFactor(unsigned VF,
	unsigned LoadSize,
	unsigned ChainSizeInBytes,
	VectorType *VecTy) const {
	return TTIImpl->getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
	}

	unsigned TargetTransformInfo::getStoreVectorFactor(unsigned VF,
	unsigned StoreSize,
	unsigned ChainSizeInBytes,
	VectorType *VecTy) const {
	return TTIImpl->getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
	}

	bool TargetTransformInfo::preferInLoopReduction(unsigned Opcode, Type *Ty,
	ReductionFlags Flags) const {
	return TTIImpl->preferInLoopReduction(Opcode, Ty, Flags);
	}

	bool TargetTransformInfo::preferPredicatedReductionSelect(
	unsigned Opcode, Type *Ty, ReductionFlags Flags) const {
	return TTIImpl->preferPredicatedReductionSelect(Opcode, Ty, Flags);
	}

	TargetTransformInfo::VPLegalization
	TargetTransformInfo::getVPLegalizationStrategy(const VPIntrinsic &VPI) const {
	return TTIImpl->getVPLegalizationStrategy(VPI);
	}

	bool TargetTransformInfo::shouldExpandReduction(const IntrinsicInst *II) const {
	return TTIImpl->shouldExpandReduction(II);
	}

	unsigned TargetTransformInfo::getGISelRematGlobalCost() const {
	return TTIImpl->getGISelRematGlobalCost();
	}

	bool TargetTransformInfo::supportsScalableVectors() const {
	return TTIImpl->supportsScalableVectors();
	}

	bool TargetTransformInfo::hasActiveVectorLength() const {
	return TTIImpl->hasActiveVectorLength();
	}

	InstructionCost
	TargetTransformInfo::getInstructionLatency(const Instruction *I) const {
	return TTIImpl->getInstructionLatency(I);
	}

	InstructionCost
	TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;

	switch (I->getOpcode()) {
	case Instruction::GetElementPtr:
	case Instruction::Ret:
	case Instruction::PHI:
	case Instruction::Br:
	case Instruction::Add:
	case Instruction::FAdd:
	case Instruction::Sub:
	case Instruction::FSub:
	case Instruction::Mul:
	case Instruction::FMul:
	case Instruction::UDiv:
	case Instruction::SDiv:
	case Instruction::FDiv:
	case Instruction::URem:
	case Instruction::SRem:
	case Instruction::FRem:
	case Instruction::Shl:
	case Instruction::LShr:
	case Instruction::AShr:
	case Instruction::And:
	case Instruction::Or:
	case Instruction::Xor:
	case Instruction::FNeg:
	case Instruction::Select:
	case Instruction::ICmp:
	case Instruction::FCmp:
	case Instruction::Store:
	case Instruction::Load:
	case Instruction::ZExt:
	case Instruction::SExt:
	case Instruction::FPToUI:
	case Instruction::FPToSI:
	case Instruction::FPExt:
	case Instruction::PtrToInt:
	case Instruction::IntToPtr:
	case Instruction::SIToFP:
	case Instruction::UIToFP:
	case Instruction::Trunc:
	case Instruction::FPTrunc:
	case Instruction::BitCast:
	case Instruction::AddrSpaceCast:
	case Instruction::ExtractElement:
	case Instruction::InsertElement:
	case Instruction::ExtractValue:
	case Instruction::ShuffleVector:
	case Instruction::Call:
	case Instruction::Switch:
	return getUserCost(I, CostKind);
	default:
	// We don't have any information on this instruction.
	return -1;
	}
	}

	TargetTransformInfo::Concept::~Concept() {}

	TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}

	TargetIRAnalysis::TargetIRAnalysis(
	std::function<Result(const Function &)> TTICallback)
	: TTICallback(std::move(TTICallback)) {}

	TargetIRAnalysis::Result TargetIRAnalysis::run(const Function &F,
	FunctionAnalysisManager &) {
	return TTICallback(F);
	}

	AnalysisKey TargetIRAnalysis::Key;

	TargetIRAnalysis::Result TargetIRAnalysis::getDefaultTTI(const Function &F) {
	return Result(F.getParent()->getDataLayout());
	}

	// Register the basic pass.
	INITIALIZE_PASS(TargetTransformInfoWrapperPass, "tti",
	"Target Transform Information", false, true)
	char TargetTransformInfoWrapperPass::ID = 0;

	void TargetTransformInfoWrapperPass::anchor() {}

	TargetTransformInfoWrapperPass::TargetTransformInfoWrapperPass()
	: ImmutablePass(ID) {
	initializeTargetTransformInfoWrapperPassPass(
	*PassRegistry::getPassRegistry());
	}

	TargetTransformInfoWrapperPass::TargetTransformInfoWrapperPass(
	TargetIRAnalysis TIRA)
	: ImmutablePass(ID), TIRA(std::move(TIRA)) {
	initializeTargetTransformInfoWrapperPassPass(
	*PassRegistry::getPassRegistry());
	}

	TargetTransformInfo &TargetTransformInfoWrapperPass::getTTI(const Function &F) {
	FunctionAnalysisManager DummyFAM;
	TTI = TIRA.run(F, DummyFAM);
	return *TTI;
	}

	ImmutablePass *
	llvm::createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA) {
	return new TargetTransformInfoWrapperPass(std::move(TIRA));
	}
	diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
	index faa14dca1c3f..7edc44c48bbd 100644
	--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
	+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
	@@ -1,1560 +1,1561 @@
	//===- llvm/CodeGen/DwarfCompileUnit.cpp - Dwarf Compile Units ------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file contains support for constructing a dwarf compile unit.
	//
	//===----------------------------------------------------------------------===//

	#include "DwarfCompileUnit.h"
	#include "AddressPool.h"
	#include "DwarfExpression.h"
	#include "llvm/ADT/None.h"
	#include "llvm/ADT/STLExtras.h"
	#include "llvm/ADT/SmallString.h"
	#include "llvm/BinaryFormat/Dwarf.h"
	#include "llvm/CodeGen/AsmPrinter.h"
	#include "llvm/CodeGen/DIE.h"
	#include "llvm/CodeGen/MachineFunction.h"
	#include "llvm/CodeGen/MachineInstr.h"
	#include "llvm/CodeGen/MachineOperand.h"
	#include "llvm/CodeGen/TargetFrameLowering.h"
	#include "llvm/CodeGen/TargetRegisterInfo.h"
	#include "llvm/CodeGen/TargetSubtargetInfo.h"
	#include "llvm/IR/DataLayout.h"
	#include "llvm/IR/DebugInfo.h"
	#include "llvm/IR/GlobalVariable.h"
	#include "llvm/MC/MCSection.h"
	#include "llvm/MC/MCStreamer.h"
	#include "llvm/MC/MCSymbol.h"
	#include "llvm/MC/MCSymbolWasm.h"
	#include "llvm/MC/MachineLocation.h"
	#include "llvm/Target/TargetLoweringObjectFile.h"
	#include "llvm/Target/TargetMachine.h"
	#include "llvm/Target/TargetOptions.h"
	#include <iterator>
	#include <string>
	#include <utility>

	using namespace llvm;

	static dwarf::Tag GetCompileUnitType(UnitKind Kind, DwarfDebug *DW) {

	// According to DWARF Debugging Information Format Version 5,
	// 3.1.2 Skeleton Compilation Unit Entries:
	// "When generating a split DWARF object file (see Section 7.3.2
	// on page 187), the compilation unit in the .debug_info section
	// is a "skeleton" compilation unit with the tag DW_TAG_skeleton_unit"
	if (DW->getDwarfVersion() >= 5 && Kind == UnitKind::Skeleton)
	return dwarf::DW_TAG_skeleton_unit;

	return dwarf::DW_TAG_compile_unit;
	}

	DwarfCompileUnit::DwarfCompileUnit(unsigned UID, const DICompileUnit *Node,
	AsmPrinter A, DwarfDebug DW,
	DwarfFile *DWU, UnitKind Kind)
	: DwarfUnit(GetCompileUnitType(Kind, DW), Node, A, DW, DWU), UniqueID(UID) {
	insertDIE(Node, &getUnitDie());
	MacroLabelBegin = Asm->createTempSymbol("cu_macro_begin");
	}

	/// addLabelAddress - Add a dwarf label attribute data and value using
	/// DW_FORM_addr or DW_FORM_GNU_addr_index.
	void DwarfCompileUnit::addLabelAddress(DIE &Die, dwarf::Attribute Attribute,
	const MCSymbol *Label) {
	// Don't use the address pool in non-fission or in the skeleton unit itself.
	if ((!DD->useSplitDwarf() \|\| !Skeleton) && DD->getDwarfVersion() < 5)
	return addLocalLabelAddress(Die, Attribute, Label);

	if (Label)
	DD->addArangeLabel(SymbolCU(this, Label));

	bool UseAddrOffsetFormOrExpressions =
	DD->useAddrOffsetForm() \|\| DD->useAddrOffsetExpressions();

	const MCSymbol *Base = nullptr;
	if (Label->isInSection() && UseAddrOffsetFormOrExpressions)
	Base = DD->getSectionLabel(&Label->getSection());

	if (!Base \|\| Base == Label) {
	unsigned idx = DD->getAddressPool().getIndex(Label);
	addAttribute(Die, Attribute,
	DD->getDwarfVersion() >= 5 ? dwarf::DW_FORM_addrx
	: dwarf::DW_FORM_GNU_addr_index,
	DIEInteger(idx));
	return;
	}

	// Could be extended to work with DWARFv4 Split DWARF if that's important for
	// someone. In that case DW_FORM_data would be used.
	assert(DD->getDwarfVersion() >= 5 &&
	"Addr+offset expressions are only valuable when using debug_addr (to "
	"reduce relocations) available in DWARFv5 or higher");
	if (DD->useAddrOffsetExpressions()) {
	auto *Loc = new (DIEValueAllocator) DIEBlock();
	addPoolOpAddress(*Loc, Label);
	addBlock(Die, Attribute, dwarf::DW_FORM_exprloc, Loc);
	} else
	addAttribute(Die, Attribute, dwarf::DW_FORM_LLVM_addrx_offset,
	new (DIEValueAllocator) DIEAddrOffset(
	DD->getAddressPool().getIndex(Base), Label, Base));
	}

	void DwarfCompileUnit::addLocalLabelAddress(DIE &Die,
	dwarf::Attribute Attribute,
	const MCSymbol *Label) {
	if (Label)
	DD->addArangeLabel(SymbolCU(this, Label));

	if (Label)
	addAttribute(Die, Attribute, dwarf::DW_FORM_addr, DIELabel(Label));
	else
	addAttribute(Die, Attribute, dwarf::DW_FORM_addr, DIEInteger(0));
	}

	unsigned DwarfCompileUnit::getOrCreateSourceID(const DIFile *File) {
	// If we print assembly, we can't separate .file entries according to
	// compile units. Thus all files will belong to the default compile unit.

	// FIXME: add a better feature test than hasRawTextSupport. Even better,
	// extend .file to support this.
	unsigned CUID = Asm->OutStreamer->hasRawTextSupport() ? 0 : getUniqueID();
	if (!File)
	return Asm->OutStreamer->emitDwarfFileDirective(0, "", "", None, None,
	CUID);
	return Asm->OutStreamer->emitDwarfFileDirective(
	0, File->getDirectory(), File->getFilename(), DD->getMD5AsBytes(File),
	File->getSource(), CUID);
	}

	DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
	const DIGlobalVariable *GV, ArrayRef<GlobalExpr> GlobalExprs) {
	// Check for pre-existence.
	if (DIE *Die = getDIE(GV))
	return Die;

	assert(GV);

	auto *GVContext = GV->getScope();
	const DIType *GTy = GV->getType();

	// Construct the context before querying for the existence of the DIE in
	// case such construction creates the DIE.
	auto *CB = GVContext ? dyn_cast<DICommonBlock>(GVContext) : nullptr;
	DIE *ContextDIE = CB ? getOrCreateCommonBlock(CB, GlobalExprs)
	: getOrCreateContextDIE(GVContext);

	// Add to map.
	DIE VariableDIE = &createAndAddDIE(GV->getTag(), ContextDIE, GV);
	DIScope *DeclContext;
	if (auto *SDMDecl = GV->getStaticDataMemberDeclaration()) {
	DeclContext = SDMDecl->getScope();
	assert(SDMDecl->isStaticMember() && "Expected static member decl");
	assert(GV->isDefinition());
	// We need the declaration DIE that is in the static member's class.
	DIE *VariableSpecDIE = getOrCreateStaticMemberDIE(SDMDecl);
	addDIEEntry(VariableDIE, dwarf::DW_AT_specification, VariableSpecDIE);
	// If the global variable's type is different from the one in the class
	// member type, assume that it's more specific and also emit it.
	if (GTy != SDMDecl->getBaseType())
	addType(*VariableDIE, GTy);
	} else {
	DeclContext = GV->getScope();
	// Add name and type.
	addString(*VariableDIE, dwarf::DW_AT_name, GV->getDisplayName());
	if (GTy)
	addType(*VariableDIE, GTy);

	// Add scoping info.
	if (!GV->isLocalToUnit())
	addFlag(*VariableDIE, dwarf::DW_AT_external);

	// Add line number info.
	addSourceLine(*VariableDIE, GV);
	}

	if (!GV->isDefinition())
	addFlag(*VariableDIE, dwarf::DW_AT_declaration);
	else
	addGlobalName(GV->getName(), *VariableDIE, DeclContext);

	if (uint32_t AlignInBytes = GV->getAlignInBytes())
	addUInt(*VariableDIE, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata,
	AlignInBytes);

	if (MDTuple *TP = GV->getTemplateParams())
	addTemplateParams(*VariableDIE, DINodeArray(TP));

	// Add location.
	addLocationAttribute(VariableDIE, GV, GlobalExprs);

	return VariableDIE;
	}

	void DwarfCompileUnit::addLocationAttribute(
	DIE VariableDIE, const DIGlobalVariable GV, ArrayRef<GlobalExpr> GlobalExprs) {
	bool addToAccelTable = false;
	DIELoc *Loc = nullptr;
	Optional<unsigned> NVPTXAddressSpace;
	std::unique_ptr<DIEDwarfExpression> DwarfExpr;
	for (const auto &GE : GlobalExprs) {
	const GlobalVariable *Global = GE.Var;
	const DIExpression *Expr = GE.Expr;

	// For compatibility with DWARF 3 and earlier,
	// DW_AT_location(DW_OP_constu, X, DW_OP_stack_value) or
	// DW_AT_location(DW_OP_consts, X, DW_OP_stack_value) becomes
	// DW_AT_const_value(X).
	if (GlobalExprs.size() == 1 && Expr && Expr->isConstant()) {
	addToAccelTable = true;
	addConstantValue(
	*VariableDIE,
	DIExpression::SignedOrUnsignedConstant::UnsignedConstant ==
	*Expr->isConstant(),
	Expr->getElement(1));
	break;
	}

	// We cannot describe the location of dllimport'd variables: the
	// computation of their address requires loads from the IAT.
	if (Global && Global->hasDLLImportStorageClass())
	continue;

	// Nothing to describe without address or constant.
	if (!Global && (!Expr \|\| !Expr->isConstant()))
	continue;

	if (Global && Global->isThreadLocal() &&
	!Asm->getObjFileLowering().supportDebugThreadLocalLocation())
	continue;

	if (!Loc) {
	addToAccelTable = true;
	Loc = new (DIEValueAllocator) DIELoc;
	DwarfExpr = std::make_unique<DIEDwarfExpression>(Asm, this, *Loc);
	}

	if (Expr) {
	// According to
	// https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf
	// cuda-gdb requires DW_AT_address_class for all variables to be able to
	// correctly interpret address space of the variable address.
	// Decode DW_OP_constu <DWARF Address Space> DW_OP_swap DW_OP_xderef
	// sequence for the NVPTX + gdb target.
	unsigned LocalNVPTXAddressSpace;
	if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB()) {
	const DIExpression *NewExpr =
	DIExpression::extractAddressClass(Expr, LocalNVPTXAddressSpace);
	if (NewExpr != Expr) {
	Expr = NewExpr;
	NVPTXAddressSpace = LocalNVPTXAddressSpace;
	}
	}
	DwarfExpr->addFragmentOffset(Expr);
	}

	if (Global) {
	const MCSymbol *Sym = Asm->getSymbol(Global);
	if (Global->isThreadLocal()) {
	if (Asm->TM.useEmulatedTLS()) {
	// TODO: add debug info for emulated thread local mode.
	} else {
	// FIXME: Make this work with -gsplit-dwarf.
	unsigned PointerSize = Asm->getDataLayout().getPointerSize();
	assert((PointerSize == 4 \|\| PointerSize == 8) &&
	"Add support for other sizes if necessary");
	// Based on GCC's support for TLS:
	if (!DD->useSplitDwarf()) {
	// 1) Start with a constNu of the appropriate pointer size
	addUInt(*Loc, dwarf::DW_FORM_data1,
	PointerSize == 4 ? dwarf::DW_OP_const4u
	: dwarf::DW_OP_const8u);
	// 2) containing the (relocated) offset of the TLS variable
	// within the module's TLS block.
	addExpr(*Loc,
	PointerSize == 4 ? dwarf::DW_FORM_data4
	: dwarf::DW_FORM_data8,
	Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym));
	} else {
	addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index);
	addUInt(*Loc, dwarf::DW_FORM_udata,
	DD->getAddressPool().getIndex(Sym, /* TLS */ true));
	}
	// 3) followed by an OP to make the debugger do a TLS lookup.
	addUInt(*Loc, dwarf::DW_FORM_data1,
	DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address
	: dwarf::DW_OP_form_tls_address);
	}
	} else {
	DD->addArangeLabel(SymbolCU(this, Sym));
	addOpAddress(*Loc, Sym);
	}
	}
	// Global variables attached to symbols are memory locations.
	// It would be better if this were unconditional, but malformed input that
	// mixes non-fragments and fragments for the same variable is too expensive
	// to detect in the verifier.
	if (DwarfExpr->isUnknownLocation())
	DwarfExpr->setMemoryLocationKind();
	DwarfExpr->addExpression(Expr);
	}
	if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB()) {
	// According to
	// https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf
	// cuda-gdb requires DW_AT_address_class for all variables to be able to
	// correctly interpret address space of the variable address.
	const unsigned NVPTX_ADDR_global_space = 5;
	addUInt(*VariableDIE, dwarf::DW_AT_address_class, dwarf::DW_FORM_data1,
	NVPTXAddressSpace ? *NVPTXAddressSpace : NVPTX_ADDR_global_space);
	}
	if (Loc)
	addBlock(*VariableDIE, dwarf::DW_AT_location, DwarfExpr->finalize());

	if (DD->useAllLinkageNames())
	addLinkageName(*VariableDIE, GV->getLinkageName());

	if (addToAccelTable) {
	DD->addAccelName(CUNode, GV->getName(), VariableDIE);

	// If the linkage name is different than the name, go ahead and output
	// that as well into the name table.
	if (GV->getLinkageName() != "" && GV->getName() != GV->getLinkageName() &&
	DD->useAllLinkageNames())
	DD->addAccelName(CUNode, GV->getLinkageName(), VariableDIE);
	}
	}

	DIE *DwarfCompileUnit::getOrCreateCommonBlock(
	const DICommonBlock *CB, ArrayRef<GlobalExpr> GlobalExprs) {
	// Construct the context before querying for the existence of the DIE in case
	// such construction creates the DIE.
	DIE *ContextDIE = getOrCreateContextDIE(CB->getScope());

	if (DIE *NDie = getDIE(CB))
	return NDie;
	DIE &NDie = createAndAddDIE(dwarf::DW_TAG_common_block, *ContextDIE, CB);
	StringRef Name = CB->getName().empty() ? "_BLNK_" : CB->getName();
	addString(NDie, dwarf::DW_AT_name, Name);
	addGlobalName(Name, NDie, CB->getScope());
	if (CB->getFile())
	addSourceLine(NDie, CB->getLineNo(), CB->getFile());
	if (DIGlobalVariable *V = CB->getDecl())
	getCU().addLocationAttribute(&NDie, V, GlobalExprs);
	return &NDie;
	}

	void DwarfCompileUnit::addRange(RangeSpan Range) {
	DD->insertSectionLabel(Range.Begin);

	bool SameAsPrevCU = this == DD->getPrevCU();
	DD->setPrevCU(this);
	// If we have no current ranges just add the range and return, otherwise,
	// check the current section and CU against the previous section and CU we
	// emitted into and the subprogram was contained within. If these are the
	// same then extend our current range, otherwise add this as a new range.
	if (CURanges.empty() \|\| !SameAsPrevCU \|\|
	(&CURanges.back().End->getSection() !=
	&Range.End->getSection())) {
	CURanges.push_back(Range);
	return;
	}

	CURanges.back().End = Range.End;
	}

	void DwarfCompileUnit::initStmtList() {
	if (CUNode->isDebugDirectivesOnly())
	return;

	const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
	if (DD->useSectionsAsReferences()) {
	LineTableStartSym = TLOF.getDwarfLineSection()->getBeginSymbol();
	} else {
	LineTableStartSym =
	Asm->OutStreamer->getDwarfLineTableSymbol(getUniqueID());
	}

	// DW_AT_stmt_list is a offset of line number information for this
	// compile unit in debug_line section. For split dwarf this is
	// left in the skeleton CU and so not included.
	// The line table entries are not always emitted in assembly, so it
	// is not okay to use line_table_start here.
	addSectionLabel(getUnitDie(), dwarf::DW_AT_stmt_list, LineTableStartSym,
	TLOF.getDwarfLineSection()->getBeginSymbol());
	}

	void DwarfCompileUnit::applyStmtList(DIE &D) {
	const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
	addSectionLabel(D, dwarf::DW_AT_stmt_list, LineTableStartSym,
	TLOF.getDwarfLineSection()->getBeginSymbol());
	}

	void DwarfCompileUnit::attachLowHighPC(DIE &D, const MCSymbol *Begin,
	const MCSymbol *End) {
	assert(Begin && "Begin label should not be null!");
	assert(End && "End label should not be null!");
	assert(Begin->isDefined() && "Invalid starting label");
	assert(End->isDefined() && "Invalid end label");

	addLabelAddress(D, dwarf::DW_AT_low_pc, Begin);
	if (DD->getDwarfVersion() < 4)
	addLabelAddress(D, dwarf::DW_AT_high_pc, End);
	else
	addLabelDelta(D, dwarf::DW_AT_high_pc, End, Begin);
	}

	// Find DIE for the given subprogram and attach appropriate DW_AT_low_pc
	// and DW_AT_high_pc attributes. If there are global variables in this
	// scope then create and insert DIEs for these variables.
	DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) {
	DIE *SPDie = getOrCreateSubprogramDIE(SP, includeMinimalInlineScopes());

	SmallVector<RangeSpan, 2> BB_List;
	// If basic block sections are on, ranges for each basic block section has
	// to be emitted separately.
	for (const auto &R : Asm->MBBSectionRanges)
	BB_List.push_back({R.second.BeginLabel, R.second.EndLabel});

	attachRangesOrLowHighPC(*SPDie, BB_List);

	if (DD->useAppleExtensionAttributes() &&
	!DD->getCurrentFunction()->getTarget().Options.DisableFramePointerElim(
	*DD->getCurrentFunction()))
	addFlag(*SPDie, dwarf::DW_AT_APPLE_omit_frame_ptr);

	// Only include DW_AT_frame_base in full debug info
	if (!includeMinimalInlineScopes()) {
	const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering();
	TargetFrameLowering::DwarfFrameBase FrameBase =
	TFI->getDwarfFrameBase(*Asm->MF);
	switch (FrameBase.Kind) {
	case TargetFrameLowering::DwarfFrameBase::Register: {
	if (Register::isPhysicalRegister(FrameBase.Location.Reg)) {
	MachineLocation Location(FrameBase.Location.Reg);
	addAddress(*SPDie, dwarf::DW_AT_frame_base, Location);
	}
	break;
	}
	case TargetFrameLowering::DwarfFrameBase::CFA: {
	DIELoc *Loc = new (DIEValueAllocator) DIELoc;
	addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_call_frame_cfa);
	addBlock(*SPDie, dwarf::DW_AT_frame_base, Loc);
	break;
	}
	case TargetFrameLowering::DwarfFrameBase::WasmFrameBase: {
	// FIXME: duplicated from Target/WebAssembly/WebAssembly.h
	// don't want to depend on target specific headers in this code?
	const unsigned TI_GLOBAL_RELOC = 3;
	if (FrameBase.Location.WasmLoc.Kind == TI_GLOBAL_RELOC) {
	// These need to be relocatable.
	assert(FrameBase.Location.WasmLoc.Index == 0); // Only SP so far.
	auto SPSym = cast<MCSymbolWasm>(
	Asm->GetExternalSymbolSymbol("__stack_pointer"));
	// FIXME: this repeats what WebAssemblyMCInstLower::
	// GetExternalSymbolSymbol does, since if there's no code that
	// refers to this symbol, we have to set it here.
	SPSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
	SPSym->setGlobalType(wasm::WasmGlobalType{
	uint8_t(Asm->getSubtargetInfo().getTargetTriple().getArch() ==
	Triple::wasm64
	? wasm::WASM_TYPE_I64
	: wasm::WASM_TYPE_I32),
	true});
	DIELoc *Loc = new (DIEValueAllocator) DIELoc;
	addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_WASM_location);
	addSInt(*Loc, dwarf::DW_FORM_sdata, TI_GLOBAL_RELOC);
	if (!isDwoUnit()) {
	addLabel(*Loc, dwarf::DW_FORM_data4, SPSym);
	DD->addArangeLabel(SymbolCU(this, SPSym));
	} else {
	// FIXME: when writing dwo, we need to avoid relocations. Probably
	// the "right" solution is to treat globals the way func and data
	// symbols are (with entries in .debug_addr).
	// For now, since we only ever use index 0, this should work as-is.
	addUInt(*Loc, dwarf::DW_FORM_data4, FrameBase.Location.WasmLoc.Index);
	}
	addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_stack_value);
	addBlock(*SPDie, dwarf::DW_AT_frame_base, Loc);
	} else {
	DIELoc *Loc = new (DIEValueAllocator) DIELoc;
	DIEDwarfExpression DwarfExpr(Asm, this, *Loc);
	DIExpressionCursor Cursor({});
	DwarfExpr.addWasmLocation(FrameBase.Location.WasmLoc.Kind,
	FrameBase.Location.WasmLoc.Index);
	DwarfExpr.addExpression(std::move(Cursor));
	addBlock(*SPDie, dwarf::DW_AT_frame_base, DwarfExpr.finalize());
	}
	break;
	}
	}
	}

	// Add name to the name table, we do this here because we're guaranteed
	// to have concrete versions of our DW_TAG_subprogram nodes.
	DD->addSubprogramNames(CUNode, SP, SPDie);

	return *SPDie;
	}

	// Construct a DIE for this scope.
	void DwarfCompileUnit::constructScopeDIE(
	LexicalScope Scope, SmallVectorImpl<DIE > &FinalChildren) {
	if (!Scope \|\| !Scope->getScopeNode())
	return;

	auto *DS = Scope->getScopeNode();

	assert((Scope->getInlinedAt() \|\| !isa<DISubprogram>(DS)) &&
	"Only handle inlined subprograms here, use "
	"constructSubprogramScopeDIE for non-inlined "
	"subprograms");

	SmallVector<DIE *, 8> Children;

	// We try to create the scope DIE first, then the children DIEs. This will
	// avoid creating un-used children then removing them later when we find out
	// the scope DIE is null.
	DIE *ScopeDIE;
	if (Scope->getParent() && isa<DISubprogram>(DS)) {
	ScopeDIE = constructInlinedScopeDIE(Scope);
	if (!ScopeDIE)
	return;
	// We create children when the scope DIE is not null.
	createScopeChildrenDIE(Scope, Children);
	} else {
	// Early exit when we know the scope DIE is going to be null.
	if (DD->isLexicalScopeDIENull(Scope))
	return;

	bool HasNonScopeChildren = false;

	// We create children here when we know the scope DIE is not going to be
	// null and the children will be added to the scope DIE.
	createScopeChildrenDIE(Scope, Children, &HasNonScopeChildren);

	// If there are only other scopes as children, put them directly in the
	// parent instead, as this scope would serve no purpose.
	if (!HasNonScopeChildren) {
	FinalChildren.insert(FinalChildren.end(),
	std::make_move_iterator(Children.begin()),
	std::make_move_iterator(Children.end()));
	return;
	}
	ScopeDIE = constructLexicalScopeDIE(Scope);
	assert(ScopeDIE && "Scope DIE should not be null.");
	}

	// Add children
	for (auto &I : Children)
	ScopeDIE->addChild(std::move(I));

	FinalChildren.push_back(std::move(ScopeDIE));
	}

	void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE,
	SmallVector<RangeSpan, 2> Range) {

	HasRangeLists = true;

	// Add the range list to the set of ranges to be emitted.
	auto IndexAndList =
	(DD->getDwarfVersion() < 5 && Skeleton ? Skeleton->DU : DU)
	->addRange(*(Skeleton ? Skeleton : this), std::move(Range));

	uint32_t Index = IndexAndList.first;
	auto &List = *IndexAndList.second;

	// Under fission, ranges are specified by constant offsets relative to the
	// CU's DW_AT_GNU_ranges_base.
	// FIXME: For DWARF v5, do not generate the DW_AT_ranges attribute under
	// fission until we support the forms using the .debug_addr section
	// (DW_RLE_startx_endx etc.).
	if (DD->getDwarfVersion() >= 5)
	addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_rnglistx, Index);
	else {
	const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
	const MCSymbol *RangeSectionSym =
	TLOF.getDwarfRangesSection()->getBeginSymbol();
	if (isDwoUnit())
	addSectionDelta(ScopeDIE, dwarf::DW_AT_ranges, List.Label,
	RangeSectionSym);
	else
	addSectionLabel(ScopeDIE, dwarf::DW_AT_ranges, List.Label,
	RangeSectionSym);
	}
	}

	void DwarfCompileUnit::attachRangesOrLowHighPC(
	DIE &Die, SmallVector<RangeSpan, 2> Ranges) {
	assert(!Ranges.empty());
	if (!DD->useRangesSection() \|\|
	(Ranges.size() == 1 &&
	(!DD->alwaysUseRanges() \|\|
	DD->getSectionLabel(&Ranges.front().Begin->getSection()) ==
	Ranges.front().Begin))) {
	const RangeSpan &Front = Ranges.front();
	const RangeSpan &Back = Ranges.back();
	attachLowHighPC(Die, Front.Begin, Back.End);
	} else
	addScopeRangeList(Die, std::move(Ranges));
	}

	void DwarfCompileUnit::attachRangesOrLowHighPC(
	DIE &Die, const SmallVectorImpl<InsnRange> &Ranges) {
	SmallVector<RangeSpan, 2> List;
	List.reserve(Ranges.size());
	for (const InsnRange &R : Ranges) {
	auto *BeginLabel = DD->getLabelBeforeInsn(R.first);
	auto *EndLabel = DD->getLabelAfterInsn(R.second);

	const auto *BeginMBB = R.first->getParent();
	const auto *EndMBB = R.second->getParent();

	const auto *MBB = BeginMBB;
	// Basic block sections allows basic block subsets to be placed in unique
	// sections. For each section, the begin and end label must be added to the
	// list. If there is more than one range, debug ranges must be used.
	// Otherwise, low/high PC can be used.
	// FIXME: Debug Info Emission depends on block order and this assumes that
	// the order of blocks will be frozen beyond this point.
	do {
	if (MBB->sameSection(EndMBB) \|\| MBB->isEndSection()) {
	auto MBBSectionRange = Asm->MBBSectionRanges[MBB->getSectionIDNum()];
	List.push_back(
	{MBB->sameSection(BeginMBB) ? BeginLabel
	: MBBSectionRange.BeginLabel,
	MBB->sameSection(EndMBB) ? EndLabel : MBBSectionRange.EndLabel});
	}
	if (MBB->sameSection(EndMBB))
	break;
	MBB = MBB->getNextNode();
	} while (true);
	}
	attachRangesOrLowHighPC(Die, std::move(List));
	}

	// This scope represents inlined body of a function. Construct DIE to
	// represent this concrete inlined copy of the function.
	DIE DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope Scope) {
	assert(Scope->getScopeNode());
	auto *DS = Scope->getScopeNode();
	auto *InlinedSP = getDISubprogram(DS);
	// Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram
	// was inlined from another compile unit.
	DIE *OriginDIE = getAbstractSPDies()[InlinedSP];
	assert(OriginDIE && "Unable to find original DIE for an inlined subprogram.");

	auto ScopeDIE = DIE::get(DIEValueAllocator, dwarf::DW_TAG_inlined_subroutine);
	addDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin, OriginDIE);

	attachRangesOrLowHighPC(*ScopeDIE, Scope->getRanges());

	// Add the call site information to the DIE.
	const DILocation *IA = Scope->getInlinedAt();
	addUInt(*ScopeDIE, dwarf::DW_AT_call_file, None,
	getOrCreateSourceID(IA->getFile()));
	addUInt(*ScopeDIE, dwarf::DW_AT_call_line, None, IA->getLine());
	if (IA->getColumn())
	addUInt(*ScopeDIE, dwarf::DW_AT_call_column, None, IA->getColumn());
	if (IA->getDiscriminator() && DD->getDwarfVersion() >= 4)
	addUInt(*ScopeDIE, dwarf::DW_AT_GNU_discriminator, None,
	IA->getDiscriminator());

	// Add name to the name table, we do this here because we're guaranteed
	// to have concrete versions of our DW_TAG_inlined_subprogram nodes.
	DD->addSubprogramNames(CUNode, InlinedSP, ScopeDIE);

	return ScopeDIE;
	}

	// Construct new DW_TAG_lexical_block for this scope and attach
	// DW_AT_low_pc/DW_AT_high_pc labels.
	DIE DwarfCompileUnit::constructLexicalScopeDIE(LexicalScope Scope) {
	if (DD->isLexicalScopeDIENull(Scope))
	return nullptr;

	auto ScopeDIE = DIE::get(DIEValueAllocator, dwarf::DW_TAG_lexical_block);
	if (Scope->isAbstractScope())
	return ScopeDIE;

	attachRangesOrLowHighPC(*ScopeDIE, Scope->getRanges());

	return ScopeDIE;
	}

	/// constructVariableDIE - Construct a DIE for the given DbgVariable.
	DIE *DwarfCompileUnit::constructVariableDIE(DbgVariable &DV, bool Abstract) {
	auto D = constructVariableDIEImpl(DV, Abstract);
	DV.setDIE(*D);
	return D;
	}

	DIE *DwarfCompileUnit::constructLabelDIE(DbgLabel &DL,
	const LexicalScope &Scope) {
	auto LabelDie = DIE::get(DIEValueAllocator, DL.getTag());
	insertDIE(DL.getLabel(), LabelDie);
	DL.setDIE(*LabelDie);

	if (Scope.isAbstractScope())
	applyLabelAttributes(DL, *LabelDie);

	return LabelDie;
	}

	DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
	bool Abstract) {
	// Define variable debug information entry.
	auto VariableDie = DIE::get(DIEValueAllocator, DV.getTag());
	insertDIE(DV.getVariable(), VariableDie);

	if (Abstract) {
	applyVariableAttributes(DV, *VariableDie);
	return VariableDie;
	}

	// Add variable address.

	unsigned Index = DV.getDebugLocListIndex();
	if (Index != ~0U) {
	addLocationList(*VariableDie, dwarf::DW_AT_location, Index);
	auto TagOffset = DV.getDebugLocListTagOffset();
	if (TagOffset)
	addUInt(*VariableDie, dwarf::DW_AT_LLVM_tag_offset, dwarf::DW_FORM_data1,
	*TagOffset);
	return VariableDie;
	}

	// Check if variable has a single location description.
	if (auto *DVal = DV.getValueLoc()) {
	if (!DVal->isVariadic()) {
	const DbgValueLocEntry *Entry = DVal->getLocEntries().begin();
	if (Entry->isLocation()) {
	addVariableAddress(DV, *VariableDie, Entry->getLoc());
	} else if (Entry->isInt()) {
	auto *Expr = DV.getSingleExpression();
	if (Expr && Expr->getNumElements()) {
	DIELoc *Loc = new (DIEValueAllocator) DIELoc;
	DIEDwarfExpression DwarfExpr(Asm, this, *Loc);
	// If there is an expression, emit raw unsigned bytes.
	DwarfExpr.addFragmentOffset(Expr);
	DwarfExpr.addUnsignedConstant(Entry->getInt());
	DwarfExpr.addExpression(Expr);
	addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
	if (DwarfExpr.TagOffset)
	addUInt(*VariableDie, dwarf::DW_AT_LLVM_tag_offset,
	dwarf::DW_FORM_data1, *DwarfExpr.TagOffset);
	} else
	addConstantValue(*VariableDie, Entry->getInt(), DV.getType());
	} else if (Entry->isConstantFP()) {
	addConstantFPValue(*VariableDie, Entry->getConstantFP());
	} else if (Entry->isConstantInt()) {
	addConstantValue(*VariableDie, Entry->getConstantInt(), DV.getType());
	} else if (Entry->isTargetIndexLocation()) {
	DIELoc *Loc = new (DIEValueAllocator) DIELoc;
	DIEDwarfExpression DwarfExpr(Asm, this, *Loc);
	const DIBasicType *BT = dyn_cast<DIBasicType>(
	static_cast<const Metadata *>(DV.getVariable()->getType()));
	DwarfDebug::emitDebugLocValue(Asm, BT, DVal, DwarfExpr);
	addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
	}
	return VariableDie;
	}
	// If any of the location entries are registers with the value 0, then the
	// location is undefined.
	if (any_of(DVal->getLocEntries(), [](const DbgValueLocEntry &Entry) {
	return Entry.isLocation() && !Entry.getLoc().getReg();
	}))
	return VariableDie;
	const DIExpression *Expr = DV.getSingleExpression();
	assert(Expr && "Variadic Debug Value must have an Expression.");
	DIELoc *Loc = new (DIEValueAllocator) DIELoc;
	DIEDwarfExpression DwarfExpr(Asm, this, *Loc);
	DwarfExpr.addFragmentOffset(Expr);
	DIExpressionCursor Cursor(Expr);
	const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo();

	auto AddEntry = [&](const DbgValueLocEntry &Entry,
	DIExpressionCursor &Cursor) {
	if (Entry.isLocation()) {
	if (!DwarfExpr.addMachineRegExpression(TRI, Cursor,
	Entry.getLoc().getReg()))
	return false;
	} else if (Entry.isInt()) {
	// If there is an expression, emit raw unsigned bytes.
	DwarfExpr.addUnsignedConstant(Entry.getInt());
	} else if (Entry.isConstantFP()) {
	APInt RawBytes = Entry.getConstantFP()->getValueAPF().bitcastToAPInt();
	DwarfExpr.addUnsignedConstant(RawBytes);
	} else if (Entry.isConstantInt()) {
	APInt RawBytes = Entry.getConstantInt()->getValue();
	DwarfExpr.addUnsignedConstant(RawBytes);
	} else if (Entry.isTargetIndexLocation()) {
	TargetIndexLocation Loc = Entry.getTargetIndexLocation();
	// TODO TargetIndexLocation is a target-independent. Currently only the
	// WebAssembly-specific encoding is supported.
	assert(Asm->TM.getTargetTriple().isWasm());
	DwarfExpr.addWasmLocation(Loc.Index, static_cast<uint64_t>(Loc.Offset));
	} else {
	llvm_unreachable("Unsupported Entry type.");
	}
	return true;
	};

	DwarfExpr.addExpression(
	std::move(Cursor),
	[&](unsigned Idx, DIExpressionCursor &Cursor) -> bool {
	return AddEntry(DVal->getLocEntries()[Idx], Cursor);
	});

	// Now attach the location information to the DIE.
	addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
	if (DwarfExpr.TagOffset)
	addUInt(*VariableDie, dwarf::DW_AT_LLVM_tag_offset, dwarf::DW_FORM_data1,
	*DwarfExpr.TagOffset);

	return VariableDie;
	}

	// .. else use frame index.
	if (!DV.hasFrameIndexExprs())
	return VariableDie;

	Optional<unsigned> NVPTXAddressSpace;
	DIELoc *Loc = new (DIEValueAllocator) DIELoc;
	DIEDwarfExpression DwarfExpr(Asm, this, *Loc);
	for (auto &Fragment : DV.getFrameIndexExprs()) {
	Register FrameReg;
	const DIExpression *Expr = Fragment.Expr;
	const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering();
	StackOffset Offset =
	TFI->getFrameIndexReference(*Asm->MF, Fragment.FI, FrameReg);
	DwarfExpr.addFragmentOffset(Expr);

	auto *TRI = Asm->MF->getSubtarget().getRegisterInfo();
	SmallVector<uint64_t, 8> Ops;
	TRI->getOffsetOpcodes(Offset, Ops);

	// According to
	// https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf
	// cuda-gdb requires DW_AT_address_class for all variables to be able to
	// correctly interpret address space of the variable address.
	// Decode DW_OP_constu <DWARF Address Space> DW_OP_swap DW_OP_xderef
	// sequence for the NVPTX + gdb target.
	unsigned LocalNVPTXAddressSpace;
	if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB()) {
	const DIExpression *NewExpr =
	DIExpression::extractAddressClass(Expr, LocalNVPTXAddressSpace);
	if (NewExpr != Expr) {
	Expr = NewExpr;
	NVPTXAddressSpace = LocalNVPTXAddressSpace;
	}
	}
	if (Expr)
	Ops.append(Expr->elements_begin(), Expr->elements_end());
	DIExpressionCursor Cursor(Ops);
	DwarfExpr.setMemoryLocationKind();
	if (const MCSymbol *FrameSymbol = Asm->getFunctionFrameSymbol())
	addOpAddress(*Loc, FrameSymbol);
	else
	DwarfExpr.addMachineRegExpression(
	*Asm->MF->getSubtarget().getRegisterInfo(), Cursor, FrameReg);
	DwarfExpr.addExpression(std::move(Cursor));
	}
	if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB()) {
	// According to
	// https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf
	// cuda-gdb requires DW_AT_address_class for all variables to be able to
	// correctly interpret address space of the variable address.
	const unsigned NVPTX_ADDR_local_space = 6;
	addUInt(*VariableDie, dwarf::DW_AT_address_class, dwarf::DW_FORM_data1,
	NVPTXAddressSpace ? *NVPTXAddressSpace : NVPTX_ADDR_local_space);
	}
	addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
	if (DwarfExpr.TagOffset)
	addUInt(*VariableDie, dwarf::DW_AT_LLVM_tag_offset, dwarf::DW_FORM_data1,
	*DwarfExpr.TagOffset);

	return VariableDie;
	}

	DIE *DwarfCompileUnit::constructVariableDIE(DbgVariable &DV,
	const LexicalScope &Scope,
	DIE *&ObjectPointer) {
	auto Var = constructVariableDIE(DV, Scope.isAbstractScope());
	if (DV.isObjectPointer())
	ObjectPointer = Var;
	return Var;
	}

	/// Return all DIVariables that appear in count: expressions.
	static SmallVector<const DIVariable , 2> dependencies(DbgVariable Var) {
	SmallVector<const DIVariable *, 2> Result;
	auto *Array = dyn_cast<DICompositeType>(Var->getType());
	if (!Array \|\| Array->getTag() != dwarf::DW_TAG_array_type)
	return Result;
	if (auto *DLVar = Array->getDataLocation())
	Result.push_back(DLVar);
	if (auto *AsVar = Array->getAssociated())
	Result.push_back(AsVar);
	if (auto *AlVar = Array->getAllocated())
	Result.push_back(AlVar);
	for (auto *El : Array->getElements()) {
	if (auto *Subrange = dyn_cast<DISubrange>(El)) {
	if (auto Count = Subrange->getCount())
	if (auto Dependency = Count.dyn_cast<DIVariable >())
	Result.push_back(Dependency);
	if (auto LB = Subrange->getLowerBound())
	if (auto Dependency = LB.dyn_cast<DIVariable >())
	Result.push_back(Dependency);
	if (auto UB = Subrange->getUpperBound())
	if (auto Dependency = UB.dyn_cast<DIVariable >())
	Result.push_back(Dependency);
	if (auto ST = Subrange->getStride())
	if (auto Dependency = ST.dyn_cast<DIVariable >())
	Result.push_back(Dependency);
	} else if (auto *GenericSubrange = dyn_cast<DIGenericSubrange>(El)) {
	if (auto Count = GenericSubrange->getCount())
	if (auto Dependency = Count.dyn_cast<DIVariable >())
	Result.push_back(Dependency);
	if (auto LB = GenericSubrange->getLowerBound())
	if (auto Dependency = LB.dyn_cast<DIVariable >())
	Result.push_back(Dependency);
	if (auto UB = GenericSubrange->getUpperBound())
	if (auto Dependency = UB.dyn_cast<DIVariable >())
	Result.push_back(Dependency);
	if (auto ST = GenericSubrange->getStride())
	if (auto Dependency = ST.dyn_cast<DIVariable >())
	Result.push_back(Dependency);
	}
	}
	return Result;
	}

	/// Sort local variables so that variables appearing inside of helper
	/// expressions come first.
	static SmallVector<DbgVariable *, 8>
	sortLocalVars(SmallVectorImpl<DbgVariable *> &Input) {
	SmallVector<DbgVariable *, 8> Result;
	SmallVector<PointerIntPair<DbgVariable *, 1>, 8> WorkList;
	// Map back from a DIVariable to its containing DbgVariable.
	SmallDenseMap<const DILocalVariable , DbgVariable > DbgVar;
	// Set of DbgVariables in Result.
	SmallDenseSet<DbgVariable *, 8> Visited;
	// For cycle detection.
	SmallDenseSet<DbgVariable *, 8> Visiting;

	// Initialize the worklist and the DIVariable lookup table.
	for (auto Var : reverse(Input)) {
	DbgVar.insert({Var->getVariable(), Var});
	WorkList.push_back({Var, 0});
	}

	// Perform a stable topological sort by doing a DFS.
	while (!WorkList.empty()) {
	auto Item = WorkList.back();
	DbgVariable *Var = Item.getPointer();
	bool visitedAllDependencies = Item.getInt();
	WorkList.pop_back();

	// Dependency is in a different lexical scope or a global.
	if (!Var)
	continue;

	// Already handled.
	if (Visited.count(Var))
	continue;

	// Add to Result if all dependencies are visited.
	if (visitedAllDependencies) {
	Visited.insert(Var);
	Result.push_back(Var);
	continue;
	}

	// Detect cycles.
	auto Res = Visiting.insert(Var);
	if (!Res.second) {
	assert(false && "dependency cycle in local variables");
	return Result;
	}

	// Push dependencies and this node onto the worklist, so that this node is
	// visited again after all of its dependencies are handled.
	WorkList.push_back({Var, 1});
	for (auto *Dependency : dependencies(Var)) {
	auto Dep = dyn_cast_or_null<const DILocalVariable>(Dependency);
	WorkList.push_back({DbgVar[Dep], 0});
	}
	}
	return Result;
	}

	DIE DwarfCompileUnit::createScopeChildrenDIE(LexicalScope Scope,
	SmallVectorImpl<DIE *> &Children,
	bool *HasNonScopeChildren) {
	assert(Children.empty());
	DIE *ObjectPointer = nullptr;

	// Emit function arguments (order is significant).
	auto Vars = DU->getScopeVariables().lookup(Scope);
	for (auto &DV : Vars.Args)
	Children.push_back(constructVariableDIE(DV.second, Scope, ObjectPointer));

	// Emit local variables.
	auto Locals = sortLocalVars(Vars.Locals);
	for (DbgVariable *DV : Locals)
	Children.push_back(constructVariableDIE(DV, Scope, ObjectPointer));

	// Skip imported directives in gmlt-like data.
	if (!includeMinimalInlineScopes()) {
	// There is no need to emit empty lexical block DIE.
	for (const auto *IE : ImportedEntities[Scope->getScopeNode()])
	Children.push_back(
	constructImportedEntityDIE(cast<DIImportedEntity>(IE)));
	}

	if (HasNonScopeChildren)
	*HasNonScopeChildren = !Children.empty();

	for (DbgLabel *DL : DU->getScopeLabels().lookup(Scope))
	Children.push_back(constructLabelDIE(DL, Scope));

	for (LexicalScope *LS : Scope->getChildren())
	constructScopeDIE(LS, Children);

	return ObjectPointer;
	}

	DIE &DwarfCompileUnit::constructSubprogramScopeDIE(const DISubprogram *Sub,
	LexicalScope *Scope) {
	DIE &ScopeDIE = updateSubprogramScopeDIE(Sub);

	if (Scope) {
	assert(!Scope->getInlinedAt());
	assert(!Scope->isAbstractScope());
	// Collect lexical scope children first.
	// ObjectPointer might be a local (non-argument) local variable if it's a
	// block's synthetic this pointer.
	if (DIE *ObjectPointer = createAndAddScopeChildren(Scope, ScopeDIE))
	addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer, *ObjectPointer);
	}

	// If this is a variadic function, add an unspecified parameter.
	DITypeRefArray FnArgs = Sub->getType()->getTypeArray();

	// If we have a single element of null, it is a function that returns void.
	// If we have more than one elements and the last one is null, it is a
	// variadic function.
	if (FnArgs.size() > 1 && !FnArgs[FnArgs.size() - 1] &&
	!includeMinimalInlineScopes())
	ScopeDIE.addChild(
	DIE::get(DIEValueAllocator, dwarf::DW_TAG_unspecified_parameters));

	return ScopeDIE;
	}

	DIE DwarfCompileUnit::createAndAddScopeChildren(LexicalScope Scope,
	DIE &ScopeDIE) {
	// We create children when the scope DIE is not null.
	SmallVector<DIE *, 8> Children;
	DIE *ObjectPointer = createScopeChildrenDIE(Scope, Children);

	// Add children
	for (auto &I : Children)
	ScopeDIE.addChild(std::move(I));

	return ObjectPointer;
	}

	void DwarfCompileUnit::constructAbstractSubprogramScopeDIE(
	LexicalScope *Scope) {
	DIE *&AbsDef = getAbstractSPDies()[Scope->getScopeNode()];
	if (AbsDef)
	return;

	auto *SP = cast<DISubprogram>(Scope->getScopeNode());

	DIE *ContextDIE;
	DwarfCompileUnit *ContextCU = this;

	if (includeMinimalInlineScopes())
	ContextDIE = &getUnitDie();
	// Some of this is duplicated from DwarfUnit::getOrCreateSubprogramDIE, with
	// the important distinction that the debug node is not associated with the
	// DIE (since the debug node will be associated with the concrete DIE, if
	// any). It could be refactored to some common utility function.
	else if (auto *SPDecl = SP->getDeclaration()) {
	ContextDIE = &getUnitDie();
	getOrCreateSubprogramDIE(SPDecl);
	} else {
	ContextDIE = getOrCreateContextDIE(SP->getScope());
	// The scope may be shared with a subprogram that has already been
	// constructed in another CU, in which case we need to construct this
	// subprogram in the same CU.
	ContextCU = DD->lookupCU(ContextDIE->getUnitDie());
	}

	// Passing null as the associated node because the abstract definition
	// shouldn't be found by lookup.
	AbsDef = &ContextCU->createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, nullptr);
	ContextCU->applySubprogramAttributesToDefinition(SP, *AbsDef);

	if (!ContextCU->includeMinimalInlineScopes())
	ContextCU->addUInt(*AbsDef, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined);
	if (DIE ObjectPointer = ContextCU->createAndAddScopeChildren(Scope, AbsDef))
	ContextCU->addDIEEntry(AbsDef, dwarf::DW_AT_object_pointer, ObjectPointer);
	}

	bool DwarfCompileUnit::useGNUAnalogForDwarf5Feature() const {
	return DD->getDwarfVersion() == 4 && !DD->tuneForLLDB();
	}

	dwarf::Tag DwarfCompileUnit::getDwarf5OrGNUTag(dwarf::Tag Tag) const {
	if (!useGNUAnalogForDwarf5Feature())
	return Tag;
	switch (Tag) {
	case dwarf::DW_TAG_call_site:
	return dwarf::DW_TAG_GNU_call_site;
	case dwarf::DW_TAG_call_site_parameter:
	return dwarf::DW_TAG_GNU_call_site_parameter;
	default:
	llvm_unreachable("DWARF5 tag with no GNU analog");
	}
	}

	dwarf::Attribute
	DwarfCompileUnit::getDwarf5OrGNUAttr(dwarf::Attribute Attr) const {
	if (!useGNUAnalogForDwarf5Feature())
	return Attr;
	switch (Attr) {
	case dwarf::DW_AT_call_all_calls:
	return dwarf::DW_AT_GNU_all_call_sites;
	case dwarf::DW_AT_call_target:
	return dwarf::DW_AT_GNU_call_site_target;
	case dwarf::DW_AT_call_origin:
	return dwarf::DW_AT_abstract_origin;
	case dwarf::DW_AT_call_return_pc:
	return dwarf::DW_AT_low_pc;
	case dwarf::DW_AT_call_value:
	return dwarf::DW_AT_GNU_call_site_value;
	case dwarf::DW_AT_call_tail_call:
	return dwarf::DW_AT_GNU_tail_call;
	default:
	llvm_unreachable("DWARF5 attribute with no GNU analog");
	}
	}

	dwarf::LocationAtom
	DwarfCompileUnit::getDwarf5OrGNULocationAtom(dwarf::LocationAtom Loc) const {
	if (!useGNUAnalogForDwarf5Feature())
	return Loc;
	switch (Loc) {
	case dwarf::DW_OP_entry_value:
	return dwarf::DW_OP_GNU_entry_value;
	default:
	llvm_unreachable("DWARF5 location atom with no GNU analog");
	}
	}

	DIE &DwarfCompileUnit::constructCallSiteEntryDIE(DIE &ScopeDIE,
	- DIE *CalleeDIE,
	+ const DISubprogram *CalleeSP,
	bool IsTail,
	const MCSymbol *PCAddr,
	const MCSymbol *CallAddr,
	unsigned CallReg) {
	// Insert a call site entry DIE within ScopeDIE.
	DIE &CallSiteDIE = createAndAddDIE(getDwarf5OrGNUTag(dwarf::DW_TAG_call_site),
	ScopeDIE, nullptr);

	if (CallReg) {
	// Indirect call.
	addAddress(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_target),
	MachineLocation(CallReg));
	} else {
	- assert(CalleeDIE && "No DIE for call site entry origin");
	+ DIE *CalleeDIE = getOrCreateSubprogramDIE(CalleeSP);
	+ assert(CalleeDIE && "Could not create DIE for call site entry origin");
	addDIEEntry(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_origin),
	*CalleeDIE);
	}

	if (IsTail) {
	// Attach DW_AT_call_tail_call to tail calls for standards compliance.
	addFlag(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_tail_call));

	// Attach the address of the branch instruction to allow the debugger to
	// show where the tail call occurred. This attribute has no GNU analog.
	//
	// GDB works backwards from non-standard usage of DW_AT_low_pc (in DWARF4
	// mode -- equivalently, in DWARF5 mode, DW_AT_call_return_pc) at tail-call
	// site entries to figure out the PC of tail-calling branch instructions.
	// This means it doesn't need the compiler to emit DW_AT_call_pc, so we
	// don't emit it here.
	//
	// There's no need to tie non-GDB debuggers to this non-standardness, as it
	// adds unnecessary complexity to the debugger. For non-GDB debuggers, emit
	// the standard DW_AT_call_pc info.
	if (!useGNUAnalogForDwarf5Feature())
	addLabelAddress(CallSiteDIE, dwarf::DW_AT_call_pc, CallAddr);
	}

	// Attach the return PC to allow the debugger to disambiguate call paths
	// from one function to another.
	//
	// The return PC is only really needed when the call /isn't/ a tail call, but
	// GDB expects it in DWARF4 mode, even for tail calls (see the comment above
	// the DW_AT_call_pc emission logic for an explanation).
	if (!IsTail \|\| useGNUAnalogForDwarf5Feature()) {
	assert(PCAddr && "Missing return PC information for a call");
	addLabelAddress(CallSiteDIE,
	getDwarf5OrGNUAttr(dwarf::DW_AT_call_return_pc), PCAddr);
	}

	return CallSiteDIE;
	}

	void DwarfCompileUnit::constructCallSiteParmEntryDIEs(
	DIE &CallSiteDIE, SmallVector<DbgCallSiteParam, 4> &Params) {
	for (const auto &Param : Params) {
	unsigned Register = Param.getRegister();
	auto CallSiteDieParam =
	DIE::get(DIEValueAllocator,
	getDwarf5OrGNUTag(dwarf::DW_TAG_call_site_parameter));
	insertDIE(CallSiteDieParam);
	addAddress(*CallSiteDieParam, dwarf::DW_AT_location,
	MachineLocation(Register));

	DIELoc *Loc = new (DIEValueAllocator) DIELoc;
	DIEDwarfExpression DwarfExpr(Asm, this, *Loc);
	DwarfExpr.setCallSiteParamValueFlag();

	DwarfDebug::emitDebugLocValue(*Asm, nullptr, Param.getValue(), DwarfExpr);

	addBlock(*CallSiteDieParam, getDwarf5OrGNUAttr(dwarf::DW_AT_call_value),
	DwarfExpr.finalize());

	CallSiteDIE.addChild(CallSiteDieParam);
	}
	}

	DIE *DwarfCompileUnit::constructImportedEntityDIE(
	const DIImportedEntity *Module) {
	DIE *IMDie = DIE::get(DIEValueAllocator, (dwarf::Tag)Module->getTag());
	insertDIE(Module, IMDie);
	DIE *EntityDie;
	auto *Entity = Module->getEntity();
	if (auto *NS = dyn_cast<DINamespace>(Entity))
	EntityDie = getOrCreateNameSpace(NS);
	else if (auto *M = dyn_cast<DIModule>(Entity))
	EntityDie = getOrCreateModule(M);
	else if (auto *SP = dyn_cast<DISubprogram>(Entity))
	EntityDie = getOrCreateSubprogramDIE(SP);
	else if (auto *T = dyn_cast<DIType>(Entity))
	EntityDie = getOrCreateTypeDIE(T);
	else if (auto *GV = dyn_cast<DIGlobalVariable>(Entity))
	EntityDie = getOrCreateGlobalVariableDIE(GV, {});
	else
	EntityDie = getDIE(Entity);
	assert(EntityDie);
	addSourceLine(*IMDie, Module->getLine(), Module->getFile());
	addDIEEntry(IMDie, dwarf::DW_AT_import, EntityDie);
	StringRef Name = Module->getName();
	if (!Name.empty())
	addString(*IMDie, dwarf::DW_AT_name, Name);

	return IMDie;
	}

	void DwarfCompileUnit::finishSubprogramDefinition(const DISubprogram *SP) {
	DIE *D = getDIE(SP);
	if (DIE *AbsSPDIE = getAbstractSPDies().lookup(SP)) {
	if (D)
	// If this subprogram has an abstract definition, reference that
	addDIEEntry(D, dwarf::DW_AT_abstract_origin, AbsSPDIE);
	} else {
	assert(D \|\| includeMinimalInlineScopes());
	if (D)
	// And attach the attributes
	applySubprogramAttributesToDefinition(SP, *D);
	}
	}

	void DwarfCompileUnit::finishEntityDefinition(const DbgEntity *Entity) {
	DbgEntity *AbsEntity = getExistingAbstractEntity(Entity->getEntity());

	auto *Die = Entity->getDIE();
	/// Label may be used to generate DW_AT_low_pc, so put it outside
	/// if/else block.
	const DbgLabel *Label = nullptr;
	if (AbsEntity && AbsEntity->getDIE()) {
	addDIEEntry(Die, dwarf::DW_AT_abstract_origin, AbsEntity->getDIE());
	Label = dyn_cast<const DbgLabel>(Entity);
	} else {
	if (const DbgVariable *Var = dyn_cast<const DbgVariable>(Entity))
	applyVariableAttributes(Var, Die);
	else if ((Label = dyn_cast<const DbgLabel>(Entity)))
	applyLabelAttributes(Label, Die);
	else
	llvm_unreachable("DbgEntity must be DbgVariable or DbgLabel.");
	}

	if (Label)
	if (const auto *Sym = Label->getSymbol())
	addLabelAddress(*Die, dwarf::DW_AT_low_pc, Sym);
	}

	DbgEntity DwarfCompileUnit::getExistingAbstractEntity(const DINode Node) {
	auto &AbstractEntities = getAbstractEntities();
	auto I = AbstractEntities.find(Node);
	if (I != AbstractEntities.end())
	return I->second.get();
	return nullptr;
	}

	void DwarfCompileUnit::createAbstractEntity(const DINode *Node,
	LexicalScope *Scope) {
	assert(Scope && Scope->isAbstractScope());
	auto &Entity = getAbstractEntities()[Node];
	if (isa<const DILocalVariable>(Node)) {
	Entity = std::make_unique<DbgVariable>(
	cast<const DILocalVariable>(Node), nullptr /* IA */);;
	DU->addScopeVariable(Scope, cast<DbgVariable>(Entity.get()));
	} else if (isa<const DILabel>(Node)) {
	Entity = std::make_unique<DbgLabel>(
	cast<const DILabel>(Node), nullptr /* IA */);
	DU->addScopeLabel(Scope, cast<DbgLabel>(Entity.get()));
	}
	}

	void DwarfCompileUnit::emitHeader(bool UseOffsets) {
	// Don't bother labeling the .dwo unit, as its offset isn't used.
	if (!Skeleton && !DD->useSectionsAsReferences()) {
	LabelBegin = Asm->createTempSymbol("cu_begin");
	Asm->OutStreamer->emitLabel(LabelBegin);
	}

	dwarf::UnitType UT = Skeleton ? dwarf::DW_UT_split_compile
	: DD->useSplitDwarf() ? dwarf::DW_UT_skeleton
	: dwarf::DW_UT_compile;
	DwarfUnit::emitCommonHeader(UseOffsets, UT);
	if (DD->getDwarfVersion() >= 5 && UT != dwarf::DW_UT_compile)
	Asm->emitInt64(getDWOId());
	}

	bool DwarfCompileUnit::hasDwarfPubSections() const {
	switch (CUNode->getNameTableKind()) {
	case DICompileUnit::DebugNameTableKind::None:
	return false;
	// Opting in to GNU Pubnames/types overrides the default to ensure these are
	// generated for things like Gold's gdb_index generation.
	case DICompileUnit::DebugNameTableKind::GNU:
	return true;
	case DICompileUnit::DebugNameTableKind::Default:
	return DD->tuneForGDB() && !includeMinimalInlineScopes() &&
	!CUNode->isDebugDirectivesOnly() &&
	DD->getAccelTableKind() != AccelTableKind::Apple &&
	DD->getDwarfVersion() < 5;
	}
	llvm_unreachable("Unhandled DICompileUnit::DebugNameTableKind enum");
	}

	/// addGlobalName - Add a new global name to the compile unit.
	void DwarfCompileUnit::addGlobalName(StringRef Name, const DIE &Die,
	const DIScope *Context) {
	if (!hasDwarfPubSections())
	return;
	std::string FullName = getParentContextString(Context) + Name.str();
	GlobalNames[FullName] = &Die;
	}

	void DwarfCompileUnit::addGlobalNameForTypeUnit(StringRef Name,
	const DIScope *Context) {
	if (!hasDwarfPubSections())
	return;
	std::string FullName = getParentContextString(Context) + Name.str();
	// Insert, allowing the entry to remain as-is if it's already present
	// This way the CU-level type DIE is preferred over the "can't describe this
	// type as a unit offset because it's not really in the CU at all, it's only
	// in a type unit"
	GlobalNames.insert(std::make_pair(std::move(FullName), &getUnitDie()));
	}

	/// Add a new global type to the unit.
	void DwarfCompileUnit::addGlobalType(const DIType *Ty, const DIE &Die,
	const DIScope *Context) {
	if (!hasDwarfPubSections())
	return;
	std::string FullName = getParentContextString(Context) + Ty->getName().str();
	GlobalTypes[FullName] = &Die;
	}

	void DwarfCompileUnit::addGlobalTypeUnitType(const DIType *Ty,
	const DIScope *Context) {
	if (!hasDwarfPubSections())
	return;
	std::string FullName = getParentContextString(Context) + Ty->getName().str();
	// Insert, allowing the entry to remain as-is if it's already present
	// This way the CU-level type DIE is preferred over the "can't describe this
	// type as a unit offset because it's not really in the CU at all, it's only
	// in a type unit"
	GlobalTypes.insert(std::make_pair(std::move(FullName), &getUnitDie()));
	}

	void DwarfCompileUnit::addVariableAddress(const DbgVariable &DV, DIE &Die,
	MachineLocation Location) {
	if (DV.hasComplexAddress())
	addComplexAddress(DV, Die, dwarf::DW_AT_location, Location);
	else
	addAddress(Die, dwarf::DW_AT_location, Location);
	}

	/// Add an address attribute to a die based on the location provided.
	void DwarfCompileUnit::addAddress(DIE &Die, dwarf::Attribute Attribute,
	const MachineLocation &Location) {
	DIELoc *Loc = new (DIEValueAllocator) DIELoc;
	DIEDwarfExpression DwarfExpr(Asm, this, *Loc);
	if (Location.isIndirect())
	DwarfExpr.setMemoryLocationKind();

	DIExpressionCursor Cursor({});
	const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo();
	if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg()))
	return;
	DwarfExpr.addExpression(std::move(Cursor));

	// Now attach the location information to the DIE.
	addBlock(Die, Attribute, DwarfExpr.finalize());

	if (DwarfExpr.TagOffset)
	addUInt(Die, dwarf::DW_AT_LLVM_tag_offset, dwarf::DW_FORM_data1,
	*DwarfExpr.TagOffset);
	}

	/// Start with the address based on the location provided, and generate the
	/// DWARF information necessary to find the actual variable given the extra
	/// address information encoded in the DbgVariable, starting from the starting
	/// location. Add the DWARF information to the die.
	void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die,
	dwarf::Attribute Attribute,
	const MachineLocation &Location) {
	DIELoc *Loc = new (DIEValueAllocator) DIELoc;
	DIEDwarfExpression DwarfExpr(Asm, this, *Loc);
	const DIExpression *DIExpr = DV.getSingleExpression();
	DwarfExpr.addFragmentOffset(DIExpr);
	DwarfExpr.setLocation(Location, DIExpr);

	DIExpressionCursor Cursor(DIExpr);

	if (DIExpr->isEntryValue())
	DwarfExpr.beginEntryValueExpression(Cursor);

	const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo();
	if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg()))
	return;
	DwarfExpr.addExpression(std::move(Cursor));

	// Now attach the location information to the DIE.
	addBlock(Die, Attribute, DwarfExpr.finalize());

	if (DwarfExpr.TagOffset)
	addUInt(Die, dwarf::DW_AT_LLVM_tag_offset, dwarf::DW_FORM_data1,
	*DwarfExpr.TagOffset);
	}

	/// Add a Dwarf loclistptr attribute data and value.
	void DwarfCompileUnit::addLocationList(DIE &Die, dwarf::Attribute Attribute,
	unsigned Index) {
	dwarf::Form Form = (DD->getDwarfVersion() >= 5)
	? dwarf::DW_FORM_loclistx
	: DD->getDwarfSectionOffsetForm();
	addAttribute(Die, Attribute, Form, DIELocList(Index));
	}

	void DwarfCompileUnit::applyVariableAttributes(const DbgVariable &Var,
	DIE &VariableDie) {
	StringRef Name = Var.getName();
	if (!Name.empty())
	addString(VariableDie, dwarf::DW_AT_name, Name);
	const auto *DIVar = Var.getVariable();
	if (DIVar)
	if (uint32_t AlignInBytes = DIVar->getAlignInBytes())
	addUInt(VariableDie, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata,
	AlignInBytes);

	addSourceLine(VariableDie, DIVar);
	addType(VariableDie, Var.getType());
	if (Var.isArtificial())
	addFlag(VariableDie, dwarf::DW_AT_artificial);
	}

	void DwarfCompileUnit::applyLabelAttributes(const DbgLabel &Label,
	DIE &LabelDie) {
	StringRef Name = Label.getName();
	if (!Name.empty())
	addString(LabelDie, dwarf::DW_AT_name, Name);
	const auto *DILabel = Label.getLabel();
	addSourceLine(LabelDie, DILabel);
	}

	/// Add a Dwarf expression attribute data and value.
	void DwarfCompileUnit::addExpr(DIELoc &Die, dwarf::Form Form,
	const MCExpr *Expr) {
	addAttribute(Die, (dwarf::Attribute)0, Form, DIEExpr(Expr));
	}

	void DwarfCompileUnit::applySubprogramAttributesToDefinition(
	const DISubprogram *SP, DIE &SPDie) {
	auto *SPDecl = SP->getDeclaration();
	auto *Context = SPDecl ? SPDecl->getScope() : SP->getScope();
	applySubprogramAttributes(SP, SPDie, includeMinimalInlineScopes());
	addGlobalName(SP->getName(), SPDie, Context);
	}

	bool DwarfCompileUnit::isDwoUnit() const {
	return DD->useSplitDwarf() && Skeleton;
	}

	void DwarfCompileUnit::finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) {
	constructTypeDIE(D, CTy);
	}

	bool DwarfCompileUnit::includeMinimalInlineScopes() const {
	return getCUNode()->getEmissionKind() == DICompileUnit::LineTablesOnly \|\|
	(DD->useSplitDwarf() && !Skeleton);
	}

	void DwarfCompileUnit::addAddrTableBase() {
	const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
	MCSymbol *Label = DD->getAddressPool().getLabel();
	addSectionLabel(getUnitDie(),
	DD->getDwarfVersion() >= 5 ? dwarf::DW_AT_addr_base
	: dwarf::DW_AT_GNU_addr_base,
	Label, TLOF.getDwarfAddrSection()->getBeginSymbol());
	}

	void DwarfCompileUnit::addBaseTypeRef(DIEValueList &Die, int64_t Idx) {
	addAttribute(Die, (dwarf::Attribute)0, dwarf::DW_FORM_udata,
	new (DIEValueAllocator) DIEBaseTypeRef(this, Idx));
	}

	void DwarfCompileUnit::createBaseTypeDIEs() {
	// Insert the base_type DIEs directly after the CU so that their offsets will
	// fit in the fixed size ULEB128 used inside the location expressions.
	// Maintain order by iterating backwards and inserting to the front of CU
	// child list.
	for (auto &Btr : reverse(ExprRefedBaseTypes)) {
	DIE &Die = getUnitDie().addChildFront(
	DIE::get(DIEValueAllocator, dwarf::DW_TAG_base_type));
	SmallString<32> Str;
	addString(Die, dwarf::DW_AT_name,
	Twine(dwarf::AttributeEncodingString(Btr.Encoding) +
	"_" + Twine(Btr.BitSize)).toStringRef(Str));
	addUInt(Die, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, Btr.Encoding);
	addUInt(Die, dwarf::DW_AT_byte_size, None, Btr.BitSize / 8);

	Btr.Die = &Die;
	}
	}
	diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
	index 6d8186a5ee2b..6e9261087686 100644
	--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
	+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
	@@ -1,372 +1,370 @@
	//===- llvm/CodeGen/DwarfCompileUnit.h - Dwarf Compile Unit ------ C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file contains support for writing dwarf compile unit.
	//
	//===----------------------------------------------------------------------===//

	#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
	#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H

	#include "DwarfDebug.h"
	#include "DwarfUnit.h"
	#include "llvm/ADT/ArrayRef.h"
	#include "llvm/ADT/DenseMap.h"
	#include "llvm/ADT/SmallVector.h"
	#include "llvm/ADT/StringMap.h"
	#include "llvm/ADT/StringRef.h"
	#include "llvm/BinaryFormat/Dwarf.h"
	#include "llvm/CodeGen/DbgEntityHistoryCalculator.h"
	#include "llvm/CodeGen/LexicalScopes.h"
	#include "llvm/IR/DebugInfoMetadata.h"
	#include "llvm/Support/Casting.h"
	#include <algorithm>
	#include <cassert>
	#include <cstdint>
	#include <memory>

	namespace llvm {

	class AsmPrinter;
	class DIE;
	class DIELoc;
	class DIEValueList;
	class DwarfFile;
	class GlobalVariable;
	class MCExpr;
	class MCSymbol;
	class MDNode;

	enum class UnitKind { Skeleton, Full };

	class DwarfCompileUnit final : public DwarfUnit {
	/// A numeric ID unique among all CUs in the module
	unsigned UniqueID;
	bool HasRangeLists = false;

	/// The start of the unit line section, this is also
	/// reused in appyStmtList.
	MCSymbol *LineTableStartSym;

	/// Skeleton unit associated with this unit.
	DwarfCompileUnit *Skeleton = nullptr;

	/// The start of the unit within its section.
	MCSymbol *LabelBegin = nullptr;

	/// The start of the unit macro info within macro section.
	MCSymbol *MacroLabelBegin;

	using ImportedEntityList = SmallVector<const MDNode *, 8>;
	using ImportedEntityMap = DenseMap<const MDNode *, ImportedEntityList>;

	ImportedEntityMap ImportedEntities;

	/// GlobalNames - A map of globally visible named entities for this unit.
	StringMap<const DIE *> GlobalNames;

	/// GlobalTypes - A map of globally visible types for this unit.
	StringMap<const DIE *> GlobalTypes;

	// List of ranges for a given compile unit.
	SmallVector<RangeSpan, 2> CURanges;

	// The base address of this unit, if any. Used for relative references in
	// ranges/locs.
	const MCSymbol *BaseAddress = nullptr;

	DenseMap<const MDNode , DIE > AbstractSPDies;
	DenseMap<const DINode *, std::unique_ptr<DbgEntity>> AbstractEntities;

	/// DWO ID for correlating skeleton and split units.
	uint64_t DWOId = 0;

	/// Construct a DIE for the given DbgVariable without initializing the
	/// DbgVariable's DIE reference.
	DIE *constructVariableDIEImpl(const DbgVariable &DV, bool Abstract);

	bool isDwoUnit() const override;

	DenseMap<const MDNode , DIE > &getAbstractSPDies() {
	if (isDwoUnit() && !DD->shareAcrossDWOCUs())
	return AbstractSPDies;
	return DU->getAbstractSPDies();
	}

	DenseMap<const DINode *, std::unique_ptr<DbgEntity>> &getAbstractEntities() {
	if (isDwoUnit() && !DD->shareAcrossDWOCUs())
	return AbstractEntities;
	return DU->getAbstractEntities();
	}

	void finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) override;

	public:
	DwarfCompileUnit(unsigned UID, const DICompileUnit Node, AsmPrinter A,
	DwarfDebug DW, DwarfFile DWU,
	UnitKind Kind = UnitKind::Full);

	bool hasRangeLists() const { return HasRangeLists; }
	unsigned getUniqueID() const { return UniqueID; }

	DwarfCompileUnit *getSkeleton() const {
	return Skeleton;
	}

	bool includeMinimalInlineScopes() const;

	void initStmtList();

	/// Apply the DW_AT_stmt_list from this compile unit to the specified DIE.
	void applyStmtList(DIE &D);

	/// Get line table start symbol for this unit.
	MCSymbol *getLineTableStartSym() const { return LineTableStartSym; }

	/// A pair of GlobalVariable and DIExpression.
	struct GlobalExpr {
	const GlobalVariable *Var;
	const DIExpression *Expr;
	};

	struct BaseTypeRef {
	BaseTypeRef(unsigned BitSize, dwarf::TypeKind Encoding) :
	BitSize(BitSize), Encoding(Encoding) {}
	unsigned BitSize;
	dwarf::TypeKind Encoding;
	DIE *Die = nullptr;
	};

	std::vector<BaseTypeRef> ExprRefedBaseTypes;

	/// Get or create global variable DIE.
	DIE *
	getOrCreateGlobalVariableDIE(const DIGlobalVariable *GV,
	ArrayRef<GlobalExpr> GlobalExprs);

	DIE getOrCreateCommonBlock(const DICommonBlock CB,
	ArrayRef<GlobalExpr> GlobalExprs);

	void addLocationAttribute(DIE ToDIE, const DIGlobalVariable GV,
	ArrayRef<GlobalExpr> GlobalExprs);

	/// addLabelAddress - Add a dwarf label attribute data and value using
	/// either DW_FORM_addr or DW_FORM_GNU_addr_index.
	void addLabelAddress(DIE &Die, dwarf::Attribute Attribute,
	const MCSymbol *Label);

	/// addLocalLabelAddress - Add a dwarf label attribute data and value using
	/// DW_FORM_addr only.
	void addLocalLabelAddress(DIE &Die, dwarf::Attribute Attribute,
	const MCSymbol *Label);

	DwarfCompileUnit &getCU() override { return *this; }

	unsigned getOrCreateSourceID(const DIFile *File) override;

	void addImportedEntity(const DIImportedEntity* IE) {
	DIScope *Scope = IE->getScope();
	assert(Scope && "Invalid Scope encoding!");
	if (!isa<DILocalScope>(Scope))
	// No need to add imported enities that are not local declaration.
	return;

	auto *LocalScope = cast<DILocalScope>(Scope)->getNonLexicalBlockFileScope();
	ImportedEntities[LocalScope].push_back(IE);
	}

	/// addRange - Add an address range to the list of ranges for this unit.
	void addRange(RangeSpan Range);

	void attachLowHighPC(DIE &D, const MCSymbol Begin, const MCSymbol End);

	/// Find DIE for the given subprogram and attach appropriate
	/// DW_AT_low_pc and DW_AT_high_pc attributes. If there are global
	/// variables in this scope then create and insert DIEs for these
	/// variables.
	DIE &updateSubprogramScopeDIE(const DISubprogram *SP);

	void constructScopeDIE(LexicalScope *Scope,
	SmallVectorImpl<DIE *> &FinalChildren);

	/// A helper function to construct a RangeSpanList for a given
	/// lexical scope.
	void addScopeRangeList(DIE &ScopeDIE, SmallVector<RangeSpan, 2> Range);

	void attachRangesOrLowHighPC(DIE &D, SmallVector<RangeSpan, 2> Ranges);

	void attachRangesOrLowHighPC(DIE &D,
	const SmallVectorImpl<InsnRange> &Ranges);

	/// This scope represents inlined body of a function. Construct
	/// DIE to represent this concrete inlined copy of the function.
	DIE constructInlinedScopeDIE(LexicalScope Scope);

	/// Construct new DW_TAG_lexical_block for this scope and
	/// attach DW_AT_low_pc/DW_AT_high_pc labels.
	DIE constructLexicalScopeDIE(LexicalScope Scope);

	/// constructVariableDIE - Construct a DIE for the given DbgVariable.
	DIE *constructVariableDIE(DbgVariable &DV, bool Abstract = false);

	DIE *constructVariableDIE(DbgVariable &DV, const LexicalScope &Scope,
	DIE *&ObjectPointer);

	/// Construct a DIE for the given DbgLabel.
	DIE *constructLabelDIE(DbgLabel &DL, const LexicalScope &Scope);

	/// A helper function to create children of a Scope DIE.
	DIE createScopeChildrenDIE(LexicalScope Scope,
	SmallVectorImpl<DIE *> &Children,
	bool *HasNonScopeChildren = nullptr);

	void createBaseTypeDIEs();

	/// Construct a DIE for this subprogram scope.
	DIE &constructSubprogramScopeDIE(const DISubprogram *Sub,
	LexicalScope *Scope);

	DIE createAndAddScopeChildren(LexicalScope Scope, DIE &ScopeDIE);

	void constructAbstractSubprogramScopeDIE(LexicalScope *Scope);

	/// Whether to use the GNU analog for a DWARF5 tag, attribute, or location
	/// atom. Only applicable when emitting otherwise DWARF4-compliant debug info.
	bool useGNUAnalogForDwarf5Feature() const;

	/// This takes a DWARF 5 tag and returns it or a GNU analog.
	dwarf::Tag getDwarf5OrGNUTag(dwarf::Tag Tag) const;

	/// This takes a DWARF 5 attribute and returns it or a GNU analog.
	dwarf::Attribute getDwarf5OrGNUAttr(dwarf::Attribute Attr) const;

	/// This takes a DWARF 5 location atom and either returns it or a GNU analog.
	dwarf::LocationAtom getDwarf5OrGNULocationAtom(dwarf::LocationAtom Loc) const;

	/// Construct a call site entry DIE describing a call within \p Scope to a
	- /// callee described by \p CalleeDIE.
	- /// \p CalleeDIE is a declaration or definition subprogram DIE for the callee.
	- /// For indirect calls \p CalleeDIE is set to nullptr.
	+ /// callee described by \p CalleeSP.
	/// \p IsTail specifies whether the call is a tail call.
	/// \p PCAddr points to the PC value after the call instruction.
	/// \p CallAddr points to the PC value at the call instruction (or is null).
	/// \p CallReg is a register location for an indirect call. For direct calls
	/// the \p CallReg is set to 0.
	- DIE &constructCallSiteEntryDIE(DIE &ScopeDIE, DIE *CalleeDIE, bool IsTail,
	- const MCSymbol *PCAddr,
	+ DIE &constructCallSiteEntryDIE(DIE &ScopeDIE, const DISubprogram *CalleeSP,
	+ bool IsTail, const MCSymbol *PCAddr,
	const MCSymbol *CallAddr, unsigned CallReg);
	/// Construct call site parameter DIEs for the \p CallSiteDIE. The \p Params
	/// were collected by the \ref collectCallSiteParameters.
	/// Note: The order of parameters does not matter, since debuggers recognize
	/// call site parameters by the DW_AT_location attribute.
	void constructCallSiteParmEntryDIEs(DIE &CallSiteDIE,
	SmallVector<DbgCallSiteParam, 4> &Params);

	/// Construct import_module DIE.
	DIE constructImportedEntityDIE(const DIImportedEntity Module);

	void finishSubprogramDefinition(const DISubprogram *SP);
	void finishEntityDefinition(const DbgEntity *Entity);

	/// Find abstract variable associated with Var.
	using InlinedEntity = DbgValueHistoryMap::InlinedEntity;
	DbgEntity getExistingAbstractEntity(const DINode Node);
	void createAbstractEntity(const DINode Node, LexicalScope Scope);

	/// Set the skeleton unit associated with this unit.
	void setSkeleton(DwarfCompileUnit &Skel) { Skeleton = &Skel; }

	unsigned getHeaderSize() const override {
	// DWARF v5 added the DWO ID to the header for split/skeleton units.
	unsigned DWOIdSize =
	DD->getDwarfVersion() >= 5 && DD->useSplitDwarf() ? sizeof(uint64_t)
	: 0;
	return DwarfUnit::getHeaderSize() + DWOIdSize;
	}
	unsigned getLength() {
	return Asm->getUnitLengthFieldByteSize() + // Length field
	getHeaderSize() + getUnitDie().getSize();
	}

	void emitHeader(bool UseOffsets) override;

	/// Add the DW_AT_addr_base attribute to the unit DIE.
	void addAddrTableBase();

	MCSymbol *getLabelBegin() const {
	assert(LabelBegin && "LabelBegin is not initialized");
	return LabelBegin;
	}

	MCSymbol *getMacroLabelBegin() const {
	return MacroLabelBegin;
	}

	/// Add a new global name to the compile unit.
	void addGlobalName(StringRef Name, const DIE &Die,
	const DIScope *Context) override;

	/// Add a new global name present in a type unit to this compile unit.
	void addGlobalNameForTypeUnit(StringRef Name, const DIScope *Context);

	/// Add a new global type to the compile unit.
	void addGlobalType(const DIType *Ty, const DIE &Die,
	const DIScope *Context) override;

	/// Add a new global type present in a type unit to this compile unit.
	void addGlobalTypeUnitType(const DIType Ty, const DIScope Context);

	const StringMap<const DIE *> &getGlobalNames() const { return GlobalNames; }
	const StringMap<const DIE *> &getGlobalTypes() const { return GlobalTypes; }

	/// Add DW_AT_location attribute for a DbgVariable based on provided
	/// MachineLocation.
	void addVariableAddress(const DbgVariable &DV, DIE &Die,
	MachineLocation Location);
	/// Add an address attribute to a die based on the location provided.
	void addAddress(DIE &Die, dwarf::Attribute Attribute,
	const MachineLocation &Location);

	/// Start with the address based on the location provided, and generate the
	/// DWARF information necessary to find the actual variable (navigating the
	/// extra location information encoded in the type) based on the starting
	/// location. Add the DWARF information to the die.
	void addComplexAddress(const DbgVariable &DV, DIE &Die,
	dwarf::Attribute Attribute,
	const MachineLocation &Location);

	/// Add a Dwarf loclistptr attribute data and value.
	void addLocationList(DIE &Die, dwarf::Attribute Attribute, unsigned Index);
	void applyVariableAttributes(const DbgVariable &Var, DIE &VariableDie);

	/// Add a Dwarf expression attribute data and value.
	void addExpr(DIELoc &Die, dwarf::Form Form, const MCExpr *Expr);

	void applySubprogramAttributesToDefinition(const DISubprogram *SP,
	DIE &SPDie);

	void applyLabelAttributes(const DbgLabel &Label, DIE &LabelDie);

	/// getRanges - Get the list of ranges for this unit.
	const SmallVectorImpl<RangeSpan> &getRanges() const { return CURanges; }
	SmallVector<RangeSpan, 2> takeRanges() { return std::move(CURanges); }

	void setBaseAddress(const MCSymbol *Base) { BaseAddress = Base; }
	const MCSymbol *getBaseAddress() const { return BaseAddress; }

	uint64_t getDWOId() const { return DWOId; }
	void setDWOId(uint64_t DwoId) { DWOId = DwoId; }

	bool hasDwarfPubSections() const;

	void addBaseTypeRef(DIEValueList &Die, int64_t Idx);
	};

	} // end namespace llvm

	#endif // LLVM_LIB_CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
	diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
	index ee14423ca3d0..52591a18791f 100644
	--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
	+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
	@@ -1,3552 +1,3537 @@
	//===- llvm/CodeGen/DwarfDebug.cpp - Dwarf Debug Framework ----------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file contains support for writing dwarf debug info into asm files.
	//
	//===----------------------------------------------------------------------===//

	#include "DwarfDebug.h"
	#include "ByteStreamer.h"
	#include "DIEHash.h"
	#include "DwarfCompileUnit.h"
	#include "DwarfExpression.h"
	#include "DwarfUnit.h"
	#include "llvm/ADT/APInt.h"
	#include "llvm/ADT/Statistic.h"
	#include "llvm/ADT/Triple.h"
	#include "llvm/ADT/Twine.h"
	#include "llvm/CodeGen/AsmPrinter.h"
	#include "llvm/CodeGen/DIE.h"
	#include "llvm/CodeGen/LexicalScopes.h"
	#include "llvm/CodeGen/MachineBasicBlock.h"
	#include "llvm/CodeGen/MachineFunction.h"
	#include "llvm/CodeGen/MachineModuleInfo.h"
	#include "llvm/CodeGen/MachineOperand.h"
	#include "llvm/CodeGen/TargetInstrInfo.h"
	#include "llvm/CodeGen/TargetLowering.h"
	#include "llvm/CodeGen/TargetRegisterInfo.h"
	#include "llvm/CodeGen/TargetSubtargetInfo.h"
	#include "llvm/DebugInfo/DWARF/DWARFExpression.h"
	#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
	#include "llvm/IR/Constants.h"
	#include "llvm/IR/Function.h"
	#include "llvm/IR/GlobalVariable.h"
	#include "llvm/IR/Module.h"
	#include "llvm/MC/MCAsmInfo.h"
	#include "llvm/MC/MCContext.h"
	#include "llvm/MC/MCSection.h"
	#include "llvm/MC/MCStreamer.h"
	#include "llvm/MC/MCSymbol.h"
	#include "llvm/MC/MCTargetOptions.h"
	#include "llvm/MC/MachineLocation.h"
	#include "llvm/MC/SectionKind.h"
	#include "llvm/Pass.h"
	#include "llvm/Support/Casting.h"
	#include "llvm/Support/CommandLine.h"
	#include "llvm/Support/Debug.h"
	#include "llvm/Support/ErrorHandling.h"
	#include "llvm/Support/MD5.h"
	#include "llvm/Support/MathExtras.h"
	#include "llvm/Support/Timer.h"
	#include "llvm/Support/raw_ostream.h"
	#include "llvm/Target/TargetLoweringObjectFile.h"
	#include "llvm/Target/TargetMachine.h"
	#include <algorithm>
	#include <cstddef>
	#include <iterator>
	#include <string>

	using namespace llvm;

	#define DEBUG_TYPE "dwarfdebug"

	STATISTIC(NumCSParams, "Number of dbg call site params created");

	static cl::opt<bool> UseDwarfRangesBaseAddressSpecifier(
	"use-dwarf-ranges-base-address-specifier", cl::Hidden,
	cl::desc("Use base address specifiers in debug_ranges"), cl::init(false));

	static cl::opt<bool> GenerateARangeSection("generate-arange-section",
	cl::Hidden,
	cl::desc("Generate dwarf aranges"),
	cl::init(false));

	static cl::opt<bool>
	GenerateDwarfTypeUnits("generate-type-units", cl::Hidden,
	cl::desc("Generate DWARF4 type units."),
	cl::init(false));

	static cl::opt<bool> SplitDwarfCrossCuReferences(
	"split-dwarf-cross-cu-references", cl::Hidden,
	cl::desc("Enable cross-cu references in DWO files"), cl::init(false));

	enum DefaultOnOff { Default, Enable, Disable };

	static cl::opt<DefaultOnOff> UnknownLocations(
	"use-unknown-locations", cl::Hidden,
	cl::desc("Make an absence of debug location information explicit."),
	cl::values(clEnumVal(Default, "At top of block or after label"),
	clEnumVal(Enable, "In all cases"), clEnumVal(Disable, "Never")),
	cl::init(Default));

	static cl::opt<AccelTableKind> AccelTables(
	"accel-tables", cl::Hidden, cl::desc("Output dwarf accelerator tables."),
	cl::values(clEnumValN(AccelTableKind::Default, "Default",
	"Default for platform"),
	clEnumValN(AccelTableKind::None, "Disable", "Disabled."),
	clEnumValN(AccelTableKind::Apple, "Apple", "Apple"),
	clEnumValN(AccelTableKind::Dwarf, "Dwarf", "DWARF")),
	cl::init(AccelTableKind::Default));

	static cl::opt<DefaultOnOff>
	DwarfInlinedStrings("dwarf-inlined-strings", cl::Hidden,
	cl::desc("Use inlined strings rather than string section."),
	cl::values(clEnumVal(Default, "Default for platform"),
	clEnumVal(Enable, "Enabled"),
	clEnumVal(Disable, "Disabled")),
	cl::init(Default));

	static cl::opt<bool>
	NoDwarfRangesSection("no-dwarf-ranges-section", cl::Hidden,
	cl::desc("Disable emission .debug_ranges section."),
	cl::init(false));

	static cl::opt<DefaultOnOff> DwarfSectionsAsReferences(
	"dwarf-sections-as-references", cl::Hidden,
	cl::desc("Use sections+offset as references rather than labels."),
	cl::values(clEnumVal(Default, "Default for platform"),
	clEnumVal(Enable, "Enabled"), clEnumVal(Disable, "Disabled")),
	cl::init(Default));

	static cl::opt<bool>
	UseGNUDebugMacro("use-gnu-debug-macro", cl::Hidden,
	cl::desc("Emit the GNU .debug_macro format with DWARF <5"),
	cl::init(false));

	static cl::opt<DefaultOnOff> DwarfOpConvert(
	"dwarf-op-convert", cl::Hidden,
	cl::desc("Enable use of the DWARFv5 DW_OP_convert operator"),
	cl::values(clEnumVal(Default, "Default for platform"),
	clEnumVal(Enable, "Enabled"), clEnumVal(Disable, "Disabled")),
	cl::init(Default));

	enum LinkageNameOption {
	DefaultLinkageNames,
	AllLinkageNames,
	AbstractLinkageNames
	};

	static cl::opt<LinkageNameOption>
	DwarfLinkageNames("dwarf-linkage-names", cl::Hidden,
	cl::desc("Which DWARF linkage-name attributes to emit."),
	cl::values(clEnumValN(DefaultLinkageNames, "Default",
	"Default for platform"),
	clEnumValN(AllLinkageNames, "All", "All"),
	clEnumValN(AbstractLinkageNames, "Abstract",
	"Abstract subprograms")),
	cl::init(DefaultLinkageNames));

	static cl::opt<DwarfDebug::MinimizeAddrInV5> MinimizeAddrInV5Option(
	"minimize-addr-in-v5", cl::Hidden,
	cl::desc("Always use DW_AT_ranges in DWARFv5 whenever it could allow more "
	"address pool entry sharing to reduce relocations/object size"),
	cl::values(clEnumValN(DwarfDebug::MinimizeAddrInV5::Default, "Default",
	"Default address minimization strategy"),
	clEnumValN(DwarfDebug::MinimizeAddrInV5::Ranges, "Ranges",
	"Use rnglists for contiguous ranges if that allows "
	"using a pre-existing base address"),
	clEnumValN(DwarfDebug::MinimizeAddrInV5::Expressions,
	"Expressions",
	"Use exprloc addrx+offset expressions for any "
	"address with a prior base address"),
	clEnumValN(DwarfDebug::MinimizeAddrInV5::Form, "Form",
	"Use addrx+offset extension form for any address "
	"with a prior base address"),
	clEnumValN(DwarfDebug::MinimizeAddrInV5::Disabled, "Disabled",
	"Stuff")),
	cl::init(DwarfDebug::MinimizeAddrInV5::Default));

	static constexpr unsigned ULEB128PadSize = 4;

	void DebugLocDwarfExpression::emitOp(uint8_t Op, const char *Comment) {
	getActiveStreamer().emitInt8(
	Op, Comment ? Twine(Comment) + " " + dwarf::OperationEncodingString(Op)
	: dwarf::OperationEncodingString(Op));
	}

	void DebugLocDwarfExpression::emitSigned(int64_t Value) {
	getActiveStreamer().emitSLEB128(Value, Twine(Value));
	}

	void DebugLocDwarfExpression::emitUnsigned(uint64_t Value) {
	getActiveStreamer().emitULEB128(Value, Twine(Value));
	}

	void DebugLocDwarfExpression::emitData1(uint8_t Value) {
	getActiveStreamer().emitInt8(Value, Twine(Value));
	}

	void DebugLocDwarfExpression::emitBaseTypeRef(uint64_t Idx) {
	assert(Idx < (1ULL << (ULEB128PadSize * 7)) && "Idx wont fit");
	getActiveStreamer().emitULEB128(Idx, Twine(Idx), ULEB128PadSize);
	}

	bool DebugLocDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI,
	llvm::Register MachineReg) {
	// This information is not available while emitting .debug_loc entries.
	return false;
	}

	void DebugLocDwarfExpression::enableTemporaryBuffer() {
	assert(!IsBuffering && "Already buffering?");
	if (!TmpBuf)
	TmpBuf = std::make_unique<TempBuffer>(OutBS.GenerateComments);
	IsBuffering = true;
	}

	void DebugLocDwarfExpression::disableTemporaryBuffer() { IsBuffering = false; }

	unsigned DebugLocDwarfExpression::getTemporaryBufferSize() {
	return TmpBuf ? TmpBuf->Bytes.size() : 0;
	}

	void DebugLocDwarfExpression::commitTemporaryBuffer() {
	if (!TmpBuf)
	return;
	for (auto Byte : enumerate(TmpBuf->Bytes)) {
	const char *Comment = (Byte.index() < TmpBuf->Comments.size())
	? TmpBuf->Comments[Byte.index()].c_str()
	: "";
	OutBS.emitInt8(Byte.value(), Comment);
	}
	TmpBuf->Bytes.clear();
	TmpBuf->Comments.clear();
	}

	const DIType *DbgVariable::getType() const {
	return getVariable()->getType();
	}

	/// Get .debug_loc entry for the instruction range starting at MI.
	static DbgValueLoc getDebugLocValue(const MachineInstr *MI) {
	const DIExpression *Expr = MI->getDebugExpression();
	const bool IsVariadic = MI->isDebugValueList();
	assert(MI->getNumOperands() >= 3);
	SmallVector<DbgValueLocEntry, 4> DbgValueLocEntries;
	for (const MachineOperand &Op : MI->debug_operands()) {
	if (Op.isReg()) {
	MachineLocation MLoc(Op.getReg(),
	MI->isNonListDebugValue() && MI->isDebugOffsetImm());
	DbgValueLocEntries.push_back(DbgValueLocEntry(MLoc));
	} else if (Op.isTargetIndex()) {
	DbgValueLocEntries.push_back(
	DbgValueLocEntry(TargetIndexLocation(Op.getIndex(), Op.getOffset())));
	} else if (Op.isImm())
	DbgValueLocEntries.push_back(DbgValueLocEntry(Op.getImm()));
	else if (Op.isFPImm())
	DbgValueLocEntries.push_back(DbgValueLocEntry(Op.getFPImm()));
	else if (Op.isCImm())
	DbgValueLocEntries.push_back(DbgValueLocEntry(Op.getCImm()));
	else
	llvm_unreachable("Unexpected debug operand in DBG_VALUE* instruction!");
	}
	return DbgValueLoc(Expr, DbgValueLocEntries, IsVariadic);
	}

	void DbgVariable::initializeDbgValue(const MachineInstr *DbgValue) {
	assert(FrameIndexExprs.empty() && "Already initialized?");
	assert(!ValueLoc.get() && "Already initialized?");

	assert(getVariable() == DbgValue->getDebugVariable() && "Wrong variable");
	assert(getInlinedAt() == DbgValue->getDebugLoc()->getInlinedAt() &&
	"Wrong inlined-at");

	ValueLoc = std::make_unique<DbgValueLoc>(getDebugLocValue(DbgValue));
	if (auto *E = DbgValue->getDebugExpression())
	if (E->getNumElements())
	FrameIndexExprs.push_back({0, E});
	}

	ArrayRef<DbgVariable::FrameIndexExpr> DbgVariable::getFrameIndexExprs() const {
	if (FrameIndexExprs.size() == 1)
	return FrameIndexExprs;

	assert(llvm::all_of(FrameIndexExprs,
	[](const FrameIndexExpr &A) {
	return A.Expr->isFragment();
	}) &&
	"multiple FI expressions without DW_OP_LLVM_fragment");
	llvm::sort(FrameIndexExprs,
	[](const FrameIndexExpr &A, const FrameIndexExpr &B) -> bool {
	return A.Expr->getFragmentInfo()->OffsetInBits <
	B.Expr->getFragmentInfo()->OffsetInBits;
	});

	return FrameIndexExprs;
	}

	void DbgVariable::addMMIEntry(const DbgVariable &V) {
	assert(DebugLocListIndex == ~0U && !ValueLoc.get() && "not an MMI entry");
	assert(V.DebugLocListIndex == ~0U && !V.ValueLoc.get() && "not an MMI entry");
	assert(V.getVariable() == getVariable() && "conflicting variable");
	assert(V.getInlinedAt() == getInlinedAt() && "conflicting inlined-at location");

	assert(!FrameIndexExprs.empty() && "Expected an MMI entry");
	assert(!V.FrameIndexExprs.empty() && "Expected an MMI entry");

	// FIXME: This logic should not be necessary anymore, as we now have proper
	// deduplication. However, without it, we currently run into the assertion
	// below, which means that we are likely dealing with broken input, i.e. two
	// non-fragment entries for the same variable at different frame indices.
	if (FrameIndexExprs.size()) {
	auto *Expr = FrameIndexExprs.back().Expr;
	if (!Expr \|\| !Expr->isFragment())
	return;
	}

	for (const auto &FIE : V.FrameIndexExprs)
	// Ignore duplicate entries.
	if (llvm::none_of(FrameIndexExprs, [&](const FrameIndexExpr &Other) {
	return FIE.FI == Other.FI && FIE.Expr == Other.Expr;
	}))
	FrameIndexExprs.push_back(FIE);

	assert((FrameIndexExprs.size() == 1 \|\|
	llvm::all_of(FrameIndexExprs,
	[](FrameIndexExpr &FIE) {
	return FIE.Expr && FIE.Expr->isFragment();
	})) &&
	"conflicting locations for variable");
	}

	static AccelTableKind computeAccelTableKind(unsigned DwarfVersion,
	bool GenerateTypeUnits,
	DebuggerKind Tuning,
	const Triple &TT) {
	// Honor an explicit request.
	if (AccelTables != AccelTableKind::Default)
	return AccelTables;

	// Accelerator tables with type units are currently not supported.
	if (GenerateTypeUnits)
	return AccelTableKind::None;

	// Accelerator tables get emitted if targetting DWARF v5 or LLDB. DWARF v5
	// always implies debug_names. For lower standard versions we use apple
	// accelerator tables on apple platforms and debug_names elsewhere.
	if (DwarfVersion >= 5)
	return AccelTableKind::Dwarf;
	if (Tuning == DebuggerKind::LLDB)
	return TT.isOSBinFormatMachO() ? AccelTableKind::Apple
	: AccelTableKind::Dwarf;
	return AccelTableKind::None;
	}

	DwarfDebug::DwarfDebug(AsmPrinter *A)
	: DebugHandlerBase(A), DebugLocs(A->OutStreamer->isVerboseAsm()),
	InfoHolder(A, "info_string", DIEValueAllocator),
	SkeletonHolder(A, "skel_string", DIEValueAllocator),
	IsDarwin(A->TM.getTargetTriple().isOSDarwin()) {
	const Triple &TT = Asm->TM.getTargetTriple();

	// Make sure we know our "debugger tuning". The target option takes
	// precedence; fall back to triple-based defaults.
	if (Asm->TM.Options.DebuggerTuning != DebuggerKind::Default)
	DebuggerTuning = Asm->TM.Options.DebuggerTuning;
	else if (IsDarwin)
	DebuggerTuning = DebuggerKind::LLDB;
	else if (TT.isPS4CPU())
	DebuggerTuning = DebuggerKind::SCE;
	else if (TT.isOSAIX())
	DebuggerTuning = DebuggerKind::DBX;
	else
	DebuggerTuning = DebuggerKind::GDB;

	if (DwarfInlinedStrings == Default)
	UseInlineStrings = TT.isNVPTX() \|\| tuneForDBX();
	else
	UseInlineStrings = DwarfInlinedStrings == Enable;

	UseLocSection = !TT.isNVPTX();

	HasAppleExtensionAttributes = tuneForLLDB();

	// Handle split DWARF.
	HasSplitDwarf = !Asm->TM.Options.MCOptions.SplitDwarfFile.empty();

	// SCE defaults to linkage names only for abstract subprograms.
	if (DwarfLinkageNames == DefaultLinkageNames)
	UseAllLinkageNames = !tuneForSCE();
	else
	UseAllLinkageNames = DwarfLinkageNames == AllLinkageNames;

	unsigned DwarfVersionNumber = Asm->TM.Options.MCOptions.DwarfVersion;
	unsigned DwarfVersion = DwarfVersionNumber ? DwarfVersionNumber
	: MMI->getModule()->getDwarfVersion();
	// Use dwarf 4 by default if nothing is requested. For NVPTX, use dwarf 2.
	DwarfVersion =
	TT.isNVPTX() ? 2 : (DwarfVersion ? DwarfVersion : dwarf::DWARF_VERSION);

	bool Dwarf64 = DwarfVersion >= 3 && // DWARF64 was introduced in DWARFv3.
	TT.isArch64Bit(); // DWARF64 requires 64-bit relocations.

	// Support DWARF64
	// 1: For ELF when requested.
	// 2: For XCOFF64: the AIX assembler will fill in debug section lengths
	// according to the DWARF64 format for 64-bit assembly, so we must use
	// DWARF64 in the compiler too for 64-bit mode.
	Dwarf64 &=
	((Asm->TM.Options.MCOptions.Dwarf64 \|\| MMI->getModule()->isDwarf64()) &&
	TT.isOSBinFormatELF()) \|\|
	TT.isOSBinFormatXCOFF();

	if (!Dwarf64 && TT.isArch64Bit() && TT.isOSBinFormatXCOFF())
	report_fatal_error("XCOFF requires DWARF64 for 64-bit mode!");

	UseRangesSection = !NoDwarfRangesSection && !TT.isNVPTX();

	// Use sections as references. Force for NVPTX.
	if (DwarfSectionsAsReferences == Default)
	UseSectionsAsReferences = TT.isNVPTX();
	else
	UseSectionsAsReferences = DwarfSectionsAsReferences == Enable;

	// Don't generate type units for unsupported object file formats.
	GenerateTypeUnits = (A->TM.getTargetTriple().isOSBinFormatELF() \|\|
	A->TM.getTargetTriple().isOSBinFormatWasm()) &&
	GenerateDwarfTypeUnits;

	TheAccelTableKind = computeAccelTableKind(
	DwarfVersion, GenerateTypeUnits, DebuggerTuning, A->TM.getTargetTriple());

	// Work around a GDB bug. GDB doesn't support the standard opcode;
	// SCE doesn't support GNU's; LLDB prefers the standard opcode, which
	// is defined as of DWARF 3.
	// See GDB bug 11616 - DW_OP_form_tls_address is unimplemented
	// https://sourceware.org/bugzilla/show_bug.cgi?id=11616
	UseGNUTLSOpcode = tuneForGDB() \|\| DwarfVersion < 3;

	// GDB does not fully support the DWARF 4 representation for bitfields.
	UseDWARF2Bitfields = (DwarfVersion < 4) \|\| tuneForGDB();

	// The DWARF v5 string offsets table has - possibly shared - contributions
	// from each compile and type unit each preceded by a header. The string
	// offsets table used by the pre-DWARF v5 split-DWARF implementation uses
	// a monolithic string offsets table without any header.
	UseSegmentedStringOffsetsTable = DwarfVersion >= 5;

	// Emit call-site-param debug info for GDB and LLDB, if the target supports
	// the debug entry values feature. It can also be enabled explicitly.
	EmitDebugEntryValues = Asm->TM.Options.ShouldEmitDebugEntryValues();

	// It is unclear if the GCC .debug_macro extension is well-specified
	// for split DWARF. For now, do not allow LLVM to emit it.
	UseDebugMacroSection =
	DwarfVersion >= 5 \|\| (UseGNUDebugMacro && !useSplitDwarf());
	if (DwarfOpConvert == Default)
	EnableOpConvert = !((tuneForGDB() && useSplitDwarf()) \|\| (tuneForLLDB() && !TT.isOSBinFormatMachO()));
	else
	EnableOpConvert = (DwarfOpConvert == Enable);

	// Split DWARF would benefit object size significantly by trading reductions
	// in address pool usage for slightly increased range list encodings.
	if (DwarfVersion >= 5) {
	MinimizeAddr = MinimizeAddrInV5Option;
	// FIXME: In the future, enable this by default for Split DWARF where the
	// tradeoff is more pronounced due to being able to offload the range
	// lists to the dwo file and shrink object files/reduce relocations there.
	if (MinimizeAddr == MinimizeAddrInV5::Default)
	MinimizeAddr = MinimizeAddrInV5::Disabled;
	}

	Asm->OutStreamer->getContext().setDwarfVersion(DwarfVersion);
	Asm->OutStreamer->getContext().setDwarfFormat(Dwarf64 ? dwarf::DWARF64
	: dwarf::DWARF32);
	}

	// Define out of line so we don't have to include DwarfUnit.h in DwarfDebug.h.
	DwarfDebug::~DwarfDebug() = default;

	static bool isObjCClass(StringRef Name) {
	return Name.startswith("+") \|\| Name.startswith("-");
	}

	static bool hasObjCCategory(StringRef Name) {
	if (!isObjCClass(Name))
	return false;

	return Name.find(") ") != StringRef::npos;
	}

	static void getObjCClassCategory(StringRef In, StringRef &Class,
	StringRef &Category) {
	if (!hasObjCCategory(In)) {
	Class = In.slice(In.find('[') + 1, In.find(' '));
	Category = "";
	return;
	}

	Class = In.slice(In.find('[') + 1, In.find('('));
	Category = In.slice(In.find('[') + 1, In.find(' '));
	}

	static StringRef getObjCMethodName(StringRef In) {
	return In.slice(In.find(' ') + 1, In.find(']'));
	}

	// Add the various names to the Dwarf accelerator table names.
	void DwarfDebug::addSubprogramNames(const DICompileUnit &CU,
	const DISubprogram *SP, DIE &Die) {
	if (getAccelTableKind() != AccelTableKind::Apple &&
	CU.getNameTableKind() == DICompileUnit::DebugNameTableKind::None)
	return;

	if (!SP->isDefinition())
	return;

	if (SP->getName() != "")
	addAccelName(CU, SP->getName(), Die);

	// If the linkage name is different than the name, go ahead and output that as
	// well into the name table. Only do that if we are going to actually emit
	// that name.
	if (SP->getLinkageName() != "" && SP->getName() != SP->getLinkageName() &&
	(useAllLinkageNames() \|\| InfoHolder.getAbstractSPDies().lookup(SP)))
	addAccelName(CU, SP->getLinkageName(), Die);

	// If this is an Objective-C selector name add it to the ObjC accelerator
	// too.
	if (isObjCClass(SP->getName())) {
	StringRef Class, Category;
	getObjCClassCategory(SP->getName(), Class, Category);
	addAccelObjC(CU, Class, Die);
	if (Category != "")
	addAccelObjC(CU, Category, Die);
	// Also add the base method name to the name table.
	addAccelName(CU, getObjCMethodName(SP->getName()), Die);
	}
	}

	/// Check whether we should create a DIE for the given Scope, return true
	/// if we don't create a DIE (the corresponding DIE is null).
	bool DwarfDebug::isLexicalScopeDIENull(LexicalScope *Scope) {
	if (Scope->isAbstractScope())
	return false;

	// We don't create a DIE if there is no Range.
	const SmallVectorImpl<InsnRange> &Ranges = Scope->getRanges();
	if (Ranges.empty())
	return true;

	if (Ranges.size() > 1)
	return false;

	// We don't create a DIE if we have a single Range and the end label
	// is null.
	return !getLabelAfterInsn(Ranges.front().second);
	}

	template <typename Func> static void forBothCUs(DwarfCompileUnit &CU, Func F) {
	F(CU);
	if (auto *SkelCU = CU.getSkeleton())
	if (CU.getCUNode()->getSplitDebugInlining())
	F(*SkelCU);
	}

	bool DwarfDebug::shareAcrossDWOCUs() const {
	return SplitDwarfCrossCuReferences;
	}

	void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU,
	LexicalScope *Scope) {
	assert(Scope && Scope->getScopeNode());
	assert(Scope->isAbstractScope());
	assert(!Scope->getInlinedAt());

	auto *SP = cast<DISubprogram>(Scope->getScopeNode());

	// Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram
	// was inlined from another compile unit.
	if (useSplitDwarf() && !shareAcrossDWOCUs() && !SP->getUnit()->getSplitDebugInlining())
	// Avoid building the original CU if it won't be used
	SrcCU.constructAbstractSubprogramScopeDIE(Scope);
	else {
	auto &CU = getOrCreateDwarfCompileUnit(SP->getUnit());
	if (auto *SkelCU = CU.getSkeleton()) {
	(shareAcrossDWOCUs() ? CU : SrcCU)
	.constructAbstractSubprogramScopeDIE(Scope);
	if (CU.getCUNode()->getSplitDebugInlining())
	SkelCU->constructAbstractSubprogramScopeDIE(Scope);
	} else
	CU.constructAbstractSubprogramScopeDIE(Scope);
	}
	}

	-DIE &DwarfDebug::constructSubprogramDefinitionDIE(const DISubprogram *SP) {
	- DICompileUnit *Unit = SP->getUnit();
	- assert(SP->isDefinition() && "Subprogram not a definition");
	- assert(Unit && "Subprogram definition without parent unit");
	- auto &CU = getOrCreateDwarfCompileUnit(Unit);
	- return *CU.getOrCreateSubprogramDIE(SP);
	-}
	-
	/// Represents a parameter whose call site value can be described by applying a
	/// debug expression to a register in the forwarded register worklist.
	struct FwdRegParamInfo {
	/// The described parameter register.
	unsigned ParamReg;

	/// Debug expression that has been built up when walking through the
	/// instruction chain that produces the parameter's value.
	const DIExpression *Expr;
	};

	/// Register worklist for finding call site values.
	using FwdRegWorklist = MapVector<unsigned, SmallVector<FwdRegParamInfo, 2>>;

	/// Append the expression \p Addition to \p Original and return the result.
	static const DIExpression combineDIExpressions(const DIExpression Original,
	const DIExpression *Addition) {
	std::vector<uint64_t> Elts = Addition->getElements().vec();
	// Avoid multiple DW_OP_stack_values.
	if (Original->isImplicit() && Addition->isImplicit())
	erase_value(Elts, dwarf::DW_OP_stack_value);
	const DIExpression *CombinedExpr =
	(Elts.size() > 0) ? DIExpression::append(Original, Elts) : Original;
	return CombinedExpr;
	}

	/// Emit call site parameter entries that are described by the given value and
	/// debug expression.
	template <typename ValT>
	static void finishCallSiteParams(ValT Val, const DIExpression *Expr,
	ArrayRef<FwdRegParamInfo> DescribedParams,
	ParamSet &Params) {
	for (auto Param : DescribedParams) {
	bool ShouldCombineExpressions = Expr && Param.Expr->getNumElements() > 0;

	// TODO: Entry value operations can currently not be combined with any
	// other expressions, so we can't emit call site entries in those cases.
	if (ShouldCombineExpressions && Expr->isEntryValue())
	continue;

	// If a parameter's call site value is produced by a chain of
	// instructions we may have already created an expression for the
	// parameter when walking through the instructions. Append that to the
	// base expression.
	const DIExpression *CombinedExpr =
	ShouldCombineExpressions ? combineDIExpressions(Expr, Param.Expr)
	: Expr;
	assert((!CombinedExpr \|\| CombinedExpr->isValid()) &&
	"Combined debug expression is invalid");

	DbgValueLoc DbgLocVal(CombinedExpr, DbgValueLocEntry(Val));
	DbgCallSiteParam CSParm(Param.ParamReg, DbgLocVal);
	Params.push_back(CSParm);
	++NumCSParams;
	}
	}

	/// Add \p Reg to the worklist, if it's not already present, and mark that the
	/// given parameter registers' values can (potentially) be described using
	/// that register and an debug expression.
	static void addToFwdRegWorklist(FwdRegWorklist &Worklist, unsigned Reg,
	const DIExpression *Expr,
	ArrayRef<FwdRegParamInfo> ParamsToAdd) {
	auto I = Worklist.insert({Reg, {}});
	auto &ParamsForFwdReg = I.first->second;
	for (auto Param : ParamsToAdd) {
	assert(none_of(ParamsForFwdReg,
	[Param](const FwdRegParamInfo &D) {
	return D.ParamReg == Param.ParamReg;
	}) &&
	"Same parameter described twice by forwarding reg");

	// If a parameter's call site value is produced by a chain of
	// instructions we may have already created an expression for the
	// parameter when walking through the instructions. Append that to the
	// new expression.
	const DIExpression *CombinedExpr = combineDIExpressions(Expr, Param.Expr);
	ParamsForFwdReg.push_back({Param.ParamReg, CombinedExpr});
	}
	}

	/// Interpret values loaded into registers by \p CurMI.
	static void interpretValues(const MachineInstr *CurMI,
	FwdRegWorklist &ForwardedRegWorklist,
	ParamSet &Params) {

	const MachineFunction *MF = CurMI->getMF();
	const DIExpression *EmptyExpr =
	DIExpression::get(MF->getFunction().getContext(), {});
	const auto &TRI = *MF->getSubtarget().getRegisterInfo();
	const auto &TII = *MF->getSubtarget().getInstrInfo();
	const auto &TLI = *MF->getSubtarget().getTargetLowering();

	// If an instruction defines more than one item in the worklist, we may run
	// into situations where a worklist register's value is (potentially)
	// described by the previous value of another register that is also defined
	// by that instruction.
	//
	// This can for example occur in cases like this:
	//
	// $r1 = mov 123
	// $r0, $r1 = mvrr $r1, 456
	// call @foo, $r0, $r1
	//
	// When describing $r1's value for the mvrr instruction, we need to make sure
	// that we don't finalize an entry value for $r0, as that is dependent on the
	// previous value of $r1 (123 rather than 456).
	//
	// In order to not have to distinguish between those cases when finalizing
	// entry values, we simply postpone adding new parameter registers to the
	// worklist, by first keeping them in this temporary container until the
	// instruction has been handled.
	FwdRegWorklist TmpWorklistItems;

	// If the MI is an instruction defining one or more parameters' forwarding
	// registers, add those defines.
	auto getForwardingRegsDefinedByMI = [&](const MachineInstr &MI,
	SmallSetVector<unsigned, 4> &Defs) {
	if (MI.isDebugInstr())
	return;

	for (const MachineOperand &MO : MI.operands()) {
	if (MO.isReg() && MO.isDef() &&
	Register::isPhysicalRegister(MO.getReg())) {
	for (auto &FwdReg : ForwardedRegWorklist)
	if (TRI.regsOverlap(FwdReg.first, MO.getReg()))
	Defs.insert(FwdReg.first);
	}
	}
	};

	// Set of worklist registers that are defined by this instruction.
	SmallSetVector<unsigned, 4> FwdRegDefs;

	getForwardingRegsDefinedByMI(*CurMI, FwdRegDefs);
	if (FwdRegDefs.empty())
	return;

	for (auto ParamFwdReg : FwdRegDefs) {
	if (auto ParamValue = TII.describeLoadedValue(*CurMI, ParamFwdReg)) {
	if (ParamValue->first.isImm()) {
	int64_t Val = ParamValue->first.getImm();
	finishCallSiteParams(Val, ParamValue->second,
	ForwardedRegWorklist[ParamFwdReg], Params);
	} else if (ParamValue->first.isReg()) {
	Register RegLoc = ParamValue->first.getReg();
	Register SP = TLI.getStackPointerRegisterToSaveRestore();
	Register FP = TRI.getFrameRegister(*MF);
	bool IsSPorFP = (RegLoc == SP) \|\| (RegLoc == FP);
	if (TRI.isCalleeSavedPhysReg(RegLoc, *MF) \|\| IsSPorFP) {
	MachineLocation MLoc(RegLoc, /Indirect=/IsSPorFP);
	finishCallSiteParams(MLoc, ParamValue->second,
	ForwardedRegWorklist[ParamFwdReg], Params);
	} else {
	// ParamFwdReg was described by the non-callee saved register
	// RegLoc. Mark that the call site values for the parameters are
	// dependent on that register instead of ParamFwdReg. Since RegLoc
	// may be a register that will be handled in this iteration, we
	// postpone adding the items to the worklist, and instead keep them
	// in a temporary container.
	addToFwdRegWorklist(TmpWorklistItems, RegLoc, ParamValue->second,
	ForwardedRegWorklist[ParamFwdReg]);
	}
	}
	}
	}

	// Remove all registers that this instruction defines from the worklist.
	for (auto ParamFwdReg : FwdRegDefs)
	ForwardedRegWorklist.erase(ParamFwdReg);

	// Now that we are done handling this instruction, add items from the
	// temporary worklist to the real one.
	for (auto &New : TmpWorklistItems)
	addToFwdRegWorklist(ForwardedRegWorklist, New.first, EmptyExpr, New.second);
	TmpWorklistItems.clear();
	}

	static bool interpretNextInstr(const MachineInstr *CurMI,
	FwdRegWorklist &ForwardedRegWorklist,
	ParamSet &Params) {
	// Skip bundle headers.
	if (CurMI->isBundle())
	return true;

	// If the next instruction is a call we can not interpret parameter's
	// forwarding registers or we finished the interpretation of all
	// parameters.
	if (CurMI->isCall())
	return false;

	if (ForwardedRegWorklist.empty())
	return false;

	// Avoid NOP description.
	if (CurMI->getNumOperands() == 0)
	return true;

	interpretValues(CurMI, ForwardedRegWorklist, Params);

	return true;
	}

	/// Try to interpret values loaded into registers that forward parameters
	/// for \p CallMI. Store parameters with interpreted value into \p Params.
	static void collectCallSiteParameters(const MachineInstr *CallMI,
	ParamSet &Params) {
	const MachineFunction *MF = CallMI->getMF();
	const auto &CalleesMap = MF->getCallSitesInfo();
	auto CallFwdRegsInfo = CalleesMap.find(CallMI);

	// There is no information for the call instruction.
	if (CallFwdRegsInfo == CalleesMap.end())
	return;

	const MachineBasicBlock *MBB = CallMI->getParent();

	// Skip the call instruction.
	auto I = std::next(CallMI->getReverseIterator());

	FwdRegWorklist ForwardedRegWorklist;

	const DIExpression *EmptyExpr =
	DIExpression::get(MF->getFunction().getContext(), {});

	// Add all the forwarding registers into the ForwardedRegWorklist.
	for (const auto &ArgReg : CallFwdRegsInfo->second) {
	bool InsertedReg =
	ForwardedRegWorklist.insert({ArgReg.Reg, {{ArgReg.Reg, EmptyExpr}}})
	.second;
	assert(InsertedReg && "Single register used to forward two arguments?");
	(void)InsertedReg;
	}

	// Do not emit CSInfo for undef forwarding registers.
	for (auto &MO : CallMI->uses())
	if (MO.isReg() && MO.isUndef())
	ForwardedRegWorklist.erase(MO.getReg());

	// We erase, from the ForwardedRegWorklist, those forwarding registers for
	// which we successfully describe a loaded value (by using
	// the describeLoadedValue()). For those remaining arguments in the working
	// list, for which we do not describe a loaded value by
	// the describeLoadedValue(), we try to generate an entry value expression
	// for their call site value description, if the call is within the entry MBB.
	// TODO: Handle situations when call site parameter value can be described
	// as the entry value within basic blocks other than the first one.
	bool ShouldTryEmitEntryVals = MBB->getIterator() == MF->begin();

	// Search for a loading value in forwarding registers inside call delay slot.
	if (CallMI->hasDelaySlot()) {
	auto Suc = std::next(CallMI->getIterator());
	// Only one-instruction delay slot is supported.
	auto BundleEnd = llvm::getBundleEnd(CallMI->getIterator());
	(void)BundleEnd;
	assert(std::next(Suc) == BundleEnd &&
	"More than one instruction in call delay slot");
	// Try to interpret value loaded by instruction.
	if (!interpretNextInstr(&*Suc, ForwardedRegWorklist, Params))
	return;
	}

	// Search for a loading value in forwarding registers.
	for (; I != MBB->rend(); ++I) {
	// Try to interpret values loaded by instruction.
	if (!interpretNextInstr(&*I, ForwardedRegWorklist, Params))
	return;
	}

	// Emit the call site parameter's value as an entry value.
	if (ShouldTryEmitEntryVals) {
	// Create an expression where the register's entry value is used.
	DIExpression *EntryExpr = DIExpression::get(
	MF->getFunction().getContext(), {dwarf::DW_OP_LLVM_entry_value, 1});
	for (auto &RegEntry : ForwardedRegWorklist) {
	MachineLocation MLoc(RegEntry.first);
	finishCallSiteParams(MLoc, EntryExpr, RegEntry.second, Params);
	}
	}
	}

	void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP,
	DwarfCompileUnit &CU, DIE &ScopeDIE,
	const MachineFunction &MF) {
	// Add a call site-related attribute (DWARF5, Sec. 3.3.1.3). Do this only if
	// the subprogram is required to have one.
	if (!SP.areAllCallsDescribed() \|\| !SP.isDefinition())
	return;

	// Use DW_AT_call_all_calls to express that call site entries are present
	// for both tail and non-tail calls. Don't use DW_AT_call_all_source_calls
	// because one of its requirements is not met: call site entries for
	// optimized-out calls are elided.
	CU.addFlag(ScopeDIE, CU.getDwarf5OrGNUAttr(dwarf::DW_AT_call_all_calls));

	const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
	assert(TII && "TargetInstrInfo not found: cannot label tail calls");

	// Delay slot support check.
	auto delaySlotSupported = [&](const MachineInstr &MI) {
	if (!MI.isBundledWithSucc())
	return false;
	auto Suc = std::next(MI.getIterator());
	auto CallInstrBundle = getBundleStart(MI.getIterator());
	(void)CallInstrBundle;
	auto DelaySlotBundle = getBundleStart(Suc);
	(void)DelaySlotBundle;
	// Ensure that label after call is following delay slot instruction.
	// Ex. CALL_INSTRUCTION {
	// DELAY_SLOT_INSTRUCTION }
	// LABEL_AFTER_CALL
	assert(getLabelAfterInsn(&*CallInstrBundle) ==
	getLabelAfterInsn(&*DelaySlotBundle) &&
	"Call and its successor instruction don't have same label after.");
	return true;
	};

	// Emit call site entries for each call or tail call in the function.
	for (const MachineBasicBlock &MBB : MF) {
	for (const MachineInstr &MI : MBB.instrs()) {
	// Bundles with call in them will pass the isCall() test below but do not
	// have callee operand information so skip them here. Iterator will
	// eventually reach the call MI.
	if (MI.isBundle())
	continue;

	// Skip instructions which aren't calls. Both calls and tail-calling jump
	// instructions (e.g TAILJMPd64) are classified correctly here.
	if (!MI.isCandidateForCallSiteEntry())
	continue;

	// Skip instructions marked as frame setup, as they are not interesting to
	// the user.
	if (MI.getFlag(MachineInstr::FrameSetup))
	continue;

	// Check if delay slot support is enabled.
	if (MI.hasDelaySlot() && !delaySlotSupported(*&MI))
	return;

	// If this is a direct call, find the callee's subprogram.
	// In the case of an indirect call find the register that holds
	// the callee.
	const MachineOperand &CalleeOp = TII->getCalleeOperand(MI);
	if (!CalleeOp.isGlobal() &&
	(!CalleeOp.isReg() \|\|
	!Register::isPhysicalRegister(CalleeOp.getReg())))
	continue;

	unsigned CallReg = 0;
	- DIE *CalleeDIE = nullptr;
	+ const DISubprogram *CalleeSP = nullptr;
	const Function *CalleeDecl = nullptr;
	if (CalleeOp.isReg()) {
	CallReg = CalleeOp.getReg();
	if (!CallReg)
	continue;
	} else {
	CalleeDecl = dyn_cast<Function>(CalleeOp.getGlobal());
	if (!CalleeDecl \|\| !CalleeDecl->getSubprogram())
	continue;
	- const DISubprogram *CalleeSP = CalleeDecl->getSubprogram();
	-
	- if (CalleeSP->isDefinition()) {
	- // Ensure that a subprogram DIE for the callee is available in the
	- // appropriate CU.
	- CalleeDIE = &constructSubprogramDefinitionDIE(CalleeSP);
	- } else {
	- // Create the declaration DIE if it is missing. This is required to
	- // support compilation of old bitcode with an incomplete list of
	- // retained metadata.
	- CalleeDIE = CU.getOrCreateSubprogramDIE(CalleeSP);
	- }
	- assert(CalleeDIE && "Must have a DIE for the callee");
	+ CalleeSP = CalleeDecl->getSubprogram();
	}

	// TODO: Omit call site entries for runtime calls (objc_msgSend, etc).

	bool IsTail = TII->isTailCall(MI);

	// If MI is in a bundle, the label was created after the bundle since
	// EmitFunctionBody iterates over top-level MIs. Get that top-level MI
	// to search for that label below.
	const MachineInstr *TopLevelCallMI =
	MI.isInsideBundle() ? &*getBundleStart(MI.getIterator()) : &MI;

	// For non-tail calls, the return PC is needed to disambiguate paths in
	// the call graph which could lead to some target function. For tail
	// calls, no return PC information is needed, unless tuning for GDB in
	// DWARF4 mode in which case we fake a return PC for compatibility.
	const MCSymbol *PCAddr =
	(!IsTail \|\| CU.useGNUAnalogForDwarf5Feature())
	? const_cast<MCSymbol *>(getLabelAfterInsn(TopLevelCallMI))
	: nullptr;

	// For tail calls, it's necessary to record the address of the branch
	// instruction so that the debugger can show where the tail call occurred.
	const MCSymbol *CallAddr =
	IsTail ? getLabelBeforeInsn(TopLevelCallMI) : nullptr;

	assert((IsTail \|\| PCAddr) && "Non-tail call without return PC");

	LLVM_DEBUG(dbgs() << "CallSiteEntry: " << MF.getName() << " -> "
	<< (CalleeDecl ? CalleeDecl->getName()
	: StringRef(MF.getSubtarget()
	.getRegisterInfo()
	->getName(CallReg)))
	<< (IsTail ? " [IsTail]" : "") << "\n");

	DIE &CallSiteDIE = CU.constructCallSiteEntryDIE(
	- ScopeDIE, CalleeDIE, IsTail, PCAddr, CallAddr, CallReg);
	+ ScopeDIE, CalleeSP, IsTail, PCAddr, CallAddr, CallReg);

	// Optionally emit call-site-param debug info.
	if (emitDebugEntryValues()) {
	ParamSet Params;
	// Try to interpret values of call site parameters.
	collectCallSiteParameters(&MI, Params);
	CU.constructCallSiteParmEntryDIEs(CallSiteDIE, Params);
	}
	}
	}
	}

	void DwarfDebug::addGnuPubAttributes(DwarfCompileUnit &U, DIE &D) const {
	if (!U.hasDwarfPubSections())
	return;

	U.addFlag(D, dwarf::DW_AT_GNU_pubnames);
	}

	void DwarfDebug::finishUnitAttributes(const DICompileUnit *DIUnit,
	DwarfCompileUnit &NewCU) {
	DIE &Die = NewCU.getUnitDie();
	StringRef FN = DIUnit->getFilename();

	StringRef Producer = DIUnit->getProducer();
	StringRef Flags = DIUnit->getFlags();
	if (!Flags.empty() && !useAppleExtensionAttributes()) {
	std::string ProducerWithFlags = Producer.str() + " " + Flags.str();
	NewCU.addString(Die, dwarf::DW_AT_producer, ProducerWithFlags);
	} else
	NewCU.addString(Die, dwarf::DW_AT_producer, Producer);

	NewCU.addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
	DIUnit->getSourceLanguage());
	NewCU.addString(Die, dwarf::DW_AT_name, FN);
	StringRef SysRoot = DIUnit->getSysRoot();
	if (!SysRoot.empty())
	NewCU.addString(Die, dwarf::DW_AT_LLVM_sysroot, SysRoot);
	StringRef SDK = DIUnit->getSDK();
	if (!SDK.empty())
	NewCU.addString(Die, dwarf::DW_AT_APPLE_sdk, SDK);

	// Add DW_str_offsets_base to the unit DIE, except for split units.
	if (useSegmentedStringOffsetsTable() && !useSplitDwarf())
	NewCU.addStringOffsetsStart();

	if (!useSplitDwarf()) {
	NewCU.initStmtList();

	// If we're using split dwarf the compilation dir is going to be in the
	// skeleton CU and so we don't need to duplicate it here.
	if (!CompilationDir.empty())
	NewCU.addString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
	addGnuPubAttributes(NewCU, Die);
	}

	if (useAppleExtensionAttributes()) {
	if (DIUnit->isOptimized())
	NewCU.addFlag(Die, dwarf::DW_AT_APPLE_optimized);

	StringRef Flags = DIUnit->getFlags();
	if (!Flags.empty())
	NewCU.addString(Die, dwarf::DW_AT_APPLE_flags, Flags);

	if (unsigned RVer = DIUnit->getRuntimeVersion())
	NewCU.addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers,
	dwarf::DW_FORM_data1, RVer);
	}

	if (DIUnit->getDWOId()) {
	// This CU is either a clang module DWO or a skeleton CU.
	NewCU.addUInt(Die, dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8,
	DIUnit->getDWOId());
	if (!DIUnit->getSplitDebugFilename().empty()) {
	// This is a prefabricated skeleton CU.
	dwarf::Attribute attrDWOName = getDwarfVersion() >= 5
	? dwarf::DW_AT_dwo_name
	: dwarf::DW_AT_GNU_dwo_name;
	NewCU.addString(Die, attrDWOName, DIUnit->getSplitDebugFilename());
	}
	}
	}
	// Create new DwarfCompileUnit for the given metadata node with tag
	// DW_TAG_compile_unit.
	DwarfCompileUnit &
	DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) {
	if (auto *CU = CUMap.lookup(DIUnit))
	return *CU;

	CompilationDir = DIUnit->getDirectory();

	auto OwnedUnit = std::make_unique<DwarfCompileUnit>(
	InfoHolder.getUnits().size(), DIUnit, Asm, this, &InfoHolder);
	DwarfCompileUnit &NewCU = *OwnedUnit;
	InfoHolder.addUnit(std::move(OwnedUnit));

	for (auto *IE : DIUnit->getImportedEntities())
	NewCU.addImportedEntity(IE);

	// LTO with assembly output shares a single line table amongst multiple CUs.
	// To avoid the compilation directory being ambiguous, let the line table
	// explicitly describe the directory of all files, never relying on the
	// compilation directory.
	if (!Asm->OutStreamer->hasRawTextSupport() \|\| SingleCU)
	Asm->OutStreamer->emitDwarfFile0Directive(
	CompilationDir, DIUnit->getFilename(), getMD5AsBytes(DIUnit->getFile()),
	DIUnit->getSource(), NewCU.getUniqueID());

	if (useSplitDwarf()) {
	NewCU.setSkeleton(constructSkeletonCU(NewCU));
	NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoDWOSection());
	} else {
	finishUnitAttributes(DIUnit, NewCU);
	NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection());
	}

	+ // Create DIEs for function declarations used for call site debug info.
	+ for (auto Scope : DIUnit->getRetainedTypes())
	+ if (auto *SP = dyn_cast_or_null<DISubprogram>(Scope))
	+ NewCU.getOrCreateSubprogramDIE(SP);
	+
	CUMap.insert({DIUnit, &NewCU});
	CUDieMap.insert({&NewCU.getUnitDie(), &NewCU});
	return NewCU;
	}

	void DwarfDebug::constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU,
	const DIImportedEntity *N) {
	if (isa<DILocalScope>(N->getScope()))
	return;
	if (DIE *D = TheCU.getOrCreateContextDIE(N->getScope()))
	D->addChild(TheCU.constructImportedEntityDIE(N));
	}

	/// Sort and unique GVEs by comparing their fragment offset.
	static SmallVectorImpl<DwarfCompileUnit::GlobalExpr> &
	sortGlobalExprs(SmallVectorImpl<DwarfCompileUnit::GlobalExpr> &GVEs) {
	llvm::sort(
	GVEs, [](DwarfCompileUnit::GlobalExpr A, DwarfCompileUnit::GlobalExpr B) {
	// Sort order: first null exprs, then exprs without fragment
	// info, then sort by fragment offset in bits.
	// FIXME: Come up with a more comprehensive comparator so
	// the sorting isn't non-deterministic, and so the following
	// std::unique call works correctly.
	if (!A.Expr \|\| !B.Expr)
	return !!B.Expr;
	auto FragmentA = A.Expr->getFragmentInfo();
	auto FragmentB = B.Expr->getFragmentInfo();
	if (!FragmentA \|\| !FragmentB)
	return !!FragmentB;
	return FragmentA->OffsetInBits < FragmentB->OffsetInBits;
	});
	GVEs.erase(std::unique(GVEs.begin(), GVEs.end(),
	[](DwarfCompileUnit::GlobalExpr A,
	DwarfCompileUnit::GlobalExpr B) {
	return A.Expr == B.Expr;
	}),
	GVEs.end());
	return GVEs;
	}

	// Emit all Dwarf sections that should come prior to the content. Create
	// global DIEs and emit initial debug info sections. This is invoked by
	// the target AsmPrinter.
	void DwarfDebug::beginModule(Module *M) {
	DebugHandlerBase::beginModule(M);

	if (!Asm \|\| !MMI->hasDebugInfo())
	return;

	unsigned NumDebugCUs = std::distance(M->debug_compile_units_begin(),
	M->debug_compile_units_end());
	assert(NumDebugCUs > 0 && "Asm unexpectedly initialized");
	assert(MMI->hasDebugInfo() &&
	"DebugInfoAvailabilty unexpectedly not initialized");
	SingleCU = NumDebugCUs == 1;
	DenseMap<DIGlobalVariable *, SmallVector<DwarfCompileUnit::GlobalExpr, 1>>
	GVMap;
	for (const GlobalVariable &Global : M->globals()) {
	SmallVector<DIGlobalVariableExpression *, 1> GVs;
	Global.getDebugInfo(GVs);
	for (auto *GVE : GVs)
	GVMap[GVE->getVariable()].push_back({&Global, GVE->getExpression()});
	}

	// Create the symbol that designates the start of the unit's contribution
	// to the string offsets table. In a split DWARF scenario, only the skeleton
	// unit has the DW_AT_str_offsets_base attribute (and hence needs the symbol).
	if (useSegmentedStringOffsetsTable())
	(useSplitDwarf() ? SkeletonHolder : InfoHolder)
	.setStringOffsetsStartSym(Asm->createTempSymbol("str_offsets_base"));


	// Create the symbols that designates the start of the DWARF v5 range list
	// and locations list tables. They are located past the table headers.
	if (getDwarfVersion() >= 5) {
	DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
	Holder.setRnglistsTableBaseSym(
	Asm->createTempSymbol("rnglists_table_base"));

	if (useSplitDwarf())
	InfoHolder.setRnglistsTableBaseSym(
	Asm->createTempSymbol("rnglists_dwo_table_base"));
	}

	// Create the symbol that points to the first entry following the debug
	// address table (.debug_addr) header.
	AddrPool.setLabel(Asm->createTempSymbol("addr_table_base"));
	DebugLocs.setSym(Asm->createTempSymbol("loclists_table_base"));

	for (DICompileUnit *CUNode : M->debug_compile_units()) {
	// FIXME: Move local imported entities into a list attached to the
	// subprogram, then this search won't be needed and a
	// getImportedEntities().empty() test should go below with the rest.
	bool HasNonLocalImportedEntities = llvm::any_of(
	CUNode->getImportedEntities(), [](const DIImportedEntity *IE) {
	return !isa<DILocalScope>(IE->getScope());
	});

	if (!HasNonLocalImportedEntities && CUNode->getEnumTypes().empty() &&
	CUNode->getRetainedTypes().empty() &&
	CUNode->getGlobalVariables().empty() && CUNode->getMacros().empty())
	continue;

	DwarfCompileUnit &CU = getOrCreateDwarfCompileUnit(CUNode);

	// Global Variables.
	for (auto *GVE : CUNode->getGlobalVariables()) {
	// Don't bother adding DIGlobalVariableExpressions listed in the CU if we
	// already know about the variable and it isn't adding a constant
	// expression.
	auto &GVMapEntry = GVMap[GVE->getVariable()];
	auto *Expr = GVE->getExpression();
	if (!GVMapEntry.size() \|\| (Expr && Expr->isConstant()))
	GVMapEntry.push_back({nullptr, Expr});
	}

	DenseSet<DIGlobalVariable *> Processed;
	for (auto *GVE : CUNode->getGlobalVariables()) {
	DIGlobalVariable *GV = GVE->getVariable();
	if (Processed.insert(GV).second)
	CU.getOrCreateGlobalVariableDIE(GV, sortGlobalExprs(GVMap[GV]));
	}

	for (auto *Ty : CUNode->getEnumTypes()) {
	// The enum types array by design contains pointers to
	// MDNodes rather than DIRefs. Unique them here.
	CU.getOrCreateTypeDIE(cast<DIType>(Ty));
	}
	for (auto *Ty : CUNode->getRetainedTypes()) {
	// The retained types array by design contains pointers to
	// MDNodes rather than DIRefs. Unique them here.
	if (DIType *RT = dyn_cast<DIType>(Ty))
	// There is no point in force-emitting a forward declaration.
	CU.getOrCreateTypeDIE(RT);
	}
	// Emit imported_modules last so that the relevant context is already
	// available.
	for (auto *IE : CUNode->getImportedEntities())
	constructAndAddImportedEntityDIE(CU, IE);
	}
	}

	void DwarfDebug::finishEntityDefinitions() {
	for (const auto &Entity : ConcreteEntities) {
	DIE *Die = Entity->getDIE();
	assert(Die);
	// FIXME: Consider the time-space tradeoff of just storing the unit pointer
	// in the ConcreteEntities list, rather than looking it up again here.
	// DIE::getUnit isn't simple - it walks parent pointers, etc.
	DwarfCompileUnit *Unit = CUDieMap.lookup(Die->getUnitDie());
	assert(Unit);
	Unit->finishEntityDefinition(Entity.get());
	}
	}

	void DwarfDebug::finishSubprogramDefinitions() {
	for (const DISubprogram *SP : ProcessedSPNodes) {
	assert(SP->getUnit()->getEmissionKind() != DICompileUnit::NoDebug);
	forBothCUs(
	getOrCreateDwarfCompileUnit(SP->getUnit()),
	[&](DwarfCompileUnit &CU) { CU.finishSubprogramDefinition(SP); });
	}
	}

	void DwarfDebug::finalizeModuleInfo() {
	const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();

	finishSubprogramDefinitions();

	finishEntityDefinitions();

	// Include the DWO file name in the hash if there's more than one CU.
	// This handles ThinLTO's situation where imported CUs may very easily be
	// duplicate with the same CU partially imported into another ThinLTO unit.
	StringRef DWOName;
	if (CUMap.size() > 1)
	DWOName = Asm->TM.Options.MCOptions.SplitDwarfFile;

	// Handle anything that needs to be done on a per-unit basis after
	// all other generation.
	for (const auto &P : CUMap) {
	auto &TheCU = *P.second;
	if (TheCU.getCUNode()->isDebugDirectivesOnly())
	continue;
	// Emit DW_AT_containing_type attribute to connect types with their
	// vtable holding type.
	TheCU.constructContainingTypeDIEs();

	// Add CU specific attributes if we need to add any.
	// If we're splitting the dwarf out now that we've got the entire
	// CU then add the dwo id to it.
	auto *SkCU = TheCU.getSkeleton();

	bool HasSplitUnit = SkCU && !TheCU.getUnitDie().children().empty();

	if (HasSplitUnit) {
	dwarf::Attribute attrDWOName = getDwarfVersion() >= 5
	? dwarf::DW_AT_dwo_name
	: dwarf::DW_AT_GNU_dwo_name;
	finishUnitAttributes(TheCU.getCUNode(), TheCU);
	TheCU.addString(TheCU.getUnitDie(), attrDWOName,
	Asm->TM.Options.MCOptions.SplitDwarfFile);
	SkCU->addString(SkCU->getUnitDie(), attrDWOName,
	Asm->TM.Options.MCOptions.SplitDwarfFile);
	// Emit a unique identifier for this CU.
	uint64_t ID =
	DIEHash(Asm, &TheCU).computeCUSignature(DWOName, TheCU.getUnitDie());
	if (getDwarfVersion() >= 5) {
	TheCU.setDWOId(ID);
	SkCU->setDWOId(ID);
	} else {
	TheCU.addUInt(TheCU.getUnitDie(), dwarf::DW_AT_GNU_dwo_id,
	dwarf::DW_FORM_data8, ID);
	SkCU->addUInt(SkCU->getUnitDie(), dwarf::DW_AT_GNU_dwo_id,
	dwarf::DW_FORM_data8, ID);
	}

	if (getDwarfVersion() < 5 && !SkeletonHolder.getRangeLists().empty()) {
	const MCSymbol *Sym = TLOF.getDwarfRangesSection()->getBeginSymbol();
	SkCU->addSectionLabel(SkCU->getUnitDie(), dwarf::DW_AT_GNU_ranges_base,
	Sym, Sym);
	}
	} else if (SkCU) {
	finishUnitAttributes(SkCU->getCUNode(), *SkCU);
	}

	// If we have code split among multiple sections or non-contiguous
	// ranges of code then emit a DW_AT_ranges attribute on the unit that will
	// remain in the .o file, otherwise add a DW_AT_low_pc.
	// FIXME: We should use ranges allow reordering of code ala
	// .subsections_via_symbols in mach-o. This would mean turning on
	// ranges for all subprogram DIEs for mach-o.
	DwarfCompileUnit &U = SkCU ? *SkCU : TheCU;

	if (unsigned NumRanges = TheCU.getRanges().size()) {
	if (NumRanges > 1 && useRangesSection())
	// A DW_AT_low_pc attribute may also be specified in combination with
	// DW_AT_ranges to specify the default base address for use in
	// location lists (see Section 2.6.2) and range lists (see Section
	// 2.17.3).
	U.addUInt(U.getUnitDie(), dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, 0);
	else
	U.setBaseAddress(TheCU.getRanges().front().Begin);
	U.attachRangesOrLowHighPC(U.getUnitDie(), TheCU.takeRanges());
	}

	// We don't keep track of which addresses are used in which CU so this
	// is a bit pessimistic under LTO.
	if ((HasSplitUnit \|\| getDwarfVersion() >= 5) && !AddrPool.isEmpty())
	U.addAddrTableBase();

	if (getDwarfVersion() >= 5) {
	if (U.hasRangeLists())
	U.addRnglistsBase();

	if (!DebugLocs.getLists().empty()) {
	if (!useSplitDwarf())
	U.addSectionLabel(U.getUnitDie(), dwarf::DW_AT_loclists_base,
	DebugLocs.getSym(),
	TLOF.getDwarfLoclistsSection()->getBeginSymbol());
	}
	}

	auto *CUNode = cast<DICompileUnit>(P.first);
	// If compile Unit has macros, emit "DW_AT_macro_info/DW_AT_macros"
	// attribute.
	if (CUNode->getMacros()) {
	if (UseDebugMacroSection) {
	if (useSplitDwarf())
	TheCU.addSectionDelta(
	TheCU.getUnitDie(), dwarf::DW_AT_macros, U.getMacroLabelBegin(),
	TLOF.getDwarfMacroDWOSection()->getBeginSymbol());
	else {
	dwarf::Attribute MacrosAttr = getDwarfVersion() >= 5
	? dwarf::DW_AT_macros
	: dwarf::DW_AT_GNU_macros;
	U.addSectionLabel(U.getUnitDie(), MacrosAttr, U.getMacroLabelBegin(),
	TLOF.getDwarfMacroSection()->getBeginSymbol());
	}
	} else {
	if (useSplitDwarf())
	TheCU.addSectionDelta(
	TheCU.getUnitDie(), dwarf::DW_AT_macro_info,
	U.getMacroLabelBegin(),
	TLOF.getDwarfMacinfoDWOSection()->getBeginSymbol());
	else
	U.addSectionLabel(U.getUnitDie(), dwarf::DW_AT_macro_info,
	U.getMacroLabelBegin(),
	TLOF.getDwarfMacinfoSection()->getBeginSymbol());
	}
	}
	}

	// Emit all frontend-produced Skeleton CUs, i.e., Clang modules.
	for (auto *CUNode : MMI->getModule()->debug_compile_units())
	if (CUNode->getDWOId())
	getOrCreateDwarfCompileUnit(CUNode);

	// Compute DIE offsets and sizes.
	InfoHolder.computeSizeAndOffsets();
	if (useSplitDwarf())
	SkeletonHolder.computeSizeAndOffsets();
	}

	// Emit all Dwarf sections that should come after the content.
	void DwarfDebug::endModule() {
	assert(CurFn == nullptr);
	assert(CurMI == nullptr);

	for (const auto &P : CUMap) {
	auto &CU = *P.second;
	CU.createBaseTypeDIEs();
	}

	// If we aren't actually generating debug info (check beginModule -
	// conditionalized on the presence of the llvm.dbg.cu metadata node)
	if (!Asm \|\| !MMI->hasDebugInfo())
	return;

	// Finalize the debug info for the module.
	finalizeModuleInfo();

	if (useSplitDwarf())
	// Emit debug_loc.dwo/debug_loclists.dwo section.
	emitDebugLocDWO();
	else
	// Emit debug_loc/debug_loclists section.
	emitDebugLoc();

	// Corresponding abbreviations into a abbrev section.
	emitAbbreviations();

	// Emit all the DIEs into a debug info section.
	emitDebugInfo();

	// Emit info into a debug aranges section.
	if (GenerateARangeSection)
	emitDebugARanges();

	// Emit info into a debug ranges section.
	emitDebugRanges();

	if (useSplitDwarf())
	// Emit info into a debug macinfo.dwo section.
	emitDebugMacinfoDWO();
	else
	// Emit info into a debug macinfo/macro section.
	emitDebugMacinfo();

	emitDebugStr();

	if (useSplitDwarf()) {
	emitDebugStrDWO();
	emitDebugInfoDWO();
	emitDebugAbbrevDWO();
	emitDebugLineDWO();
	emitDebugRangesDWO();
	}

	emitDebugAddr();

	// Emit info into the dwarf accelerator table sections.
	switch (getAccelTableKind()) {
	case AccelTableKind::Apple:
	emitAccelNames();
	emitAccelObjC();
	emitAccelNamespaces();
	emitAccelTypes();
	break;
	case AccelTableKind::Dwarf:
	emitAccelDebugNames();
	break;
	case AccelTableKind::None:
	break;
	case AccelTableKind::Default:
	llvm_unreachable("Default should have already been resolved.");
	}

	// Emit the pubnames and pubtypes sections if requested.
	emitDebugPubSections();

	// clean up.
	// FIXME: AbstractVariables.clear();
	}

	void DwarfDebug::ensureAbstractEntityIsCreated(DwarfCompileUnit &CU,
	const DINode *Node,
	const MDNode *ScopeNode) {
	if (CU.getExistingAbstractEntity(Node))
	return;

	CU.createAbstractEntity(Node, LScopes.getOrCreateAbstractScope(
	cast<DILocalScope>(ScopeNode)));
	}

	void DwarfDebug::ensureAbstractEntityIsCreatedIfScoped(DwarfCompileUnit &CU,
	const DINode Node, const MDNode ScopeNode) {
	if (CU.getExistingAbstractEntity(Node))
	return;

	if (LexicalScope *Scope =
	LScopes.findAbstractScope(cast_or_null<DILocalScope>(ScopeNode)))
	CU.createAbstractEntity(Node, Scope);
	}

	// Collect variable information from side table maintained by MF.
	void DwarfDebug::collectVariableInfoFromMFTable(
	DwarfCompileUnit &TheCU, DenseSet<InlinedEntity> &Processed) {
	SmallDenseMap<InlinedEntity, DbgVariable *> MFVars;
	LLVM_DEBUG(dbgs() << "DwarfDebug: collecting variables from MF side table\n");
	for (const auto &VI : Asm->MF->getVariableDbgInfo()) {
	if (!VI.Var)
	continue;
	assert(VI.Var->isValidLocationForIntrinsic(VI.Loc) &&
	"Expected inlined-at fields to agree");

	InlinedEntity Var(VI.Var, VI.Loc->getInlinedAt());
	Processed.insert(Var);
	LexicalScope *Scope = LScopes.findLexicalScope(VI.Loc);

	// If variable scope is not found then skip this variable.
	if (!Scope) {
	LLVM_DEBUG(dbgs() << "Dropping debug info for " << VI.Var->getName()
	<< ", no variable scope found\n");
	continue;
	}

	ensureAbstractEntityIsCreatedIfScoped(TheCU, Var.first, Scope->getScopeNode());
	auto RegVar = std::make_unique<DbgVariable>(
	cast<DILocalVariable>(Var.first), Var.second);
	RegVar->initializeMMI(VI.Expr, VI.Slot);
	LLVM_DEBUG(dbgs() << "Created DbgVariable for " << VI.Var->getName()
	<< "\n");

	if (DbgVariable *DbgVar = MFVars.lookup(Var))
	DbgVar->addMMIEntry(*RegVar);
	else if (InfoHolder.addScopeVariable(Scope, RegVar.get())) {
	MFVars.insert({Var, RegVar.get()});
	ConcreteEntities.push_back(std::move(RegVar));
	}
	}
	}

	/// Determine whether a singular DBG_VALUE is valid for the entirety of its
	/// enclosing lexical scope. The check ensures there are no other instructions
	/// in the same lexical scope preceding the DBG_VALUE and that its range is
	/// either open or otherwise rolls off the end of the scope.
	static bool validThroughout(LexicalScopes &LScopes,
	const MachineInstr *DbgValue,
	const MachineInstr *RangeEnd,
	const InstructionOrdering &Ordering) {
	assert(DbgValue->getDebugLoc() && "DBG_VALUE without a debug location");
	auto MBB = DbgValue->getParent();
	auto DL = DbgValue->getDebugLoc();
	auto *LScope = LScopes.findLexicalScope(DL);
	// Scope doesn't exist; this is a dead DBG_VALUE.
	if (!LScope)
	return false;
	auto &LSRange = LScope->getRanges();
	if (LSRange.size() == 0)
	return false;

	const MachineInstr *LScopeBegin = LSRange.front().first;
	// If the scope starts before the DBG_VALUE then we may have a negative
	// result. Otherwise the location is live coming into the scope and we
	// can skip the following checks.
	if (!Ordering.isBefore(DbgValue, LScopeBegin)) {
	// Exit if the lexical scope begins outside of the current block.
	if (LScopeBegin->getParent() != MBB)
	return false;

	MachineBasicBlock::const_reverse_iterator Pred(DbgValue);
	for (++Pred; Pred != MBB->rend(); ++Pred) {
	if (Pred->getFlag(MachineInstr::FrameSetup))
	break;
	auto PredDL = Pred->getDebugLoc();
	if (!PredDL \|\| Pred->isMetaInstruction())
	continue;
	// Check whether the instruction preceding the DBG_VALUE is in the same
	// (sub)scope as the DBG_VALUE.
	if (DL->getScope() == PredDL->getScope())
	return false;
	auto *PredScope = LScopes.findLexicalScope(PredDL);
	if (!PredScope \|\| LScope->dominates(PredScope))
	return false;
	}
	}

	// If the range of the DBG_VALUE is open-ended, report success.
	if (!RangeEnd)
	return true;

	// Single, constant DBG_VALUEs in the prologue are promoted to be live
	// throughout the function. This is a hack, presumably for DWARF v2 and not
	// necessarily correct. It would be much better to use a dbg.declare instead
	// if we know the constant is live throughout the scope.
	if (MBB->pred_empty() &&
	all_of(DbgValue->debug_operands(),
	[](const MachineOperand &Op) { return Op.isImm(); }))
	return true;

	// Test if the location terminates before the end of the scope.
	const MachineInstr *LScopeEnd = LSRange.back().second;
	if (Ordering.isBefore(RangeEnd, LScopeEnd))
	return false;

	// There's a single location which starts at the scope start, and ends at or
	// after the scope end.
	return true;
	}

	/// Build the location list for all DBG_VALUEs in the function that
	/// describe the same variable. The resulting DebugLocEntries will have
	/// strict monotonically increasing begin addresses and will never
	/// overlap. If the resulting list has only one entry that is valid
	/// throughout variable's scope return true.
	//
	// See the definition of DbgValueHistoryMap::Entry for an explanation of the
	// different kinds of history map entries. One thing to be aware of is that if
	// a debug value is ended by another entry (rather than being valid until the
	// end of the function), that entry's instruction may or may not be included in
	// the range, depending on if the entry is a clobbering entry (it has an
	// instruction that clobbers one or more preceding locations), or if it is an
	// (overlapping) debug value entry. This distinction can be seen in the example
	// below. The first debug value is ended by the clobbering entry 2, and the
	// second and third debug values are ended by the overlapping debug value entry
	// 4.
	//
	// Input:
	//
	// History map entries [type, end index, mi]
	//
	// 0 \| [DbgValue, 2, DBG_VALUE $reg0, [...] (fragment 0, 32)]
	// 1 \| \| [DbgValue, 4, DBG_VALUE $reg1, [...] (fragment 32, 32)]
	// 2 \| \| [Clobber, $reg0 = [...], -, -]
	// 3 \| \| [DbgValue, 4, DBG_VALUE 123, [...] (fragment 64, 32)]
	// 4 [DbgValue, ~0, DBG_VALUE @g, [...] (fragment 0, 96)]
	//
	// Output [start, end) [Value...]:
	//
	// [0-1) [(reg0, fragment 0, 32)]
	// [1-3) [(reg0, fragment 0, 32), (reg1, fragment 32, 32)]
	// [3-4) [(reg1, fragment 32, 32), (123, fragment 64, 32)]
	// [4-) [(@g, fragment 0, 96)]
	bool DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
	const DbgValueHistoryMap::Entries &Entries) {
	using OpenRange =
	std::pair<DbgValueHistoryMap::EntryIndex, DbgValueLoc>;
	SmallVector<OpenRange, 4> OpenRanges;
	bool isSafeForSingleLocation = true;
	const MachineInstr *StartDebugMI = nullptr;
	const MachineInstr *EndMI = nullptr;

	for (auto EB = Entries.begin(), EI = EB, EE = Entries.end(); EI != EE; ++EI) {
	const MachineInstr *Instr = EI->getInstr();

	// Remove all values that are no longer live.
	size_t Index = std::distance(EB, EI);
	erase_if(OpenRanges, [&](OpenRange &R) { return R.first <= Index; });

	// If we are dealing with a clobbering entry, this iteration will result in
	// a location list entry starting after the clobbering instruction.
	const MCSymbol *StartLabel =
	EI->isClobber() ? getLabelAfterInsn(Instr) : getLabelBeforeInsn(Instr);
	assert(StartLabel &&
	"Forgot label before/after instruction starting a range!");

	const MCSymbol *EndLabel;
	if (std::next(EI) == Entries.end()) {
	const MachineBasicBlock &EndMBB = Asm->MF->back();
	EndLabel = Asm->MBBSectionRanges[EndMBB.getSectionIDNum()].EndLabel;
	if (EI->isClobber())
	EndMI = EI->getInstr();
	}
	else if (std::next(EI)->isClobber())
	EndLabel = getLabelAfterInsn(std::next(EI)->getInstr());
	else
	EndLabel = getLabelBeforeInsn(std::next(EI)->getInstr());
	assert(EndLabel && "Forgot label after instruction ending a range!");

	if (EI->isDbgValue())
	LLVM_DEBUG(dbgs() << "DotDebugLoc: " << *Instr << "\n");

	// If this history map entry has a debug value, add that to the list of
	// open ranges and check if its location is valid for a single value
	// location.
	if (EI->isDbgValue()) {
	// Do not add undef debug values, as they are redundant information in
	// the location list entries. An undef debug results in an empty location
	// description. If there are any non-undef fragments then padding pieces
	// with empty location descriptions will automatically be inserted, and if
	// all fragments are undef then the whole location list entry is
	// redundant.
	if (!Instr->isUndefDebugValue()) {
	auto Value = getDebugLocValue(Instr);
	OpenRanges.emplace_back(EI->getEndIndex(), Value);

	// TODO: Add support for single value fragment locations.
	if (Instr->getDebugExpression()->isFragment())
	isSafeForSingleLocation = false;

	if (!StartDebugMI)
	StartDebugMI = Instr;
	} else {
	isSafeForSingleLocation = false;
	}
	}

	// Location list entries with empty location descriptions are redundant
	// information in DWARF, so do not emit those.
	if (OpenRanges.empty())
	continue;

	// Omit entries with empty ranges as they do not have any effect in DWARF.
	if (StartLabel == EndLabel) {
	LLVM_DEBUG(dbgs() << "Omitting location list entry with empty range.\n");
	continue;
	}

	SmallVector<DbgValueLoc, 4> Values;
	for (auto &R : OpenRanges)
	Values.push_back(R.second);

	// With Basic block sections, it is posssible that the StartLabel and the
	// Instr are not in the same section. This happens when the StartLabel is
	// the function begin label and the dbg value appears in a basic block
	// that is not the entry. In this case, the range needs to be split to
	// span each individual section in the range from StartLabel to EndLabel.
	if (Asm->MF->hasBBSections() && StartLabel == Asm->getFunctionBegin() &&
	!Instr->getParent()->sameSection(&Asm->MF->front())) {
	const MCSymbol *BeginSectionLabel = StartLabel;

	for (const MachineBasicBlock &MBB : *Asm->MF) {
	if (MBB.isBeginSection() && &MBB != &Asm->MF->front())
	BeginSectionLabel = MBB.getSymbol();

	if (MBB.sameSection(Instr->getParent())) {
	DebugLoc.emplace_back(BeginSectionLabel, EndLabel, Values);
	break;
	}
	if (MBB.isEndSection())
	DebugLoc.emplace_back(BeginSectionLabel, MBB.getEndSymbol(), Values);
	}
	} else {
	DebugLoc.emplace_back(StartLabel, EndLabel, Values);
	}

	// Attempt to coalesce the ranges of two otherwise identical
	// DebugLocEntries.
	auto CurEntry = DebugLoc.rbegin();
	LLVM_DEBUG({
	dbgs() << CurEntry->getValues().size() << " Values:\n";
	for (auto &Value : CurEntry->getValues())
	Value.dump();
	dbgs() << "-----\n";
	});

	auto PrevEntry = std::next(CurEntry);
	if (PrevEntry != DebugLoc.rend() && PrevEntry->MergeRanges(*CurEntry))
	DebugLoc.pop_back();
	}

	if (!isSafeForSingleLocation \|\|
	!validThroughout(LScopes, StartDebugMI, EndMI, getInstOrdering()))
	return false;

	if (DebugLoc.size() == 1)
	return true;

	if (!Asm->MF->hasBBSections())
	return false;

	// Check here to see if loclist can be merged into a single range. If not,
	// we must keep the split loclists per section. This does exactly what
	// MergeRanges does without sections. We don't actually merge the ranges
	// as the split ranges must be kept intact if this cannot be collapsed
	// into a single range.
	const MachineBasicBlock *RangeMBB = nullptr;
	if (DebugLoc[0].getBeginSym() == Asm->getFunctionBegin())
	RangeMBB = &Asm->MF->front();
	else
	RangeMBB = Entries.begin()->getInstr()->getParent();
	auto *CurEntry = DebugLoc.begin();
	auto *NextEntry = std::next(CurEntry);
	while (NextEntry != DebugLoc.end()) {
	// Get the last machine basic block of this section.
	while (!RangeMBB->isEndSection())
	RangeMBB = RangeMBB->getNextNode();
	if (!RangeMBB->getNextNode())
	return false;
	// CurEntry should end the current section and NextEntry should start
	// the next section and the Values must match for these two ranges to be
	// merged.
	if (CurEntry->getEndSym() != RangeMBB->getEndSymbol() \|\|
	NextEntry->getBeginSym() != RangeMBB->getNextNode()->getSymbol() \|\|
	CurEntry->getValues() != NextEntry->getValues())
	return false;
	RangeMBB = RangeMBB->getNextNode();
	CurEntry = NextEntry;
	NextEntry = std::next(CurEntry);
	}
	return true;
	}

	DbgEntity *DwarfDebug::createConcreteEntity(DwarfCompileUnit &TheCU,
	LexicalScope &Scope,
	const DINode *Node,
	const DILocation *Location,
	const MCSymbol *Sym) {
	ensureAbstractEntityIsCreatedIfScoped(TheCU, Node, Scope.getScopeNode());
	if (isa<const DILocalVariable>(Node)) {
	ConcreteEntities.push_back(
	std::make_unique<DbgVariable>(cast<const DILocalVariable>(Node),
	Location));
	InfoHolder.addScopeVariable(&Scope,
	cast<DbgVariable>(ConcreteEntities.back().get()));
	} else if (isa<const DILabel>(Node)) {
	ConcreteEntities.push_back(
	std::make_unique<DbgLabel>(cast<const DILabel>(Node),
	Location, Sym));
	InfoHolder.addScopeLabel(&Scope,
	cast<DbgLabel>(ConcreteEntities.back().get()));
	}
	return ConcreteEntities.back().get();
	}

	// Find variables for each lexical scope.
	void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU,
	const DISubprogram *SP,
	DenseSet<InlinedEntity> &Processed) {
	// Grab the variable info that was squirreled away in the MMI side-table.
	collectVariableInfoFromMFTable(TheCU, Processed);

	for (const auto &I : DbgValues) {
	InlinedEntity IV = I.first;
	if (Processed.count(IV))
	continue;

	// Instruction ranges, specifying where IV is accessible.
	const auto &HistoryMapEntries = I.second;

	// Try to find any non-empty variable location. Do not create a concrete
	// entity if there are no locations.
	if (!DbgValues.hasNonEmptyLocation(HistoryMapEntries))
	continue;

	LexicalScope *Scope = nullptr;
	const DILocalVariable *LocalVar = cast<DILocalVariable>(IV.first);
	if (const DILocation *IA = IV.second)
	Scope = LScopes.findInlinedScope(LocalVar->getScope(), IA);
	else
	Scope = LScopes.findLexicalScope(LocalVar->getScope());
	// If variable scope is not found then skip this variable.
	if (!Scope)
	continue;

	Processed.insert(IV);
	DbgVariable *RegVar = cast<DbgVariable>(createConcreteEntity(TheCU,
	*Scope, LocalVar, IV.second));

	const MachineInstr *MInsn = HistoryMapEntries.front().getInstr();
	assert(MInsn->isDebugValue() && "History must begin with debug value");

	// Check if there is a single DBG_VALUE, valid throughout the var's scope.
	// If the history map contains a single debug value, there may be an
	// additional entry which clobbers the debug value.
	size_t HistSize = HistoryMapEntries.size();
	bool SingleValueWithClobber =
	HistSize == 2 && HistoryMapEntries[1].isClobber();
	if (HistSize == 1 \|\| SingleValueWithClobber) {
	const auto *End =
	SingleValueWithClobber ? HistoryMapEntries[1].getInstr() : nullptr;
	if (validThroughout(LScopes, MInsn, End, getInstOrdering())) {
	RegVar->initializeDbgValue(MInsn);
	continue;
	}
	}

	// Do not emit location lists if .debug_loc secton is disabled.
	if (!useLocSection())
	continue;

	// Handle multiple DBG_VALUE instructions describing one variable.
	DebugLocStream::ListBuilder List(DebugLocs, TheCU, Asm, RegVar, *MInsn);

	// Build the location list for this variable.
	SmallVector<DebugLocEntry, 8> Entries;
	bool isValidSingleLocation = buildLocationList(Entries, HistoryMapEntries);

	// Check whether buildLocationList managed to merge all locations to one
	// that is valid throughout the variable's scope. If so, produce single
	// value location.
	if (isValidSingleLocation) {
	RegVar->initializeDbgValue(Entries[0].getValues()[0]);
	continue;
	}

	// If the variable has a DIBasicType, extract it. Basic types cannot have
	// unique identifiers, so don't bother resolving the type with the
	// identifier map.
	const DIBasicType *BT = dyn_cast<DIBasicType>(
	static_cast<const Metadata *>(LocalVar->getType()));

	// Finalize the entry by lowering it into a DWARF bytestream.
	for (auto &Entry : Entries)
	Entry.finalize(*Asm, List, BT, TheCU);
	}

	// For each InlinedEntity collected from DBG_LABEL instructions, convert to
	// DWARF-related DbgLabel.
	for (const auto &I : DbgLabels) {
	InlinedEntity IL = I.first;
	const MachineInstr *MI = I.second;
	if (MI == nullptr)
	continue;

	LexicalScope *Scope = nullptr;
	const DILabel *Label = cast<DILabel>(IL.first);
	// The scope could have an extra lexical block file.
	const DILocalScope *LocalScope =
	Label->getScope()->getNonLexicalBlockFileScope();
	// Get inlined DILocation if it is inlined label.
	if (const DILocation *IA = IL.second)
	Scope = LScopes.findInlinedScope(LocalScope, IA);
	else
	Scope = LScopes.findLexicalScope(LocalScope);
	// If label scope is not found then skip this label.
	if (!Scope)
	continue;

	Processed.insert(IL);
	/// At this point, the temporary label is created.
	/// Save the temporary label to DbgLabel entity to get the
	/// actually address when generating Dwarf DIE.
	MCSymbol *Sym = getLabelBeforeInsn(MI);
	createConcreteEntity(TheCU, *Scope, Label, IL.second, Sym);
	}

	// Collect info for variables/labels that were optimized out.
	for (const DINode *DN : SP->getRetainedNodes()) {
	if (!Processed.insert(InlinedEntity(DN, nullptr)).second)
	continue;
	LexicalScope *Scope = nullptr;
	if (auto *DV = dyn_cast<DILocalVariable>(DN)) {
	Scope = LScopes.findLexicalScope(DV->getScope());
	} else if (auto *DL = dyn_cast<DILabel>(DN)) {
	Scope = LScopes.findLexicalScope(DL->getScope());
	}

	if (Scope)
	createConcreteEntity(TheCU, *Scope, DN, nullptr);
	}
	}

	// Process beginning of an instruction.
	void DwarfDebug::beginInstruction(const MachineInstr *MI) {
	const MachineFunction &MF = *MI->getMF();
	const auto *SP = MF.getFunction().getSubprogram();
	bool NoDebug =
	!SP \|\| SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug;

	// Delay slot support check.
	auto delaySlotSupported = [](const MachineInstr &MI) {
	if (!MI.isBundledWithSucc())
	return false;
	auto Suc = std::next(MI.getIterator());
	(void)Suc;
	// Ensure that delay slot instruction is successor of the call instruction.
	// Ex. CALL_INSTRUCTION {
	// DELAY_SLOT_INSTRUCTION }
	assert(Suc->isBundledWithPred() &&
	"Call bundle instructions are out of order");
	return true;
	};

	// When describing calls, we need a label for the call instruction.
	if (!NoDebug && SP->areAllCallsDescribed() &&
	MI->isCandidateForCallSiteEntry(MachineInstr::AnyInBundle) &&
	(!MI->hasDelaySlot() \|\| delaySlotSupported(*MI))) {
	const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
	bool IsTail = TII->isTailCall(*MI);
	// For tail calls, we need the address of the branch instruction for
	// DW_AT_call_pc.
	if (IsTail)
	requestLabelBeforeInsn(MI);
	// For non-tail calls, we need the return address for the call for
	// DW_AT_call_return_pc. Under GDB tuning, this information is needed for
	// tail calls as well.
	requestLabelAfterInsn(MI);
	}

	DebugHandlerBase::beginInstruction(MI);
	if (!CurMI)
	return;

	if (NoDebug)
	return;

	// Check if source location changes, but ignore DBG_VALUE and CFI locations.
	// If the instruction is part of the function frame setup code, do not emit
	// any line record, as there is no correspondence with any user code.
	if (MI->isMetaInstruction() \|\| MI->getFlag(MachineInstr::FrameSetup))
	return;
	const DebugLoc &DL = MI->getDebugLoc();
	// When we emit a line-0 record, we don't update PrevInstLoc; so look at
	// the last line number actually emitted, to see if it was line 0.
	unsigned LastAsmLine =
	Asm->OutStreamer->getContext().getCurrentDwarfLoc().getLine();

	if (DL == PrevInstLoc) {
	// If we have an ongoing unspecified location, nothing to do here.
	if (!DL)
	return;
	// We have an explicit location, same as the previous location.
	// But we might be coming back to it after a line 0 record.
	if (LastAsmLine == 0 && DL.getLine() != 0) {
	// Reinstate the source location but not marked as a statement.
	const MDNode *Scope = DL.getScope();
	recordSourceLine(DL.getLine(), DL.getCol(), Scope, /Flags=/0);
	}
	return;
	}

	if (!DL) {
	// We have an unspecified location, which might want to be line 0.
	// If we have already emitted a line-0 record, don't repeat it.
	if (LastAsmLine == 0)
	return;
	// If user said Don't Do That, don't do that.
	if (UnknownLocations == Disable)
	return;
	// See if we have a reason to emit a line-0 record now.
	// Reasons to emit a line-0 record include:
	// - User asked for it (UnknownLocations).
	// - Instruction has a label, so it's referenced from somewhere else,
	// possibly debug information; we want it to have a source location.
	// - Instruction is at the top of a block; we don't want to inherit the
	// location from the physically previous (maybe unrelated) block.
	if (UnknownLocations == Enable \|\| PrevLabel \|\|
	(PrevInstBB && PrevInstBB != MI->getParent())) {
	// Preserve the file and column numbers, if we can, to save space in
	// the encoded line table.
	// Do not update PrevInstLoc, it remembers the last non-0 line.
	const MDNode *Scope = nullptr;
	unsigned Column = 0;
	if (PrevInstLoc) {
	Scope = PrevInstLoc.getScope();
	Column = PrevInstLoc.getCol();
	}
	recordSourceLine(/Line=/0, Column, Scope, /Flags=/0);
	}
	return;
	}

	// We have an explicit location, different from the previous location.
	// Don't repeat a line-0 record, but otherwise emit the new location.
	// (The new location might be an explicit line 0, which we do emit.)
	if (DL.getLine() == 0 && LastAsmLine == 0)
	return;
	unsigned Flags = 0;
	if (DL == PrologEndLoc) {
	Flags \|= DWARF2_FLAG_PROLOGUE_END \| DWARF2_FLAG_IS_STMT;
	PrologEndLoc = DebugLoc();
	}
	// If the line changed, we call that a new statement; unless we went to
	// line 0 and came back, in which case it is not a new statement.
	unsigned OldLine = PrevInstLoc ? PrevInstLoc.getLine() : LastAsmLine;
	if (DL.getLine() && DL.getLine() != OldLine)
	Flags \|= DWARF2_FLAG_IS_STMT;

	const MDNode *Scope = DL.getScope();
	recordSourceLine(DL.getLine(), DL.getCol(), Scope, Flags);

	// If we're not at line 0, remember this location.
	if (DL.getLine())
	PrevInstLoc = DL;
	}

	static DebugLoc findPrologueEndLoc(const MachineFunction *MF) {
	// First known non-DBG_VALUE and non-frame setup location marks
	// the beginning of the function body.
	for (const auto &MBB : *MF)
	for (const auto &MI : MBB)
	if (!MI.isMetaInstruction() && !MI.getFlag(MachineInstr::FrameSetup) &&
	MI.getDebugLoc())
	return MI.getDebugLoc();
	return DebugLoc();
	}

	/// Register a source line with debug info. Returns the unique label that was
	/// emitted and which provides correspondence to the source line list.
	static void recordSourceLine(AsmPrinter &Asm, unsigned Line, unsigned Col,
	const MDNode *S, unsigned Flags, unsigned CUID,
	uint16_t DwarfVersion,
	ArrayRef<std::unique_ptr<DwarfCompileUnit>> DCUs) {
	StringRef Fn;
	unsigned FileNo = 1;
	unsigned Discriminator = 0;
	if (auto *Scope = cast_or_null<DIScope>(S)) {
	Fn = Scope->getFilename();
	if (Line != 0 && DwarfVersion >= 4)
	if (auto *LBF = dyn_cast<DILexicalBlockFile>(Scope))
	Discriminator = LBF->getDiscriminator();

	FileNo = static_cast<DwarfCompileUnit &>(*DCUs[CUID])
	.getOrCreateSourceID(Scope->getFile());
	}
	Asm.OutStreamer->emitDwarfLocDirective(FileNo, Line, Col, Flags, 0,
	Discriminator, Fn);
	}

	DebugLoc DwarfDebug::emitInitialLocDirective(const MachineFunction &MF,
	unsigned CUID) {
	// Get beginning of function.
	if (DebugLoc PrologEndLoc = findPrologueEndLoc(&MF)) {
	// Ensure the compile unit is created if the function is called before
	// beginFunction().
	(void)getOrCreateDwarfCompileUnit(
	MF.getFunction().getSubprogram()->getUnit());
	// We'd like to list the prologue as "not statements" but GDB behaves
	// poorly if we do that. Revisit this with caution/GDB (7.5+) testing.
	const DISubprogram *SP = PrologEndLoc->getInlinedAtScope()->getSubprogram();
	::recordSourceLine(*Asm, SP->getScopeLine(), 0, SP, DWARF2_FLAG_IS_STMT,
	CUID, getDwarfVersion(), getUnits());
	return PrologEndLoc;
	}
	return DebugLoc();
	}

	// Gather pre-function debug information. Assumes being called immediately
	// after the function entry point has been emitted.
	void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) {
	CurFn = MF;

	auto *SP = MF->getFunction().getSubprogram();
	assert(LScopes.empty() \|\| SP == LScopes.getCurrentFunctionScope()->getScopeNode());
	if (SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug)
	return;

	DwarfCompileUnit &CU = getOrCreateDwarfCompileUnit(SP->getUnit());

	// Set DwarfDwarfCompileUnitID in MCContext to the Compile Unit this function
	// belongs to so that we add to the correct per-cu line table in the
	// non-asm case.
	if (Asm->OutStreamer->hasRawTextSupport())
	// Use a single line table if we are generating assembly.
	Asm->OutStreamer->getContext().setDwarfCompileUnitID(0);
	else
	Asm->OutStreamer->getContext().setDwarfCompileUnitID(CU.getUniqueID());

	// Record beginning of function.
	PrologEndLoc = emitInitialLocDirective(
	*MF, Asm->OutStreamer->getContext().getDwarfCompileUnitID());
	}

	void DwarfDebug::skippedNonDebugFunction() {
	// If we don't have a subprogram for this function then there will be a hole
	// in the range information. Keep note of this by setting the previously used
	// section to nullptr.
	PrevCU = nullptr;
	CurFn = nullptr;
	}

	// Gather and emit post-function debug information.
	void DwarfDebug::endFunctionImpl(const MachineFunction *MF) {
	const DISubprogram *SP = MF->getFunction().getSubprogram();

	assert(CurFn == MF &&
	"endFunction should be called with the same function as beginFunction");

	// Set DwarfDwarfCompileUnitID in MCContext to default value.
	Asm->OutStreamer->getContext().setDwarfCompileUnitID(0);

	LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
	assert(!FnScope \|\| SP == FnScope->getScopeNode());
	DwarfCompileUnit &TheCU = *CUMap.lookup(SP->getUnit());
	if (TheCU.getCUNode()->isDebugDirectivesOnly()) {
	PrevLabel = nullptr;
	CurFn = nullptr;
	return;
	}

	DenseSet<InlinedEntity> Processed;
	collectEntityInfo(TheCU, SP, Processed);

	// Add the range of this function to the list of ranges for the CU.
	// With basic block sections, add ranges for all basic block sections.
	for (const auto &R : Asm->MBBSectionRanges)
	TheCU.addRange({R.second.BeginLabel, R.second.EndLabel});

	// Under -gmlt, skip building the subprogram if there are no inlined
	// subroutines inside it. But with -fdebug-info-for-profiling, the subprogram
	// is still needed as we need its source location.
	if (!TheCU.getCUNode()->getDebugInfoForProfiling() &&
	TheCU.getCUNode()->getEmissionKind() == DICompileUnit::LineTablesOnly &&
	LScopes.getAbstractScopesList().empty() && !IsDarwin) {
	assert(InfoHolder.getScopeVariables().empty());
	PrevLabel = nullptr;
	CurFn = nullptr;
	return;
	}

	#ifndef NDEBUG
	size_t NumAbstractScopes = LScopes.getAbstractScopesList().size();
	#endif
	// Construct abstract scopes.
	for (LexicalScope *AScope : LScopes.getAbstractScopesList()) {
	auto *SP = cast<DISubprogram>(AScope->getScopeNode());
	for (const DINode *DN : SP->getRetainedNodes()) {
	if (!Processed.insert(InlinedEntity(DN, nullptr)).second)
	continue;

	const MDNode *Scope = nullptr;
	if (auto *DV = dyn_cast<DILocalVariable>(DN))
	Scope = DV->getScope();
	else if (auto *DL = dyn_cast<DILabel>(DN))
	Scope = DL->getScope();
	else
	llvm_unreachable("Unexpected DI type!");

	// Collect info for variables/labels that were optimized out.
	ensureAbstractEntityIsCreated(TheCU, DN, Scope);
	assert(LScopes.getAbstractScopesList().size() == NumAbstractScopes
	&& "ensureAbstractEntityIsCreated inserted abstract scopes");
	}
	constructAbstractSubprogramScopeDIE(TheCU, AScope);
	}

	ProcessedSPNodes.insert(SP);
	DIE &ScopeDIE = TheCU.constructSubprogramScopeDIE(SP, FnScope);
	if (auto *SkelCU = TheCU.getSkeleton())
	if (!LScopes.getAbstractScopesList().empty() &&
	TheCU.getCUNode()->getSplitDebugInlining())
	SkelCU->constructSubprogramScopeDIE(SP, FnScope);

	// Construct call site entries.
	constructCallSiteEntryDIEs(SP, TheCU, ScopeDIE, MF);

	// Clear debug info
	// Ownership of DbgVariables is a bit subtle - ScopeVariables owns all the
	// DbgVariables except those that are also in AbstractVariables (since they
	// can be used cross-function)
	InfoHolder.getScopeVariables().clear();
	InfoHolder.getScopeLabels().clear();
	PrevLabel = nullptr;
	CurFn = nullptr;
	}

	// Register a source line with debug info. Returns the unique label that was
	// emitted and which provides correspondence to the source line list.
	void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S,
	unsigned Flags) {
	::recordSourceLine(*Asm, Line, Col, S, Flags,
	Asm->OutStreamer->getContext().getDwarfCompileUnitID(),
	getDwarfVersion(), getUnits());
	}

	//===----------------------------------------------------------------------===//
	// Emit Methods
	//===----------------------------------------------------------------------===//

	// Emit the debug info section.
	void DwarfDebug::emitDebugInfo() {
	DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
	Holder.emitUnits(/* UseOffsets */ false);
	}

	// Emit the abbreviation section.
	void DwarfDebug::emitAbbreviations() {
	DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;

	Holder.emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevSection());
	}

	void DwarfDebug::emitStringOffsetsTableHeader() {
	DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
	Holder.getStringPool().emitStringOffsetsTableHeader(
	*Asm, Asm->getObjFileLowering().getDwarfStrOffSection(),
	Holder.getStringOffsetsStartSym());
	}

	template <typename AccelTableT>
	void DwarfDebug::emitAccel(AccelTableT &Accel, MCSection *Section,
	StringRef TableName) {
	Asm->OutStreamer->SwitchSection(Section);

	// Emit the full data.
	emitAppleAccelTable(Asm, Accel, TableName, Section->getBeginSymbol());
	}

	void DwarfDebug::emitAccelDebugNames() {
	// Don't emit anything if we have no compilation units to index.
	if (getUnits().empty())
	return;

	emitDWARF5AccelTable(Asm, AccelDebugNames, *this, getUnits());
	}

	// Emit visible names into a hashed accelerator table section.
	void DwarfDebug::emitAccelNames() {
	emitAccel(AccelNames, Asm->getObjFileLowering().getDwarfAccelNamesSection(),
	"Names");
	}

	// Emit objective C classes and categories into a hashed accelerator table
	// section.
	void DwarfDebug::emitAccelObjC() {
	emitAccel(AccelObjC, Asm->getObjFileLowering().getDwarfAccelObjCSection(),
	"ObjC");
	}

	// Emit namespace dies into a hashed accelerator table.
	void DwarfDebug::emitAccelNamespaces() {
	emitAccel(AccelNamespace,
	Asm->getObjFileLowering().getDwarfAccelNamespaceSection(),
	"namespac");
	}

	// Emit type dies into a hashed accelerator table.
	void DwarfDebug::emitAccelTypes() {
	emitAccel(AccelTypes, Asm->getObjFileLowering().getDwarfAccelTypesSection(),
	"types");
	}

	// Public name handling.
	// The format for the various pubnames:
	//
	// dwarf pubnames - offset/name pairs where the offset is the offset into the CU
	// for the DIE that is named.
	//
	// gnu pubnames - offset/index value/name tuples where the offset is the offset
	// into the CU and the index value is computed according to the type of value
	// for the DIE that is named.
	//
	// For type units the offset is the offset of the skeleton DIE. For split dwarf
	// it's the offset within the debug_info/debug_types dwo section, however, the
	// reference in the pubname header doesn't change.

	/// computeIndexValue - Compute the gdb index value for the DIE and CU.
	static dwarf::PubIndexEntryDescriptor computeIndexValue(DwarfUnit *CU,
	const DIE *Die) {
	// Entities that ended up only in a Type Unit reference the CU instead (since
	// the pub entry has offsets within the CU there's no real offset that can be
	// provided anyway). As it happens all such entities (namespaces and types,
	// types only in C++ at that) are rendered as TYPE+EXTERNAL. If this turns out
	// not to be true it would be necessary to persist this information from the
	// point at which the entry is added to the index data structure - since by
	// the time the index is built from that, the original type/namespace DIE in a
	// type unit has already been destroyed so it can't be queried for properties
	// like tag, etc.
	if (Die->getTag() == dwarf::DW_TAG_compile_unit)
	return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_TYPE,
	dwarf::GIEL_EXTERNAL);
	dwarf::GDBIndexEntryLinkage Linkage = dwarf::GIEL_STATIC;

	// We could have a specification DIE that has our most of our knowledge,
	// look for that now.
	if (DIEValue SpecVal = Die->findAttribute(dwarf::DW_AT_specification)) {
	DIE &SpecDIE = SpecVal.getDIEEntry().getEntry();
	if (SpecDIE.findAttribute(dwarf::DW_AT_external))
	Linkage = dwarf::GIEL_EXTERNAL;
	} else if (Die->findAttribute(dwarf::DW_AT_external))
	Linkage = dwarf::GIEL_EXTERNAL;

	switch (Die->getTag()) {
	case dwarf::DW_TAG_class_type:
	case dwarf::DW_TAG_structure_type:
	case dwarf::DW_TAG_union_type:
	case dwarf::DW_TAG_enumeration_type:
	return dwarf::PubIndexEntryDescriptor(
	dwarf::GIEK_TYPE,
	dwarf::isCPlusPlus((dwarf::SourceLanguage)CU->getLanguage())
	? dwarf::GIEL_EXTERNAL
	: dwarf::GIEL_STATIC);
	case dwarf::DW_TAG_typedef:
	case dwarf::DW_TAG_base_type:
	case dwarf::DW_TAG_subrange_type:
	return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_TYPE, dwarf::GIEL_STATIC);
	case dwarf::DW_TAG_namespace:
	return dwarf::GIEK_TYPE;
	case dwarf::DW_TAG_subprogram:
	return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_FUNCTION, Linkage);
	case dwarf::DW_TAG_variable:
	return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_VARIABLE, Linkage);
	case dwarf::DW_TAG_enumerator:
	return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_VARIABLE,
	dwarf::GIEL_STATIC);
	default:
	return dwarf::GIEK_NONE;
	}
	}

	/// emitDebugPubSections - Emit visible names and types into debug pubnames and
	/// pubtypes sections.
	void DwarfDebug::emitDebugPubSections() {
	for (const auto &NU : CUMap) {
	DwarfCompileUnit *TheU = NU.second;
	if (!TheU->hasDwarfPubSections())
	continue;

	bool GnuStyle = TheU->getCUNode()->getNameTableKind() ==
	DICompileUnit::DebugNameTableKind::GNU;

	Asm->OutStreamer->SwitchSection(
	GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubNamesSection()
	: Asm->getObjFileLowering().getDwarfPubNamesSection());
	emitDebugPubSection(GnuStyle, "Names", TheU, TheU->getGlobalNames());

	Asm->OutStreamer->SwitchSection(
	GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubTypesSection()
	: Asm->getObjFileLowering().getDwarfPubTypesSection());
	emitDebugPubSection(GnuStyle, "Types", TheU, TheU->getGlobalTypes());
	}
	}

	void DwarfDebug::emitSectionReference(const DwarfCompileUnit &CU) {
	if (useSectionsAsReferences())
	Asm->emitDwarfOffset(CU.getSection()->getBeginSymbol(),
	CU.getDebugSectionOffset());
	else
	Asm->emitDwarfSymbolReference(CU.getLabelBegin());
	}

	void DwarfDebug::emitDebugPubSection(bool GnuStyle, StringRef Name,
	DwarfCompileUnit *TheU,
	const StringMap<const DIE *> &Globals) {
	if (auto *Skeleton = TheU->getSkeleton())
	TheU = Skeleton;

	// Emit the header.
	MCSymbol *EndLabel = Asm->emitDwarfUnitLength(
	"pub" + Name, "Length of Public " + Name + " Info");

	Asm->OutStreamer->AddComment("DWARF Version");
	Asm->emitInt16(dwarf::DW_PUBNAMES_VERSION);

	Asm->OutStreamer->AddComment("Offset of Compilation Unit Info");
	emitSectionReference(*TheU);

	Asm->OutStreamer->AddComment("Compilation Unit Length");
	Asm->emitDwarfLengthOrOffset(TheU->getLength());

	// Emit the pubnames for this compilation unit.
	for (const auto &GI : Globals) {
	const char *Name = GI.getKeyData();
	const DIE *Entity = GI.second;

	Asm->OutStreamer->AddComment("DIE offset");
	Asm->emitDwarfLengthOrOffset(Entity->getOffset());

	if (GnuStyle) {
	dwarf::PubIndexEntryDescriptor Desc = computeIndexValue(TheU, Entity);
	Asm->OutStreamer->AddComment(
	Twine("Attributes: ") + dwarf::GDBIndexEntryKindString(Desc.Kind) +
	", " + dwarf::GDBIndexEntryLinkageString(Desc.Linkage));
	Asm->emitInt8(Desc.toBits());
	}

	Asm->OutStreamer->AddComment("External Name");
	Asm->OutStreamer->emitBytes(StringRef(Name, GI.getKeyLength() + 1));
	}

	Asm->OutStreamer->AddComment("End Mark");
	Asm->emitDwarfLengthOrOffset(0);
	Asm->OutStreamer->emitLabel(EndLabel);
	}

	/// Emit null-terminated strings into a debug str section.
	void DwarfDebug::emitDebugStr() {
	MCSection *StringOffsetsSection = nullptr;
	if (useSegmentedStringOffsetsTable()) {
	emitStringOffsetsTableHeader();
	StringOffsetsSection = Asm->getObjFileLowering().getDwarfStrOffSection();
	}
	DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
	Holder.emitStrings(Asm->getObjFileLowering().getDwarfStrSection(),
	StringOffsetsSection, /* UseRelativeOffsets = */ true);
	}

	void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer,
	const DebugLocStream::Entry &Entry,
	const DwarfCompileUnit *CU) {
	auto &&Comments = DebugLocs.getComments(Entry);
	auto Comment = Comments.begin();
	auto End = Comments.end();

	// The expressions are inserted into a byte stream rather early (see
	// DwarfExpression::addExpression) so for those ops (e.g. DW_OP_convert) that
	// need to reference a base_type DIE the offset of that DIE is not yet known.
	// To deal with this we instead insert a placeholder early and then extract
	// it here and replace it with the real reference.
	unsigned PtrSize = Asm->MAI->getCodePointerSize();
	DWARFDataExtractor Data(StringRef(DebugLocs.getBytes(Entry).data(),
	DebugLocs.getBytes(Entry).size()),
	Asm->getDataLayout().isLittleEndian(), PtrSize);
	DWARFExpression Expr(Data, PtrSize, Asm->OutContext.getDwarfFormat());

	using Encoding = DWARFExpression::Operation::Encoding;
	uint64_t Offset = 0;
	for (auto &Op : Expr) {
	assert(Op.getCode() != dwarf::DW_OP_const_type &&
	"3 operand ops not yet supported");
	Streamer.emitInt8(Op.getCode(), Comment != End ? *(Comment++) : "");
	Offset++;
	for (unsigned I = 0; I < 2; ++I) {
	if (Op.getDescription().Op[I] == Encoding::SizeNA)
	continue;
	if (Op.getDescription().Op[I] == Encoding::BaseTypeRef) {
	uint64_t Offset =
	CU->ExprRefedBaseTypes[Op.getRawOperand(I)].Die->getOffset();
	assert(Offset < (1ULL << (ULEB128PadSize * 7)) && "Offset wont fit");
	Streamer.emitULEB128(Offset, "", ULEB128PadSize);
	// Make sure comments stay aligned.
	for (unsigned J = 0; J < ULEB128PadSize; ++J)
	if (Comment != End)
	Comment++;
	} else {
	for (uint64_t J = Offset; J < Op.getOperandEndOffset(I); ++J)
	Streamer.emitInt8(Data.getData()[J], Comment != End ? *(Comment++) : "");
	}
	Offset = Op.getOperandEndOffset(I);
	}
	assert(Offset == Op.getEndOffset());
	}
	}

	void DwarfDebug::emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
	const DbgValueLoc &Value,
	DwarfExpression &DwarfExpr) {
	auto *DIExpr = Value.getExpression();
	DIExpressionCursor ExprCursor(DIExpr);
	DwarfExpr.addFragmentOffset(DIExpr);

	// If the DIExpr is is an Entry Value, we want to follow the same code path
	// regardless of whether the DBG_VALUE is variadic or not.
	if (DIExpr && DIExpr->isEntryValue()) {
	// Entry values can only be a single register with no additional DIExpr,
	// so just add it directly.
	assert(Value.getLocEntries().size() == 1);
	assert(Value.getLocEntries()[0].isLocation());
	MachineLocation Location = Value.getLocEntries()[0].getLoc();
	DwarfExpr.setLocation(Location, DIExpr);

	DwarfExpr.beginEntryValueExpression(ExprCursor);

	const TargetRegisterInfo &TRI = *AP.MF->getSubtarget().getRegisterInfo();
	if (!DwarfExpr.addMachineRegExpression(TRI, ExprCursor, Location.getReg()))
	return;
	return DwarfExpr.addExpression(std::move(ExprCursor));
	}

	// Regular entry.
	auto EmitValueLocEntry = [&DwarfExpr, &BT,
	&AP](const DbgValueLocEntry &Entry,
	DIExpressionCursor &Cursor) -> bool {
	if (Entry.isInt()) {
	if (BT && (BT->getEncoding() == dwarf::DW_ATE_signed \|\|
	BT->getEncoding() == dwarf::DW_ATE_signed_char))
	DwarfExpr.addSignedConstant(Entry.getInt());
	else
	DwarfExpr.addUnsignedConstant(Entry.getInt());
	} else if (Entry.isLocation()) {
	MachineLocation Location = Entry.getLoc();
	if (Location.isIndirect())
	DwarfExpr.setMemoryLocationKind();

	const TargetRegisterInfo &TRI = *AP.MF->getSubtarget().getRegisterInfo();
	if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg()))
	return false;
	} else if (Entry.isTargetIndexLocation()) {
	TargetIndexLocation Loc = Entry.getTargetIndexLocation();
	// TODO TargetIndexLocation is a target-independent. Currently only the
	// WebAssembly-specific encoding is supported.
	assert(AP.TM.getTargetTriple().isWasm());
	DwarfExpr.addWasmLocation(Loc.Index, static_cast<uint64_t>(Loc.Offset));
	} else if (Entry.isConstantFP()) {
	if (AP.getDwarfVersion() >= 4 && !AP.getDwarfDebug()->tuneForSCE() &&
	!Cursor) {
	DwarfExpr.addConstantFP(Entry.getConstantFP()->getValueAPF(), AP);
	} else if (Entry.getConstantFP()
	->getValueAPF()
	.bitcastToAPInt()
	.getBitWidth() <= 64 /bits/) {
	DwarfExpr.addUnsignedConstant(
	Entry.getConstantFP()->getValueAPF().bitcastToAPInt());
	} else {
	LLVM_DEBUG(
	dbgs() << "Skipped DwarfExpression creation for ConstantFP of size"
	<< Entry.getConstantFP()
	->getValueAPF()
	.bitcastToAPInt()
	.getBitWidth()
	<< " bits\n");
	return false;
	}
	}
	return true;
	};

	if (!Value.isVariadic()) {
	if (!EmitValueLocEntry(Value.getLocEntries()[0], ExprCursor))
	return;
	DwarfExpr.addExpression(std::move(ExprCursor));
	return;
	}

	// If any of the location entries are registers with the value 0, then the
	// location is undefined.
	if (any_of(Value.getLocEntries(), [](const DbgValueLocEntry &Entry) {
	return Entry.isLocation() && !Entry.getLoc().getReg();
	}))
	return;

	DwarfExpr.addExpression(
	std::move(ExprCursor),
	[EmitValueLocEntry, &Value](unsigned Idx,
	DIExpressionCursor &Cursor) -> bool {
	return EmitValueLocEntry(Value.getLocEntries()[Idx], Cursor);
	});
	}

	void DebugLocEntry::finalize(const AsmPrinter &AP,
	DebugLocStream::ListBuilder &List,
	const DIBasicType *BT,
	DwarfCompileUnit &TheCU) {
	assert(!Values.empty() &&
	"location list entries without values are redundant");
	assert(Begin != End && "unexpected location list entry with empty range");
	DebugLocStream::EntryBuilder Entry(List, Begin, End);
	BufferByteStreamer Streamer = Entry.getStreamer();
	DebugLocDwarfExpression DwarfExpr(AP.getDwarfVersion(), Streamer, TheCU);
	const DbgValueLoc &Value = Values[0];
	if (Value.isFragment()) {
	// Emit all fragments that belong to the same variable and range.
	assert(llvm::all_of(Values, [](DbgValueLoc P) {
	return P.isFragment();
	}) && "all values are expected to be fragments");
	assert(llvm::is_sorted(Values) && "fragments are expected to be sorted");

	for (const auto &Fragment : Values)
	DwarfDebug::emitDebugLocValue(AP, BT, Fragment, DwarfExpr);

	} else {
	assert(Values.size() == 1 && "only fragments may have >1 value");
	DwarfDebug::emitDebugLocValue(AP, BT, Value, DwarfExpr);
	}
	DwarfExpr.finalize();
	if (DwarfExpr.TagOffset)
	List.setTagOffset(*DwarfExpr.TagOffset);
	}

	void DwarfDebug::emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry,
	const DwarfCompileUnit *CU) {
	// Emit the size.
	Asm->OutStreamer->AddComment("Loc expr size");
	if (getDwarfVersion() >= 5)
	Asm->emitULEB128(DebugLocs.getBytes(Entry).size());
	else if (DebugLocs.getBytes(Entry).size() <= std::numeric_limits<uint16_t>::max())
	Asm->emitInt16(DebugLocs.getBytes(Entry).size());
	else {
	// The entry is too big to fit into 16 bit, drop it as there is nothing we
	// can do.
	Asm->emitInt16(0);
	return;
	}
	// Emit the entry.
	APByteStreamer Streamer(*Asm);
	emitDebugLocEntry(Streamer, Entry, CU);
	}

	// Emit the header of a DWARF 5 range list table list table. Returns the symbol
	// that designates the end of the table for the caller to emit when the table is
	// complete.
	static MCSymbol emitRnglistsTableHeader(AsmPrinter Asm,
	const DwarfFile &Holder) {
	MCSymbol TableEnd = mcdwarf::emitListsTableHeaderStart(Asm->OutStreamer);

	Asm->OutStreamer->AddComment("Offset entry count");
	Asm->emitInt32(Holder.getRangeLists().size());
	Asm->OutStreamer->emitLabel(Holder.getRnglistsTableBaseSym());

	for (const RangeSpanList &List : Holder.getRangeLists())
	Asm->emitLabelDifference(List.Label, Holder.getRnglistsTableBaseSym(),
	Asm->getDwarfOffsetByteSize());

	return TableEnd;
	}

	// Emit the header of a DWARF 5 locations list table. Returns the symbol that
	// designates the end of the table for the caller to emit when the table is
	// complete.
	static MCSymbol emitLoclistsTableHeader(AsmPrinter Asm,
	const DwarfDebug &DD) {
	MCSymbol TableEnd = mcdwarf::emitListsTableHeaderStart(Asm->OutStreamer);

	const auto &DebugLocs = DD.getDebugLocs();

	Asm->OutStreamer->AddComment("Offset entry count");
	Asm->emitInt32(DebugLocs.getLists().size());
	Asm->OutStreamer->emitLabel(DebugLocs.getSym());

	for (const auto &List : DebugLocs.getLists())
	Asm->emitLabelDifference(List.Label, DebugLocs.getSym(),
	Asm->getDwarfOffsetByteSize());

	return TableEnd;
	}

	template <typename Ranges, typename PayloadEmitter>
	static void emitRangeList(
	DwarfDebug &DD, AsmPrinter Asm, MCSymbol Sym, const Ranges &R,
	const DwarfCompileUnit &CU, unsigned BaseAddressx, unsigned OffsetPair,
	unsigned StartxLength, unsigned EndOfList,
	StringRef (*StringifyEnum)(unsigned),
	bool ShouldUseBaseAddress,
	PayloadEmitter EmitPayload) {

	auto Size = Asm->MAI->getCodePointerSize();
	bool UseDwarf5 = DD.getDwarfVersion() >= 5;

	// Emit our symbol so we can find the beginning of the range.
	Asm->OutStreamer->emitLabel(Sym);

	// Gather all the ranges that apply to the same section so they can share
	// a base address entry.
	MapVector<const MCSection , std::vector<decltype(&R.begin())>> SectionRanges;

	for (const auto &Range : R)
	SectionRanges[&Range.Begin->getSection()].push_back(&Range);

	const MCSymbol *CUBase = CU.getBaseAddress();
	bool BaseIsSet = false;
	for (const auto &P : SectionRanges) {
	auto *Base = CUBase;
	if (!Base && ShouldUseBaseAddress) {
	const MCSymbol *Begin = P.second.front()->Begin;
	const MCSymbol *NewBase = DD.getSectionLabel(&Begin->getSection());
	if (!UseDwarf5) {
	Base = NewBase;
	BaseIsSet = true;
	Asm->OutStreamer->emitIntValue(-1, Size);
	Asm->OutStreamer->AddComment(" base address");
	Asm->OutStreamer->emitSymbolValue(Base, Size);
	} else if (NewBase != Begin \|\| P.second.size() > 1) {
	// Only use a base address if
	// * the existing pool address doesn't match (NewBase != Begin)
	// * or, there's more than one entry to share the base address
	Base = NewBase;
	BaseIsSet = true;
	Asm->OutStreamer->AddComment(StringifyEnum(BaseAddressx));
	Asm->emitInt8(BaseAddressx);
	Asm->OutStreamer->AddComment(" base address index");
	Asm->emitULEB128(DD.getAddressPool().getIndex(Base));
	}
	} else if (BaseIsSet && !UseDwarf5) {
	BaseIsSet = false;
	assert(!Base);
	Asm->OutStreamer->emitIntValue(-1, Size);
	Asm->OutStreamer->emitIntValue(0, Size);
	}

	for (const auto *RS : P.second) {
	const MCSymbol *Begin = RS->Begin;
	const MCSymbol *End = RS->End;
	assert(Begin && "Range without a begin symbol?");
	assert(End && "Range without an end symbol?");
	if (Base) {
	if (UseDwarf5) {
	// Emit offset_pair when we have a base.
	Asm->OutStreamer->AddComment(StringifyEnum(OffsetPair));
	Asm->emitInt8(OffsetPair);
	Asm->OutStreamer->AddComment(" starting offset");
	Asm->emitLabelDifferenceAsULEB128(Begin, Base);
	Asm->OutStreamer->AddComment(" ending offset");
	Asm->emitLabelDifferenceAsULEB128(End, Base);
	} else {
	Asm->emitLabelDifference(Begin, Base, Size);
	Asm->emitLabelDifference(End, Base, Size);
	}
	} else if (UseDwarf5) {
	Asm->OutStreamer->AddComment(StringifyEnum(StartxLength));
	Asm->emitInt8(StartxLength);
	Asm->OutStreamer->AddComment(" start index");
	Asm->emitULEB128(DD.getAddressPool().getIndex(Begin));
	Asm->OutStreamer->AddComment(" length");
	Asm->emitLabelDifferenceAsULEB128(End, Begin);
	} else {
	Asm->OutStreamer->emitSymbolValue(Begin, Size);
	Asm->OutStreamer->emitSymbolValue(End, Size);
	}
	EmitPayload(*RS);
	}
	}

	if (UseDwarf5) {
	Asm->OutStreamer->AddComment(StringifyEnum(EndOfList));
	Asm->emitInt8(EndOfList);
	} else {
	// Terminate the list with two 0 values.
	Asm->OutStreamer->emitIntValue(0, Size);
	Asm->OutStreamer->emitIntValue(0, Size);
	}
	}

	// Handles emission of both debug_loclist / debug_loclist.dwo
	static void emitLocList(DwarfDebug &DD, AsmPrinter *Asm, const DebugLocStream::List &List) {
	emitRangeList(DD, Asm, List.Label, DD.getDebugLocs().getEntries(List),
	*List.CU, dwarf::DW_LLE_base_addressx,
	dwarf::DW_LLE_offset_pair, dwarf::DW_LLE_startx_length,
	dwarf::DW_LLE_end_of_list, llvm::dwarf::LocListEncodingString,
	/* ShouldUseBaseAddress */ true,
	[&](const DebugLocStream::Entry &E) {
	DD.emitDebugLocEntryLocation(E, List.CU);
	});
	}

	void DwarfDebug::emitDebugLocImpl(MCSection *Sec) {
	if (DebugLocs.getLists().empty())
	return;

	Asm->OutStreamer->SwitchSection(Sec);

	MCSymbol *TableEnd = nullptr;
	if (getDwarfVersion() >= 5)
	TableEnd = emitLoclistsTableHeader(Asm, *this);

	for (const auto &List : DebugLocs.getLists())
	emitLocList(*this, Asm, List);

	if (TableEnd)
	Asm->OutStreamer->emitLabel(TableEnd);
	}

	// Emit locations into the .debug_loc/.debug_loclists section.
	void DwarfDebug::emitDebugLoc() {
	emitDebugLocImpl(
	getDwarfVersion() >= 5
	? Asm->getObjFileLowering().getDwarfLoclistsSection()
	: Asm->getObjFileLowering().getDwarfLocSection());
	}

	// Emit locations into the .debug_loc.dwo/.debug_loclists.dwo section.
	void DwarfDebug::emitDebugLocDWO() {
	if (getDwarfVersion() >= 5) {
	emitDebugLocImpl(
	Asm->getObjFileLowering().getDwarfLoclistsDWOSection());

	return;
	}

	for (const auto &List : DebugLocs.getLists()) {
	Asm->OutStreamer->SwitchSection(
	Asm->getObjFileLowering().getDwarfLocDWOSection());
	Asm->OutStreamer->emitLabel(List.Label);

	for (const auto &Entry : DebugLocs.getEntries(List)) {
	// GDB only supports startx_length in pre-standard split-DWARF.
	// (in v5 standard loclists, it currently* /only/ supports base_address +
	// offset_pair, so the implementations can't really share much since they
	// need to use different representations)
	// * as of October 2018, at least
	//
	// In v5 (see emitLocList), this uses SectionLabels to reuse existing
	// addresses in the address pool to minimize object size/relocations.
	Asm->emitInt8(dwarf::DW_LLE_startx_length);
	unsigned idx = AddrPool.getIndex(Entry.Begin);
	Asm->emitULEB128(idx);
	// Also the pre-standard encoding is slightly different, emitting this as
	// an address-length entry here, but its a ULEB128 in DWARFv5 loclists.
	Asm->emitLabelDifference(Entry.End, Entry.Begin, 4);
	emitDebugLocEntryLocation(Entry, List.CU);
	}
	Asm->emitInt8(dwarf::DW_LLE_end_of_list);
	}
	}

	struct ArangeSpan {
	const MCSymbol Start, End;
	};

	// Emit a debug aranges section, containing a CU lookup for any
	// address we can tie back to a CU.
	void DwarfDebug::emitDebugARanges() {
	// Provides a unique id per text section.
	MapVector<MCSection *, SmallVector<SymbolCU, 8>> SectionMap;

	// Filter labels by section.
	for (const SymbolCU &SCU : ArangeLabels) {
	if (SCU.Sym->isInSection()) {
	// Make a note of this symbol and it's section.
	MCSection *Section = &SCU.Sym->getSection();
	if (!Section->getKind().isMetadata())
	SectionMap[Section].push_back(SCU);
	} else {
	// Some symbols (e.g. common/bss on mach-o) can have no section but still
	// appear in the output. This sucks as we rely on sections to build
	// arange spans. We can do it without, but it's icky.
	SectionMap[nullptr].push_back(SCU);
	}
	}

	DenseMap<DwarfCompileUnit *, std::vector<ArangeSpan>> Spans;

	for (auto &I : SectionMap) {
	MCSection *Section = I.first;
	SmallVector<SymbolCU, 8> &List = I.second;
	if (List.size() < 1)
	continue;

	// If we have no section (e.g. common), just write out
	// individual spans for each symbol.
	if (!Section) {
	for (const SymbolCU &Cur : List) {
	ArangeSpan Span;
	Span.Start = Cur.Sym;
	Span.End = nullptr;
	assert(Cur.CU);
	Spans[Cur.CU].push_back(Span);
	}
	continue;
	}

	// Sort the symbols by offset within the section.
	llvm::stable_sort(List, [&](const SymbolCU &A, const SymbolCU &B) {
	unsigned IA = A.Sym ? Asm->OutStreamer->GetSymbolOrder(A.Sym) : 0;
	unsigned IB = B.Sym ? Asm->OutStreamer->GetSymbolOrder(B.Sym) : 0;

	// Symbols with no order assigned should be placed at the end.
	// (e.g. section end labels)
	if (IA == 0)
	return false;
	if (IB == 0)
	return true;
	return IA < IB;
	});

	// Insert a final terminator.
	List.push_back(SymbolCU(nullptr, Asm->OutStreamer->endSection(Section)));

	// Build spans between each label.
	const MCSymbol *StartSym = List[0].Sym;
	for (size_t n = 1, e = List.size(); n < e; n++) {
	const SymbolCU &Prev = List[n - 1];
	const SymbolCU &Cur = List[n];

	// Try and build the longest span we can within the same CU.
	if (Cur.CU != Prev.CU) {
	ArangeSpan Span;
	Span.Start = StartSym;
	Span.End = Cur.Sym;
	assert(Prev.CU);
	Spans[Prev.CU].push_back(Span);
	StartSym = Cur.Sym;
	}
	}
	}

	// Start the dwarf aranges section.
	Asm->OutStreamer->SwitchSection(
	Asm->getObjFileLowering().getDwarfARangesSection());

	unsigned PtrSize = Asm->MAI->getCodePointerSize();

	// Build a list of CUs used.
	std::vector<DwarfCompileUnit *> CUs;
	for (const auto &it : Spans) {
	DwarfCompileUnit *CU = it.first;
	CUs.push_back(CU);
	}

	// Sort the CU list (again, to ensure consistent output order).
	llvm::sort(CUs, [](const DwarfCompileUnit A, const DwarfCompileUnit B) {
	return A->getUniqueID() < B->getUniqueID();
	});

	// Emit an arange table for each CU we used.
	for (DwarfCompileUnit *CU : CUs) {
	std::vector<ArangeSpan> &List = Spans[CU];

	// Describe the skeleton CU's offset and length, not the dwo file's.
	if (auto *Skel = CU->getSkeleton())
	CU = Skel;

	// Emit size of content not including length itself.
	unsigned ContentSize =
	sizeof(int16_t) + // DWARF ARange version number
	Asm->getDwarfOffsetByteSize() + // Offset of CU in the .debug_info
	// section
	sizeof(int8_t) + // Pointer Size (in bytes)
	sizeof(int8_t); // Segment Size (in bytes)

	unsigned TupleSize = PtrSize * 2;

	// 7.20 in the Dwarf specs requires the table to be aligned to a tuple.
	unsigned Padding = offsetToAlignment(
	Asm->getUnitLengthFieldByteSize() + ContentSize, Align(TupleSize));

	ContentSize += Padding;
	ContentSize += (List.size() + 1) * TupleSize;

	// For each compile unit, write the list of spans it covers.
	Asm->emitDwarfUnitLength(ContentSize, "Length of ARange Set");
	Asm->OutStreamer->AddComment("DWARF Arange version number");
	Asm->emitInt16(dwarf::DW_ARANGES_VERSION);
	Asm->OutStreamer->AddComment("Offset Into Debug Info Section");
	emitSectionReference(*CU);
	Asm->OutStreamer->AddComment("Address Size (in bytes)");
	Asm->emitInt8(PtrSize);
	Asm->OutStreamer->AddComment("Segment Size (in bytes)");
	Asm->emitInt8(0);

	Asm->OutStreamer->emitFill(Padding, 0xff);

	for (const ArangeSpan &Span : List) {
	Asm->emitLabelReference(Span.Start, PtrSize);

	// Calculate the size as being from the span start to it's end.
	if (Span.End) {
	Asm->emitLabelDifference(Span.End, Span.Start, PtrSize);
	} else {
	// For symbols without an end marker (e.g. common), we
	// write a single arange entry containing just that one symbol.
	uint64_t Size = SymSize[Span.Start];
	if (Size == 0)
	Size = 1;

	Asm->OutStreamer->emitIntValue(Size, PtrSize);
	}
	}

	Asm->OutStreamer->AddComment("ARange terminator");
	Asm->OutStreamer->emitIntValue(0, PtrSize);
	Asm->OutStreamer->emitIntValue(0, PtrSize);
	}
	}

	/// Emit a single range list. We handle both DWARF v5 and earlier.
	static void emitRangeList(DwarfDebug &DD, AsmPrinter *Asm,
	const RangeSpanList &List) {
	emitRangeList(DD, Asm, List.Label, List.Ranges, *List.CU,
	dwarf::DW_RLE_base_addressx, dwarf::DW_RLE_offset_pair,
	dwarf::DW_RLE_startx_length, dwarf::DW_RLE_end_of_list,
	llvm::dwarf::RangeListEncodingString,
	List.CU->getCUNode()->getRangesBaseAddress() \|\|
	DD.getDwarfVersion() >= 5,
	[](auto) {});
	}

	void DwarfDebug::emitDebugRangesImpl(const DwarfFile &Holder, MCSection *Section) {
	if (Holder.getRangeLists().empty())
	return;

	assert(useRangesSection());
	assert(!CUMap.empty());
	assert(llvm::any_of(CUMap, [](const decltype(CUMap)::value_type &Pair) {
	return !Pair.second->getCUNode()->isDebugDirectivesOnly();
	}));

	Asm->OutStreamer->SwitchSection(Section);

	MCSymbol *TableEnd = nullptr;
	if (getDwarfVersion() >= 5)
	TableEnd = emitRnglistsTableHeader(Asm, Holder);

	for (const RangeSpanList &List : Holder.getRangeLists())
	emitRangeList(*this, Asm, List);

	if (TableEnd)
	Asm->OutStreamer->emitLabel(TableEnd);
	}

	/// Emit address ranges into the .debug_ranges section or into the DWARF v5
	/// .debug_rnglists section.
	void DwarfDebug::emitDebugRanges() {
	const auto &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;

	emitDebugRangesImpl(Holder,
	getDwarfVersion() >= 5
	? Asm->getObjFileLowering().getDwarfRnglistsSection()
	: Asm->getObjFileLowering().getDwarfRangesSection());
	}

	void DwarfDebug::emitDebugRangesDWO() {
	emitDebugRangesImpl(InfoHolder,
	Asm->getObjFileLowering().getDwarfRnglistsDWOSection());
	}

	/// Emit the header of a DWARF 5 macro section, or the GNU extension for
	/// DWARF 4.
	static void emitMacroHeader(AsmPrinter *Asm, const DwarfDebug &DD,
	const DwarfCompileUnit &CU, uint16_t DwarfVersion) {
	enum HeaderFlagMask {
	#define HANDLE_MACRO_FLAG(ID, NAME) MACRO_FLAG_##NAME = ID,
	#include "llvm/BinaryFormat/Dwarf.def"
	};
	Asm->OutStreamer->AddComment("Macro information version");
	Asm->emitInt16(DwarfVersion >= 5 ? DwarfVersion : 4);
	// We emit the line offset flag unconditionally here, since line offset should
	// be mostly present.
	if (Asm->isDwarf64()) {
	Asm->OutStreamer->AddComment("Flags: 64 bit, debug_line_offset present");
	Asm->emitInt8(MACRO_FLAG_OFFSET_SIZE \| MACRO_FLAG_DEBUG_LINE_OFFSET);
	} else {
	Asm->OutStreamer->AddComment("Flags: 32 bit, debug_line_offset present");
	Asm->emitInt8(MACRO_FLAG_DEBUG_LINE_OFFSET);
	}
	Asm->OutStreamer->AddComment("debug_line_offset");
	if (DD.useSplitDwarf())
	Asm->emitDwarfLengthOrOffset(0);
	else
	Asm->emitDwarfSymbolReference(CU.getLineTableStartSym());
	}

	void DwarfDebug::handleMacroNodes(DIMacroNodeArray Nodes, DwarfCompileUnit &U) {
	for (auto *MN : Nodes) {
	if (auto *M = dyn_cast<DIMacro>(MN))
	emitMacro(*M);
	else if (auto *F = dyn_cast<DIMacroFile>(MN))
	emitMacroFile(*F, U);
	else
	llvm_unreachable("Unexpected DI type!");
	}
	}

	void DwarfDebug::emitMacro(DIMacro &M) {
	StringRef Name = M.getName();
	StringRef Value = M.getValue();

	// There should be one space between the macro name and the macro value in
	// define entries. In undef entries, only the macro name is emitted.
	std::string Str = Value.empty() ? Name.str() : (Name + " " + Value).str();

	if (UseDebugMacroSection) {
	if (getDwarfVersion() >= 5) {
	unsigned Type = M.getMacinfoType() == dwarf::DW_MACINFO_define
	? dwarf::DW_MACRO_define_strx
	: dwarf::DW_MACRO_undef_strx;
	Asm->OutStreamer->AddComment(dwarf::MacroString(Type));
	Asm->emitULEB128(Type);
	Asm->OutStreamer->AddComment("Line Number");
	Asm->emitULEB128(M.getLine());
	Asm->OutStreamer->AddComment("Macro String");
	Asm->emitULEB128(
	InfoHolder.getStringPool().getIndexedEntry(*Asm, Str).getIndex());
	} else {
	unsigned Type = M.getMacinfoType() == dwarf::DW_MACINFO_define
	? dwarf::DW_MACRO_GNU_define_indirect
	: dwarf::DW_MACRO_GNU_undef_indirect;
	Asm->OutStreamer->AddComment(dwarf::GnuMacroString(Type));
	Asm->emitULEB128(Type);
	Asm->OutStreamer->AddComment("Line Number");
	Asm->emitULEB128(M.getLine());
	Asm->OutStreamer->AddComment("Macro String");
	Asm->emitDwarfSymbolReference(
	InfoHolder.getStringPool().getEntry(*Asm, Str).getSymbol());
	}
	} else {
	Asm->OutStreamer->AddComment(dwarf::MacinfoString(M.getMacinfoType()));
	Asm->emitULEB128(M.getMacinfoType());
	Asm->OutStreamer->AddComment("Line Number");
	Asm->emitULEB128(M.getLine());
	Asm->OutStreamer->AddComment("Macro String");
	Asm->OutStreamer->emitBytes(Str);
	Asm->emitInt8('\0');
	}
	}

	void DwarfDebug::emitMacroFileImpl(
	DIMacroFile &MF, DwarfCompileUnit &U, unsigned StartFile, unsigned EndFile,
	StringRef (*MacroFormToString)(unsigned Form)) {

	Asm->OutStreamer->AddComment(MacroFormToString(StartFile));
	Asm->emitULEB128(StartFile);
	Asm->OutStreamer->AddComment("Line Number");
	Asm->emitULEB128(MF.getLine());
	Asm->OutStreamer->AddComment("File Number");
	DIFile &F = *MF.getFile();
	if (useSplitDwarf())
	Asm->emitULEB128(getDwoLineTable(U)->getFile(
	F.getDirectory(), F.getFilename(), getMD5AsBytes(&F),
	Asm->OutContext.getDwarfVersion(), F.getSource()));
	else
	Asm->emitULEB128(U.getOrCreateSourceID(&F));
	handleMacroNodes(MF.getElements(), U);
	Asm->OutStreamer->AddComment(MacroFormToString(EndFile));
	Asm->emitULEB128(EndFile);
	}

	void DwarfDebug::emitMacroFile(DIMacroFile &F, DwarfCompileUnit &U) {
	// DWARFv5 macro and DWARFv4 macinfo share some common encodings,
	// so for readibility/uniformity, We are explicitly emitting those.
	assert(F.getMacinfoType() == dwarf::DW_MACINFO_start_file);
	if (UseDebugMacroSection)
	emitMacroFileImpl(
	F, U, dwarf::DW_MACRO_start_file, dwarf::DW_MACRO_end_file,
	(getDwarfVersion() >= 5) ? dwarf::MacroString : dwarf::GnuMacroString);
	else
	emitMacroFileImpl(F, U, dwarf::DW_MACINFO_start_file,
	dwarf::DW_MACINFO_end_file, dwarf::MacinfoString);
	}

	void DwarfDebug::emitDebugMacinfoImpl(MCSection *Section) {
	for (const auto &P : CUMap) {
	auto &TheCU = *P.second;
	auto *SkCU = TheCU.getSkeleton();
	DwarfCompileUnit &U = SkCU ? *SkCU : TheCU;
	auto *CUNode = cast<DICompileUnit>(P.first);
	DIMacroNodeArray Macros = CUNode->getMacros();
	if (Macros.empty())
	continue;
	Asm->OutStreamer->SwitchSection(Section);
	Asm->OutStreamer->emitLabel(U.getMacroLabelBegin());
	if (UseDebugMacroSection)
	emitMacroHeader(Asm, *this, U, getDwarfVersion());
	handleMacroNodes(Macros, U);
	Asm->OutStreamer->AddComment("End Of Macro List Mark");
	Asm->emitInt8(0);
	}
	}

	/// Emit macros into a debug macinfo/macro section.
	void DwarfDebug::emitDebugMacinfo() {
	auto &ObjLower = Asm->getObjFileLowering();
	emitDebugMacinfoImpl(UseDebugMacroSection
	? ObjLower.getDwarfMacroSection()
	: ObjLower.getDwarfMacinfoSection());
	}

	void DwarfDebug::emitDebugMacinfoDWO() {
	auto &ObjLower = Asm->getObjFileLowering();
	emitDebugMacinfoImpl(UseDebugMacroSection
	? ObjLower.getDwarfMacroDWOSection()
	: ObjLower.getDwarfMacinfoDWOSection());
	}

	// DWARF5 Experimental Separate Dwarf emitters.

	void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die,
	std::unique_ptr<DwarfCompileUnit> NewU) {

	if (!CompilationDir.empty())
	NewU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
	addGnuPubAttributes(*NewU, Die);

	SkeletonHolder.addUnit(std::move(NewU));
	}

	DwarfCompileUnit &DwarfDebug::constructSkeletonCU(const DwarfCompileUnit &CU) {

	auto OwnedUnit = std::make_unique<DwarfCompileUnit>(
	CU.getUniqueID(), CU.getCUNode(), Asm, this, &SkeletonHolder,
	UnitKind::Skeleton);
	DwarfCompileUnit &NewCU = *OwnedUnit;
	NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection());

	NewCU.initStmtList();

	if (useSegmentedStringOffsetsTable())
	NewCU.addStringOffsetsStart();

	initSkeletonUnit(CU, NewCU.getUnitDie(), std::move(OwnedUnit));

	return NewCU;
	}

	// Emit the .debug_info.dwo section for separated dwarf. This contains the
	// compile units that would normally be in debug_info.
	void DwarfDebug::emitDebugInfoDWO() {
	assert(useSplitDwarf() && "No split dwarf debug info?");
	// Don't emit relocations into the dwo file.
	InfoHolder.emitUnits(/* UseOffsets */ true);
	}

	// Emit the .debug_abbrev.dwo section for separated dwarf. This contains the
	// abbreviations for the .debug_info.dwo section.
	void DwarfDebug::emitDebugAbbrevDWO() {
	assert(useSplitDwarf() && "No split dwarf?");
	InfoHolder.emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevDWOSection());
	}

	void DwarfDebug::emitDebugLineDWO() {
	assert(useSplitDwarf() && "No split dwarf?");
	SplitTypeUnitFileTable.Emit(
	*Asm->OutStreamer, MCDwarfLineTableParams(),
	Asm->getObjFileLowering().getDwarfLineDWOSection());
	}

	void DwarfDebug::emitStringOffsetsTableHeaderDWO() {
	assert(useSplitDwarf() && "No split dwarf?");
	InfoHolder.getStringPool().emitStringOffsetsTableHeader(
	*Asm, Asm->getObjFileLowering().getDwarfStrOffDWOSection(),
	InfoHolder.getStringOffsetsStartSym());
	}

	// Emit the .debug_str.dwo section for separated dwarf. This contains the
	// string section and is identical in format to traditional .debug_str
	// sections.
	void DwarfDebug::emitDebugStrDWO() {
	if (useSegmentedStringOffsetsTable())
	emitStringOffsetsTableHeaderDWO();
	assert(useSplitDwarf() && "No split dwarf?");
	MCSection *OffSec = Asm->getObjFileLowering().getDwarfStrOffDWOSection();
	InfoHolder.emitStrings(Asm->getObjFileLowering().getDwarfStrDWOSection(),
	OffSec, /* UseRelativeOffsets = */ false);
	}

	// Emit address pool.
	void DwarfDebug::emitDebugAddr() {
	AddrPool.emit(*Asm, Asm->getObjFileLowering().getDwarfAddrSection());
	}

	MCDwarfDwoLineTable *DwarfDebug::getDwoLineTable(const DwarfCompileUnit &CU) {
	if (!useSplitDwarf())
	return nullptr;
	const DICompileUnit *DIUnit = CU.getCUNode();
	SplitTypeUnitFileTable.maybeSetRootFile(
	DIUnit->getDirectory(), DIUnit->getFilename(),
	getMD5AsBytes(DIUnit->getFile()), DIUnit->getSource());
	return &SplitTypeUnitFileTable;
	}

	uint64_t DwarfDebug::makeTypeSignature(StringRef Identifier) {
	MD5 Hash;
	Hash.update(Identifier);
	// ... take the least significant 8 bytes and return those. Our MD5
	// implementation always returns its results in little endian, so we actually
	// need the "high" word.
	MD5::MD5Result Result;
	Hash.final(Result);
	return Result.high();
	}

	void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
	StringRef Identifier, DIE &RefDie,
	const DICompositeType *CTy) {
	// Fast path if we're building some type units and one has already used the
	// address pool we know we're going to throw away all this work anyway, so
	// don't bother building dependent types.
	if (!TypeUnitsUnderConstruction.empty() && AddrPool.hasBeenUsed())
	return;

	auto Ins = TypeSignatures.insert(std::make_pair(CTy, 0));
	if (!Ins.second) {
	CU.addDIETypeSignature(RefDie, Ins.first->second);
	return;
	}

	bool TopLevelType = TypeUnitsUnderConstruction.empty();
	AddrPool.resetUsedFlag();

	auto OwnedUnit = std::make_unique<DwarfTypeUnit>(CU, Asm, this, &InfoHolder,
	getDwoLineTable(CU));
	DwarfTypeUnit &NewTU = *OwnedUnit;
	DIE &UnitDie = NewTU.getUnitDie();
	TypeUnitsUnderConstruction.emplace_back(std::move(OwnedUnit), CTy);

	NewTU.addUInt(UnitDie, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
	CU.getLanguage());

	uint64_t Signature = makeTypeSignature(Identifier);
	NewTU.setTypeSignature(Signature);
	Ins.first->second = Signature;

	if (useSplitDwarf()) {
	MCSection *Section =
	getDwarfVersion() <= 4
	? Asm->getObjFileLowering().getDwarfTypesDWOSection()
	: Asm->getObjFileLowering().getDwarfInfoDWOSection();
	NewTU.setSection(Section);
	} else {
	MCSection *Section =
	getDwarfVersion() <= 4
	? Asm->getObjFileLowering().getDwarfTypesSection(Signature)
	: Asm->getObjFileLowering().getDwarfInfoSection(Signature);
	NewTU.setSection(Section);
	// Non-split type units reuse the compile unit's line table.
	CU.applyStmtList(UnitDie);
	}

	// Add DW_AT_str_offsets_base to the type unit DIE, but not for split type
	// units.
	if (useSegmentedStringOffsetsTable() && !useSplitDwarf())
	NewTU.addStringOffsetsStart();

	NewTU.setType(NewTU.createTypeDIE(CTy));

	if (TopLevelType) {
	auto TypeUnitsToAdd = std::move(TypeUnitsUnderConstruction);
	TypeUnitsUnderConstruction.clear();

	// Types referencing entries in the address table cannot be placed in type
	// units.
	if (AddrPool.hasBeenUsed()) {

	// Remove all the types built while building this type.
	// This is pessimistic as some of these types might not be dependent on
	// the type that used an address.
	for (const auto &TU : TypeUnitsToAdd)
	TypeSignatures.erase(TU.second);

	// Construct this type in the CU directly.
	// This is inefficient because all the dependent types will be rebuilt
	// from scratch, including building them in type units, discovering that
	// they depend on addresses, throwing them out and rebuilding them.
	CU.constructTypeDIE(RefDie, cast<DICompositeType>(CTy));
	return;
	}

	// If the type wasn't dependent on fission addresses, finish adding the type
	// and all its dependent types.
	for (auto &TU : TypeUnitsToAdd) {
	InfoHolder.computeSizeAndOffsetsForUnit(TU.first.get());
	InfoHolder.emitUnit(TU.first.get(), useSplitDwarf());
	}
	}
	CU.addDIETypeSignature(RefDie, Signature);
	}

	DwarfDebug::NonTypeUnitContext::NonTypeUnitContext(DwarfDebug *DD)
	: DD(DD),
	TypeUnitsUnderConstruction(std::move(DD->TypeUnitsUnderConstruction)), AddrPoolUsed(DD->AddrPool.hasBeenUsed()) {
	DD->TypeUnitsUnderConstruction.clear();
	DD->AddrPool.resetUsedFlag();
	}

	DwarfDebug::NonTypeUnitContext::~NonTypeUnitContext() {
	DD->TypeUnitsUnderConstruction = std::move(TypeUnitsUnderConstruction);
	DD->AddrPool.resetUsedFlag(AddrPoolUsed);
	}

	DwarfDebug::NonTypeUnitContext DwarfDebug::enterNonTypeUnitContext() {
	return NonTypeUnitContext(this);
	}

	// Add the Name along with its companion DIE to the appropriate accelerator
	// table (for AccelTableKind::Dwarf it's always AccelDebugNames, for
	// AccelTableKind::Apple, we use the table we got as an argument). If
	// accelerator tables are disabled, this function does nothing.
	template <typename DataT>
	void DwarfDebug::addAccelNameImpl(const DICompileUnit &CU,
	AccelTable<DataT> &AppleAccel, StringRef Name,
	const DIE &Die) {
	if (getAccelTableKind() == AccelTableKind::None)
	return;

	if (getAccelTableKind() != AccelTableKind::Apple &&
	CU.getNameTableKind() != DICompileUnit::DebugNameTableKind::Default)
	return;

	DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
	DwarfStringPoolEntryRef Ref = Holder.getStringPool().getEntry(*Asm, Name);

	switch (getAccelTableKind()) {
	case AccelTableKind::Apple:
	AppleAccel.addName(Ref, Die);
	break;
	case AccelTableKind::Dwarf:
	AccelDebugNames.addName(Ref, Die);
	break;
	case AccelTableKind::Default:
	llvm_unreachable("Default should have already been resolved.");
	case AccelTableKind::None:
	llvm_unreachable("None handled above");
	}
	}

	void DwarfDebug::addAccelName(const DICompileUnit &CU, StringRef Name,
	const DIE &Die) {
	addAccelNameImpl(CU, AccelNames, Name, Die);
	}

	void DwarfDebug::addAccelObjC(const DICompileUnit &CU, StringRef Name,
	const DIE &Die) {
	// ObjC names go only into the Apple accelerator tables.
	if (getAccelTableKind() == AccelTableKind::Apple)
	addAccelNameImpl(CU, AccelObjC, Name, Die);
	}

	void DwarfDebug::addAccelNamespace(const DICompileUnit &CU, StringRef Name,
	const DIE &Die) {
	addAccelNameImpl(CU, AccelNamespace, Name, Die);
	}

	void DwarfDebug::addAccelType(const DICompileUnit &CU, StringRef Name,
	const DIE &Die, char Flags) {
	addAccelNameImpl(CU, AccelTypes, Name, Die);
	}

	uint16_t DwarfDebug::getDwarfVersion() const {
	return Asm->OutStreamer->getContext().getDwarfVersion();
	}

	dwarf::Form DwarfDebug::getDwarfSectionOffsetForm() const {
	if (Asm->getDwarfVersion() >= 4)
	return dwarf::Form::DW_FORM_sec_offset;
	assert((!Asm->isDwarf64() \|\| (Asm->getDwarfVersion() == 3)) &&
	"DWARF64 is not defined prior DWARFv3");
	return Asm->isDwarf64() ? dwarf::Form::DW_FORM_data8
	: dwarf::Form::DW_FORM_data4;
	}

	const MCSymbol DwarfDebug::getSectionLabel(const MCSection S) {
	auto I = SectionLabels.find(S);
	if (I == SectionLabels.end())
	return nullptr;
	return I->second;
	}
	void DwarfDebug::insertSectionLabel(const MCSymbol *S) {
	if (SectionLabels.insert(std::make_pair(&S->getSection(), S)).second)
	if (useSplitDwarf() \|\| getDwarfVersion() >= 5)
	AddrPool.getIndex(S);
	}

	Optional<MD5::MD5Result> DwarfDebug::getMD5AsBytes(const DIFile *File) const {
	assert(File);
	if (getDwarfVersion() < 5)
	return None;
	Optional<DIFile::ChecksumInfo<StringRef>> Checksum = File->getChecksum();
	if (!Checksum \|\| Checksum->Kind != DIFile::CSK_MD5)
	return None;

	// Convert the string checksum to an MD5Result for the streamer.
	// The verifier validates the checksum so we assume it's okay.
	// An MD5 checksum is 16 bytes.
	std::string ChecksumString = fromHex(Checksum->Value);
	MD5::MD5Result CKMem;
	std::copy(ChecksumString.begin(), ChecksumString.end(), CKMem.Bytes.data());
	return CKMem;
	}
	diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
	index 6356a65b50d3..b55be799b6bc 100644
	--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
	+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
	@@ -1,858 +1,855 @@
	//===- llvm/CodeGen/DwarfDebug.h - Dwarf Debug Framework --------- C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file contains support for writing dwarf debug info into asm files.
	//
	//===----------------------------------------------------------------------===//

	#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFDEBUG_H
	#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFDEBUG_H

	#include "AddressPool.h"
	#include "DebugLocStream.h"
	#include "DebugLocEntry.h"
	#include "DwarfFile.h"
	#include "llvm/ADT/ArrayRef.h"
	#include "llvm/ADT/DenseMap.h"
	#include "llvm/ADT/DenseSet.h"
	#include "llvm/ADT/MapVector.h"
	#include "llvm/ADT/STLExtras.h"
	#include "llvm/ADT/SetVector.h"
	#include "llvm/ADT/SmallPtrSet.h"
	#include "llvm/ADT/SmallVector.h"
	#include "llvm/ADT/StringMap.h"
	#include "llvm/ADT/StringRef.h"
	#include "llvm/BinaryFormat/Dwarf.h"
	#include "llvm/CodeGen/AccelTable.h"
	#include "llvm/CodeGen/DbgEntityHistoryCalculator.h"
	#include "llvm/CodeGen/DebugHandlerBase.h"
	#include "llvm/CodeGen/MachineInstr.h"
	#include "llvm/IR/DebugInfoMetadata.h"
	#include "llvm/IR/DebugLoc.h"
	#include "llvm/IR/Metadata.h"
	#include "llvm/MC/MCDwarf.h"
	#include "llvm/Support/Allocator.h"
	#include "llvm/Target/TargetOptions.h"
	#include <cassert>
	#include <cstdint>
	#include <limits>
	#include <memory>
	#include <utility>
	#include <vector>

	namespace llvm {

	class AsmPrinter;
	class ByteStreamer;
	class DIE;
	class DwarfCompileUnit;
	class DwarfExpression;
	class DwarfTypeUnit;
	class DwarfUnit;
	class LexicalScope;
	class MachineFunction;
	class MCSection;
	class MCSymbol;
	class Module;

	//===----------------------------------------------------------------------===//
	/// This class is defined as the common parent of DbgVariable and DbgLabel
	/// such that it could levarage polymorphism to extract common code for
	/// DbgVariable and DbgLabel.
	class DbgEntity {
	const DINode *Entity;
	const DILocation *InlinedAt;
	DIE *TheDIE = nullptr;
	unsigned SubclassID;

	public:
	enum DbgEntityKind {
	DbgVariableKind,
	DbgLabelKind
	};

	DbgEntity(const DINode N, const DILocation IA, unsigned ID)
	: Entity(N), InlinedAt(IA), SubclassID(ID) {}
	virtual ~DbgEntity() {}

	/// Accessors.
	/// @{
	const DINode *getEntity() const { return Entity; }
	const DILocation *getInlinedAt() const { return InlinedAt; }
	DIE *getDIE() const { return TheDIE; }
	unsigned getDbgEntityID() const { return SubclassID; }
	/// @}

	void setDIE(DIE &D) { TheDIE = &D; }

	static bool classof(const DbgEntity *N) {
	switch (N->getDbgEntityID()) {
	default:
	return false;
	case DbgVariableKind:
	case DbgLabelKind:
	return true;
	}
	}
	};

	//===----------------------------------------------------------------------===//
	/// This class is used to track local variable information.
	///
	/// Variables can be created from allocas, in which case they're generated from
	/// the MMI table. Such variables can have multiple expressions and frame
	/// indices.
	///
	/// Variables can be created from \c DBG_VALUE instructions. Those whose
	/// location changes over time use \a DebugLocListIndex, while those with a
	/// single location use \a ValueLoc and (optionally) a single entry of \a Expr.
	///
	/// Variables that have been optimized out use none of these fields.
	class DbgVariable : public DbgEntity {
	/// Index of the entry list in DebugLocs.
	unsigned DebugLocListIndex = ~0u;
	/// DW_OP_LLVM_tag_offset value from DebugLocs.
	Optional<uint8_t> DebugLocListTagOffset;

	/// Single value location description.
	std::unique_ptr<DbgValueLoc> ValueLoc = nullptr;

	struct FrameIndexExpr {
	int FI;
	const DIExpression *Expr;
	};
	mutable SmallVector<FrameIndexExpr, 1>
	FrameIndexExprs; /// Frame index + expression.

	public:
	/// Construct a DbgVariable.
	///
	/// Creates a variable without any DW_AT_location. Call \a initializeMMI()
	/// for MMI entries, or \a initializeDbgValue() for DBG_VALUE instructions.
	DbgVariable(const DILocalVariable V, const DILocation IA)
	: DbgEntity(V, IA, DbgVariableKind) {}

	/// Initialize from the MMI table.
	void initializeMMI(const DIExpression *E, int FI) {
	assert(FrameIndexExprs.empty() && "Already initialized?");
	assert(!ValueLoc.get() && "Already initialized?");

	assert((!E \|\| E->isValid()) && "Expected valid expression");
	assert(FI != std::numeric_limits<int>::max() && "Expected valid index");

	FrameIndexExprs.push_back({FI, E});
	}

	// Initialize variable's location.
	void initializeDbgValue(DbgValueLoc Value) {
	assert(FrameIndexExprs.empty() && "Already initialized?");
	assert(!ValueLoc && "Already initialized?");
	assert(!Value.getExpression()->isFragment() && "Fragments not supported.");

	ValueLoc = std::make_unique<DbgValueLoc>(Value);
	if (auto *E = ValueLoc->getExpression())
	if (E->getNumElements())
	FrameIndexExprs.push_back({0, E});
	}

	/// Initialize from a DBG_VALUE instruction.
	void initializeDbgValue(const MachineInstr *DbgValue);

	// Accessors.
	const DILocalVariable *getVariable() const {
	return cast<DILocalVariable>(getEntity());
	}

	const DIExpression *getSingleExpression() const {
	assert(ValueLoc.get() && FrameIndexExprs.size() <= 1);
	return FrameIndexExprs.size() ? FrameIndexExprs[0].Expr : nullptr;
	}

	void setDebugLocListIndex(unsigned O) { DebugLocListIndex = O; }
	unsigned getDebugLocListIndex() const { return DebugLocListIndex; }
	void setDebugLocListTagOffset(uint8_t O) { DebugLocListTagOffset = O; }
	Optional<uint8_t> getDebugLocListTagOffset() const { return DebugLocListTagOffset; }
	StringRef getName() const { return getVariable()->getName(); }
	const DbgValueLoc *getValueLoc() const { return ValueLoc.get(); }
	/// Get the FI entries, sorted by fragment offset.
	ArrayRef<FrameIndexExpr> getFrameIndexExprs() const;
	bool hasFrameIndexExprs() const { return !FrameIndexExprs.empty(); }
	void addMMIEntry(const DbgVariable &V);

	// Translate tag to proper Dwarf tag.
	dwarf::Tag getTag() const {
	// FIXME: Why don't we just infer this tag and store it all along?
	if (getVariable()->isParameter())
	return dwarf::DW_TAG_formal_parameter;

	return dwarf::DW_TAG_variable;
	}

	/// Return true if DbgVariable is artificial.
	bool isArtificial() const {
	if (getVariable()->isArtificial())
	return true;
	if (getType()->isArtificial())
	return true;
	return false;
	}

	bool isObjectPointer() const {
	if (getVariable()->isObjectPointer())
	return true;
	if (getType()->isObjectPointer())
	return true;
	return false;
	}

	bool hasComplexAddress() const {
	assert(ValueLoc.get() && "Expected DBG_VALUE, not MMI variable");
	assert((FrameIndexExprs.empty() \|\|
	(FrameIndexExprs.size() == 1 &&
	FrameIndexExprs[0].Expr->getNumElements())) &&
	"Invalid Expr for DBG_VALUE");
	return !FrameIndexExprs.empty();
	}

	const DIType *getType() const;

	static bool classof(const DbgEntity *N) {
	return N->getDbgEntityID() == DbgVariableKind;
	}
	};

	//===----------------------------------------------------------------------===//
	/// This class is used to track label information.
	///
	/// Labels are collected from \c DBG_LABEL instructions.
	class DbgLabel : public DbgEntity {
	const MCSymbol *Sym; /// Symbol before DBG_LABEL instruction.

	public:
	/// We need MCSymbol information to generate DW_AT_low_pc.
	DbgLabel(const DILabel L, const DILocation IA, const MCSymbol *Sym = nullptr)
	: DbgEntity(L, IA, DbgLabelKind), Sym(Sym) {}

	/// Accessors.
	/// @{
	const DILabel *getLabel() const { return cast<DILabel>(getEntity()); }
	const MCSymbol *getSymbol() const { return Sym; }

	StringRef getName() const { return getLabel()->getName(); }
	/// @}

	/// Translate tag to proper Dwarf tag.
	dwarf::Tag getTag() const {
	return dwarf::DW_TAG_label;
	}

	static bool classof(const DbgEntity *N) {
	return N->getDbgEntityID() == DbgLabelKind;
	}
	};

	/// Used for tracking debug info about call site parameters.
	class DbgCallSiteParam {
	private:
	unsigned Register; ///< Parameter register at the callee entry point.
	DbgValueLoc Value; ///< Corresponding location for the parameter value at
	///< the call site.
	public:
	DbgCallSiteParam(unsigned Reg, DbgValueLoc Val)
	: Register(Reg), Value(Val) {
	assert(Reg && "Parameter register cannot be undef");
	}

	unsigned getRegister() const { return Register; }
	DbgValueLoc getValue() const { return Value; }
	};

	/// Collection used for storing debug call site parameters.
	using ParamSet = SmallVector<DbgCallSiteParam, 4>;

	/// Helper used to pair up a symbol and its DWARF compile unit.
	struct SymbolCU {
	SymbolCU(DwarfCompileUnit CU, const MCSymbol Sym) : Sym(Sym), CU(CU) {}

	const MCSymbol *Sym;
	DwarfCompileUnit *CU;
	};

	/// The kind of accelerator tables we should emit.
	enum class AccelTableKind {
	Default, ///< Platform default.
	None, ///< None.
	Apple, ///< .apple_names, .apple_namespaces, .apple_types, .apple_objc.
	Dwarf, ///< DWARF v5 .debug_names.
	};

	/// Collects and handles dwarf debug information.
	class DwarfDebug : public DebugHandlerBase {
	/// All DIEValues are allocated through this allocator.
	BumpPtrAllocator DIEValueAllocator;

	/// Maps MDNode with its corresponding DwarfCompileUnit.
	MapVector<const MDNode , DwarfCompileUnit > CUMap;

	/// Maps a CU DIE with its corresponding DwarfCompileUnit.
	DenseMap<const DIE , DwarfCompileUnit > CUDieMap;

	/// List of all labels used in aranges generation.
	std::vector<SymbolCU> ArangeLabels;

	/// Size of each symbol emitted (for those symbols that have a specific size).
	DenseMap<const MCSymbol *, uint64_t> SymSize;

	/// Collection of abstract variables/labels.
	SmallVector<std::unique_ptr<DbgEntity>, 64> ConcreteEntities;

	/// Collection of DebugLocEntry. Stored in a linked list so that DIELocLists
	/// can refer to them in spite of insertions into this list.
	DebugLocStream DebugLocs;

	/// This is a collection of subprogram MDNodes that are processed to
	/// create DIEs.
	SetVector<const DISubprogram , SmallVector<const DISubprogram , 16>,
	SmallPtrSet<const DISubprogram *, 16>>
	ProcessedSPNodes;

	/// If nonnull, stores the current machine function we're processing.
	const MachineFunction *CurFn = nullptr;

	/// If nonnull, stores the CU in which the previous subprogram was contained.
	const DwarfCompileUnit *PrevCU = nullptr;

	/// As an optimization, there is no need to emit an entry in the directory
	/// table for the same directory as DW_AT_comp_dir.
	StringRef CompilationDir;

	/// Holder for the file specific debug information.
	DwarfFile InfoHolder;

	/// Holders for the various debug information flags that we might need to
	/// have exposed. See accessor functions below for description.

	/// Map from MDNodes for user-defined types to their type signatures. Also
	/// used to keep track of which types we have emitted type units for.
	DenseMap<const MDNode *, uint64_t> TypeSignatures;

	DenseMap<const MCSection , const MCSymbol > SectionLabels;

	SmallVector<
	std::pair<std::unique_ptr<DwarfTypeUnit>, const DICompositeType *>, 1>
	TypeUnitsUnderConstruction;

	/// Whether to use the GNU TLS opcode (instead of the standard opcode).
	bool UseGNUTLSOpcode;

	/// Whether to use DWARF 2 bitfields (instead of the DWARF 4 format).
	bool UseDWARF2Bitfields;

	/// Whether to emit all linkage names, or just abstract subprograms.
	bool UseAllLinkageNames;

	/// Use inlined strings.
	bool UseInlineStrings = false;

	/// Allow emission of .debug_ranges section.
	bool UseRangesSection = true;

	/// True if the sections itself must be used as references and don't create
	/// temp symbols inside DWARF sections.
	bool UseSectionsAsReferences = false;

	///Allow emission of the .debug_loc section.
	bool UseLocSection = true;

	/// Generate DWARF v4 type units.
	bool GenerateTypeUnits;

	/// Emit a .debug_macro section instead of .debug_macinfo.
	bool UseDebugMacroSection;

	/// Avoid using DW_OP_convert due to consumer incompatibilities.
	bool EnableOpConvert;

	public:
	enum class MinimizeAddrInV5 {
	Default,
	Disabled,
	Ranges,
	Expressions,
	Form,
	};

	private:
	/// Force the use of DW_AT_ranges even for single-entry range lists.
	MinimizeAddrInV5 MinimizeAddr = MinimizeAddrInV5::Disabled;

	/// DWARF5 Experimental Options
	/// @{
	AccelTableKind TheAccelTableKind;
	bool HasAppleExtensionAttributes;
	bool HasSplitDwarf;

	/// Whether to generate the DWARF v5 string offsets table.
	/// It consists of a series of contributions, each preceded by a header.
	/// The pre-DWARF v5 string offsets table for split dwarf is, in contrast,
	/// a monolithic sequence of string offsets.
	bool UseSegmentedStringOffsetsTable;

	/// Enable production of call site parameters needed to print the debug entry
	/// values. Useful for testing purposes when a debugger does not support the
	/// feature yet.
	bool EmitDebugEntryValues;

	/// Separated Dwarf Variables
	/// In general these will all be for bits that are left in the
	/// original object file, rather than things that are meant
	/// to be in the .dwo sections.

	/// Holder for the skeleton information.
	DwarfFile SkeletonHolder;

	/// Store file names for type units under fission in a line table
	/// header that will be emitted into debug_line.dwo.
	// FIXME: replace this with a map from comp_dir to table so that we
	// can emit multiple tables during LTO each of which uses directory
	// 0, referencing the comp_dir of all the type units that use it.
	MCDwarfDwoLineTable SplitTypeUnitFileTable;
	/// @}

	/// True iff there are multiple CUs in this module.
	bool SingleCU;
	bool IsDarwin;

	/// Map for tracking Fortran deferred CHARACTER lengths.
	DenseMap<const DIStringType *, unsigned> StringTypeLocMap;

	AddressPool AddrPool;

	/// Accelerator tables.
	AccelTable<DWARF5AccelTableData> AccelDebugNames;
	AccelTable<AppleAccelTableOffsetData> AccelNames;
	AccelTable<AppleAccelTableOffsetData> AccelObjC;
	AccelTable<AppleAccelTableOffsetData> AccelNamespace;
	AccelTable<AppleAccelTableTypeData> AccelTypes;

	/// Identify a debugger for "tuning" the debug info.
	///
	/// The "tuning" should be used to set defaults for individual feature flags
	/// in DwarfDebug; if a given feature has a more specific command-line option,
	/// that option should take precedence over the tuning.
	DebuggerKind DebuggerTuning = DebuggerKind::Default;

	MCDwarfDwoLineTable *getDwoLineTable(const DwarfCompileUnit &);

	const SmallVectorImpl<std::unique_ptr<DwarfCompileUnit>> &getUnits() {
	return InfoHolder.getUnits();
	}

	using InlinedEntity = DbgValueHistoryMap::InlinedEntity;

	void ensureAbstractEntityIsCreated(DwarfCompileUnit &CU,
	const DINode *Node,
	const MDNode *Scope);
	void ensureAbstractEntityIsCreatedIfScoped(DwarfCompileUnit &CU,
	const DINode *Node,
	const MDNode *Scope);

	DbgEntity *createConcreteEntity(DwarfCompileUnit &TheCU,
	LexicalScope &Scope,
	const DINode *Node,
	const DILocation *Location,
	const MCSymbol *Sym = nullptr);

	/// Construct a DIE for this abstract scope.
	void constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU, LexicalScope *Scope);

	- /// Construct a DIE for the subprogram definition \p SP and return it.
	- DIE &constructSubprogramDefinitionDIE(const DISubprogram *SP);
	-
	/// Construct DIEs for call site entries describing the calls in \p MF.
	void constructCallSiteEntryDIEs(const DISubprogram &SP, DwarfCompileUnit &CU,
	DIE &ScopeDIE, const MachineFunction &MF);

	template <typename DataT>
	void addAccelNameImpl(const DICompileUnit &CU, AccelTable<DataT> &AppleAccel,
	StringRef Name, const DIE &Die);

	void finishEntityDefinitions();

	void finishSubprogramDefinitions();

	/// Finish off debug information after all functions have been
	/// processed.
	void finalizeModuleInfo();

	/// Emit the debug info section.
	void emitDebugInfo();

	/// Emit the abbreviation section.
	void emitAbbreviations();

	/// Emit the string offsets table header.
	void emitStringOffsetsTableHeader();

	/// Emit a specified accelerator table.
	template <typename AccelTableT>
	void emitAccel(AccelTableT &Accel, MCSection *Section, StringRef TableName);

	/// Emit DWARF v5 accelerator table.
	void emitAccelDebugNames();

	/// Emit visible names into a hashed accelerator table section.
	void emitAccelNames();

	/// Emit objective C classes and categories into a hashed
	/// accelerator table section.
	void emitAccelObjC();

	/// Emit namespace dies into a hashed accelerator table.
	void emitAccelNamespaces();

	/// Emit type dies into a hashed accelerator table.
	void emitAccelTypes();

	/// Emit visible names and types into debug pubnames and pubtypes sections.
	void emitDebugPubSections();

	void emitDebugPubSection(bool GnuStyle, StringRef Name,
	DwarfCompileUnit *TheU,
	const StringMap<const DIE *> &Globals);

	/// Emit null-terminated strings into a debug str section.
	void emitDebugStr();

	/// Emit variable locations into a debug loc section.
	void emitDebugLoc();

	/// Emit variable locations into a debug loc dwo section.
	void emitDebugLocDWO();

	void emitDebugLocImpl(MCSection *Sec);

	/// Emit address ranges into a debug aranges section.
	void emitDebugARanges();

	/// Emit address ranges into a debug ranges section.
	void emitDebugRanges();
	void emitDebugRangesDWO();
	void emitDebugRangesImpl(const DwarfFile &Holder, MCSection *Section);

	/// Emit macros into a debug macinfo section.
	void emitDebugMacinfo();
	/// Emit macros into a debug macinfo.dwo section.
	void emitDebugMacinfoDWO();
	void emitDebugMacinfoImpl(MCSection *Section);
	void emitMacro(DIMacro &M);
	void emitMacroFile(DIMacroFile &F, DwarfCompileUnit &U);
	void emitMacroFileImpl(DIMacroFile &F, DwarfCompileUnit &U,
	unsigned StartFile, unsigned EndFile,
	StringRef (*MacroFormToString)(unsigned Form));
	void handleMacroNodes(DIMacroNodeArray Nodes, DwarfCompileUnit &U);

	/// DWARF 5 Experimental Split Dwarf Emitters

	/// Initialize common features of skeleton units.
	void initSkeletonUnit(const DwarfUnit &U, DIE &Die,
	std::unique_ptr<DwarfCompileUnit> NewU);

	/// Construct the split debug info compile unit for the debug info section.
	/// In DWARF v5, the skeleton unit DIE may have the following attributes:
	/// DW_AT_addr_base, DW_AT_comp_dir, DW_AT_dwo_name, DW_AT_high_pc,
	/// DW_AT_low_pc, DW_AT_ranges, DW_AT_stmt_list, and DW_AT_str_offsets_base.
	/// Prior to DWARF v5 it may also have DW_AT_GNU_dwo_id. DW_AT_GNU_dwo_name
	/// is used instead of DW_AT_dwo_name, Dw_AT_GNU_addr_base instead of
	/// DW_AT_addr_base, and DW_AT_GNU_ranges_base instead of DW_AT_rnglists_base.
	DwarfCompileUnit &constructSkeletonCU(const DwarfCompileUnit &CU);

	/// Emit the debug info dwo section.
	void emitDebugInfoDWO();

	/// Emit the debug abbrev dwo section.
	void emitDebugAbbrevDWO();

	/// Emit the debug line dwo section.
	void emitDebugLineDWO();

	/// Emit the dwo stringoffsets table header.
	void emitStringOffsetsTableHeaderDWO();

	/// Emit the debug str dwo section.
	void emitDebugStrDWO();

	/// Emit DWO addresses.
	void emitDebugAddr();

	/// Flags to let the linker know we have emitted new style pubnames. Only
	/// emit it here if we don't have a skeleton CU for split dwarf.
	void addGnuPubAttributes(DwarfCompileUnit &U, DIE &D) const;

	/// Create new DwarfCompileUnit for the given metadata node with tag
	/// DW_TAG_compile_unit.
	DwarfCompileUnit &getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit);
	void finishUnitAttributes(const DICompileUnit *DIUnit,
	DwarfCompileUnit &NewCU);

	/// Construct imported_module or imported_declaration DIE.
	void constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU,
	const DIImportedEntity *N);

	/// Register a source line with debug info. Returns the unique
	/// label that was emitted and which provides correspondence to the
	/// source line list.
	void recordSourceLine(unsigned Line, unsigned Col, const MDNode *Scope,
	unsigned Flags);

	/// Populate LexicalScope entries with variables' info.
	void collectEntityInfo(DwarfCompileUnit &TheCU, const DISubprogram *SP,
	DenseSet<InlinedEntity> &ProcessedVars);

	/// Build the location list for all DBG_VALUEs in the
	/// function that describe the same variable. If the resulting
	/// list has only one entry that is valid for entire variable's
	/// scope return true.
	bool buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
	const DbgValueHistoryMap::Entries &Entries);

	/// Collect variable information from the side table maintained by MF.
	void collectVariableInfoFromMFTable(DwarfCompileUnit &TheCU,
	DenseSet<InlinedEntity> &P);

	/// Emit the reference to the section.
	void emitSectionReference(const DwarfCompileUnit &CU);

	protected:
	/// Gather pre-function debug information.
	void beginFunctionImpl(const MachineFunction *MF) override;

	/// Gather and emit post-function debug information.
	void endFunctionImpl(const MachineFunction *MF) override;

	void skippedNonDebugFunction() override;

	public:
	//===--------------------------------------------------------------------===//
	// Main entry points.
	//
	DwarfDebug(AsmPrinter *A);

	~DwarfDebug() override;

	/// Emit all Dwarf sections that should come prior to the
	/// content.
	void beginModule(Module *M) override;

	/// Emit all Dwarf sections that should come after the content.
	void endModule() override;

	/// Emits inital debug location directive.
	DebugLoc emitInitialLocDirective(const MachineFunction &MF, unsigned CUID);

	/// Process beginning of an instruction.
	void beginInstruction(const MachineInstr *MI) override;

	/// Perform an MD5 checksum of \p Identifier and return the lower 64 bits.
	static uint64_t makeTypeSignature(StringRef Identifier);

	/// Add a DIE to the set of types that we're going to pull into
	/// type units.
	void addDwarfTypeUnitType(DwarfCompileUnit &CU, StringRef Identifier,
	DIE &Die, const DICompositeType *CTy);

	class NonTypeUnitContext {
	DwarfDebug *DD;
	decltype(DwarfDebug::TypeUnitsUnderConstruction) TypeUnitsUnderConstruction;
	bool AddrPoolUsed;
	friend class DwarfDebug;
	NonTypeUnitContext(DwarfDebug *DD);
	public:
	NonTypeUnitContext(NonTypeUnitContext&&) = default;
	~NonTypeUnitContext();
	};

	NonTypeUnitContext enterNonTypeUnitContext();

	/// Add a label so that arange data can be generated for it.
	void addArangeLabel(SymbolCU SCU) { ArangeLabels.push_back(SCU); }

	/// For symbols that have a size designated (e.g. common symbols),
	/// this tracks that size.
	void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {
	SymSize[Sym] = Size;
	}

	/// Returns whether we should emit all DW_AT_[MIPS_]linkage_name.
	/// If not, we still might emit certain cases.
	bool useAllLinkageNames() const { return UseAllLinkageNames; }

	/// Returns whether to use DW_OP_GNU_push_tls_address, instead of the
	/// standard DW_OP_form_tls_address opcode
	bool useGNUTLSOpcode() const { return UseGNUTLSOpcode; }

	/// Returns whether to use the DWARF2 format for bitfields instyead of the
	/// DWARF4 format.
	bool useDWARF2Bitfields() const { return UseDWARF2Bitfields; }

	/// Returns whether to use inline strings.
	bool useInlineStrings() const { return UseInlineStrings; }

	/// Returns whether ranges section should be emitted.
	bool useRangesSection() const { return UseRangesSection; }

	/// Returns whether range encodings should be used for single entry range
	/// lists.
	bool alwaysUseRanges() const {
	return MinimizeAddr == MinimizeAddrInV5::Ranges;
	}

	// Returns whether novel exprloc addrx+offset encodings should be used to
	// reduce debug_addr size.
	bool useAddrOffsetExpressions() const {
	return MinimizeAddr == MinimizeAddrInV5::Expressions;
	}

	// Returns whether addrx+offset LLVM extension form should be used to reduce
	// debug_addr size.
	bool useAddrOffsetForm() const {
	return MinimizeAddr == MinimizeAddrInV5::Form;
	}

	/// Returns whether to use sections as labels rather than temp symbols.
	bool useSectionsAsReferences() const {
	return UseSectionsAsReferences;
	}

	/// Returns whether .debug_loc section should be emitted.
	bool useLocSection() const { return UseLocSection; }

	/// Returns whether to generate DWARF v4 type units.
	bool generateTypeUnits() const { return GenerateTypeUnits; }

	// Experimental DWARF5 features.

	/// Returns what kind (if any) of accelerator tables to emit.
	AccelTableKind getAccelTableKind() const { return TheAccelTableKind; }

	bool useAppleExtensionAttributes() const {
	return HasAppleExtensionAttributes;
	}

	/// Returns whether or not to change the current debug info for the
	/// split dwarf proposal support.
	bool useSplitDwarf() const { return HasSplitDwarf; }

	/// Returns whether to generate a string offsets table with (possibly shared)
	/// contributions from each CU and type unit. This implies the use of
	/// DW_FORM_strx* indirect references with DWARF v5 and beyond. Note that
	/// DW_FORM_GNU_str_index is also an indirect reference, but it is used with
	/// a pre-DWARF v5 implementation of split DWARF sections, which uses a
	/// monolithic string offsets table.
	bool useSegmentedStringOffsetsTable() const {
	return UseSegmentedStringOffsetsTable;
	}

	bool emitDebugEntryValues() const {
	return EmitDebugEntryValues;
	}

	bool useOpConvert() const {
	return EnableOpConvert;
	}

	bool shareAcrossDWOCUs() const;

	/// Returns the Dwarf Version.
	uint16_t getDwarfVersion() const;

	/// Returns a suitable DWARF form to represent a section offset, i.e.
	/// * DW_FORM_sec_offset for DWARF version >= 4;
	/// * DW_FORM_data8 for 64-bit DWARFv3;
	/// * DW_FORM_data4 for 32-bit DWARFv3 and DWARFv2.
	dwarf::Form getDwarfSectionOffsetForm() const;

	/// Returns the previous CU that was being updated
	const DwarfCompileUnit *getPrevCU() const { return PrevCU; }
	void setPrevCU(const DwarfCompileUnit *PrevCU) { this->PrevCU = PrevCU; }

	/// Returns the entries for the .debug_loc section.
	const DebugLocStream &getDebugLocs() const { return DebugLocs; }

	/// Emit an entry for the debug loc section. This can be used to
	/// handle an entry that's going to be emitted into the debug loc section.
	void emitDebugLocEntry(ByteStreamer &Streamer,
	const DebugLocStream::Entry &Entry,
	const DwarfCompileUnit *CU);

	/// Emit the location for a debug loc entry, including the size header.
	void emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry,
	const DwarfCompileUnit *CU);

	void addSubprogramNames(const DICompileUnit &CU, const DISubprogram *SP,
	DIE &Die);

	AddressPool &getAddressPool() { return AddrPool; }

	void addAccelName(const DICompileUnit &CU, StringRef Name, const DIE &Die);

	void addAccelObjC(const DICompileUnit &CU, StringRef Name, const DIE &Die);

	void addAccelNamespace(const DICompileUnit &CU, StringRef Name,
	const DIE &Die);

	void addAccelType(const DICompileUnit &CU, StringRef Name, const DIE &Die,
	char Flags);

	const MachineFunction *getCurrentFunction() const { return CurFn; }

	/// A helper function to check whether the DIE for a given Scope is
	/// going to be null.
	bool isLexicalScopeDIENull(LexicalScope *Scope);

	/// Find the matching DwarfCompileUnit for the given CU DIE.
	DwarfCompileUnit lookupCU(const DIE Die) { return CUDieMap.lookup(Die); }
	const DwarfCompileUnit lookupCU(const DIE Die) const {
	return CUDieMap.lookup(Die);
	}

	unsigned getStringTypeLoc(const DIStringType *ST) const {
	return StringTypeLocMap.lookup(ST);
	}

	void addStringTypeLoc(const DIStringType *ST, unsigned Loc) {
	assert(ST);
	if (Loc)
	StringTypeLocMap[ST] = Loc;
	}

	/// \defgroup DebuggerTuning Predicates to tune DWARF for a given debugger.
	///
	/// Returns whether we are "tuning" for a given debugger.
	/// @{
	bool tuneForGDB() const { return DebuggerTuning == DebuggerKind::GDB; }
	bool tuneForLLDB() const { return DebuggerTuning == DebuggerKind::LLDB; }
	bool tuneForSCE() const { return DebuggerTuning == DebuggerKind::SCE; }
	bool tuneForDBX() const { return DebuggerTuning == DebuggerKind::DBX; }
	/// @}

	const MCSymbol getSectionLabel(const MCSection S);
	void insertSectionLabel(const MCSymbol *S);

	static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
	const DbgValueLoc &Value,
	DwarfExpression &DwarfExpr);

	/// If the \p File has an MD5 checksum, return it as an MD5Result
	/// allocated in the MCContext.
	Optional<MD5::MD5Result> getMD5AsBytes(const DIFile *File) const;
	};

	} // end namespace llvm

	#endif // LLVM_LIB_CODEGEN_ASMPRINTER_DWARFDEBUG_H
	diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
	index 344d30fad347..9d7b3d6e1891 100644
	--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
	+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
	@@ -1,1817 +1,1818 @@
	//===-- llvm/CodeGen/DwarfUnit.cpp - Dwarf Type and Compile Units ---------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file contains support for constructing a dwarf compile unit.
	//
	//===----------------------------------------------------------------------===//

	#include "DwarfUnit.h"
	#include "AddressPool.h"
	#include "DwarfCompileUnit.h"
	#include "DwarfExpression.h"
	#include "llvm/ADT/APFloat.h"
	#include "llvm/ADT/APInt.h"
	#include "llvm/ADT/None.h"
	#include "llvm/ADT/StringExtras.h"
	#include "llvm/ADT/iterator_range.h"
	#include "llvm/CodeGen/MachineFunction.h"
	#include "llvm/CodeGen/MachineOperand.h"
	#include "llvm/CodeGen/TargetRegisterInfo.h"
	#include "llvm/CodeGen/TargetSubtargetInfo.h"
	#include "llvm/IR/Constants.h"
	#include "llvm/IR/DataLayout.h"
	#include "llvm/IR/GlobalValue.h"
	#include "llvm/IR/Metadata.h"
	#include "llvm/MC/MCAsmInfo.h"
	#include "llvm/MC/MCContext.h"
	#include "llvm/MC/MCDwarf.h"
	#include "llvm/MC/MCSection.h"
	#include "llvm/MC/MCStreamer.h"
	#include "llvm/MC/MachineLocation.h"
	#include "llvm/Support/Casting.h"
	#include "llvm/Support/CommandLine.h"
	#include "llvm/Target/TargetLoweringObjectFile.h"
	#include <cassert>
	#include <cstdint>
	#include <string>
	#include <utility>

	using namespace llvm;

	#define DEBUG_TYPE "dwarfdebug"

	DIEDwarfExpression::DIEDwarfExpression(const AsmPrinter &AP,
	DwarfCompileUnit &CU, DIELoc &DIE)
	: DwarfExpression(AP.getDwarfVersion(), CU), AP(AP), OutDIE(DIE) {}

	void DIEDwarfExpression::emitOp(uint8_t Op, const char* Comment) {
	CU.addUInt(getActiveDIE(), dwarf::DW_FORM_data1, Op);
	}

	void DIEDwarfExpression::emitSigned(int64_t Value) {
	CU.addSInt(getActiveDIE(), dwarf::DW_FORM_sdata, Value);
	}

	void DIEDwarfExpression::emitUnsigned(uint64_t Value) {
	CU.addUInt(getActiveDIE(), dwarf::DW_FORM_udata, Value);
	}

	void DIEDwarfExpression::emitData1(uint8_t Value) {
	CU.addUInt(getActiveDIE(), dwarf::DW_FORM_data1, Value);
	}

	void DIEDwarfExpression::emitBaseTypeRef(uint64_t Idx) {
	CU.addBaseTypeRef(getActiveDIE(), Idx);
	}

	void DIEDwarfExpression::enableTemporaryBuffer() {
	assert(!IsBuffering && "Already buffering?");
	IsBuffering = true;
	}

	void DIEDwarfExpression::disableTemporaryBuffer() { IsBuffering = false; }

	unsigned DIEDwarfExpression::getTemporaryBufferSize() {
	return TmpDIE.ComputeSize(&AP);
	}

	void DIEDwarfExpression::commitTemporaryBuffer() { OutDIE.takeValues(TmpDIE); }

	bool DIEDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI,
	llvm::Register MachineReg) {
	return MachineReg == TRI.getFrameRegister(*AP.MF);
	}

	DwarfUnit::DwarfUnit(dwarf::Tag UnitTag, const DICompileUnit *Node,
	AsmPrinter A, DwarfDebug DW, DwarfFile *DWU)
	: DIEUnit(UnitTag), CUNode(Node), Asm(A), DD(DW), DU(DWU),
	IndexTyDie(nullptr) {}

	DwarfTypeUnit::DwarfTypeUnit(DwarfCompileUnit &CU, AsmPrinter *A,
	DwarfDebug DW, DwarfFile DWU,
	MCDwarfDwoLineTable *SplitLineTable)
	: DwarfUnit(dwarf::DW_TAG_type_unit, CU.getCUNode(), A, DW, DWU), CU(CU),
	SplitLineTable(SplitLineTable) {
	}

	DwarfUnit::~DwarfUnit() {
	for (DIEBlock *B : DIEBlocks)
	B->~DIEBlock();
	for (DIELoc *L : DIELocs)
	L->~DIELoc();
	}

	int64_t DwarfUnit::getDefaultLowerBound() const {
	switch (getLanguage()) {
	default:
	break;

	// The languages below have valid values in all DWARF versions.
	case dwarf::DW_LANG_C:
	case dwarf::DW_LANG_C89:
	case dwarf::DW_LANG_C_plus_plus:
	return 0;

	case dwarf::DW_LANG_Fortran77:
	case dwarf::DW_LANG_Fortran90:
	return 1;

	// The languages below have valid values only if the DWARF version >= 3.
	case dwarf::DW_LANG_C99:
	case dwarf::DW_LANG_ObjC:
	case dwarf::DW_LANG_ObjC_plus_plus:
	if (DD->getDwarfVersion() >= 3)
	return 0;
	break;

	case dwarf::DW_LANG_Fortran95:
	if (DD->getDwarfVersion() >= 3)
	return 1;
	break;

	// Starting with DWARF v4, all defined languages have valid values.
	case dwarf::DW_LANG_D:
	case dwarf::DW_LANG_Java:
	case dwarf::DW_LANG_Python:
	case dwarf::DW_LANG_UPC:
	if (DD->getDwarfVersion() >= 4)
	return 0;
	break;

	case dwarf::DW_LANG_Ada83:
	case dwarf::DW_LANG_Ada95:
	case dwarf::DW_LANG_Cobol74:
	case dwarf::DW_LANG_Cobol85:
	case dwarf::DW_LANG_Modula2:
	case dwarf::DW_LANG_Pascal83:
	case dwarf::DW_LANG_PLI:
	if (DD->getDwarfVersion() >= 4)
	return 1;
	break;

	// The languages below are new in DWARF v5.
	case dwarf::DW_LANG_BLISS:
	case dwarf::DW_LANG_C11:
	case dwarf::DW_LANG_C_plus_plus_03:
	case dwarf::DW_LANG_C_plus_plus_11:
	case dwarf::DW_LANG_C_plus_plus_14:
	case dwarf::DW_LANG_Dylan:
	case dwarf::DW_LANG_Go:
	case dwarf::DW_LANG_Haskell:
	case dwarf::DW_LANG_OCaml:
	case dwarf::DW_LANG_OpenCL:
	case dwarf::DW_LANG_RenderScript:
	case dwarf::DW_LANG_Rust:
	case dwarf::DW_LANG_Swift:
	if (DD->getDwarfVersion() >= 5)
	return 0;
	break;

	case dwarf::DW_LANG_Fortran03:
	case dwarf::DW_LANG_Fortran08:
	case dwarf::DW_LANG_Julia:
	case dwarf::DW_LANG_Modula3:
	if (DD->getDwarfVersion() >= 5)
	return 1;
	break;
	}

	return -1;
	}

	/// Check whether the DIE for this MDNode can be shared across CUs.
	bool DwarfUnit::isShareableAcrossCUs(const DINode *D) const {
	- // When the MDNode can be part of the type system (this includes subprogram
	- // declarations and subprogram definitions, even local definitions), the
	- // DIE must be shared across CUs.
	+ // When the MDNode can be part of the type system, the DIE can be shared
	+ // across CUs.
	// Combining type units and cross-CU DIE sharing is lower value (since
	// cross-CU DIE sharing is used in LTO and removes type redundancy at that
	// level already) but may be implementable for some value in projects
	// building multiple independent libraries with LTO and then linking those
	// together.
	if (isDwoUnit() && !DD->shareAcrossDWOCUs())
	return false;
	- return (isa<DIType>(D) \|\| isa<DISubprogram>(D)) && !DD->generateTypeUnits();
	+ return (isa<DIType>(D) \|\|
	+ (isa<DISubprogram>(D) && !cast<DISubprogram>(D)->isDefinition())) &&
	+ !DD->generateTypeUnits();
	}

	DIE DwarfUnit::getDIE(const DINode D) const {
	if (isShareableAcrossCUs(D))
	return DU->getDIE(D);
	return MDNodeToDieMap.lookup(D);
	}

	void DwarfUnit::insertDIE(const DINode Desc, DIE D) {
	if (isShareableAcrossCUs(Desc)) {
	DU->insertDIE(Desc, D);
	return;
	}
	MDNodeToDieMap.insert(std::make_pair(Desc, D));
	}

	void DwarfUnit::insertDIE(DIE *D) {
	MDNodeToDieMap.insert(std::make_pair(nullptr, D));
	}

	void DwarfUnit::addFlag(DIE &Die, dwarf::Attribute Attribute) {
	if (DD->getDwarfVersion() >= 4)
	addAttribute(Die, Attribute, dwarf::DW_FORM_flag_present, DIEInteger(1));
	else
	addAttribute(Die, Attribute, dwarf::DW_FORM_flag, DIEInteger(1));
	}

	void DwarfUnit::addUInt(DIEValueList &Die, dwarf::Attribute Attribute,
	Optional<dwarf::Form> Form, uint64_t Integer) {
	if (!Form)
	Form = DIEInteger::BestForm(false, Integer);
	assert(Form != dwarf::DW_FORM_implicit_const &&
	"DW_FORM_implicit_const is used only for signed integers");
	addAttribute(Die, Attribute, *Form, DIEInteger(Integer));
	}

	void DwarfUnit::addUInt(DIEValueList &Block, dwarf::Form Form,
	uint64_t Integer) {
	addUInt(Block, (dwarf::Attribute)0, Form, Integer);
	}

	void DwarfUnit::addSInt(DIEValueList &Die, dwarf::Attribute Attribute,
	Optional<dwarf::Form> Form, int64_t Integer) {
	if (!Form)
	Form = DIEInteger::BestForm(true, Integer);
	addAttribute(Die, Attribute, *Form, DIEInteger(Integer));
	}

	void DwarfUnit::addSInt(DIELoc &Die, Optional<dwarf::Form> Form,
	int64_t Integer) {
	addSInt(Die, (dwarf::Attribute)0, Form, Integer);
	}

	void DwarfUnit::addString(DIE &Die, dwarf::Attribute Attribute,
	StringRef String) {
	if (CUNode->isDebugDirectivesOnly())
	return;

	if (DD->useInlineStrings()) {
	addAttribute(Die, Attribute, dwarf::DW_FORM_string,
	new (DIEValueAllocator)
	DIEInlineString(String, DIEValueAllocator));
	return;
	}
	dwarf::Form IxForm =
	isDwoUnit() ? dwarf::DW_FORM_GNU_str_index : dwarf::DW_FORM_strp;

	auto StringPoolEntry =
	useSegmentedStringOffsetsTable() \|\| IxForm == dwarf::DW_FORM_GNU_str_index
	? DU->getStringPool().getIndexedEntry(*Asm, String)
	: DU->getStringPool().getEntry(*Asm, String);

	// For DWARF v5 and beyond, use the smallest strx? form possible.
	if (useSegmentedStringOffsetsTable()) {
	IxForm = dwarf::DW_FORM_strx1;
	unsigned Index = StringPoolEntry.getIndex();
	if (Index > 0xffffff)
	IxForm = dwarf::DW_FORM_strx4;
	else if (Index > 0xffff)
	IxForm = dwarf::DW_FORM_strx3;
	else if (Index > 0xff)
	IxForm = dwarf::DW_FORM_strx2;
	}
	addAttribute(Die, Attribute, IxForm, DIEString(StringPoolEntry));
	}

	void DwarfUnit::addLabel(DIEValueList &Die, dwarf::Attribute Attribute,
	dwarf::Form Form, const MCSymbol *Label) {
	addAttribute(Die, Attribute, Form, DIELabel(Label));
	}

	void DwarfUnit::addLabel(DIELoc &Die, dwarf::Form Form, const MCSymbol *Label) {
	addLabel(Die, (dwarf::Attribute)0, Form, Label);
	}

	void DwarfUnit::addSectionOffset(DIE &Die, dwarf::Attribute Attribute,
	uint64_t Integer) {
	addUInt(Die, Attribute, DD->getDwarfSectionOffsetForm(), Integer);
	}

	unsigned DwarfTypeUnit::getOrCreateSourceID(const DIFile *File) {
	if (!SplitLineTable)
	return getCU().getOrCreateSourceID(File);
	if (!UsedLineTable) {
	UsedLineTable = true;
	// This is a split type unit that needs a line table.
	addSectionOffset(getUnitDie(), dwarf::DW_AT_stmt_list, 0);
	}
	return SplitLineTable->getFile(
	File->getDirectory(), File->getFilename(), DD->getMD5AsBytes(File),
	Asm->OutContext.getDwarfVersion(), File->getSource());
	}

	void DwarfUnit::addPoolOpAddress(DIEValueList &Die, const MCSymbol *Label) {
	bool UseAddrOffsetFormOrExpressions =
	DD->useAddrOffsetForm() \|\| DD->useAddrOffsetExpressions();

	const MCSymbol *Base = nullptr;
	if (Label->isInSection() && UseAddrOffsetFormOrExpressions)
	Base = DD->getSectionLabel(&Label->getSection());

	uint32_t Index = DD->getAddressPool().getIndex(Base ? Base : Label);

	if (DD->getDwarfVersion() >= 5) {
	addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_addrx);
	addUInt(Die, dwarf::DW_FORM_addrx, Index);
	} else {
	addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_addr_index);
	addUInt(Die, dwarf::DW_FORM_GNU_addr_index, Index);
	}

	if (Base && Base != Label) {
	addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_const4u);
	addLabelDelta(Die, (dwarf::Attribute)0, Label, Base);
	addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
	}
	}

	void DwarfUnit::addOpAddress(DIELoc &Die, const MCSymbol *Sym) {
	if (DD->getDwarfVersion() >= 5) {
	addPoolOpAddress(Die, Sym);
	return;
	}

	if (DD->useSplitDwarf()) {
	addPoolOpAddress(Die, Sym);
	return;
	}

	addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
	addLabel(Die, dwarf::DW_FORM_addr, Sym);
	}

	void DwarfUnit::addLabelDelta(DIEValueList &Die, dwarf::Attribute Attribute,
	const MCSymbol Hi, const MCSymbol Lo) {
	addAttribute(Die, Attribute, dwarf::DW_FORM_data4,
	new (DIEValueAllocator) DIEDelta(Hi, Lo));
	}

	void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIE &Entry) {
	addDIEEntry(Die, Attribute, DIEEntry(Entry));
	}

	void DwarfUnit::addDIETypeSignature(DIE &Die, uint64_t Signature) {
	// Flag the type unit reference as a declaration so that if it contains
	// members (implicit special members, static data member definitions, member
	// declarations for definitions in this CU, etc) consumers don't get confused
	// and think this is a full definition.
	addFlag(Die, dwarf::DW_AT_declaration);

	addAttribute(Die, dwarf::DW_AT_signature, dwarf::DW_FORM_ref_sig8,
	DIEInteger(Signature));
	}

	void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute,
	DIEEntry Entry) {
	const DIEUnit *CU = Die.getUnit();
	const DIEUnit *EntryCU = Entry.getEntry().getUnit();
	if (!CU)
	// We assume that Die belongs to this CU, if it is not linked to any CU yet.
	CU = getUnitDie().getUnit();
	if (!EntryCU)
	EntryCU = getUnitDie().getUnit();
	addAttribute(Die, Attribute,
	EntryCU == CU ? dwarf::DW_FORM_ref4 : dwarf::DW_FORM_ref_addr,
	Entry);
	}

	DIE &DwarfUnit::createAndAddDIE(dwarf::Tag Tag, DIE &Parent, const DINode *N) {
	DIE &Die = Parent.addChild(DIE::get(DIEValueAllocator, Tag));
	if (N)
	insertDIE(N, &Die);
	return Die;
	}

	void DwarfUnit::addBlock(DIE &Die, dwarf::Attribute Attribute, DIELoc *Loc) {
	Loc->ComputeSize(Asm);
	DIELocs.push_back(Loc); // Memoize so we can call the destructor later on.
	addAttribute(Die, Attribute, Loc->BestForm(DD->getDwarfVersion()), Loc);
	}

	void DwarfUnit::addBlock(DIE &Die, dwarf::Attribute Attribute, dwarf::Form Form,
	DIEBlock *Block) {
	Block->ComputeSize(Asm);
	DIEBlocks.push_back(Block); // Memoize so we can call the destructor later on.
	addAttribute(Die, Attribute, Form, Block);
	}

	void DwarfUnit::addBlock(DIE &Die, dwarf::Attribute Attribute,
	DIEBlock *Block) {
	addBlock(Die, Attribute, Block->BestForm(), Block);
	}

	void DwarfUnit::addSourceLine(DIE &Die, unsigned Line, const DIFile *File) {
	if (Line == 0)
	return;

	unsigned FileID = getOrCreateSourceID(File);
	addUInt(Die, dwarf::DW_AT_decl_file, None, FileID);
	addUInt(Die, dwarf::DW_AT_decl_line, None, Line);
	}

	void DwarfUnit::addSourceLine(DIE &Die, const DILocalVariable *V) {
	assert(V);

	addSourceLine(Die, V->getLine(), V->getFile());
	}

	void DwarfUnit::addSourceLine(DIE &Die, const DIGlobalVariable *G) {
	assert(G);

	addSourceLine(Die, G->getLine(), G->getFile());
	}

	void DwarfUnit::addSourceLine(DIE &Die, const DISubprogram *SP) {
	assert(SP);

	addSourceLine(Die, SP->getLine(), SP->getFile());
	}

	void DwarfUnit::addSourceLine(DIE &Die, const DILabel *L) {
	assert(L);

	addSourceLine(Die, L->getLine(), L->getFile());
	}

	void DwarfUnit::addSourceLine(DIE &Die, const DIType *Ty) {
	assert(Ty);

	addSourceLine(Die, Ty->getLine(), Ty->getFile());
	}

	void DwarfUnit::addSourceLine(DIE &Die, const DIObjCProperty *Ty) {
	assert(Ty);

	addSourceLine(Die, Ty->getLine(), Ty->getFile());
	}

	void DwarfUnit::addConstantFPValue(DIE &Die, const ConstantFP *CFP) {
	// Pass this down to addConstantValue as an unsigned bag of bits.
	addConstantValue(Die, CFP->getValueAPF().bitcastToAPInt(), true);
	}

	void DwarfUnit::addConstantValue(DIE &Die, const ConstantInt *CI,
	const DIType *Ty) {
	addConstantValue(Die, CI->getValue(), Ty);
	}

	void DwarfUnit::addConstantValue(DIE &Die, uint64_t Val, const DIType *Ty) {
	addConstantValue(Die, DD->isUnsignedDIType(Ty), Val);
	}

	void DwarfUnit::addConstantValue(DIE &Die, bool Unsigned, uint64_t Val) {
	// FIXME: This is a bit conservative/simple - it emits negative values always
	// sign extended to 64 bits rather than minimizing the number of bytes.
	addUInt(Die, dwarf::DW_AT_const_value,
	Unsigned ? dwarf::DW_FORM_udata : dwarf::DW_FORM_sdata, Val);
	}

	void DwarfUnit::addConstantValue(DIE &Die, const APInt &Val, const DIType *Ty) {
	addConstantValue(Die, Val, DD->isUnsignedDIType(Ty));
	}

	void DwarfUnit::addConstantValue(DIE &Die, const APInt &Val, bool Unsigned) {
	unsigned CIBitWidth = Val.getBitWidth();
	if (CIBitWidth <= 64) {
	addConstantValue(Die, Unsigned,
	Unsigned ? Val.getZExtValue() : Val.getSExtValue());
	return;
	}

	DIEBlock *Block = new (DIEValueAllocator) DIEBlock;

	// Get the raw data form of the large APInt.
	const uint64_t *Ptr64 = Val.getRawData();

	int NumBytes = Val.getBitWidth() / 8; // 8 bits per byte.
	bool LittleEndian = Asm->getDataLayout().isLittleEndian();

	// Output the constant to DWARF one byte at a time.
	for (int i = 0; i < NumBytes; i++) {
	uint8_t c;
	if (LittleEndian)
	c = Ptr64[i / 8] >> (8 * (i & 7));
	else
	c = Ptr64[(NumBytes - 1 - i) / 8] >> (8 * ((NumBytes - 1 - i) & 7));
	addUInt(*Block, dwarf::DW_FORM_data1, c);
	}

	addBlock(Die, dwarf::DW_AT_const_value, Block);
	}

	void DwarfUnit::addLinkageName(DIE &Die, StringRef LinkageName) {
	if (!LinkageName.empty())
	addString(Die,
	DD->getDwarfVersion() >= 4 ? dwarf::DW_AT_linkage_name
	: dwarf::DW_AT_MIPS_linkage_name,
	GlobalValue::dropLLVMManglingEscape(LinkageName));
	}

	void DwarfUnit::addTemplateParams(DIE &Buffer, DINodeArray TParams) {
	// Add template parameters.
	for (const auto *Element : TParams) {
	if (auto *TTP = dyn_cast<DITemplateTypeParameter>(Element))
	constructTemplateTypeParameterDIE(Buffer, TTP);
	else if (auto *TVP = dyn_cast<DITemplateValueParameter>(Element))
	constructTemplateValueParameterDIE(Buffer, TVP);
	}
	}

	/// Add thrown types.
	void DwarfUnit::addThrownTypes(DIE &Die, DINodeArray ThrownTypes) {
	for (const auto *Ty : ThrownTypes) {
	DIE &TT = createAndAddDIE(dwarf::DW_TAG_thrown_type, Die);
	addType(TT, cast<DIType>(Ty));
	}
	}

	DIE DwarfUnit::getOrCreateContextDIE(const DIScope Context) {
	if (!Context \|\| isa<DIFile>(Context))
	return &getUnitDie();
	if (auto *T = dyn_cast<DIType>(Context))
	return getOrCreateTypeDIE(T);
	if (auto *NS = dyn_cast<DINamespace>(Context))
	return getOrCreateNameSpace(NS);
	if (auto *SP = dyn_cast<DISubprogram>(Context))
	return getOrCreateSubprogramDIE(SP);
	if (auto *M = dyn_cast<DIModule>(Context))
	return getOrCreateModule(M);
	return getDIE(Context);
	}

	DIE DwarfUnit::createTypeDIE(const DICompositeType Ty) {
	auto *Context = Ty->getScope();
	DIE *ContextDIE = getOrCreateContextDIE(Context);

	if (DIE *TyDIE = getDIE(Ty))
	return TyDIE;

	// Create new type.
	DIE &TyDIE = createAndAddDIE(Ty->getTag(), *ContextDIE, Ty);

	constructTypeDIE(TyDIE, cast<DICompositeType>(Ty));

	updateAcceleratorTables(Context, Ty, TyDIE);
	return &TyDIE;
	}

	DIE DwarfUnit::createTypeDIE(const DIScope Context, DIE &ContextDIE,
	const DIType *Ty) {
	// Create new type.
	DIE &TyDIE = createAndAddDIE(Ty->getTag(), ContextDIE, Ty);

	updateAcceleratorTables(Context, Ty, TyDIE);

	if (auto *BT = dyn_cast<DIBasicType>(Ty))
	constructTypeDIE(TyDIE, BT);
	else if (auto *ST = dyn_cast<DIStringType>(Ty))
	constructTypeDIE(TyDIE, ST);
	else if (auto *STy = dyn_cast<DISubroutineType>(Ty))
	constructTypeDIE(TyDIE, STy);
	else if (auto *CTy = dyn_cast<DICompositeType>(Ty)) {
	if (DD->generateTypeUnits() && !Ty->isForwardDecl() &&
	(Ty->getRawName() \|\| CTy->getRawIdentifier())) {
	// Skip updating the accelerator tables since this is not the full type.
	if (MDString *TypeId = CTy->getRawIdentifier())
	DD->addDwarfTypeUnitType(getCU(), TypeId->getString(), TyDIE, CTy);
	else {
	auto X = DD->enterNonTypeUnitContext();
	finishNonUnitTypeDIE(TyDIE, CTy);
	}
	return &TyDIE;
	}
	constructTypeDIE(TyDIE, CTy);
	} else {
	constructTypeDIE(TyDIE, cast<DIDerivedType>(Ty));
	}

	return &TyDIE;
	}

	DIE DwarfUnit::getOrCreateTypeDIE(const MDNode TyNode) {
	if (!TyNode)
	return nullptr;

	auto *Ty = cast<DIType>(TyNode);

	// DW_TAG_restrict_type is not supported in DWARF2
	if (Ty->getTag() == dwarf::DW_TAG_restrict_type && DD->getDwarfVersion() <= 2)
	return getOrCreateTypeDIE(cast<DIDerivedType>(Ty)->getBaseType());

	// DW_TAG_atomic_type is not supported in DWARF < 5
	if (Ty->getTag() == dwarf::DW_TAG_atomic_type && DD->getDwarfVersion() < 5)
	return getOrCreateTypeDIE(cast<DIDerivedType>(Ty)->getBaseType());

	// Construct the context before querying for the existence of the DIE in case
	// such construction creates the DIE.
	auto *Context = Ty->getScope();
	DIE *ContextDIE = getOrCreateContextDIE(Context);
	assert(ContextDIE);

	if (DIE *TyDIE = getDIE(Ty))
	return TyDIE;

	return static_cast<DwarfUnit *>(ContextDIE->getUnit())
	->createTypeDIE(Context, *ContextDIE, Ty);
	}

	void DwarfUnit::updateAcceleratorTables(const DIScope *Context,
	const DIType *Ty, const DIE &TyDIE) {
	if (!Ty->getName().empty() && !Ty->isForwardDecl()) {
	bool IsImplementation = false;
	if (auto *CT = dyn_cast<DICompositeType>(Ty)) {
	// A runtime language of 0 actually means C/C++ and that any
	// non-negative value is some version of Objective-C/C++.
	IsImplementation = CT->getRuntimeLang() == 0 \|\| CT->isObjcClassComplete();
	}
	unsigned Flags = IsImplementation ? dwarf::DW_FLAG_type_implementation : 0;
	DD->addAccelType(*CUNode, Ty->getName(), TyDIE, Flags);

	if (!Context \|\| isa<DICompileUnit>(Context) \|\| isa<DIFile>(Context) \|\|
	isa<DINamespace>(Context) \|\| isa<DICommonBlock>(Context))
	addGlobalType(Ty, TyDIE, Context);
	}
	}

	void DwarfUnit::addType(DIE &Entity, const DIType *Ty,
	dwarf::Attribute Attribute) {
	assert(Ty && "Trying to add a type that doesn't exist?");
	addDIEEntry(Entity, Attribute, DIEEntry(*getOrCreateTypeDIE(Ty)));
	}

	std::string DwarfUnit::getParentContextString(const DIScope *Context) const {
	if (!Context)
	return "";

	// FIXME: Decide whether to implement this for non-C++ languages.
	if (!dwarf::isCPlusPlus((dwarf::SourceLanguage)getLanguage()))
	return "";

	std::string CS;
	SmallVector<const DIScope *, 1> Parents;
	while (!isa<DICompileUnit>(Context)) {
	Parents.push_back(Context);
	if (const DIScope *S = Context->getScope())
	Context = S;
	else
	// Structure, etc types will have a NULL context if they're at the top
	// level.
	break;
	}

	// Reverse iterate over our list to go from the outermost construct to the
	// innermost.
	for (const DIScope *Ctx : make_range(Parents.rbegin(), Parents.rend())) {
	StringRef Name = Ctx->getName();
	if (Name.empty() && isa<DINamespace>(Ctx))
	Name = "(anonymous namespace)";
	if (!Name.empty()) {
	CS += Name;
	CS += "::";
	}
	}
	return CS;
	}

	void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIBasicType *BTy) {
	// Get core information.
	StringRef Name = BTy->getName();
	// Add name if not anonymous or intermediate type.
	if (!Name.empty())
	addString(Buffer, dwarf::DW_AT_name, Name);

	// An unspecified type only has a name attribute.
	if (BTy->getTag() == dwarf::DW_TAG_unspecified_type)
	return;

	if (BTy->getTag() != dwarf::DW_TAG_string_type)
	addUInt(Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
	BTy->getEncoding());

	uint64_t Size = BTy->getSizeInBits() >> 3;
	addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size);

	if (BTy->isBigEndian())
	addUInt(Buffer, dwarf::DW_AT_endianity, None, dwarf::DW_END_big);
	else if (BTy->isLittleEndian())
	addUInt(Buffer, dwarf::DW_AT_endianity, None, dwarf::DW_END_little);
	}

	void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIStringType *STy) {
	// Get core information.
	StringRef Name = STy->getName();
	// Add name if not anonymous or intermediate type.
	if (!Name.empty())
	addString(Buffer, dwarf::DW_AT_name, Name);

	if (DIVariable *Var = STy->getStringLength()) {
	if (auto *VarDIE = getDIE(Var))
	addDIEEntry(Buffer, dwarf::DW_AT_string_length, *VarDIE);
	} else if (DIExpression *Expr = STy->getStringLengthExp()) {
	DIELoc *Loc = new (DIEValueAllocator) DIELoc;
	DIEDwarfExpression DwarfExpr(Asm, getCU(), Loc);
	// This is to describe the memory location of the
	// length of a Fortran deferred length string, so
	// lock it down as such.
	DwarfExpr.setMemoryLocationKind();
	DwarfExpr.addExpression(Expr);
	addBlock(Buffer, dwarf::DW_AT_string_length, DwarfExpr.finalize());
	} else {
	uint64_t Size = STy->getSizeInBits() >> 3;
	addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size);
	}

	if (STy->getEncoding()) {
	// For eventual Unicode support.
	addUInt(Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
	STy->getEncoding());
	}
	}

	void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy) {
	// Get core information.
	StringRef Name = DTy->getName();
	uint64_t Size = DTy->getSizeInBits() >> 3;
	uint16_t Tag = Buffer.getTag();

	// Map to main type, void will not have a type.
	const DIType *FromTy = DTy->getBaseType();
	if (FromTy)
	addType(Buffer, FromTy);

	// Add name if not anonymous or intermediate type.
	if (!Name.empty())
	addString(Buffer, dwarf::DW_AT_name, Name);

	// If alignment is specified for a typedef , create and insert DW_AT_alignment
	// attribute in DW_TAG_typedef DIE.
	if (Tag == dwarf::DW_TAG_typedef && DD->getDwarfVersion() >= 5) {
	uint32_t AlignInBytes = DTy->getAlignInBytes();
	if (AlignInBytes > 0)
	addUInt(Buffer, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata,
	AlignInBytes);
	}

	// Add size if non-zero (derived types might be zero-sized.)
	if (Size && Tag != dwarf::DW_TAG_pointer_type
	&& Tag != dwarf::DW_TAG_ptr_to_member_type
	&& Tag != dwarf::DW_TAG_reference_type
	&& Tag != dwarf::DW_TAG_rvalue_reference_type)
	addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size);

	if (Tag == dwarf::DW_TAG_ptr_to_member_type)
	addDIEEntry(Buffer, dwarf::DW_AT_containing_type,
	*getOrCreateTypeDIE(cast<DIDerivedType>(DTy)->getClassType()));
	// Add source line info if available and TyDesc is not a forward declaration.
	if (!DTy->isForwardDecl())
	addSourceLine(Buffer, DTy);

	// If DWARF address space value is other than None, add it. The IR
	// verifier checks that DWARF address space only exists for pointer
	// or reference types.
	if (DTy->getDWARFAddressSpace())
	addUInt(Buffer, dwarf::DW_AT_address_class, dwarf::DW_FORM_data4,
	DTy->getDWARFAddressSpace().getValue());
	}

	void DwarfUnit::constructSubprogramArguments(DIE &Buffer, DITypeRefArray Args) {
	for (unsigned i = 1, N = Args.size(); i < N; ++i) {
	const DIType *Ty = Args[i];
	if (!Ty) {
	assert(i == N-1 && "Unspecified parameter must be the last argument");
	createAndAddDIE(dwarf::DW_TAG_unspecified_parameters, Buffer);
	} else {
	DIE &Arg = createAndAddDIE(dwarf::DW_TAG_formal_parameter, Buffer);
	addType(Arg, Ty);
	if (Ty->isArtificial())
	addFlag(Arg, dwarf::DW_AT_artificial);
	}
	}
	}

	void DwarfUnit::constructTypeDIE(DIE &Buffer, const DISubroutineType *CTy) {
	// Add return type. A void return won't have a type.
	auto Elements = cast<DISubroutineType>(CTy)->getTypeArray();
	if (Elements.size())
	if (auto RTy = Elements[0])
	addType(Buffer, RTy);

	bool isPrototyped = true;
	if (Elements.size() == 2 && !Elements[1])
	isPrototyped = false;

	constructSubprogramArguments(Buffer, Elements);

	// Add prototype flag if we're dealing with a C language and the function has
	// been prototyped.
	uint16_t Language = getLanguage();
	if (isPrototyped &&
	(Language == dwarf::DW_LANG_C89 \|\| Language == dwarf::DW_LANG_C99 \|\|
	Language == dwarf::DW_LANG_ObjC))
	addFlag(Buffer, dwarf::DW_AT_prototyped);

	// Add a DW_AT_calling_convention if this has an explicit convention.
	if (CTy->getCC() && CTy->getCC() != dwarf::DW_CC_normal)
	addUInt(Buffer, dwarf::DW_AT_calling_convention, dwarf::DW_FORM_data1,
	CTy->getCC());

	if (CTy->isLValueReference())
	addFlag(Buffer, dwarf::DW_AT_reference);

	if (CTy->isRValueReference())
	addFlag(Buffer, dwarf::DW_AT_rvalue_reference);
	}

	void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
	// Add name if not anonymous or intermediate type.
	StringRef Name = CTy->getName();

	uint64_t Size = CTy->getSizeInBits() >> 3;
	uint16_t Tag = Buffer.getTag();

	switch (Tag) {
	case dwarf::DW_TAG_array_type:
	constructArrayTypeDIE(Buffer, CTy);
	break;
	case dwarf::DW_TAG_enumeration_type:
	constructEnumTypeDIE(Buffer, CTy);
	break;
	case dwarf::DW_TAG_variant_part:
	case dwarf::DW_TAG_structure_type:
	case dwarf::DW_TAG_union_type:
	case dwarf::DW_TAG_class_type: {
	// Emit the discriminator for a variant part.
	DIDerivedType *Discriminator = nullptr;
	if (Tag == dwarf::DW_TAG_variant_part) {
	Discriminator = CTy->getDiscriminator();
	if (Discriminator) {
	// DWARF says:
	// If the variant part has a discriminant, the discriminant is
	// represented by a separate debugging information entry which is
	// a child of the variant part entry.
	DIE &DiscMember = constructMemberDIE(Buffer, Discriminator);
	addDIEEntry(Buffer, dwarf::DW_AT_discr, DiscMember);
	}
	}

	// Add template parameters to a class, structure or union types.
	if (Tag == dwarf::DW_TAG_class_type \|\|
	Tag == dwarf::DW_TAG_structure_type \|\| Tag == dwarf::DW_TAG_union_type)
	addTemplateParams(Buffer, CTy->getTemplateParams());

	// Add elements to structure type.
	DINodeArray Elements = CTy->getElements();
	for (const auto *Element : Elements) {
	if (!Element)
	continue;
	if (auto *SP = dyn_cast<DISubprogram>(Element))
	getOrCreateSubprogramDIE(SP);
	else if (auto *DDTy = dyn_cast<DIDerivedType>(Element)) {
	if (DDTy->getTag() == dwarf::DW_TAG_friend) {
	DIE &ElemDie = createAndAddDIE(dwarf::DW_TAG_friend, Buffer);
	addType(ElemDie, DDTy->getBaseType(), dwarf::DW_AT_friend);
	} else if (DDTy->isStaticMember()) {
	getOrCreateStaticMemberDIE(DDTy);
	} else if (Tag == dwarf::DW_TAG_variant_part) {
	// When emitting a variant part, wrap each member in
	// DW_TAG_variant.
	DIE &Variant = createAndAddDIE(dwarf::DW_TAG_variant, Buffer);
	if (const ConstantInt *CI =
	dyn_cast_or_null<ConstantInt>(DDTy->getDiscriminantValue())) {
	if (DD->isUnsignedDIType(Discriminator->getBaseType()))
	addUInt(Variant, dwarf::DW_AT_discr_value, None, CI->getZExtValue());
	else
	addSInt(Variant, dwarf::DW_AT_discr_value, None, CI->getSExtValue());
	}
	constructMemberDIE(Variant, DDTy);
	} else {
	constructMemberDIE(Buffer, DDTy);
	}
	} else if (auto *Property = dyn_cast<DIObjCProperty>(Element)) {
	DIE &ElemDie = createAndAddDIE(Property->getTag(), Buffer);
	StringRef PropertyName = Property->getName();
	addString(ElemDie, dwarf::DW_AT_APPLE_property_name, PropertyName);
	if (Property->getType())
	addType(ElemDie, Property->getType());
	addSourceLine(ElemDie, Property);
	StringRef GetterName = Property->getGetterName();
	if (!GetterName.empty())
	addString(ElemDie, dwarf::DW_AT_APPLE_property_getter, GetterName);
	StringRef SetterName = Property->getSetterName();
	if (!SetterName.empty())
	addString(ElemDie, dwarf::DW_AT_APPLE_property_setter, SetterName);
	if (unsigned PropertyAttributes = Property->getAttributes())
	addUInt(ElemDie, dwarf::DW_AT_APPLE_property_attribute, None,
	PropertyAttributes);
	} else if (auto *Composite = dyn_cast<DICompositeType>(Element)) {
	if (Composite->getTag() == dwarf::DW_TAG_variant_part) {
	DIE &VariantPart = createAndAddDIE(Composite->getTag(), Buffer);
	constructTypeDIE(VariantPart, Composite);
	}
	}
	}

	if (CTy->isAppleBlockExtension())
	addFlag(Buffer, dwarf::DW_AT_APPLE_block);

	if (CTy->getExportSymbols())
	addFlag(Buffer, dwarf::DW_AT_export_symbols);

	// This is outside the DWARF spec, but GDB expects a DW_AT_containing_type
	// inside C++ composite types to point to the base class with the vtable.
	// Rust uses DW_AT_containing_type to link a vtable to the type
	// for which it was created.
	if (auto *ContainingType = CTy->getVTableHolder())
	addDIEEntry(Buffer, dwarf::DW_AT_containing_type,
	*getOrCreateTypeDIE(ContainingType));

	if (CTy->isObjcClassComplete())
	addFlag(Buffer, dwarf::DW_AT_APPLE_objc_complete_type);

	// Add the type's non-standard calling convention.
	// DW_CC_pass_by_value/DW_CC_pass_by_reference are introduced in DWARF 5.
	if (!Asm->TM.Options.DebugStrictDwarf \|\| DD->getDwarfVersion() >= 5) {
	uint8_t CC = 0;
	if (CTy->isTypePassByValue())
	CC = dwarf::DW_CC_pass_by_value;
	else if (CTy->isTypePassByReference())
	CC = dwarf::DW_CC_pass_by_reference;
	if (CC)
	addUInt(Buffer, dwarf::DW_AT_calling_convention, dwarf::DW_FORM_data1,
	CC);
	}
	break;
	}
	default:
	break;
	}

	// Add name if not anonymous or intermediate type.
	if (!Name.empty())
	addString(Buffer, dwarf::DW_AT_name, Name);

	if (Tag == dwarf::DW_TAG_enumeration_type \|\|
	Tag == dwarf::DW_TAG_class_type \|\| Tag == dwarf::DW_TAG_structure_type \|\|
	Tag == dwarf::DW_TAG_union_type) {
	// Add size if non-zero (derived types might be zero-sized.)
	// Ignore the size if it's a non-enum forward decl.
	// TODO: Do we care about size for enum forward declarations?
	if (Size &&
	(!CTy->isForwardDecl() \|\| Tag == dwarf::DW_TAG_enumeration_type))
	addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size);
	else if (!CTy->isForwardDecl())
	// Add zero size if it is not a forward declaration.
	addUInt(Buffer, dwarf::DW_AT_byte_size, None, 0);

	// If we're a forward decl, say so.
	if (CTy->isForwardDecl())
	addFlag(Buffer, dwarf::DW_AT_declaration);

	// Add source line info if available.
	if (!CTy->isForwardDecl())
	addSourceLine(Buffer, CTy);

	// No harm in adding the runtime language to the declaration.
	unsigned RLang = CTy->getRuntimeLang();
	if (RLang)
	addUInt(Buffer, dwarf::DW_AT_APPLE_runtime_class, dwarf::DW_FORM_data1,
	RLang);

	// Add align info if available.
	if (uint32_t AlignInBytes = CTy->getAlignInBytes())
	addUInt(Buffer, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata,
	AlignInBytes);
	}
	}

	void DwarfUnit::constructTemplateTypeParameterDIE(
	DIE &Buffer, const DITemplateTypeParameter *TP) {
	DIE &ParamDIE =
	createAndAddDIE(dwarf::DW_TAG_template_type_parameter, Buffer);
	// Add the type if it exists, it could be void and therefore no type.
	if (TP->getType())
	addType(ParamDIE, TP->getType());
	if (!TP->getName().empty())
	addString(ParamDIE, dwarf::DW_AT_name, TP->getName());
	if (TP->isDefault() && (DD->getDwarfVersion() >= 5))
	addFlag(ParamDIE, dwarf::DW_AT_default_value);
	}

	void DwarfUnit::constructTemplateValueParameterDIE(
	DIE &Buffer, const DITemplateValueParameter *VP) {
	DIE &ParamDIE = createAndAddDIE(VP->getTag(), Buffer);

	// Add the type if there is one, template template and template parameter
	// packs will not have a type.
	if (VP->getTag() == dwarf::DW_TAG_template_value_parameter)
	addType(ParamDIE, VP->getType());
	if (!VP->getName().empty())
	addString(ParamDIE, dwarf::DW_AT_name, VP->getName());
	if (VP->isDefault() && (DD->getDwarfVersion() >= 5))
	addFlag(ParamDIE, dwarf::DW_AT_default_value);
	if (Metadata *Val = VP->getValue()) {
	if (ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Val))
	addConstantValue(ParamDIE, CI, VP->getType());
	else if (GlobalValue *GV = mdconst::dyn_extract<GlobalValue>(Val)) {
	// We cannot describe the location of dllimport'd entities: the
	// computation of their address requires loads from the IAT.
	if (!GV->hasDLLImportStorageClass()) {
	// For declaration non-type template parameters (such as global values
	// and functions)
	DIELoc *Loc = new (DIEValueAllocator) DIELoc;
	addOpAddress(*Loc, Asm->getSymbol(GV));
	// Emit DW_OP_stack_value to use the address as the immediate value of
	// the parameter, rather than a pointer to it.
	addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_stack_value);
	addBlock(ParamDIE, dwarf::DW_AT_location, Loc);
	}
	} else if (VP->getTag() == dwarf::DW_TAG_GNU_template_template_param) {
	assert(isa<MDString>(Val));
	addString(ParamDIE, dwarf::DW_AT_GNU_template_name,
	cast<MDString>(Val)->getString());
	} else if (VP->getTag() == dwarf::DW_TAG_GNU_template_parameter_pack) {
	addTemplateParams(ParamDIE, cast<MDTuple>(Val));
	}
	}
	}

	DIE DwarfUnit::getOrCreateNameSpace(const DINamespace NS) {
	// Construct the context before querying for the existence of the DIE in case
	// such construction creates the DIE.
	DIE *ContextDIE = getOrCreateContextDIE(NS->getScope());

	if (DIE *NDie = getDIE(NS))
	return NDie;
	DIE &NDie = createAndAddDIE(dwarf::DW_TAG_namespace, *ContextDIE, NS);

	StringRef Name = NS->getName();
	if (!Name.empty())
	addString(NDie, dwarf::DW_AT_name, NS->getName());
	else
	Name = "(anonymous namespace)";
	DD->addAccelNamespace(*CUNode, Name, NDie);
	addGlobalName(Name, NDie, NS->getScope());
	if (NS->getExportSymbols())
	addFlag(NDie, dwarf::DW_AT_export_symbols);
	return &NDie;
	}

	DIE DwarfUnit::getOrCreateModule(const DIModule M) {
	// Construct the context before querying for the existence of the DIE in case
	// such construction creates the DIE.
	DIE *ContextDIE = getOrCreateContextDIE(M->getScope());

	if (DIE *MDie = getDIE(M))
	return MDie;
	DIE &MDie = createAndAddDIE(dwarf::DW_TAG_module, *ContextDIE, M);

	if (!M->getName().empty()) {
	addString(MDie, dwarf::DW_AT_name, M->getName());
	addGlobalName(M->getName(), MDie, M->getScope());
	}
	if (!M->getConfigurationMacros().empty())
	addString(MDie, dwarf::DW_AT_LLVM_config_macros,
	M->getConfigurationMacros());
	if (!M->getIncludePath().empty())
	addString(MDie, dwarf::DW_AT_LLVM_include_path, M->getIncludePath());
	if (!M->getAPINotesFile().empty())
	addString(MDie, dwarf::DW_AT_LLVM_apinotes, M->getAPINotesFile());
	if (M->getFile())
	addUInt(MDie, dwarf::DW_AT_decl_file, None,
	getOrCreateSourceID(M->getFile()));
	if (M->getLineNo())
	addUInt(MDie, dwarf::DW_AT_decl_line, None, M->getLineNo());
	if (M->getIsDecl())
	addFlag(MDie, dwarf::DW_AT_declaration);

	return &MDie;
	}

	DIE DwarfUnit::getOrCreateSubprogramDIE(const DISubprogram SP, bool Minimal) {
	// Construct the context before querying for the existence of the DIE in case
	// such construction creates the DIE (as is the case for member function
	// declarations).
	DIE *ContextDIE =
	Minimal ? &getUnitDie() : getOrCreateContextDIE(SP->getScope());

	if (DIE *SPDie = getDIE(SP))
	return SPDie;

	if (auto *SPDecl = SP->getDeclaration()) {
	if (!Minimal) {
	// Add subprogram definitions to the CU die directly.
	ContextDIE = &getUnitDie();
	// Build the decl now to ensure it precedes the definition.
	getOrCreateSubprogramDIE(SPDecl);
	}
	}

	// DW_TAG_inlined_subroutine may refer to this DIE.
	DIE &SPDie = createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, SP);

	// Stop here and fill this in later, depending on whether or not this
	// subprogram turns out to have inlined instances or not.
	if (SP->isDefinition())
	return &SPDie;

	static_cast<DwarfUnit *>(SPDie.getUnit())
	->applySubprogramAttributes(SP, SPDie);
	return &SPDie;
	}

	bool DwarfUnit::applySubprogramDefinitionAttributes(const DISubprogram *SP,
	DIE &SPDie, bool Minimal) {
	DIE *DeclDie = nullptr;
	StringRef DeclLinkageName;
	if (auto *SPDecl = SP->getDeclaration()) {
	if (!Minimal) {
	DITypeRefArray DeclArgs, DefinitionArgs;
	DeclArgs = SPDecl->getType()->getTypeArray();
	DefinitionArgs = SP->getType()->getTypeArray();

	if (DeclArgs.size() && DefinitionArgs.size())
	if (DefinitionArgs[0] != NULL && DeclArgs[0] != DefinitionArgs[0])
	addType(SPDie, DefinitionArgs[0]);

	DeclDie = getDIE(SPDecl);
	assert(DeclDie && "This DIE should've already been constructed when the "
	"definition DIE was created in "
	"getOrCreateSubprogramDIE");
	// Look at the Decl's linkage name only if we emitted it.
	if (DD->useAllLinkageNames())
	DeclLinkageName = SPDecl->getLinkageName();
	unsigned DeclID = getOrCreateSourceID(SPDecl->getFile());
	unsigned DefID = getOrCreateSourceID(SP->getFile());
	if (DeclID != DefID)
	addUInt(SPDie, dwarf::DW_AT_decl_file, None, DefID);

	if (SP->getLine() != SPDecl->getLine())
	addUInt(SPDie, dwarf::DW_AT_decl_line, None, SP->getLine());
	}
	}

	// Add function template parameters.
	addTemplateParams(SPDie, SP->getTemplateParams());

	// Add the linkage name if we have one and it isn't in the Decl.
	StringRef LinkageName = SP->getLinkageName();
	assert(((LinkageName.empty() \|\| DeclLinkageName.empty()) \|\|
	LinkageName == DeclLinkageName) &&
	"decl has a linkage name and it is different");
	if (DeclLinkageName.empty() &&
	// Always emit it for abstract subprograms.
	(DD->useAllLinkageNames() \|\| DU->getAbstractSPDies().lookup(SP)))
	addLinkageName(SPDie, LinkageName);

	if (!DeclDie)
	return false;

	// Refer to the function declaration where all the other attributes will be
	// found.
	addDIEEntry(SPDie, dwarf::DW_AT_specification, *DeclDie);
	return true;
	}

	void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
	bool SkipSPAttributes) {
	// If -fdebug-info-for-profiling is enabled, need to emit the subprogram
	// and its source location.
	bool SkipSPSourceLocation = SkipSPAttributes &&
	!CUNode->getDebugInfoForProfiling();
	if (!SkipSPSourceLocation)
	if (applySubprogramDefinitionAttributes(SP, SPDie, SkipSPAttributes))
	return;

	// Constructors and operators for anonymous aggregates do not have names.
	if (!SP->getName().empty())
	addString(SPDie, dwarf::DW_AT_name, SP->getName());

	if (!SkipSPSourceLocation)
	addSourceLine(SPDie, SP);

	// Skip the rest of the attributes under -gmlt to save space.
	if (SkipSPAttributes)
	return;

	// Add the prototype if we have a prototype and we have a C like
	// language.
	uint16_t Language = getLanguage();
	if (SP->isPrototyped() &&
	(Language == dwarf::DW_LANG_C89 \|\| Language == dwarf::DW_LANG_C99 \|\|
	Language == dwarf::DW_LANG_ObjC))
	addFlag(SPDie, dwarf::DW_AT_prototyped);

	if (SP->isObjCDirect())
	addFlag(SPDie, dwarf::DW_AT_APPLE_objc_direct);

	unsigned CC = 0;
	DITypeRefArray Args;
	if (const DISubroutineType *SPTy = SP->getType()) {
	Args = SPTy->getTypeArray();
	CC = SPTy->getCC();
	}

	// Add a DW_AT_calling_convention if this has an explicit convention.
	if (CC && CC != dwarf::DW_CC_normal)
	addUInt(SPDie, dwarf::DW_AT_calling_convention, dwarf::DW_FORM_data1, CC);

	// Add a return type. If this is a type like a C/C++ void type we don't add a
	// return type.
	if (Args.size())
	if (auto Ty = Args[0])
	addType(SPDie, Ty);

	unsigned VK = SP->getVirtuality();
	if (VK) {
	addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1, VK);
	if (SP->getVirtualIndex() != -1u) {
	DIELoc *Block = getDIELoc();
	addUInt(*Block, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
	addUInt(*Block, dwarf::DW_FORM_udata, SP->getVirtualIndex());
	addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, Block);
	}
	ContainingTypeMap.insert(std::make_pair(&SPDie, SP->getContainingType()));
	}

	if (!SP->isDefinition()) {
	addFlag(SPDie, dwarf::DW_AT_declaration);

	// Add arguments. Do not add arguments for subprogram definition. They will
	// be handled while processing variables.
	constructSubprogramArguments(SPDie, Args);
	}

	addThrownTypes(SPDie, SP->getThrownTypes());

	if (SP->isArtificial())
	addFlag(SPDie, dwarf::DW_AT_artificial);

	if (!SP->isLocalToUnit())
	addFlag(SPDie, dwarf::DW_AT_external);

	if (DD->useAppleExtensionAttributes()) {
	if (SP->isOptimized())
	addFlag(SPDie, dwarf::DW_AT_APPLE_optimized);

	if (unsigned isa = Asm->getISAEncoding())
	addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa);
	}

	if (SP->isLValueReference())
	addFlag(SPDie, dwarf::DW_AT_reference);

	if (SP->isRValueReference())
	addFlag(SPDie, dwarf::DW_AT_rvalue_reference);

	if (SP->isNoReturn())
	addFlag(SPDie, dwarf::DW_AT_noreturn);

	if (SP->isProtected())
	addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
	dwarf::DW_ACCESS_protected);
	else if (SP->isPrivate())
	addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
	dwarf::DW_ACCESS_private);
	else if (SP->isPublic())
	addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
	dwarf::DW_ACCESS_public);

	if (SP->isExplicit())
	addFlag(SPDie, dwarf::DW_AT_explicit);

	if (SP->isMainSubprogram())
	addFlag(SPDie, dwarf::DW_AT_main_subprogram);
	if (SP->isPure())
	addFlag(SPDie, dwarf::DW_AT_pure);
	if (SP->isElemental())
	addFlag(SPDie, dwarf::DW_AT_elemental);
	if (SP->isRecursive())
	addFlag(SPDie, dwarf::DW_AT_recursive);

	if (DD->getDwarfVersion() >= 5 && SP->isDeleted())
	addFlag(SPDie, dwarf::DW_AT_deleted);
	}

	void DwarfUnit::constructSubrangeDIE(DIE &Buffer, const DISubrange *SR,
	DIE *IndexTy) {
	DIE &DW_Subrange = createAndAddDIE(dwarf::DW_TAG_subrange_type, Buffer);
	addDIEEntry(DW_Subrange, dwarf::DW_AT_type, *IndexTy);

	// The LowerBound value defines the lower bounds which is typically zero for
	// C/C++. The Count value is the number of elements. Values are 64 bit. If
	// Count == -1 then the array is unbounded and we do not emit
	// DW_AT_lower_bound and DW_AT_count attributes.
	int64_t DefaultLowerBound = getDefaultLowerBound();

	auto AddBoundTypeEntry = [&](dwarf::Attribute Attr,
	DISubrange::BoundType Bound) -> void {
	if (auto BV = Bound.dyn_cast<DIVariable >()) {
	if (auto *VarDIE = getDIE(BV))
	addDIEEntry(DW_Subrange, Attr, *VarDIE);
	} else if (auto BE = Bound.dyn_cast<DIExpression >()) {
	DIELoc *Loc = new (DIEValueAllocator) DIELoc;
	DIEDwarfExpression DwarfExpr(Asm, getCU(), Loc);
	DwarfExpr.setMemoryLocationKind();
	DwarfExpr.addExpression(BE);
	addBlock(DW_Subrange, Attr, DwarfExpr.finalize());
	} else if (auto BI = Bound.dyn_cast<ConstantInt >()) {
	if (Attr == dwarf::DW_AT_count) {
	if (BI->getSExtValue() != -1)
	addUInt(DW_Subrange, Attr, None, BI->getSExtValue());
	} else if (Attr != dwarf::DW_AT_lower_bound \|\| DefaultLowerBound == -1 \|\|
	BI->getSExtValue() != DefaultLowerBound)
	addSInt(DW_Subrange, Attr, dwarf::DW_FORM_sdata, BI->getSExtValue());
	}
	};

	AddBoundTypeEntry(dwarf::DW_AT_lower_bound, SR->getLowerBound());

	AddBoundTypeEntry(dwarf::DW_AT_count, SR->getCount());

	AddBoundTypeEntry(dwarf::DW_AT_upper_bound, SR->getUpperBound());

	AddBoundTypeEntry(dwarf::DW_AT_byte_stride, SR->getStride());
	}

	void DwarfUnit::constructGenericSubrangeDIE(DIE &Buffer,
	const DIGenericSubrange *GSR,
	DIE *IndexTy) {
	DIE &DwGenericSubrange =
	createAndAddDIE(dwarf::DW_TAG_generic_subrange, Buffer);
	addDIEEntry(DwGenericSubrange, dwarf::DW_AT_type, *IndexTy);

	int64_t DefaultLowerBound = getDefaultLowerBound();

	auto AddBoundTypeEntry = [&](dwarf::Attribute Attr,
	DIGenericSubrange::BoundType Bound) -> void {
	if (auto BV = Bound.dyn_cast<DIVariable >()) {
	if (auto *VarDIE = getDIE(BV))
	addDIEEntry(DwGenericSubrange, Attr, *VarDIE);
	} else if (auto BE = Bound.dyn_cast<DIExpression >()) {
	if (BE->isConstant() &&
	DIExpression::SignedOrUnsignedConstant::SignedConstant ==
	*BE->isConstant()) {
	if (Attr != dwarf::DW_AT_lower_bound \|\| DefaultLowerBound == -1 \|\|
	static_cast<int64_t>(BE->getElement(1)) != DefaultLowerBound)
	addSInt(DwGenericSubrange, Attr, dwarf::DW_FORM_sdata,
	BE->getElement(1));
	} else {
	DIELoc *Loc = new (DIEValueAllocator) DIELoc;
	DIEDwarfExpression DwarfExpr(Asm, getCU(), Loc);
	DwarfExpr.setMemoryLocationKind();
	DwarfExpr.addExpression(BE);
	addBlock(DwGenericSubrange, Attr, DwarfExpr.finalize());
	}
	}
	};

	AddBoundTypeEntry(dwarf::DW_AT_lower_bound, GSR->getLowerBound());
	AddBoundTypeEntry(dwarf::DW_AT_count, GSR->getCount());
	AddBoundTypeEntry(dwarf::DW_AT_upper_bound, GSR->getUpperBound());
	AddBoundTypeEntry(dwarf::DW_AT_byte_stride, GSR->getStride());
	}

	DIE *DwarfUnit::getIndexTyDie() {
	if (IndexTyDie)
	return IndexTyDie;
	// Construct an integer type to use for indexes.
	IndexTyDie = &createAndAddDIE(dwarf::DW_TAG_base_type, getUnitDie());
	StringRef Name = "__ARRAY_SIZE_TYPE__";
	addString(*IndexTyDie, dwarf::DW_AT_name, Name);
	addUInt(*IndexTyDie, dwarf::DW_AT_byte_size, None, sizeof(int64_t));
	addUInt(*IndexTyDie, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
	dwarf::DW_ATE_unsigned);
	DD->addAccelType(CUNode, Name, IndexTyDie, /Flags/ 0);
	return IndexTyDie;
	}

	/// Returns true if the vector's size differs from the sum of sizes of elements
	/// the user specified. This can occur if the vector has been rounded up to
	/// fit memory alignment constraints.
	static bool hasVectorBeenPadded(const DICompositeType *CTy) {
	assert(CTy && CTy->isVector() && "Composite type is not a vector");
	const uint64_t ActualSize = CTy->getSizeInBits();

	// Obtain the size of each element in the vector.
	DIType *BaseTy = CTy->getBaseType();
	assert(BaseTy && "Unknown vector element type.");
	const uint64_t ElementSize = BaseTy->getSizeInBits();

	// Locate the number of elements in the vector.
	const DINodeArray Elements = CTy->getElements();
	assert(Elements.size() == 1 &&
	Elements[0]->getTag() == dwarf::DW_TAG_subrange_type &&
	"Invalid vector element array, expected one element of type subrange");
	const auto Subrange = cast<DISubrange>(Elements[0]);
	const auto NumVecElements =
	Subrange->getCount()
	? Subrange->getCount().get<ConstantInt *>()->getSExtValue()
	: 0;

	// Ensure we found the element count and that the actual size is wide
	// enough to contain the requested size.
	assert(ActualSize >= (NumVecElements * ElementSize) && "Invalid vector size");
	return ActualSize != (NumVecElements * ElementSize);
	}

	void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
	if (CTy->isVector()) {
	addFlag(Buffer, dwarf::DW_AT_GNU_vector);
	if (hasVectorBeenPadded(CTy))
	addUInt(Buffer, dwarf::DW_AT_byte_size, None,
	CTy->getSizeInBits() / CHAR_BIT);
	}

	if (DIVariable *Var = CTy->getDataLocation()) {
	if (auto *VarDIE = getDIE(Var))
	addDIEEntry(Buffer, dwarf::DW_AT_data_location, *VarDIE);
	} else if (DIExpression *Expr = CTy->getDataLocationExp()) {
	DIELoc *Loc = new (DIEValueAllocator) DIELoc;
	DIEDwarfExpression DwarfExpr(Asm, getCU(), Loc);
	DwarfExpr.setMemoryLocationKind();
	DwarfExpr.addExpression(Expr);
	addBlock(Buffer, dwarf::DW_AT_data_location, DwarfExpr.finalize());
	}

	if (DIVariable *Var = CTy->getAssociated()) {
	if (auto *VarDIE = getDIE(Var))
	addDIEEntry(Buffer, dwarf::DW_AT_associated, *VarDIE);
	} else if (DIExpression *Expr = CTy->getAssociatedExp()) {
	DIELoc *Loc = new (DIEValueAllocator) DIELoc;
	DIEDwarfExpression DwarfExpr(Asm, getCU(), Loc);
	DwarfExpr.setMemoryLocationKind();
	DwarfExpr.addExpression(Expr);
	addBlock(Buffer, dwarf::DW_AT_associated, DwarfExpr.finalize());
	}

	if (DIVariable *Var = CTy->getAllocated()) {
	if (auto *VarDIE = getDIE(Var))
	addDIEEntry(Buffer, dwarf::DW_AT_allocated, *VarDIE);
	} else if (DIExpression *Expr = CTy->getAllocatedExp()) {
	DIELoc *Loc = new (DIEValueAllocator) DIELoc;
	DIEDwarfExpression DwarfExpr(Asm, getCU(), Loc);
	DwarfExpr.setMemoryLocationKind();
	DwarfExpr.addExpression(Expr);
	addBlock(Buffer, dwarf::DW_AT_allocated, DwarfExpr.finalize());
	}

	if (auto *RankConst = CTy->getRankConst()) {
	addSInt(Buffer, dwarf::DW_AT_rank, dwarf::DW_FORM_sdata,
	RankConst->getSExtValue());
	} else if (auto *RankExpr = CTy->getRankExp()) {
	DIELoc *Loc = new (DIEValueAllocator) DIELoc;
	DIEDwarfExpression DwarfExpr(Asm, getCU(), Loc);
	DwarfExpr.setMemoryLocationKind();
	DwarfExpr.addExpression(RankExpr);
	addBlock(Buffer, dwarf::DW_AT_rank, DwarfExpr.finalize());
	}

	// Emit the element type.
	addType(Buffer, CTy->getBaseType());

	// Get an anonymous type for index type.
	// FIXME: This type should be passed down from the front end
	// as different languages may have different sizes for indexes.
	DIE *IdxTy = getIndexTyDie();

	// Add subranges to array type.
	DINodeArray Elements = CTy->getElements();
	for (DINode *E : Elements) {
	// FIXME: Should this really be such a loose cast?
	if (auto *Element = dyn_cast_or_null<DINode>(E)) {
	if (Element->getTag() == dwarf::DW_TAG_subrange_type)
	constructSubrangeDIE(Buffer, cast<DISubrange>(Element), IdxTy);
	else if (Element->getTag() == dwarf::DW_TAG_generic_subrange)
	constructGenericSubrangeDIE(Buffer, cast<DIGenericSubrange>(Element),
	IdxTy);
	}
	}
	}

	void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
	const DIType *DTy = CTy->getBaseType();
	bool IsUnsigned = DTy && DD->isUnsignedDIType(DTy);
	if (DTy) {
	if (DD->getDwarfVersion() >= 3)
	addType(Buffer, DTy);
	if (DD->getDwarfVersion() >= 4 && (CTy->getFlags() & DINode::FlagEnumClass))
	addFlag(Buffer, dwarf::DW_AT_enum_class);
	}

	auto *Context = CTy->getScope();
	bool IndexEnumerators = !Context \|\| isa<DICompileUnit>(Context) \|\| isa<DIFile>(Context) \|\|
	isa<DINamespace>(Context) \|\| isa<DICommonBlock>(Context);
	DINodeArray Elements = CTy->getElements();

	// Add enumerators to enumeration type.
	for (const DINode *E : Elements) {
	auto *Enum = dyn_cast_or_null<DIEnumerator>(E);
	if (Enum) {
	DIE &Enumerator = createAndAddDIE(dwarf::DW_TAG_enumerator, Buffer);
	StringRef Name = Enum->getName();
	addString(Enumerator, dwarf::DW_AT_name, Name);
	addConstantValue(Enumerator, Enum->getValue(), IsUnsigned);
	if (IndexEnumerators)
	addGlobalName(Name, Enumerator, Context);
	}
	}
	}

	void DwarfUnit::constructContainingTypeDIEs() {
	for (auto &P : ContainingTypeMap) {
	DIE &SPDie = *P.first;
	const DINode *D = P.second;
	if (!D)
	continue;
	DIE *NDie = getDIE(D);
	if (!NDie)
	continue;
	addDIEEntry(SPDie, dwarf::DW_AT_containing_type, *NDie);
	}
	}

	DIE &DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) {
	DIE &MemberDie = createAndAddDIE(DT->getTag(), Buffer);
	StringRef Name = DT->getName();
	if (!Name.empty())
	addString(MemberDie, dwarf::DW_AT_name, Name);

	if (DIType *Resolved = DT->getBaseType())
	addType(MemberDie, Resolved);

	addSourceLine(MemberDie, DT);

	if (DT->getTag() == dwarf::DW_TAG_inheritance && DT->isVirtual()) {

	// For C++, virtual base classes are not at fixed offset. Use following
	// expression to extract appropriate offset from vtable.
	// BaseAddr = ObAddr + ((ObAddr) - Offset)

	DIELoc *VBaseLocationDie = new (DIEValueAllocator) DIELoc;
	addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_dup);
	addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
	addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
	addUInt(*VBaseLocationDie, dwarf::DW_FORM_udata, DT->getOffsetInBits());
	addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_minus);
	addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
	addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);

	addBlock(MemberDie, dwarf::DW_AT_data_member_location, VBaseLocationDie);
	} else {
	uint64_t Size = DT->getSizeInBits();
	uint64_t FieldSize = DD->getBaseTypeSize(DT);
	uint32_t AlignInBytes = DT->getAlignInBytes();
	uint64_t OffsetInBytes;

	bool IsBitfield = FieldSize && Size != FieldSize;
	if (IsBitfield) {
	// Handle bitfield, assume bytes are 8 bits.
	if (DD->useDWARF2Bitfields())
	addUInt(MemberDie, dwarf::DW_AT_byte_size, None, FieldSize/8);
	addUInt(MemberDie, dwarf::DW_AT_bit_size, None, Size);

	uint64_t Offset = DT->getOffsetInBits();
	// We can't use DT->getAlignInBits() here: AlignInBits for member type
	// is non-zero if and only if alignment was forced (e.g. _Alignas()),
	// which can't be done with bitfields. Thus we use FieldSize here.
	uint32_t AlignInBits = FieldSize;
	uint32_t AlignMask = ~(AlignInBits - 1);
	// The bits from the start of the storage unit to the start of the field.
	uint64_t StartBitOffset = Offset - (Offset & AlignMask);
	// The byte offset of the field's aligned storage unit inside the struct.
	OffsetInBytes = (Offset - StartBitOffset) / 8;

	if (DD->useDWARF2Bitfields()) {
	uint64_t HiMark = (Offset + FieldSize) & AlignMask;
	uint64_t FieldOffset = (HiMark - FieldSize);
	Offset -= FieldOffset;

	// Maybe we need to work from the other end.
	if (Asm->getDataLayout().isLittleEndian())
	Offset = FieldSize - (Offset + Size);

	addUInt(MemberDie, dwarf::DW_AT_bit_offset, None, Offset);
	OffsetInBytes = FieldOffset >> 3;
	} else {
	addUInt(MemberDie, dwarf::DW_AT_data_bit_offset, None, Offset);
	}
	} else {
	// This is not a bitfield.
	OffsetInBytes = DT->getOffsetInBits() / 8;
	if (AlignInBytes)
	addUInt(MemberDie, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata,
	AlignInBytes);
	}

	if (DD->getDwarfVersion() <= 2) {
	DIELoc *MemLocationDie = new (DIEValueAllocator) DIELoc;
	addUInt(*MemLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
	addUInt(*MemLocationDie, dwarf::DW_FORM_udata, OffsetInBytes);
	addBlock(MemberDie, dwarf::DW_AT_data_member_location, MemLocationDie);
	} else if (!IsBitfield \|\| DD->useDWARF2Bitfields()) {
	// In DWARF v3, DW_FORM_data4/8 in DW_AT_data_member_location are
	// interpreted as location-list pointers. Interpreting constants as
	// pointers is not expected, so we use DW_FORM_udata to encode the
	// constants here.
	if (DD->getDwarfVersion() == 3)
	addUInt(MemberDie, dwarf::DW_AT_data_member_location,
	dwarf::DW_FORM_udata, OffsetInBytes);
	else
	addUInt(MemberDie, dwarf::DW_AT_data_member_location, None,
	OffsetInBytes);
	}
	}

	if (DT->isProtected())
	addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
	dwarf::DW_ACCESS_protected);
	else if (DT->isPrivate())
	addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
	dwarf::DW_ACCESS_private);
	// Otherwise C++ member and base classes are considered public.
	else if (DT->isPublic())
	addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
	dwarf::DW_ACCESS_public);
	if (DT->isVirtual())
	addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1,
	dwarf::DW_VIRTUALITY_virtual);

	// Objective-C properties.
	if (DINode *PNode = DT->getObjCProperty())
	if (DIE *PDie = getDIE(PNode))
	addAttribute(MemberDie, dwarf::DW_AT_APPLE_property,
	dwarf::DW_FORM_ref4, DIEEntry(*PDie));

	if (DT->isArtificial())
	addFlag(MemberDie, dwarf::DW_AT_artificial);

	return MemberDie;
	}

	DIE DwarfUnit::getOrCreateStaticMemberDIE(const DIDerivedType DT) {
	if (!DT)
	return nullptr;

	// Construct the context before querying for the existence of the DIE in case
	// such construction creates the DIE.
	DIE *ContextDIE = getOrCreateContextDIE(DT->getScope());
	assert(dwarf::isType(ContextDIE->getTag()) &&
	"Static member should belong to a type.");

	if (DIE *StaticMemberDIE = getDIE(DT))
	return StaticMemberDIE;

	DIE &StaticMemberDIE = createAndAddDIE(DT->getTag(), *ContextDIE, DT);

	const DIType *Ty = DT->getBaseType();

	addString(StaticMemberDIE, dwarf::DW_AT_name, DT->getName());
	addType(StaticMemberDIE, Ty);
	addSourceLine(StaticMemberDIE, DT);
	addFlag(StaticMemberDIE, dwarf::DW_AT_external);
	addFlag(StaticMemberDIE, dwarf::DW_AT_declaration);

	// FIXME: We could omit private if the parent is a class_type, and
	// public if the parent is something else.
	if (DT->isProtected())
	addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
	dwarf::DW_ACCESS_protected);
	else if (DT->isPrivate())
	addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
	dwarf::DW_ACCESS_private);
	else if (DT->isPublic())
	addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
	dwarf::DW_ACCESS_public);

	if (const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(DT->getConstant()))
	addConstantValue(StaticMemberDIE, CI, Ty);
	if (const ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(DT->getConstant()))
	addConstantFPValue(StaticMemberDIE, CFP);

	if (uint32_t AlignInBytes = DT->getAlignInBytes())
	addUInt(StaticMemberDIE, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata,
	AlignInBytes);

	return &StaticMemberDIE;
	}

	void DwarfUnit::emitCommonHeader(bool UseOffsets, dwarf::UnitType UT) {
	// Emit size of content not including length itself
	if (!DD->useSectionsAsReferences())
	EndLabel = Asm->emitDwarfUnitLength(
	isDwoUnit() ? "debug_info_dwo" : "debug_info", "Length of Unit");
	else
	Asm->emitDwarfUnitLength(getHeaderSize() + getUnitDie().getSize(),
	"Length of Unit");

	Asm->OutStreamer->AddComment("DWARF version number");
	unsigned Version = DD->getDwarfVersion();
	Asm->emitInt16(Version);

	// DWARF v5 reorders the address size and adds a unit type.
	if (Version >= 5) {
	Asm->OutStreamer->AddComment("DWARF Unit Type");
	Asm->emitInt8(UT);
	Asm->OutStreamer->AddComment("Address Size (in bytes)");
	Asm->emitInt8(Asm->MAI->getCodePointerSize());
	}

	// We share one abbreviations table across all units so it's always at the
	// start of the section. Use a relocatable offset where needed to ensure
	// linking doesn't invalidate that offset.
	Asm->OutStreamer->AddComment("Offset Into Abbrev. Section");
	const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
	if (UseOffsets)
	Asm->emitDwarfLengthOrOffset(0);
	else
	Asm->emitDwarfSymbolReference(
	TLOF.getDwarfAbbrevSection()->getBeginSymbol(), false);

	if (Version <= 4) {
	Asm->OutStreamer->AddComment("Address Size (in bytes)");
	Asm->emitInt8(Asm->MAI->getCodePointerSize());
	}
	}

	void DwarfTypeUnit::emitHeader(bool UseOffsets) {
	DwarfUnit::emitCommonHeader(UseOffsets,
	DD->useSplitDwarf() ? dwarf::DW_UT_split_type
	: dwarf::DW_UT_type);
	Asm->OutStreamer->AddComment("Type Signature");
	Asm->OutStreamer->emitIntValue(TypeSignature, sizeof(TypeSignature));
	Asm->OutStreamer->AddComment("Type DIE Offset");
	// In a skeleton type unit there is no type DIE so emit a zero offset.
	Asm->emitDwarfLengthOrOffset(Ty ? Ty->getOffset() : 0);
	}

	void DwarfUnit::addSectionDelta(DIE &Die, dwarf::Attribute Attribute,
	const MCSymbol Hi, const MCSymbol Lo) {
	addAttribute(Die, Attribute, DD->getDwarfSectionOffsetForm(),
	new (DIEValueAllocator) DIEDelta(Hi, Lo));
	}

	void DwarfUnit::addSectionLabel(DIE &Die, dwarf::Attribute Attribute,
	const MCSymbol Label, const MCSymbol Sec) {
	if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
	addLabel(Die, Attribute, DD->getDwarfSectionOffsetForm(), Label);
	else
	addSectionDelta(Die, Attribute, Label, Sec);
	}

	bool DwarfTypeUnit::isDwoUnit() const {
	// Since there are no skeleton type units, all type units are dwo type units
	// when split DWARF is being used.
	return DD->useSplitDwarf();
	}

	void DwarfTypeUnit::addGlobalName(StringRef Name, const DIE &Die,
	const DIScope *Context) {
	getCU().addGlobalNameForTypeUnit(Name, Context);
	}

	void DwarfTypeUnit::addGlobalType(const DIType *Ty, const DIE &Die,
	const DIScope *Context) {
	getCU().addGlobalTypeUnitType(Ty, Context);
	}

	const MCSymbol *DwarfUnit::getCrossSectionRelativeBaseAddress() const {
	if (!Asm->MAI->doesDwarfUseRelocationsAcrossSections())
	return nullptr;
	if (isDwoUnit())
	return nullptr;
	return getSection()->getBeginSymbol();
	}

	void DwarfUnit::addStringOffsetsStart() {
	const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
	addSectionLabel(getUnitDie(), dwarf::DW_AT_str_offsets_base,
	DU->getStringOffsetsStartSym(),
	TLOF.getDwarfStrOffSection()->getBeginSymbol());
	}

	void DwarfUnit::addRnglistsBase() {
	assert(DD->getDwarfVersion() >= 5 &&
	"DW_AT_rnglists_base requires DWARF version 5 or later");
	const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
	addSectionLabel(getUnitDie(), dwarf::DW_AT_rnglists_base,
	DU->getRnglistsTableBaseSym(),
	TLOF.getDwarfRnglistsSection()->getBeginSymbol());
	}

	void DwarfTypeUnit::finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) {
	addFlag(D, dwarf::DW_AT_declaration);
	StringRef Name = CTy->getName();
	if (!Name.empty())
	addString(D, dwarf::DW_AT_name, Name);
	getCU().createTypeDIE(CTy);
	}
	diff --git a/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp b/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp
	index 4bbc3d163089..248ef6c23974 100644
	--- a/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp
	+++ b/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp
	@@ -1,541 +1,540 @@
	//===-- HardwareLoops.cpp - Target Independent Hardware Loops --- C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	/// \file
	/// Insert hardware loop intrinsics into loops which are deemed profitable by
	/// the target, by querying TargetTransformInfo. A hardware loop comprises of
	/// two intrinsics: one, outside the loop, to set the loop iteration count and
	/// another, in the exit block, to decrement the counter. The decremented value
	/// can either be carried through the loop via a phi or handled in some opaque
	/// way by the target.
	///
	//===----------------------------------------------------------------------===//

	#include "llvm/ADT/Statistic.h"
	#include "llvm/Analysis/AssumptionCache.h"
	#include "llvm/Analysis/LoopInfo.h"
	#include "llvm/Analysis/OptimizationRemarkEmitter.h"
	#include "llvm/Analysis/ScalarEvolution.h"
	#include "llvm/Analysis/TargetLibraryInfo.h"
	#include "llvm/Analysis/TargetTransformInfo.h"
	#include "llvm/CodeGen/Passes.h"
	#include "llvm/CodeGen/TargetPassConfig.h"
	#include "llvm/IR/BasicBlock.h"
	#include "llvm/IR/Constants.h"
	#include "llvm/IR/DataLayout.h"
	#include "llvm/IR/Dominators.h"
	#include "llvm/IR/IRBuilder.h"
	#include "llvm/IR/Instructions.h"
	#include "llvm/IR/IntrinsicInst.h"
	#include "llvm/IR/Value.h"
	#include "llvm/InitializePasses.h"
	#include "llvm/Pass.h"
	#include "llvm/PassRegistry.h"
	#include "llvm/Support/CommandLine.h"
	#include "llvm/Support/Debug.h"
	#include "llvm/Transforms/Scalar.h"
	#include "llvm/Transforms/Utils.h"
	#include "llvm/Transforms/Utils/BasicBlockUtils.h"
	#include "llvm/Transforms/Utils/Local.h"
	#include "llvm/Transforms/Utils/LoopUtils.h"
	#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"

	#define DEBUG_TYPE "hardware-loops"

	#define HW_LOOPS_NAME "Hardware Loop Insertion"

	using namespace llvm;

	static cl::opt<bool>
	ForceHardwareLoops("force-hardware-loops", cl::Hidden, cl::init(false),
	cl::desc("Force hardware loops intrinsics to be inserted"));

	static cl::opt<bool>
	ForceHardwareLoopPHI(
	"force-hardware-loop-phi", cl::Hidden, cl::init(false),
	cl::desc("Force hardware loop counter to be updated through a phi"));

	static cl::opt<bool>
	ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false),
	cl::desc("Force allowance of nested hardware loops"));

	static cl::opt<unsigned>
	LoopDecrement("hardware-loop-decrement", cl::Hidden, cl::init(1),
	cl::desc("Set the loop decrement value"));

	static cl::opt<unsigned>
	CounterBitWidth("hardware-loop-counter-bitwidth", cl::Hidden, cl::init(32),
	cl::desc("Set the loop counter bitwidth"));

	static cl::opt<bool>
	ForceGuardLoopEntry(
	"force-hardware-loop-guard", cl::Hidden, cl::init(false),
	cl::desc("Force generation of loop guard intrinsic"));

	STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");

	#ifndef NDEBUG
	static void debugHWLoopFailure(const StringRef DebugMsg,
	Instruction *I) {
	dbgs() << "HWLoops: " << DebugMsg;
	if (I)
	dbgs() << ' ' << *I;
	else
	dbgs() << '.';
	dbgs() << '\n';
	}
	#endif

	static OptimizationRemarkAnalysis
	createHWLoopAnalysis(StringRef RemarkName, Loop L, Instruction I) {
	Value *CodeRegion = L->getHeader();
	DebugLoc DL = L->getStartLoc();

	if (I) {
	CodeRegion = I->getParent();
	// If there is no debug location attached to the instruction, revert back to
	// using the loop's.
	if (I->getDebugLoc())
	DL = I->getDebugLoc();
	}

	OptimizationRemarkAnalysis R(DEBUG_TYPE, RemarkName, DL, CodeRegion);
	R << "hardware-loop not created: ";
	return R;
	}

	namespace {

	void reportHWLoopFailure(const StringRef Msg, const StringRef ORETag,
	OptimizationRemarkEmitter ORE, Loop TheLoop, Instruction *I = nullptr) {
	LLVM_DEBUG(debugHWLoopFailure(Msg, I));
	ORE->emit(createHWLoopAnalysis(ORETag, TheLoop, I) << Msg);
	}

	using TTI = TargetTransformInfo;

	class HardwareLoops : public FunctionPass {
	public:
	static char ID;

	HardwareLoops() : FunctionPass(ID) {
	initializeHardwareLoopsPass(*PassRegistry::getPassRegistry());
	}

	bool runOnFunction(Function &F) override;

	void getAnalysisUsage(AnalysisUsage &AU) const override {
	AU.addRequired<LoopInfoWrapperPass>();
	AU.addPreserved<LoopInfoWrapperPass>();
	AU.addRequired<DominatorTreeWrapperPass>();
	AU.addPreserved<DominatorTreeWrapperPass>();
	AU.addRequired<ScalarEvolutionWrapperPass>();
	AU.addRequired<AssumptionCacheTracker>();
	AU.addRequired<TargetTransformInfoWrapperPass>();
	AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
	}

	// Try to convert the given Loop into a hardware loop.
	bool TryConvertLoop(Loop *L);

	// Given that the target believes the loop to be profitable, try to
	// convert it.
	bool TryConvertLoop(HardwareLoopInfo &HWLoopInfo);

	private:
	ScalarEvolution *SE = nullptr;
	LoopInfo *LI = nullptr;
	const DataLayout *DL = nullptr;
	OptimizationRemarkEmitter *ORE = nullptr;
	const TargetTransformInfo *TTI = nullptr;
	DominatorTree *DT = nullptr;
	bool PreserveLCSSA = false;
	AssumptionCache *AC = nullptr;
	TargetLibraryInfo *LibInfo = nullptr;
	Module *M = nullptr;
	bool MadeChange = false;
	};

	class HardwareLoop {
	// Expand the trip count scev into a value that we can use.
	Value *InitLoopCount();

	// Insert the set_loop_iteration intrinsic.
	Value InsertIterationSetup(Value LoopCountInit);

	// Insert the loop_decrement intrinsic.
	void InsertLoopDec();

	// Insert the loop_decrement_reg intrinsic.
	Instruction InsertLoopRegDec(Value EltsRem);

	// If the target requires the counter value to be updated in the loop,
	// insert a phi to hold the value. The intended purpose is for use by
	// loop_decrement_reg.
	PHINode InsertPHICounter(Value NumElts, Value *EltsRem);

	// Create a new cmp, that checks the returned value of loop_decrement*,
	// and update the exit branch to use it.
	void UpdateBranch(Value *EltsRem);

	public:
	HardwareLoop(HardwareLoopInfo &Info, ScalarEvolution &SE,
	const DataLayout &DL,
	OptimizationRemarkEmitter *ORE) :
	SE(SE), DL(DL), ORE(ORE), L(Info.L), M(L->getHeader()->getModule()),
	ExitCount(Info.ExitCount),
	CountType(Info.CountType),
	ExitBranch(Info.ExitBranch),
	LoopDecrement(Info.LoopDecrement),
	UsePHICounter(Info.CounterInReg),
	UseLoopGuard(Info.PerformEntryTest) { }

	void Create();

	private:
	ScalarEvolution &SE;
	const DataLayout &DL;
	OptimizationRemarkEmitter *ORE = nullptr;
	Loop *L = nullptr;
	Module *M = nullptr;
	const SCEV *ExitCount = nullptr;
	Type *CountType = nullptr;
	BranchInst *ExitBranch = nullptr;
	Value *LoopDecrement = nullptr;
	bool UsePHICounter = false;
	bool UseLoopGuard = false;
	BasicBlock *BeginBB = nullptr;
	};
	}

	char HardwareLoops::ID = 0;

	bool HardwareLoops::runOnFunction(Function &F) {
	if (skipFunction(F))
	return false;

	LLVM_DEBUG(dbgs() << "HWLoops: Running on " << F.getName() << "\n");

	LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
	SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
	DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
	TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
	DL = &F.getParent()->getDataLayout();
	ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
	auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
	LibInfo = TLIP ? &TLIP->getTLI(F) : nullptr;
	PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
	AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
	M = F.getParent();

	for (Loop L : LI)
	if (L->isOutermost())
	TryConvertLoop(L);

	return MadeChange;
	}

	// Return true if the search should stop, which will be when an inner loop is
	// converted and the parent loop doesn't support containing a hardware loop.
	bool HardwareLoops::TryConvertLoop(Loop *L) {
	// Process nested loops first.
	bool AnyChanged = false;
	for (Loop SL : L)
	AnyChanged \|= TryConvertLoop(SL);
	if (AnyChanged) {
	reportHWLoopFailure("nested hardware-loops not supported", "HWLoopNested",
	ORE, L);
	return true; // Stop search.
	}

	LLVM_DEBUG(dbgs() << "HWLoops: Loop " << L->getHeader()->getName() << "\n");

	HardwareLoopInfo HWLoopInfo(L);
	if (!HWLoopInfo.canAnalyze(*LI)) {
	reportHWLoopFailure("cannot analyze loop, irreducible control flow",
	"HWLoopCannotAnalyze", ORE, L);
	return false;
	}

	if (!ForceHardwareLoops &&
	!TTI->isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo)) {
	reportHWLoopFailure("it's not profitable to create a hardware-loop",
	"HWLoopNotProfitable", ORE, L);
	return false;
	}

	// Allow overriding of the counter width and loop decrement value.
	if (CounterBitWidth.getNumOccurrences())
	HWLoopInfo.CountType =
	IntegerType::get(M->getContext(), CounterBitWidth);

	if (LoopDecrement.getNumOccurrences())
	HWLoopInfo.LoopDecrement =
	ConstantInt::get(HWLoopInfo.CountType, LoopDecrement);

	MadeChange \|= TryConvertLoop(HWLoopInfo);
	return MadeChange && (!HWLoopInfo.IsNestingLegal && !ForceNestedLoop);
	}

	bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {

	Loop *L = HWLoopInfo.L;
	LLVM_DEBUG(dbgs() << "HWLoops: Try to convert profitable loop: " << *L);

	if (!HWLoopInfo.isHardwareLoopCandidate(SE, LI, *DT, ForceNestedLoop,
	ForceHardwareLoopPHI)) {
	// TODO: there can be many reasons a loop is not considered a
	// candidate, so we should let isHardwareLoopCandidate fill in the
	// reason and then report a better message here.
	reportHWLoopFailure("loop is not a candidate", "HWLoopNoCandidate", ORE, L);
	return false;
	}

	assert(
	(HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.ExitCount) &&
	"Hardware Loop must have set exit info.");

	BasicBlock *Preheader = L->getLoopPreheader();

	// If we don't have a preheader, then insert one.
	if (!Preheader)
	Preheader = InsertPreheaderForLoop(L, DT, LI, nullptr, PreserveLCSSA);
	if (!Preheader)
	return false;

	HardwareLoop HWLoop(HWLoopInfo, SE, DL, ORE);
	HWLoop.Create();
	++NumHWLoops;
	return true;
	}

	void HardwareLoop::Create() {
	LLVM_DEBUG(dbgs() << "HWLoops: Converting loop..\n");

	Value *LoopCountInit = InitLoopCount();
	if (!LoopCountInit) {
	reportHWLoopFailure("could not safely create a loop count expression",
	"HWLoopNotSafe", ORE, L);
	return;
	}

	Value *Setup = InsertIterationSetup(LoopCountInit);

	if (UsePHICounter \|\| ForceHardwareLoopPHI) {
	Instruction *LoopDec = InsertLoopRegDec(LoopCountInit);
	Value *EltsRem = InsertPHICounter(Setup, LoopDec);
	LoopDec->setOperand(0, EltsRem);
	UpdateBranch(LoopDec);
	} else
	InsertLoopDec();

	// Run through the basic blocks of the loop and see if any of them have dead
	// PHIs that can be removed.
	for (auto I : L->blocks())
	DeleteDeadPHIs(I);
	}

	static bool CanGenerateTest(Loop L, Value Count) {
	BasicBlock *Preheader = L->getLoopPreheader();
	if (!Preheader->getSinglePredecessor())
	return false;

	BasicBlock *Pred = Preheader->getSinglePredecessor();
	if (!isa<BranchInst>(Pred->getTerminator()))
	return false;

	auto *BI = cast<BranchInst>(Pred->getTerminator());
	if (BI->isUnconditional() \|\| !isa<ICmpInst>(BI->getCondition()))
	return false;

	// Check that the icmp is checking for equality of Count and zero and that
	// a non-zero value results in entering the loop.
	auto ICmp = cast<ICmpInst>(BI->getCondition());
	LLVM_DEBUG(dbgs() << " - Found condition: " << *ICmp << "\n");
	if (!ICmp->isEquality())
	return false;

	auto IsCompareZero = [](ICmpInst ICmp, Value Count, unsigned OpIdx) {
	if (auto *Const = dyn_cast<ConstantInt>(ICmp->getOperand(OpIdx)))
	return Const->isZero() && ICmp->getOperand(OpIdx ^ 1) == Count;
	return false;
	};

	if (!IsCompareZero(ICmp, Count, 0) && !IsCompareZero(ICmp, Count, 1))
	return false;

	unsigned SuccIdx = ICmp->getPredicate() == ICmpInst::ICMP_NE ? 0 : 1;
	if (BI->getSuccessor(SuccIdx) != Preheader)
	return false;

	return true;
	}

	Value *HardwareLoop::InitLoopCount() {
	LLVM_DEBUG(dbgs() << "HWLoops: Initialising loop counter value:\n");
	// Can we replace a conditional branch with an intrinsic that sets the
	// loop counter and tests that is not zero?

	SCEVExpander SCEVE(SE, DL, "loopcnt");
	-
	if (!ExitCount->getType()->isPointerTy() &&
	- ExitCount->getType() != CountType)
	+ ExitCount->getType() != CountType)
	ExitCount = SE.getZeroExtendExpr(ExitCount, CountType);

	ExitCount = SE.getAddExpr(ExitCount, SE.getOne(CountType));

	// If we're trying to use the 'test and set' form of the intrinsic, we need
	// to replace a conditional branch that is controlling entry to the loop. It
	// is likely (guaranteed?) that the preheader has an unconditional branch to
	// the loop header, so also check if it has a single predecessor.
	if (SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, ExitCount,
	- SE.getZero(ExitCount->getType()))) {
	+ SE.getZero(ExitCount->getType()))) {
	LLVM_DEBUG(dbgs() << " - Attempting to use test.set counter.\n");
	UseLoopGuard \|= ForceGuardLoopEntry;
	} else
	UseLoopGuard = false;

	BasicBlock *BB = L->getLoopPreheader();
	if (UseLoopGuard && BB->getSinglePredecessor() &&
	cast<BranchInst>(BB->getTerminator())->isUnconditional()) {
	BasicBlock *Predecessor = BB->getSinglePredecessor();
	// If it's not safe to create a while loop then don't force it and create a
	// do-while loop instead
	if (!isSafeToExpandAt(ExitCount, Predecessor->getTerminator(), SE))
	UseLoopGuard = false;
	else
	BB = Predecessor;
	}

	if (!isSafeToExpandAt(ExitCount, BB->getTerminator(), SE)) {
	LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand ExitCount "
	<< *ExitCount << "\n");
	return nullptr;
	}

	Value *Count = SCEVE.expandCodeFor(ExitCount, CountType,
	BB->getTerminator());

	// FIXME: We've expanded Count where we hope to insert the counter setting
	// intrinsic. But, in the case of the 'test and set' form, we may fallback to
	// the just 'set' form and in which case the insertion block is most likely
	// different. It means there will be instruction(s) in a block that possibly
	// aren't needed. The isLoopEntryGuardedByCond is trying to avoid this issue,
	// but it's doesn't appear to work in all cases.

	UseLoopGuard = UseLoopGuard && CanGenerateTest(L, Count);
	BeginBB = UseLoopGuard ? BB : L->getLoopPreheader();
	LLVM_DEBUG(dbgs() << " - Loop Count: " << *Count << "\n"
	<< " - Expanded Count in " << BB->getName() << "\n"
	<< " - Will insert set counter intrinsic into: "
	<< BeginBB->getName() << "\n");
	return Count;
	}

	Value* HardwareLoop::InsertIterationSetup(Value *LoopCountInit) {
	IRBuilder<> Builder(BeginBB->getTerminator());
	Type *Ty = LoopCountInit->getType();
	bool UsePhi = UsePHICounter \|\| ForceHardwareLoopPHI;
	Intrinsic::ID ID = UseLoopGuard
	? (UsePhi ? Intrinsic::test_start_loop_iterations
	: Intrinsic::test_set_loop_iterations)
	: (UsePhi ? Intrinsic::start_loop_iterations
	: Intrinsic::set_loop_iterations);
	Function *LoopIter = Intrinsic::getDeclaration(M, ID, Ty);
	Value *LoopSetup = Builder.CreateCall(LoopIter, LoopCountInit);

	// Use the return value of the intrinsic to control the entry of the loop.
	if (UseLoopGuard) {
	assert((isa<BranchInst>(BeginBB->getTerminator()) &&
	cast<BranchInst>(BeginBB->getTerminator())->isConditional()) &&
	"Expected conditional branch");

	Value *SetCount =
	UsePhi ? Builder.CreateExtractValue(LoopSetup, 1) : LoopSetup;
	auto *LoopGuard = cast<BranchInst>(BeginBB->getTerminator());
	LoopGuard->setCondition(SetCount);
	if (LoopGuard->getSuccessor(0) != L->getLoopPreheader())
	LoopGuard->swapSuccessors();
	}
	LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop counter: " << *LoopSetup
	<< "\n");
	if (UsePhi && UseLoopGuard)
	LoopSetup = Builder.CreateExtractValue(LoopSetup, 0);
	return !UsePhi ? LoopCountInit : LoopSetup;
	}

	void HardwareLoop::InsertLoopDec() {
	IRBuilder<> CondBuilder(ExitBranch);

	Function *DecFunc =
	Intrinsic::getDeclaration(M, Intrinsic::loop_decrement,
	LoopDecrement->getType());
	Value *Ops[] = { LoopDecrement };
	Value *NewCond = CondBuilder.CreateCall(DecFunc, Ops);
	Value *OldCond = ExitBranch->getCondition();
	ExitBranch->setCondition(NewCond);

	// The false branch must exit the loop.
	if (!L->contains(ExitBranch->getSuccessor(0)))
	ExitBranch->swapSuccessors();

	// The old condition may be dead now, and may have even created a dead PHI
	// (the original induction variable).
	RecursivelyDeleteTriviallyDeadInstructions(OldCond);

	LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *NewCond << "\n");
	}

	Instruction* HardwareLoop::InsertLoopRegDec(Value *EltsRem) {
	IRBuilder<> CondBuilder(ExitBranch);

	Function *DecFunc =
	Intrinsic::getDeclaration(M, Intrinsic::loop_decrement_reg,
	{ EltsRem->getType() });
	Value *Ops[] = { EltsRem, LoopDecrement };
	Value *Call = CondBuilder.CreateCall(DecFunc, Ops);

	LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *Call << "\n");
	return cast<Instruction>(Call);
	}

	PHINode* HardwareLoop::InsertPHICounter(Value NumElts, Value EltsRem) {
	BasicBlock *Preheader = L->getLoopPreheader();
	BasicBlock *Header = L->getHeader();
	BasicBlock *Latch = ExitBranch->getParent();
	IRBuilder<> Builder(Header->getFirstNonPHI());
	PHINode *Index = Builder.CreatePHI(NumElts->getType(), 2);
	Index->addIncoming(NumElts, Preheader);
	Index->addIncoming(EltsRem, Latch);
	LLVM_DEBUG(dbgs() << "HWLoops: PHI Counter: " << *Index << "\n");
	return Index;
	}

	void HardwareLoop::UpdateBranch(Value *EltsRem) {
	IRBuilder<> CondBuilder(ExitBranch);
	Value *NewCond =
	CondBuilder.CreateICmpNE(EltsRem, ConstantInt::get(EltsRem->getType(), 0));
	Value *OldCond = ExitBranch->getCondition();
	ExitBranch->setCondition(NewCond);

	// The false branch must exit the loop.
	if (!L->contains(ExitBranch->getSuccessor(0)))
	ExitBranch->swapSuccessors();

	// The old condition may be dead now, and may have even created a dead PHI
	// (the original induction variable).
	RecursivelyDeleteTriviallyDeadInstructions(OldCond);
	}

	INITIALIZE_PASS_BEGIN(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
	INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
	INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
	INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
	INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
	INITIALIZE_PASS_END(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false)

	FunctionPass *llvm::createHardwareLoopsPass() { return new HardwareLoops(); }
	diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
	index 4f730b2cf372..dc245f0d7b16 100644
	--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
	+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
	@@ -1,23435 +1,23436 @@
	//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
	// both before and after the DAG is legalized.
	//
	// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
	// primarily intended to handle simplification opportunities that are implicit
	// in the LLVM IR and exposed by the various codegen lowering phases.
	//
	//===----------------------------------------------------------------------===//

	#include "llvm/ADT/APFloat.h"
	#include "llvm/ADT/APInt.h"
	#include "llvm/ADT/ArrayRef.h"
	#include "llvm/ADT/DenseMap.h"
	#include "llvm/ADT/IntervalMap.h"
	#include "llvm/ADT/None.h"
	#include "llvm/ADT/Optional.h"
	#include "llvm/ADT/STLExtras.h"
	#include "llvm/ADT/SetVector.h"
	#include "llvm/ADT/SmallBitVector.h"
	#include "llvm/ADT/SmallPtrSet.h"
	#include "llvm/ADT/SmallSet.h"
	#include "llvm/ADT/SmallVector.h"
	#include "llvm/ADT/Statistic.h"
	#include "llvm/Analysis/AliasAnalysis.h"
	#include "llvm/Analysis/MemoryLocation.h"
	#include "llvm/Analysis/TargetLibraryInfo.h"
	#include "llvm/Analysis/VectorUtils.h"
	#include "llvm/CodeGen/DAGCombine.h"
	#include "llvm/CodeGen/ISDOpcodes.h"
	#include "llvm/CodeGen/MachineFrameInfo.h"
	#include "llvm/CodeGen/MachineFunction.h"
	#include "llvm/CodeGen/MachineMemOperand.h"
	#include "llvm/CodeGen/RuntimeLibcalls.h"
	#include "llvm/CodeGen/SelectionDAG.h"
	#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
	#include "llvm/CodeGen/SelectionDAGNodes.h"
	#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
	#include "llvm/CodeGen/TargetLowering.h"
	#include "llvm/CodeGen/TargetRegisterInfo.h"
	#include "llvm/CodeGen/TargetSubtargetInfo.h"
	#include "llvm/CodeGen/ValueTypes.h"
	#include "llvm/IR/Attributes.h"
	#include "llvm/IR/Constant.h"
	#include "llvm/IR/DataLayout.h"
	#include "llvm/IR/DerivedTypes.h"
	#include "llvm/IR/Function.h"
	#include "llvm/IR/LLVMContext.h"
	#include "llvm/IR/Metadata.h"
	#include "llvm/Support/Casting.h"
	#include "llvm/Support/CodeGen.h"
	#include "llvm/Support/CommandLine.h"
	#include "llvm/Support/Compiler.h"
	#include "llvm/Support/Debug.h"
	#include "llvm/Support/ErrorHandling.h"
	#include "llvm/Support/KnownBits.h"
	#include "llvm/Support/MachineValueType.h"
	#include "llvm/Support/MathExtras.h"
	#include "llvm/Support/raw_ostream.h"
	#include "llvm/Target/TargetMachine.h"
	#include "llvm/Target/TargetOptions.h"
	#include <algorithm>
	#include <cassert>
	#include <cstdint>
	#include <functional>
	#include <iterator>
	#include <string>
	#include <tuple>
	#include <utility>

	using namespace llvm;

	#define DEBUG_TYPE "dagcombine"

	STATISTIC(NodesCombined , "Number of dag nodes combined");
	STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
	STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
	STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
	STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
	STATISTIC(SlicedLoads, "Number of load sliced");
	STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");

	static cl::opt<bool>
	CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
	cl::desc("Enable DAG combiner's use of IR alias analysis"));

	static cl::opt<bool>
	UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
	cl::desc("Enable DAG combiner's use of TBAA"));

	#ifndef NDEBUG
	static cl::opt<std::string>
	CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
	cl::desc("Only use DAG-combiner alias analysis in this"
	" function"));
	#endif

	/// Hidden option to stress test load slicing, i.e., when this option
	/// is enabled, load slicing bypasses most of its profitability guards.
	static cl::opt<bool>
	StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
	cl::desc("Bypass the profitability model of load slicing"),
	cl::init(false));

	static cl::opt<bool>
	MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
	cl::desc("DAG combiner may split indexing from loads"));

	static cl::opt<bool>
	EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
	cl::desc("DAG combiner enable merging multiple stores "
	"into a wider store"));

	static cl::opt<unsigned> TokenFactorInlineLimit(
	"combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
	cl::desc("Limit the number of operands to inline for Token Factors"));

	static cl::opt<unsigned> StoreMergeDependenceLimit(
	"combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
	cl::desc("Limit the number of times for the same StoreNode and RootNode "
	"to bail out in store merging dependence check"));

	static cl::opt<bool> EnableReduceLoadOpStoreWidth(
	"combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
	cl::desc("DAG cominber enable reducing the width of load/op/store "
	"sequence"));

	static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore(
	"combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
	cl::desc("DAG cominber enable load/<replace bytes>/store with "
	"a narrower store"));

	namespace {

	class DAGCombiner {
	SelectionDAG &DAG;
	const TargetLowering &TLI;
	const SelectionDAGTargetInfo *STI;
	CombineLevel Level;
	CodeGenOpt::Level OptLevel;
	bool LegalDAG = false;
	bool LegalOperations = false;
	bool LegalTypes = false;
	bool ForCodeSize;
	bool DisableGenericCombines;

	/// Worklist of all of the nodes that need to be simplified.
	///
	/// This must behave as a stack -- new nodes to process are pushed onto the
	/// back and when processing we pop off of the back.
	///
	/// The worklist will not contain duplicates but may contain null entries
	/// due to nodes being deleted from the underlying DAG.
	SmallVector<SDNode *, 64> Worklist;

	/// Mapping from an SDNode to its position on the worklist.
	///
	/// This is used to find and remove nodes from the worklist (by nulling
	/// them) when they are deleted from the underlying DAG. It relies on
	/// stable indices of nodes within the worklist.
	DenseMap<SDNode *, unsigned> WorklistMap;
	/// This records all nodes attempted to add to the worklist since we
	/// considered a new worklist entry. As we keep do not add duplicate nodes
	/// in the worklist, this is different from the tail of the worklist.
	SmallSetVector<SDNode *, 32> PruningList;

	/// Set of nodes which have been combined (at least once).
	///
	/// This is used to allow us to reliably add any operands of a DAG node
	/// which have not yet been combined to the worklist.
	SmallPtrSet<SDNode *, 32> CombinedNodes;

	/// Map from candidate StoreNode to the pair of RootNode and count.
	/// The count is used to track how many times we have seen the StoreNode
	/// with the same RootNode bail out in dependence check. If we have seen
	/// the bail out for the same pair many times over a limit, we won't
	/// consider the StoreNode with the same RootNode as store merging
	/// candidate again.
	DenseMap<SDNode , std::pair<SDNode , unsigned>> StoreRootCountMap;

	// AA - Used for DAG load/store alias analysis.
	AliasAnalysis *AA;

	/// When an instruction is simplified, add all users of the instruction to
	/// the work lists because they might get more simplified now.
	void AddUsersToWorklist(SDNode *N) {
	for (SDNode *Node : N->uses())
	AddToWorklist(Node);
	}

	/// Convenient shorthand to add a node and all of its user to the worklist.
	void AddToWorklistWithUsers(SDNode *N) {
	AddUsersToWorklist(N);
	AddToWorklist(N);
	}

	// Prune potentially dangling nodes. This is called after
	// any visit to a node, but should also be called during a visit after any
	// failed combine which may have created a DAG node.
	void clearAddedDanglingWorklistEntries() {
	// Check any nodes added to the worklist to see if they are prunable.
	while (!PruningList.empty()) {
	auto *N = PruningList.pop_back_val();
	if (N->use_empty())
	recursivelyDeleteUnusedNodes(N);
	}
	}

	SDNode *getNextWorklistEntry() {
	// Before we do any work, remove nodes that are not in use.
	clearAddedDanglingWorklistEntries();
	SDNode *N = nullptr;
	// The Worklist holds the SDNodes in order, but it may contain null
	// entries.
	while (!N && !Worklist.empty()) {
	N = Worklist.pop_back_val();
	}

	if (N) {
	bool GoodWorklistEntry = WorklistMap.erase(N);
	(void)GoodWorklistEntry;
	assert(GoodWorklistEntry &&
	"Found a worklist entry without a corresponding map entry!");
	}
	return N;
	}

	/// Call the node-specific routine that folds each particular type of node.
	SDValue visit(SDNode *N);

	public:
	DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
	: DAG(D), TLI(D.getTargetLoweringInfo()),
	STI(D.getSubtarget().getSelectionDAGInfo()),
	Level(BeforeLegalizeTypes), OptLevel(OL), AA(AA) {
	ForCodeSize = DAG.shouldOptForSize();
	DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);

	MaximumLegalStoreInBits = 0;
	// We use the minimum store size here, since that's all we can guarantee
	// for the scalable vector types.
	for (MVT VT : MVT::all_valuetypes())
	if (EVT(VT).isSimple() && VT != MVT::Other &&
	TLI.isTypeLegal(EVT(VT)) &&
	VT.getSizeInBits().getKnownMinSize() >= MaximumLegalStoreInBits)
	MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinSize();
	}

	void ConsiderForPruning(SDNode *N) {
	// Mark this for potential pruning.
	PruningList.insert(N);
	}

	/// Add to the worklist making sure its instance is at the back (next to be
	/// processed.)
	void AddToWorklist(SDNode *N) {
	assert(N->getOpcode() != ISD::DELETED_NODE &&
	"Deleted Node added to Worklist");

	// Skip handle nodes as they can't usefully be combined and confuse the
	// zero-use deletion strategy.
	if (N->getOpcode() == ISD::HANDLENODE)
	return;

	ConsiderForPruning(N);

	if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
	Worklist.push_back(N);
	}

	/// Remove all instances of N from the worklist.
	void removeFromWorklist(SDNode *N) {
	CombinedNodes.erase(N);
	PruningList.remove(N);
	StoreRootCountMap.erase(N);

	auto It = WorklistMap.find(N);
	if (It == WorklistMap.end())
	return; // Not in the worklist.

	// Null out the entry rather than erasing it to avoid a linear operation.
	Worklist[It->second] = nullptr;
	WorklistMap.erase(It);
	}

	void deleteAndRecombine(SDNode *N);
	bool recursivelyDeleteUnusedNodes(SDNode *N);

	/// Replaces all uses of the results of one DAG node with new values.
	SDValue CombineTo(SDNode N, const SDValue To, unsigned NumTo,
	bool AddTo = true);

	/// Replaces all uses of the results of one DAG node with new values.
	SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
	return CombineTo(N, &Res, 1, AddTo);
	}

	/// Replaces all uses of the results of one DAG node with new values.
	SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
	bool AddTo = true) {
	SDValue To[] = { Res0, Res1 };
	return CombineTo(N, To, 2, AddTo);
	}

	void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);

	private:
	unsigned MaximumLegalStoreInBits;

	/// Check the specified integer node value to see if it can be simplified or
	/// if things it uses can be simplified by bit propagation.
	/// If so, return true.
	bool SimplifyDemandedBits(SDValue Op) {
	unsigned BitWidth = Op.getScalarValueSizeInBits();
	APInt DemandedBits = APInt::getAllOnesValue(BitWidth);
	return SimplifyDemandedBits(Op, DemandedBits);
	}

	bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
	TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
	KnownBits Known;
	if (!TLI.SimplifyDemandedBits(Op, DemandedBits, Known, TLO, 0, false))
	return false;

	// Revisit the node.
	AddToWorklist(Op.getNode());

	CommitTargetLoweringOpt(TLO);
	return true;
	}

	/// Check the specified vector node value to see if it can be simplified or
	/// if things it uses can be simplified as it only uses some of the
	/// elements. If so, return true.
	bool SimplifyDemandedVectorElts(SDValue Op) {
	// TODO: For now just pretend it cannot be simplified.
	if (Op.getValueType().isScalableVector())
	return false;

	unsigned NumElts = Op.getValueType().getVectorNumElements();
	APInt DemandedElts = APInt::getAllOnesValue(NumElts);
	return SimplifyDemandedVectorElts(Op, DemandedElts);
	}

	bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
	const APInt &DemandedElts,
	bool AssumeSingleUse = false);
	bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
	bool AssumeSingleUse = false);

	bool CombineToPreIndexedLoadStore(SDNode *N);
	bool CombineToPostIndexedLoadStore(SDNode *N);
	SDValue SplitIndexingFromLoad(LoadSDNode *LD);
	bool SliceUpLoad(SDNode *N);

	// Scalars have size 0 to distinguish from singleton vectors.
	SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
	bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
	bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);

	/// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
	/// load.
	///
	/// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
	/// \param InVecVT type of the input vector to EVE with bitcasts resolved.
	/// \param EltNo index of the vector element to load.
	/// \param OriginalLoad load that EVE came from to be replaced.
	/// \returns EVE on success SDValue() on failure.
	SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
	SDValue EltNo,
	LoadSDNode *OriginalLoad);
	void ReplaceLoadWithPromotedLoad(SDNode Load, SDNode ExtLoad);
	SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
	SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
	SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
	SDValue PromoteIntBinOp(SDValue Op);
	SDValue PromoteIntShiftOp(SDValue Op);
	SDValue PromoteExtend(SDValue Op);
	bool PromoteLoad(SDValue Op);

	/// Call the node-specific routine that knows how to fold each
	/// particular type of node. If that doesn't do anything, try the
	/// target-specific DAG combines.
	SDValue combine(SDNode *N);

	// Visitation implementation - Implement dag node combining for different
	// node types. The semantics are as follows:
	// Return Value:
	// SDValue.getNode() == 0 - No change was made
	// SDValue.getNode() == N - N was replaced, is dead and has been handled.
	// otherwise - N should be replaced by the returned Operand.
	//
	SDValue visitTokenFactor(SDNode *N);
	SDValue visitMERGE_VALUES(SDNode *N);
	SDValue visitADD(SDNode *N);
	SDValue visitADDLike(SDNode *N);
	SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
	SDValue visitSUB(SDNode *N);
	SDValue visitADDSAT(SDNode *N);
	SDValue visitSUBSAT(SDNode *N);
	SDValue visitADDC(SDNode *N);
	SDValue visitADDO(SDNode *N);
	SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
	SDValue visitSUBC(SDNode *N);
	SDValue visitSUBO(SDNode *N);
	SDValue visitADDE(SDNode *N);
	SDValue visitADDCARRY(SDNode *N);
	SDValue visitSADDO_CARRY(SDNode *N);
	SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
	SDValue visitSUBE(SDNode *N);
	SDValue visitSUBCARRY(SDNode *N);
	SDValue visitSSUBO_CARRY(SDNode *N);
	SDValue visitMUL(SDNode *N);
	SDValue visitMULFIX(SDNode *N);
	SDValue useDivRem(SDNode *N);
	SDValue visitSDIV(SDNode *N);
	SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
	SDValue visitUDIV(SDNode *N);
	SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
	SDValue visitREM(SDNode *N);
	SDValue visitMULHU(SDNode *N);
	SDValue visitMULHS(SDNode *N);
	SDValue visitSMUL_LOHI(SDNode *N);
	SDValue visitUMUL_LOHI(SDNode *N);
	SDValue visitMULO(SDNode *N);
	SDValue visitIMINMAX(SDNode *N);
	SDValue visitAND(SDNode *N);
	SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
	SDValue visitOR(SDNode *N);
	SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
	SDValue visitXOR(SDNode *N);
	SDValue SimplifyVBinOp(SDNode *N);
	SDValue visitSHL(SDNode *N);
	SDValue visitSRA(SDNode *N);
	SDValue visitSRL(SDNode *N);
	SDValue visitFunnelShift(SDNode *N);
	SDValue visitRotate(SDNode *N);
	SDValue visitABS(SDNode *N);
	SDValue visitBSWAP(SDNode *N);
	SDValue visitBITREVERSE(SDNode *N);
	SDValue visitCTLZ(SDNode *N);
	SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
	SDValue visitCTTZ(SDNode *N);
	SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
	SDValue visitCTPOP(SDNode *N);
	SDValue visitSELECT(SDNode *N);
	SDValue visitVSELECT(SDNode *N);
	SDValue visitSELECT_CC(SDNode *N);
	SDValue visitSETCC(SDNode *N);
	SDValue visitSETCCCARRY(SDNode *N);
	SDValue visitSIGN_EXTEND(SDNode *N);
	SDValue visitZERO_EXTEND(SDNode *N);
	SDValue visitANY_EXTEND(SDNode *N);
	SDValue visitAssertExt(SDNode *N);
	SDValue visitAssertAlign(SDNode *N);
	SDValue visitSIGN_EXTEND_INREG(SDNode *N);
	SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
	SDValue visitTRUNCATE(SDNode *N);
	SDValue visitBITCAST(SDNode *N);
	SDValue visitFREEZE(SDNode *N);
	SDValue visitBUILD_PAIR(SDNode *N);
	SDValue visitFADD(SDNode *N);
	SDValue visitSTRICT_FADD(SDNode *N);
	SDValue visitFSUB(SDNode *N);
	SDValue visitFMUL(SDNode *N);
	SDValue visitFMA(SDNode *N);
	SDValue visitFDIV(SDNode *N);
	SDValue visitFREM(SDNode *N);
	SDValue visitFSQRT(SDNode *N);
	SDValue visitFCOPYSIGN(SDNode *N);
	SDValue visitFPOW(SDNode *N);
	SDValue visitSINT_TO_FP(SDNode *N);
	SDValue visitUINT_TO_FP(SDNode *N);
	SDValue visitFP_TO_SINT(SDNode *N);
	SDValue visitFP_TO_UINT(SDNode *N);
	SDValue visitFP_ROUND(SDNode *N);
	SDValue visitFP_EXTEND(SDNode *N);
	SDValue visitFNEG(SDNode *N);
	SDValue visitFABS(SDNode *N);
	SDValue visitFCEIL(SDNode *N);
	SDValue visitFTRUNC(SDNode *N);
	SDValue visitFFLOOR(SDNode *N);
	SDValue visitFMINNUM(SDNode *N);
	SDValue visitFMAXNUM(SDNode *N);
	SDValue visitFMINIMUM(SDNode *N);
	SDValue visitFMAXIMUM(SDNode *N);
	SDValue visitBRCOND(SDNode *N);
	SDValue visitBR_CC(SDNode *N);
	SDValue visitLOAD(SDNode *N);

	SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
	SDValue replaceStoreOfFPConstant(StoreSDNode *ST);

	SDValue visitSTORE(SDNode *N);
	SDValue visitLIFETIME_END(SDNode *N);
	SDValue visitINSERT_VECTOR_ELT(SDNode *N);
	SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
	SDValue visitBUILD_VECTOR(SDNode *N);
	SDValue visitCONCAT_VECTORS(SDNode *N);
	SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
	SDValue visitVECTOR_SHUFFLE(SDNode *N);
	SDValue visitSCALAR_TO_VECTOR(SDNode *N);
	SDValue visitINSERT_SUBVECTOR(SDNode *N);
	SDValue visitMLOAD(SDNode *N);
	SDValue visitMSTORE(SDNode *N);
	SDValue visitMGATHER(SDNode *N);
	SDValue visitMSCATTER(SDNode *N);
	SDValue visitFP_TO_FP16(SDNode *N);
	SDValue visitFP16_TO_FP(SDNode *N);
	SDValue visitVECREDUCE(SDNode *N);

	SDValue visitFADDForFMACombine(SDNode *N);
	SDValue visitFSUBForFMACombine(SDNode *N);
	SDValue visitFMULForFMADistributiveCombine(SDNode *N);

	SDValue XformToShuffleWithZero(SDNode *N);
	bool reassociationCanBreakAddressingModePattern(unsigned Opc,
	const SDLoc &DL, SDValue N0,
	SDValue N1);
	SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
	SDValue N1);
	SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
	SDValue N1, SDNodeFlags Flags);

	SDValue visitShiftByConstant(SDNode *N);

	SDValue foldSelectOfConstants(SDNode *N);
	SDValue foldVSelectOfConstants(SDNode *N);
	SDValue foldBinOpIntoSelect(SDNode *BO);
	bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
	SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
	SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
	SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
	SDValue N2, SDValue N3, ISD::CondCode CC,
	bool NotExtCompare = false);
	SDValue convertSelectOfFPConstantsToLoadOffset(
	const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
	ISD::CondCode CC);
	SDValue foldSignChangeInBitcast(SDNode *N);
	SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
	SDValue N2, SDValue N3, ISD::CondCode CC);
	SDValue foldSelectOfBinops(SDNode *N);
	SDValue foldSextSetcc(SDNode *N);
	SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
	const SDLoc &DL);
	SDValue foldSubToUSubSat(EVT DstVT, SDNode *N);
	SDValue unfoldMaskedMerge(SDNode *N);
	SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
	SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
	const SDLoc &DL, bool foldBooleans);
	SDValue rebuildSetCC(SDValue N);

	bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
	SDValue &CC, bool MatchStrict = false) const;
	bool isOneUseSetCC(SDValue N) const;

	SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
	unsigned HiOp);
	SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
	SDValue CombineExtLoad(SDNode *N);
	SDValue CombineZExtLogicopShiftLoad(SDNode *N);
	SDValue combineRepeatedFPDivisors(SDNode *N);
	SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
	SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
	SDValue BuildSDIV(SDNode *N);
	SDValue BuildSDIVPow2(SDNode *N);
	SDValue BuildUDIV(SDNode *N);
	SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
	SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
	SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
	SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
	SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
	SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
	SDNodeFlags Flags, bool Reciprocal);
	SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
	SDNodeFlags Flags, bool Reciprocal);
	SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
	bool DemandHighBits = true);
	SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
	SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
	SDValue InnerPos, SDValue InnerNeg,
	unsigned PosOpcode, unsigned NegOpcode,
	const SDLoc &DL);
	SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
	SDValue InnerPos, SDValue InnerNeg,
	unsigned PosOpcode, unsigned NegOpcode,
	const SDLoc &DL);
	SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
	SDValue MatchLoadCombine(SDNode *N);
	SDValue mergeTruncStores(StoreSDNode *N);
	SDValue ReduceLoadWidth(SDNode *N);
	SDValue ReduceLoadOpStoreWidth(SDNode *N);
	SDValue splitMergedValStore(StoreSDNode *ST);
	SDValue TransformFPLoadStorePair(SDNode *N);
	SDValue convertBuildVecZextToZext(SDNode *N);
	SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
	SDValue reduceBuildVecTruncToBitCast(SDNode *N);
	SDValue reduceBuildVecToShuffle(SDNode *N);
	SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
	ArrayRef<int> VectorMask, SDValue VecIn1,
	SDValue VecIn2, unsigned LeftIdx,
	bool DidSplitVec);
	SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);

	/// Walk up chain skipping non-aliasing memory nodes,
	/// looking for aliasing nodes and adding them to the Aliases vector.
	void GatherAllAliases(SDNode *N, SDValue OriginalChain,
	SmallVectorImpl<SDValue> &Aliases);

	/// Return true if there is any possibility that the two addresses overlap.
	bool isAlias(SDNode Op0, SDNode Op1) const;

	/// Walk up chain skipping non-aliasing memory nodes, looking for a better
	/// chain (aliasing node.)
	SDValue FindBetterChain(SDNode *N, SDValue Chain);

	/// Try to replace a store and any possibly adjacent stores on
	/// consecutive chains with better chains. Return true only if St is
	/// replaced.
	///
	/// Notice that other chains may still be replaced even if the function
	/// returns false.
	bool findBetterNeighborChains(StoreSDNode *St);

	// Helper for findBetterNeighborChains. Walk up store chain add additional
	// chained stores that do not overlap and can be parallelized.
	bool parallelizeChainedStores(StoreSDNode *St);

	/// Holds a pointer to an LSBaseSDNode as well as information on where it
	/// is located in a sequence of memory operations connected by a chain.
	struct MemOpLink {
	// Ptr to the mem node.
	LSBaseSDNode *MemNode;

	// Offset from the base ptr.
	int64_t OffsetFromBase;

	MemOpLink(LSBaseSDNode *N, int64_t Offset)
	: MemNode(N), OffsetFromBase(Offset) {}
	};

	// Classify the origin of a stored value.
	enum class StoreSource { Unknown, Constant, Extract, Load };
	StoreSource getStoreSource(SDValue StoreVal) {
	switch (StoreVal.getOpcode()) {
	case ISD::Constant:
	case ISD::ConstantFP:
	return StoreSource::Constant;
	case ISD::EXTRACT_VECTOR_ELT:
	case ISD::EXTRACT_SUBVECTOR:
	return StoreSource::Extract;
	case ISD::LOAD:
	return StoreSource::Load;
	default:
	return StoreSource::Unknown;
	}
	}

	/// This is a helper function for visitMUL to check the profitability
	/// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
	/// MulNode is the original multiply, AddNode is (add x, c1),
	/// and ConstNode is c2.
	bool isMulAddWithConstProfitable(SDNode *MulNode,
	SDValue &AddNode,
	SDValue &ConstNode);

	/// This is a helper function for visitAND and visitZERO_EXTEND. Returns
	/// true if the (and (load x) c) pattern matches an extload. ExtVT returns
	/// the type of the loaded value to be extended.
	bool isAndLoadExtLoad(ConstantSDNode AndC, LoadSDNode LoadN,
	EVT LoadResultTy, EVT &ExtVT);

	/// Helper function to calculate whether the given Load/Store can have its
	/// width reduced to ExtVT.
	bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
	EVT &MemVT, unsigned ShAmt = 0);

	/// Used by BackwardsPropagateMask to find suitable loads.
	bool SearchForAndLoads(SDNode N, SmallVectorImpl<LoadSDNode> &Loads,
	SmallPtrSetImpl<SDNode*> &NodesWithConsts,
	ConstantSDNode Mask, SDNode &NodeToMask);
	/// Attempt to propagate a given AND node back to load leaves so that they
	/// can be combined into narrow loads.
	bool BackwardsPropagateMask(SDNode *N);

	/// Helper function for mergeConsecutiveStores which merges the component
	/// store chains.
	SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
	unsigned NumStores);

	/// This is a helper function for mergeConsecutiveStores. When the source
	/// elements of the consecutive stores are all constants or all extracted
	/// vector elements, try to merge them into one larger store introducing
	/// bitcasts if necessary. \return True if a merged store was created.
	bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
	EVT MemVT, unsigned NumStores,
	bool IsConstantSrc, bool UseVector,
	bool UseTrunc);

	/// This is a helper function for mergeConsecutiveStores. Stores that
	/// potentially may be merged with St are placed in StoreNodes. RootNode is
	/// a chain predecessor to all store candidates.
	void getStoreMergeCandidates(StoreSDNode *St,
	SmallVectorImpl<MemOpLink> &StoreNodes,
	SDNode *&Root);

	/// Helper function for mergeConsecutiveStores. Checks if candidate stores
	/// have indirect dependency through their operands. RootNode is the
	/// predecessor to all stores calculated by getStoreMergeCandidates and is
	/// used to prune the dependency check. \return True if safe to merge.
	bool checkMergeStoreCandidatesForDependencies(
	SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
	SDNode *RootNode);

	/// This is a helper function for mergeConsecutiveStores. Given a list of
	/// store candidates, find the first N that are consecutive in memory.
	/// Returns 0 if there are not at least 2 consecutive stores to try merging.
	unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
	int64_t ElementSizeBytes) const;

	/// This is a helper function for mergeConsecutiveStores. It is used for
	/// store chains that are composed entirely of constant values.
	bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
	unsigned NumConsecutiveStores,
	EVT MemVT, SDNode *Root, bool AllowVectors);

	/// This is a helper function for mergeConsecutiveStores. It is used for
	/// store chains that are composed entirely of extracted vector elements.
	/// When extracting multiple vector elements, try to store them in one
	/// vector store rather than a sequence of scalar stores.
	bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
	unsigned NumConsecutiveStores, EVT MemVT,
	SDNode *Root);

	/// This is a helper function for mergeConsecutiveStores. It is used for
	/// store chains that are composed entirely of loaded values.
	bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
	unsigned NumConsecutiveStores, EVT MemVT,
	SDNode *Root, bool AllowVectors,
	bool IsNonTemporalStore, bool IsNonTemporalLoad);

	/// Merge consecutive store operations into a wide store.
	/// This optimization uses wide integers or vectors when possible.
	/// \return true if stores were merged.
	bool mergeConsecutiveStores(StoreSDNode *St);

	/// Try to transform a truncation where C is a constant:
	/// (trunc (and X, C)) -> (and (trunc X), (trunc C))
	///
	/// \p N needs to be a truncation and its first operand an AND. Other
	/// requirements are checked by the function (e.g. that trunc is
	/// single-use) and if missed an empty SDValue is returned.
	SDValue distributeTruncateThroughAnd(SDNode *N);

	/// Helper function to determine whether the target supports operation
	/// given by \p Opcode for type \p VT, that is, whether the operation
	/// is legal or custom before legalizing operations, and whether is
	/// legal (but not custom) after legalization.
	bool hasOperation(unsigned Opcode, EVT VT) {
	return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
	}

	public:
	/// Runs the dag combiner on all nodes in the work list
	void Run(CombineLevel AtLevel);

	SelectionDAG &getDAG() const { return DAG; }

	/// Returns a type large enough to hold any valid shift amount - before type
	/// legalization these can be huge.
	EVT getShiftAmountTy(EVT LHSTy) {
	assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
	return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
	}

	/// This method returns true if we are running before type legalization or
	/// if the specified VT is legal.
	bool isTypeLegal(const EVT &VT) {
	if (!LegalTypes) return true;
	return TLI.isTypeLegal(VT);
	}

	/// Convenience wrapper around TargetLowering::getSetCCResultType
	EVT getSetCCResultType(EVT VT) const {
	return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
	}

	void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
	SDValue OrigLoad, SDValue ExtLoad,
	ISD::NodeType ExtType);
	};

	/// This class is a DAGUpdateListener that removes any deleted
	/// nodes from the worklist.
	class WorklistRemover : public SelectionDAG::DAGUpdateListener {
	DAGCombiner &DC;

	public:
	explicit WorklistRemover(DAGCombiner &dc)
	: SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}

	void NodeDeleted(SDNode N, SDNode E) override {
	DC.removeFromWorklist(N);
	}
	};

	class WorklistInserter : public SelectionDAG::DAGUpdateListener {
	DAGCombiner &DC;

	public:
	explicit WorklistInserter(DAGCombiner &dc)
	: SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}

	// FIXME: Ideally we could add N to the worklist, but this causes exponential
	// compile time costs in large DAGs, e.g. Halide.
	void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
	};

	} // end anonymous namespace

	//===----------------------------------------------------------------------===//
	// TargetLowering::DAGCombinerInfo implementation
	//===----------------------------------------------------------------------===//

	void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
	((DAGCombiner*)DC)->AddToWorklist(N);
	}

	SDValue TargetLowering::DAGCombinerInfo::
	CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
	return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
	}

	SDValue TargetLowering::DAGCombinerInfo::
	CombineTo(SDNode *N, SDValue Res, bool AddTo) {
	return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
	}

	SDValue TargetLowering::DAGCombinerInfo::
	CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
	return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
	}

	bool TargetLowering::DAGCombinerInfo::
	recursivelyDeleteUnusedNodes(SDNode *N) {
	return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
	}

	void TargetLowering::DAGCombinerInfo::
	CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
	return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
	}

	//===----------------------------------------------------------------------===//
	// Helper Functions
	//===----------------------------------------------------------------------===//

	void DAGCombiner::deleteAndRecombine(SDNode *N) {
	removeFromWorklist(N);

	// If the operands of this node are only used by the node, they will now be
	// dead. Make sure to re-visit them and recursively delete dead nodes.
	for (const SDValue &Op : N->ops())
	// For an operand generating multiple values, one of the values may
	// become dead allowing further simplification (e.g. split index
	// arithmetic from an indexed load).
	if (Op->hasOneUse() \|\| Op->getNumValues() > 1)
	AddToWorklist(Op.getNode());

	DAG.DeleteNode(N);
	}

	// APInts must be the same size for most operations, this helper
	// function zero extends the shorter of the pair so that they match.
	// We provide an Offset so that we can create bitwidths that won't overflow.
	static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
	unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
	LHS = LHS.zextOrSelf(Bits);
	RHS = RHS.zextOrSelf(Bits);
	}

	// Return true if this node is a setcc, or is a select_cc
	// that selects between the target values used for true and false, making it
	// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
	// the appropriate nodes based on the type of node we are checking. This
	// simplifies life a bit for the callers.
	bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
	SDValue &CC, bool MatchStrict) const {
	if (N.getOpcode() == ISD::SETCC) {
	LHS = N.getOperand(0);
	RHS = N.getOperand(1);
	CC = N.getOperand(2);
	return true;
	}

	if (MatchStrict &&
	(N.getOpcode() == ISD::STRICT_FSETCC \|\|
	N.getOpcode() == ISD::STRICT_FSETCCS)) {
	LHS = N.getOperand(1);
	RHS = N.getOperand(2);
	CC = N.getOperand(3);
	return true;
	}

	if (N.getOpcode() != ISD::SELECT_CC \|\|
	!TLI.isConstTrueVal(N.getOperand(2).getNode()) \|\|
	!TLI.isConstFalseVal(N.getOperand(3).getNode()))
	return false;

	if (TLI.getBooleanContents(N.getValueType()) ==
	TargetLowering::UndefinedBooleanContent)
	return false;

	LHS = N.getOperand(0);
	RHS = N.getOperand(1);
	CC = N.getOperand(4);
	return true;
	}

	/// Return true if this is a SetCC-equivalent operation with only one use.
	/// If this is true, it allows the users to invert the operation for free when
	/// it is profitable to do so.
	bool DAGCombiner::isOneUseSetCC(SDValue N) const {
	SDValue N0, N1, N2;
	if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
	return true;
	return false;
	}

	static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy) {
	if (!ScalarTy.isSimple())
	return false;

	uint64_t MaskForTy = 0ULL;
	switch (ScalarTy.getSimpleVT().SimpleTy) {
	case MVT::i8:
	MaskForTy = 0xFFULL;
	break;
	case MVT::i16:
	MaskForTy = 0xFFFFULL;
	break;
	case MVT::i32:
	MaskForTy = 0xFFFFFFFFULL;
	break;
	default:
	return false;
	break;
	}

	APInt Val;
	if (ISD::isConstantSplatVector(N, Val))
	return Val.getLimitedValue() == MaskForTy;

	return false;
	}

	// Determines if it is a constant integer or a splat/build vector of constant
	// integers (and undefs).
	// Do not permit build vector implicit truncation.
	static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
	if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
	return !(Const->isOpaque() && NoOpaques);
	if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
	return false;
	unsigned BitWidth = N.getScalarValueSizeInBits();
	for (const SDValue &Op : N->op_values()) {
	if (Op.isUndef())
	continue;
	ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
	if (!Const \|\| Const->getAPIntValue().getBitWidth() != BitWidth \|\|
	(Const->isOpaque() && NoOpaques))
	return false;
	}
	return true;
	}

	// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
	// undef's.
	static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
	if (V.getOpcode() != ISD::BUILD_VECTOR)
	return false;
	return isConstantOrConstantVector(V, NoOpaques) \|\|
	ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
	}

	// Determine if this an indexed load with an opaque target constant index.
	static bool canSplitIdx(LoadSDNode *LD) {
	return MaySplitLoadIndex &&
	(LD->getOperand(2).getOpcode() != ISD::TargetConstant \|\|
	!cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
	}

	bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
	const SDLoc &DL,
	SDValue N0,
	SDValue N1) {
	// Currently this only tries to ensure we don't undo the GEP splits done by
	// CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
	// we check if the following transformation would be problematic:
	// (load/store (add, (add, x, offset1), offset2)) ->
	// (load/store (add, x, offset1+offset2)).

	if (Opc != ISD::ADD \|\| N0.getOpcode() != ISD::ADD)
	return false;

	if (N0.hasOneUse())
	return false;

	auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
	auto *C2 = dyn_cast<ConstantSDNode>(N1);
	if (!C1 \|\| !C2)
	return false;

	const APInt &C1APIntVal = C1->getAPIntValue();
	const APInt &C2APIntVal = C2->getAPIntValue();
	if (C1APIntVal.getBitWidth() > 64 \|\| C2APIntVal.getBitWidth() > 64)
	return false;

	const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
	if (CombinedValueIntVal.getBitWidth() > 64)
	return false;
	const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();

	for (SDNode *Node : N0->uses()) {
	auto LoadStore = dyn_cast<MemSDNode>(Node);
	if (LoadStore) {
	// Is x[offset2] already not a legal addressing mode? If so then
	// reassociating the constants breaks nothing (we test offset2 because
	// that's the one we hope to fold into the load or store).
	TargetLoweringBase::AddrMode AM;
	AM.HasBaseReg = true;
	AM.BaseOffs = C2APIntVal.getSExtValue();
	EVT VT = LoadStore->getMemoryVT();
	unsigned AS = LoadStore->getAddressSpace();
	Type AccessTy = VT.getTypeForEVT(DAG.getContext());
	if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
	continue;

	// Would x[offset1+offset2] still be a legal addressing mode?
	AM.BaseOffs = CombinedValue;
	if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
	return true;
	}
	}

	return false;
	}

	// Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
	// such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
	SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
	SDValue N0, SDValue N1) {
	EVT VT = N0.getValueType();

	if (N0.getOpcode() != Opc)
	return SDValue();

	if (DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
	if (DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
	// Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
	if (SDValue OpNode =
	DAG.FoldConstantArithmetic(Opc, DL, VT, {N0.getOperand(1), N1}))
	return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
	return SDValue();
	}
	if (N0.hasOneUse()) {
	// Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
	// iff (op x, c1) has one use
	SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
	if (!OpNode.getNode())
	return SDValue();
	return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
	}
	}
	return SDValue();
	}

	// Try to reassociate commutative binops.
	SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
	SDValue N1, SDNodeFlags Flags) {
	assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");

	// Floating-point reassociation is not allowed without loose FP math.
	if (N0.getValueType().isFloatingPoint() \|\|
	N1.getValueType().isFloatingPoint())
	if (!Flags.hasAllowReassociation() \|\| !Flags.hasNoSignedZeros())
	return SDValue();

	if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
	return Combined;
	if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
	return Combined;
	return SDValue();
	}

	SDValue DAGCombiner::CombineTo(SDNode N, const SDValue To, unsigned NumTo,
	bool AddTo) {
	assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
	++NodesCombined;
	LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
	To[0].getNode()->dump(&DAG);
	dbgs() << " and " << NumTo - 1 << " other values\n");
	for (unsigned i = 0, e = NumTo; i != e; ++i)
	assert((!To[i].getNode() \|\|
	N->getValueType(i) == To[i].getValueType()) &&
	"Cannot combine value to value of different type!");

	WorklistRemover DeadNodes(*this);
	DAG.ReplaceAllUsesWith(N, To);
	if (AddTo) {
	// Push the new nodes and any users onto the worklist
	for (unsigned i = 0, e = NumTo; i != e; ++i) {
	if (To[i].getNode()) {
	AddToWorklist(To[i].getNode());
	AddUsersToWorklist(To[i].getNode());
	}
	}
	}

	// Finally, if the node is now dead, remove it from the graph. The node
	// may not be dead if the replacement process recursively simplified to
	// something else needing this node.
	if (N->use_empty())
	deleteAndRecombine(N);
	return SDValue(N, 0);
	}

	void DAGCombiner::
	CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
	// Replace the old value with the new one.
	++NodesCombined;
	LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
	dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
	dbgs() << '\n');

	// Replace all uses. If any nodes become isomorphic to other nodes and
	// are deleted, make sure to remove them from our worklist.
	WorklistRemover DeadNodes(*this);
	DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);

	// Push the new node and any (possibly new) users onto the worklist.
	AddToWorklistWithUsers(TLO.New.getNode());

	// Finally, if the node is now dead, remove it from the graph. The node
	// may not be dead if the replacement process recursively simplified to
	// something else needing this node.
	if (TLO.Old.getNode()->use_empty())
	deleteAndRecombine(TLO.Old.getNode());
	}

	/// Check the specified integer node value to see if it can be simplified or if
	/// things it uses can be simplified by bit propagation. If so, return true.
	bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
	const APInt &DemandedElts,
	bool AssumeSingleUse) {
	TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
	KnownBits Known;
	if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
	AssumeSingleUse))
	return false;

	// Revisit the node.
	AddToWorklist(Op.getNode());

	CommitTargetLoweringOpt(TLO);
	return true;
	}

	/// Check the specified vector node value to see if it can be simplified or
	/// if things it uses can be simplified as it only uses some of the elements.
	/// If so, return true.
	bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
	const APInt &DemandedElts,
	bool AssumeSingleUse) {
	TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
	APInt KnownUndef, KnownZero;
	if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
	TLO, 0, AssumeSingleUse))
	return false;

	// Revisit the node.
	AddToWorklist(Op.getNode());

	CommitTargetLoweringOpt(TLO);
	return true;
	}

	void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode Load, SDNode ExtLoad) {
	SDLoc DL(Load);
	EVT VT = Load->getValueType(0);
	SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));

	LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
	Trunc.getNode()->dump(&DAG); dbgs() << '\n');
	WorklistRemover DeadNodes(*this);
	DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
	DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
	deleteAndRecombine(Load);
	AddToWorklist(Trunc.getNode());
	}

	SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
	Replace = false;
	SDLoc DL(Op);
	if (ISD::isUNINDEXEDLoad(Op.getNode())) {
	LoadSDNode *LD = cast<LoadSDNode>(Op);
	EVT MemVT = LD->getMemoryVT();
	ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
	: LD->getExtensionType();
	Replace = true;
	return DAG.getExtLoad(ExtType, DL, PVT,
	LD->getChain(), LD->getBasePtr(),
	MemVT, LD->getMemOperand());
	}

	unsigned Opc = Op.getOpcode();
	switch (Opc) {
	default: break;
	case ISD::AssertSext:
	if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
	return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
	break;
	case ISD::AssertZext:
	if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
	return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
	break;
	case ISD::Constant: {
	unsigned ExtOpc =
	Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
	return DAG.getNode(ExtOpc, DL, PVT, Op);
	}
	}

	if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
	return SDValue();
	return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
	}

	SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
	if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
	return SDValue();
	EVT OldVT = Op.getValueType();
	SDLoc DL(Op);
	bool Replace = false;
	SDValue NewOp = PromoteOperand(Op, PVT, Replace);
	if (!NewOp.getNode())
	return SDValue();
	AddToWorklist(NewOp.getNode());

	if (Replace)
	ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
	return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
	DAG.getValueType(OldVT));
	}

	SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
	EVT OldVT = Op.getValueType();
	SDLoc DL(Op);
	bool Replace = false;
	SDValue NewOp = PromoteOperand(Op, PVT, Replace);
	if (!NewOp.getNode())
	return SDValue();
	AddToWorklist(NewOp.getNode());

	if (Replace)
	ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
	return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
	}

	/// Promote the specified integer binary operation if the target indicates it is
	/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
	/// i32 since i16 instructions are longer.
	SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
	if (!LegalOperations)
	return SDValue();

	EVT VT = Op.getValueType();
	if (VT.isVector() \|\| !VT.isInteger())
	return SDValue();

	// If operation type is 'undesirable', e.g. i16 on x86, consider
	// promoting it.
	unsigned Opc = Op.getOpcode();
	if (TLI.isTypeDesirableForOp(Opc, VT))
	return SDValue();

	EVT PVT = VT;
	// Consult target whether it is a good idea to promote this operation and
	// what's the right type to promote it to.
	if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
	assert(PVT != VT && "Don't know what type to promote to!");

	LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));

	bool Replace0 = false;
	SDValue N0 = Op.getOperand(0);
	SDValue NN0 = PromoteOperand(N0, PVT, Replace0);

	bool Replace1 = false;
	SDValue N1 = Op.getOperand(1);
	SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
	SDLoc DL(Op);

	SDValue RV =
	DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));

	// We are always replacing N0/N1's use in N and only need additional
	// replacements if there are additional uses.
	// Note: We are checking uses of the nodes (SDNode) rather than values
	// (SDValue) here because the node may reference multiple values
	// (for example, the chain value of a load node).
	Replace0 &= !N0->hasOneUse();
	Replace1 &= (N0 != N1) && !N1->hasOneUse();

	// Combine Op here so it is preserved past replacements.
	CombineTo(Op.getNode(), RV);

	// If operands have a use ordering, make sure we deal with
	// predecessor first.
	if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
	std::swap(N0, N1);
	std::swap(NN0, NN1);
	}

	if (Replace0) {
	AddToWorklist(NN0.getNode());
	ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
	}
	if (Replace1) {
	AddToWorklist(NN1.getNode());
	ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
	}
	return Op;
	}
	return SDValue();
	}

	/// Promote the specified integer shift operation if the target indicates it is
	/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
	/// i32 since i16 instructions are longer.
	SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
	if (!LegalOperations)
	return SDValue();

	EVT VT = Op.getValueType();
	if (VT.isVector() \|\| !VT.isInteger())
	return SDValue();

	// If operation type is 'undesirable', e.g. i16 on x86, consider
	// promoting it.
	unsigned Opc = Op.getOpcode();
	if (TLI.isTypeDesirableForOp(Opc, VT))
	return SDValue();

	EVT PVT = VT;
	// Consult target whether it is a good idea to promote this operation and
	// what's the right type to promote it to.
	if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
	assert(PVT != VT && "Don't know what type to promote to!");

	LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));

	bool Replace = false;
	SDValue N0 = Op.getOperand(0);
	SDValue N1 = Op.getOperand(1);
	if (Opc == ISD::SRA)
	N0 = SExtPromoteOperand(N0, PVT);
	else if (Opc == ISD::SRL)
	N0 = ZExtPromoteOperand(N0, PVT);
	else
	N0 = PromoteOperand(N0, PVT, Replace);

	if (!N0.getNode())
	return SDValue();

	SDLoc DL(Op);
	SDValue RV =
	DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));

	if (Replace)
	ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());

	// Deal with Op being deleted.
	if (Op && Op.getOpcode() != ISD::DELETED_NODE)
	return RV;
	}
	return SDValue();
	}

	SDValue DAGCombiner::PromoteExtend(SDValue Op) {
	if (!LegalOperations)
	return SDValue();

	EVT VT = Op.getValueType();
	if (VT.isVector() \|\| !VT.isInteger())
	return SDValue();

	// If operation type is 'undesirable', e.g. i16 on x86, consider
	// promoting it.
	unsigned Opc = Op.getOpcode();
	if (TLI.isTypeDesirableForOp(Opc, VT))
	return SDValue();

	EVT PVT = VT;
	// Consult target whether it is a good idea to promote this operation and
	// what's the right type to promote it to.
	if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
	assert(PVT != VT && "Don't know what type to promote to!");
	// fold (aext (aext x)) -> (aext x)
	// fold (aext (zext x)) -> (zext x)
	// fold (aext (sext x)) -> (sext x)
	LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
	return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
	}
	return SDValue();
	}

	bool DAGCombiner::PromoteLoad(SDValue Op) {
	if (!LegalOperations)
	return false;

	if (!ISD::isUNINDEXEDLoad(Op.getNode()))
	return false;

	EVT VT = Op.getValueType();
	if (VT.isVector() \|\| !VT.isInteger())
	return false;

	// If operation type is 'undesirable', e.g. i16 on x86, consider
	// promoting it.
	unsigned Opc = Op.getOpcode();
	if (TLI.isTypeDesirableForOp(Opc, VT))
	return false;

	EVT PVT = VT;
	// Consult target whether it is a good idea to promote this operation and
	// what's the right type to promote it to.
	if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
	assert(PVT != VT && "Don't know what type to promote to!");

	SDLoc DL(Op);
	SDNode *N = Op.getNode();
	LoadSDNode *LD = cast<LoadSDNode>(N);
	EVT MemVT = LD->getMemoryVT();
	ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
	: LD->getExtensionType();
	SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
	LD->getChain(), LD->getBasePtr(),
	MemVT, LD->getMemOperand());
	SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);

	LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
	Result.getNode()->dump(&DAG); dbgs() << '\n');
	WorklistRemover DeadNodes(*this);
	DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
	DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
	deleteAndRecombine(N);
	AddToWorklist(Result.getNode());
	return true;
	}
	return false;
	}

	/// Recursively delete a node which has no uses and any operands for
	/// which it is the only use.
	///
	/// Note that this both deletes the nodes and removes them from the worklist.
	/// It also adds any nodes who have had a user deleted to the worklist as they
	/// may now have only one use and subject to other combines.
	bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
	if (!N->use_empty())
	return false;

	SmallSetVector<SDNode *, 16> Nodes;
	Nodes.insert(N);
	do {
	N = Nodes.pop_back_val();
	if (!N)
	continue;

	if (N->use_empty()) {
	for (const SDValue &ChildN : N->op_values())
	Nodes.insert(ChildN.getNode());

	removeFromWorklist(N);
	DAG.DeleteNode(N);
	} else {
	AddToWorklist(N);
	}
	} while (!Nodes.empty());
	return true;
	}

	//===----------------------------------------------------------------------===//
	// Main DAG Combiner implementation
	//===----------------------------------------------------------------------===//

	void DAGCombiner::Run(CombineLevel AtLevel) {
	// set the instance variables, so that the various visit routines may use it.
	Level = AtLevel;
	LegalDAG = Level >= AfterLegalizeDAG;
	LegalOperations = Level >= AfterLegalizeVectorOps;
	LegalTypes = Level >= AfterLegalizeTypes;

	WorklistInserter AddNodes(*this);

	// Add all the dag nodes to the worklist.
	for (SDNode &Node : DAG.allnodes())
	AddToWorklist(&Node);

	// Create a dummy node (which is not added to allnodes), that adds a reference
	// to the root node, preventing it from being deleted, and tracking any
	// changes of the root.
	HandleSDNode Dummy(DAG.getRoot());

	// While we have a valid worklist entry node, try to combine it.
	while (SDNode *N = getNextWorklistEntry()) {
	// If N has no uses, it is dead. Make sure to revisit all N's operands once
	// N is deleted from the DAG, since they too may now be dead or may have a
	// reduced number of uses, allowing other xforms.
	if (recursivelyDeleteUnusedNodes(N))
	continue;

	WorklistRemover DeadNodes(*this);

	// If this combine is running after legalizing the DAG, re-legalize any
	// nodes pulled off the worklist.
	if (LegalDAG) {
	SmallSetVector<SDNode *, 16> UpdatedNodes;
	bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);

	for (SDNode *LN : UpdatedNodes)
	AddToWorklistWithUsers(LN);

	if (!NIsValid)
	continue;
	}

	LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));

	// Add any operands of the new node which have not yet been combined to the
	// worklist as well. Because the worklist uniques things already, this
	// won't repeatedly process the same operand.
	CombinedNodes.insert(N);
	for (const SDValue &ChildN : N->op_values())
	if (!CombinedNodes.count(ChildN.getNode()))
	AddToWorklist(ChildN.getNode());

	SDValue RV = combine(N);

	if (!RV.getNode())
	continue;

	++NodesCombined;

	// If we get back the same node we passed in, rather than a new node or
	// zero, we know that the node must have defined multiple values and
	// CombineTo was used. Since CombineTo takes care of the worklist
	// mechanics for us, we have no work to do in this case.
	if (RV.getNode() == N)
	continue;

	assert(N->getOpcode() != ISD::DELETED_NODE &&
	RV.getOpcode() != ISD::DELETED_NODE &&
	"Node was deleted but visit returned new node!");

	LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));

	if (N->getNumValues() == RV.getNode()->getNumValues())
	DAG.ReplaceAllUsesWith(N, RV.getNode());
	else {
	assert(N->getValueType(0) == RV.getValueType() &&
	N->getNumValues() == 1 && "Type mismatch");
	DAG.ReplaceAllUsesWith(N, &RV);
	}

	// Push the new node and any users onto the worklist. Omit this if the
	// new node is the EntryToken (e.g. if a store managed to get optimized
	// out), because re-visiting the EntryToken and its users will not uncover
	// any additional opportunities, but there may be a large number of such
	// users, potentially causing compile time explosion.
	if (RV.getOpcode() != ISD::EntryToken) {
	AddToWorklist(RV.getNode());
	AddUsersToWorklist(RV.getNode());
	}

	// Finally, if the node is now dead, remove it from the graph. The node
	// may not be dead if the replacement process recursively simplified to
	// something else needing this node. This will also take care of adding any
	// operands which have lost a user to the worklist.
	recursivelyDeleteUnusedNodes(N);
	}

	// If the root changed (e.g. it was a dead load, update the root).
	DAG.setRoot(Dummy.getValue());
	DAG.RemoveDeadNodes();
	}

	SDValue DAGCombiner::visit(SDNode *N) {
	switch (N->getOpcode()) {
	default: break;
	case ISD::TokenFactor: return visitTokenFactor(N);
	case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
	case ISD::ADD: return visitADD(N);
	case ISD::SUB: return visitSUB(N);
	case ISD::SADDSAT:
	case ISD::UADDSAT: return visitADDSAT(N);
	case ISD::SSUBSAT:
	case ISD::USUBSAT: return visitSUBSAT(N);
	case ISD::ADDC: return visitADDC(N);
	case ISD::SADDO:
	case ISD::UADDO: return visitADDO(N);
	case ISD::SUBC: return visitSUBC(N);
	case ISD::SSUBO:
	case ISD::USUBO: return visitSUBO(N);
	case ISD::ADDE: return visitADDE(N);
	case ISD::ADDCARRY: return visitADDCARRY(N);
	case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
	case ISD::SUBE: return visitSUBE(N);
	case ISD::SUBCARRY: return visitSUBCARRY(N);
	case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
	case ISD::SMULFIX:
	case ISD::SMULFIXSAT:
	case ISD::UMULFIX:
	case ISD::UMULFIXSAT: return visitMULFIX(N);
	case ISD::MUL: return visitMUL(N);
	case ISD::SDIV: return visitSDIV(N);
	case ISD::UDIV: return visitUDIV(N);
	case ISD::SREM:
	case ISD::UREM: return visitREM(N);
	case ISD::MULHU: return visitMULHU(N);
	case ISD::MULHS: return visitMULHS(N);
	case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
	case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
	case ISD::SMULO:
	case ISD::UMULO: return visitMULO(N);
	case ISD::SMIN:
	case ISD::SMAX:
	case ISD::UMIN:
	case ISD::UMAX: return visitIMINMAX(N);
	case ISD::AND: return visitAND(N);
	case ISD::OR: return visitOR(N);
	case ISD::XOR: return visitXOR(N);
	case ISD::SHL: return visitSHL(N);
	case ISD::SRA: return visitSRA(N);
	case ISD::SRL: return visitSRL(N);
	case ISD::ROTR:
	case ISD::ROTL: return visitRotate(N);
	case ISD::FSHL:
	case ISD::FSHR: return visitFunnelShift(N);
	case ISD::ABS: return visitABS(N);
	case ISD::BSWAP: return visitBSWAP(N);
	case ISD::BITREVERSE: return visitBITREVERSE(N);
	case ISD::CTLZ: return visitCTLZ(N);
	case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
	case ISD::CTTZ: return visitCTTZ(N);
	case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
	case ISD::CTPOP: return visitCTPOP(N);
	case ISD::SELECT: return visitSELECT(N);
	case ISD::VSELECT: return visitVSELECT(N);
	case ISD::SELECT_CC: return visitSELECT_CC(N);
	case ISD::SETCC: return visitSETCC(N);
	case ISD::SETCCCARRY: return visitSETCCCARRY(N);
	case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
	case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
	case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
	case ISD::AssertSext:
	case ISD::AssertZext: return visitAssertExt(N);
	case ISD::AssertAlign: return visitAssertAlign(N);
	case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
	case ISD::SIGN_EXTEND_VECTOR_INREG:
	case ISD::ZERO_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
	case ISD::TRUNCATE: return visitTRUNCATE(N);
	case ISD::BITCAST: return visitBITCAST(N);
	case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
	case ISD::FADD: return visitFADD(N);
	case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
	case ISD::FSUB: return visitFSUB(N);
	case ISD::FMUL: return visitFMUL(N);
	case ISD::FMA: return visitFMA(N);
	case ISD::FDIV: return visitFDIV(N);
	case ISD::FREM: return visitFREM(N);
	case ISD::FSQRT: return visitFSQRT(N);
	case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
	case ISD::FPOW: return visitFPOW(N);
	case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
	case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
	case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
	case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
	case ISD::FP_ROUND: return visitFP_ROUND(N);
	case ISD::FP_EXTEND: return visitFP_EXTEND(N);
	case ISD::FNEG: return visitFNEG(N);
	case ISD::FABS: return visitFABS(N);
	case ISD::FFLOOR: return visitFFLOOR(N);
	case ISD::FMINNUM: return visitFMINNUM(N);
	case ISD::FMAXNUM: return visitFMAXNUM(N);
	case ISD::FMINIMUM: return visitFMINIMUM(N);
	case ISD::FMAXIMUM: return visitFMAXIMUM(N);
	case ISD::FCEIL: return visitFCEIL(N);
	case ISD::FTRUNC: return visitFTRUNC(N);
	case ISD::BRCOND: return visitBRCOND(N);
	case ISD::BR_CC: return visitBR_CC(N);
	case ISD::LOAD: return visitLOAD(N);
	case ISD::STORE: return visitSTORE(N);
	case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
	case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
	case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
	case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
	case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
	case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
	case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
	case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
	case ISD::MGATHER: return visitMGATHER(N);
	case ISD::MLOAD: return visitMLOAD(N);
	case ISD::MSCATTER: return visitMSCATTER(N);
	case ISD::MSTORE: return visitMSTORE(N);
	case ISD::LIFETIME_END: return visitLIFETIME_END(N);
	case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
	case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
	case ISD::FREEZE: return visitFREEZE(N);
	case ISD::VECREDUCE_FADD:
	case ISD::VECREDUCE_FMUL:
	case ISD::VECREDUCE_ADD:
	case ISD::VECREDUCE_MUL:
	case ISD::VECREDUCE_AND:
	case ISD::VECREDUCE_OR:
	case ISD::VECREDUCE_XOR:
	case ISD::VECREDUCE_SMAX:
	case ISD::VECREDUCE_SMIN:
	case ISD::VECREDUCE_UMAX:
	case ISD::VECREDUCE_UMIN:
	case ISD::VECREDUCE_FMAX:
	case ISD::VECREDUCE_FMIN: return visitVECREDUCE(N);
	}
	return SDValue();
	}

	SDValue DAGCombiner::combine(SDNode *N) {
	SDValue RV;
	if (!DisableGenericCombines)
	RV = visit(N);

	// If nothing happened, try a target-specific DAG combine.
	if (!RV.getNode()) {
	assert(N->getOpcode() != ISD::DELETED_NODE &&
	"Node was deleted but visit returned NULL!");

	if (N->getOpcode() >= ISD::BUILTIN_OP_END \|\|
	TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {

	// Expose the DAG combiner to the target combiner impls.
	TargetLowering::DAGCombinerInfo
	DagCombineInfo(DAG, Level, false, this);

	RV = TLI.PerformDAGCombine(N, DagCombineInfo);
	}
	}

	// If nothing happened still, try promoting the operation.
	if (!RV.getNode()) {
	switch (N->getOpcode()) {
	default: break;
	case ISD::ADD:
	case ISD::SUB:
	case ISD::MUL:
	case ISD::AND:
	case ISD::OR:
	case ISD::XOR:
	RV = PromoteIntBinOp(SDValue(N, 0));
	break;
	case ISD::SHL:
	case ISD::SRA:
	case ISD::SRL:
	RV = PromoteIntShiftOp(SDValue(N, 0));
	break;
	case ISD::SIGN_EXTEND:
	case ISD::ZERO_EXTEND:
	case ISD::ANY_EXTEND:
	RV = PromoteExtend(SDValue(N, 0));
	break;
	case ISD::LOAD:
	if (PromoteLoad(SDValue(N, 0)))
	RV = SDValue(N, 0);
	break;
	}
	}

	// If N is a commutative binary node, try to eliminate it if the commuted
	// version is already present in the DAG.
	if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
	N->getNumValues() == 1) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);

	// Constant operands are canonicalized to RHS.
	if (N0 != N1 && (isa<ConstantSDNode>(N0) \|\| !isa<ConstantSDNode>(N1))) {
	SDValue Ops[] = {N1, N0};
	SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
	N->getFlags());
	if (CSENode)
	return SDValue(CSENode, 0);
	}
	}

	return RV;
	}

	/// Given a node, return its input chain if it has one, otherwise return a null
	/// sd operand.
	static SDValue getInputChainForNode(SDNode *N) {
	if (unsigned NumOps = N->getNumOperands()) {
	if (N->getOperand(0).getValueType() == MVT::Other)
	return N->getOperand(0);
	if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
	return N->getOperand(NumOps-1);
	for (unsigned i = 1; i < NumOps-1; ++i)
	if (N->getOperand(i).getValueType() == MVT::Other)
	return N->getOperand(i);
	}
	return SDValue();
	}

	SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
	// If N has two operands, where one has an input chain equal to the other,
	// the 'other' chain is redundant.
	if (N->getNumOperands() == 2) {
	if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
	return N->getOperand(0);
	if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
	return N->getOperand(1);
	}

	// Don't simplify token factors if optnone.
	if (OptLevel == CodeGenOpt::None)
	return SDValue();

	// Don't simplify the token factor if the node itself has too many operands.
	if (N->getNumOperands() > TokenFactorInlineLimit)
	return SDValue();

	// If the sole user is a token factor, we should make sure we have a
	// chance to merge them together. This prevents TF chains from inhibiting
	// optimizations.
	if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
	AddToWorklist(*(N->use_begin()));

	SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
	SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
	SmallPtrSet<SDNode*, 16> SeenOps;
	bool Changed = false; // If we should replace this token factor.

	// Start out with this token factor.
	TFs.push_back(N);

	// Iterate through token factors. The TFs grows when new token factors are
	// encountered.
	for (unsigned i = 0; i < TFs.size(); ++i) {
	// Limit number of nodes to inline, to avoid quadratic compile times.
	// We have to add the outstanding Token Factors to Ops, otherwise we might
	// drop Ops from the resulting Token Factors.
	if (Ops.size() > TokenFactorInlineLimit) {
	for (unsigned j = i; j < TFs.size(); j++)
	Ops.emplace_back(TFs[j], 0);
	// Drop unprocessed Token Factors from TFs, so we do not add them to the
	// combiner worklist later.
	TFs.resize(i);
	break;
	}

	SDNode *TF = TFs[i];
	// Check each of the operands.
	for (const SDValue &Op : TF->op_values()) {
	switch (Op.getOpcode()) {
	case ISD::EntryToken:
	// Entry tokens don't need to be added to the list. They are
	// redundant.
	Changed = true;
	break;

	case ISD::TokenFactor:
	if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
	// Queue up for processing.
	TFs.push_back(Op.getNode());
	Changed = true;
	break;
	}
	LLVM_FALLTHROUGH;

	default:
	// Only add if it isn't already in the list.
	if (SeenOps.insert(Op.getNode()).second)
	Ops.push_back(Op);
	else
	Changed = true;
	break;
	}
	}
	}

	// Re-visit inlined Token Factors, to clean them up in case they have been
	// removed. Skip the first Token Factor, as this is the current node.
	for (unsigned i = 1, e = TFs.size(); i < e; i++)
	AddToWorklist(TFs[i]);

	// Remove Nodes that are chained to another node in the list. Do so
	// by walking up chains breath-first stopping when we've seen
	// another operand. In general we must climb to the EntryNode, but we can exit
	// early if we find all remaining work is associated with just one operand as
	// no further pruning is possible.

	// List of nodes to search through and original Ops from which they originate.
	SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
	SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
	SmallPtrSet<SDNode *, 16> SeenChains;
	bool DidPruneOps = false;

	unsigned NumLeftToConsider = 0;
	for (const SDValue &Op : Ops) {
	Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
	OpWorkCount.push_back(1);
	}

	auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
	// If this is an Op, we can remove the op from the list. Remark any
	// search associated with it as from the current OpNumber.
	if (SeenOps.contains(Op)) {
	Changed = true;
	DidPruneOps = true;
	unsigned OrigOpNumber = 0;
	while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
	OrigOpNumber++;
	assert((OrigOpNumber != Ops.size()) &&
	"expected to find TokenFactor Operand");
	// Re-mark worklist from OrigOpNumber to OpNumber
	for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
	if (Worklist[i].second == OrigOpNumber) {
	Worklist[i].second = OpNumber;
	}
	}
	OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
	OpWorkCount[OrigOpNumber] = 0;
	NumLeftToConsider--;
	}
	// Add if it's a new chain
	if (SeenChains.insert(Op).second) {
	OpWorkCount[OpNumber]++;
	Worklist.push_back(std::make_pair(Op, OpNumber));
	}
	};

	for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
	// We need at least be consider at least 2 Ops to prune.
	if (NumLeftToConsider <= 1)
	break;
	auto CurNode = Worklist[i].first;
	auto CurOpNumber = Worklist[i].second;
	assert((OpWorkCount[CurOpNumber] > 0) &&
	"Node should not appear in worklist");
	switch (CurNode->getOpcode()) {
	case ISD::EntryToken:
	// Hitting EntryToken is the only way for the search to terminate without
	// hitting
	// another operand's search. Prevent us from marking this operand
	// considered.
	NumLeftToConsider++;
	break;
	case ISD::TokenFactor:
	for (const SDValue &Op : CurNode->op_values())
	AddToWorklist(i, Op.getNode(), CurOpNumber);
	break;
	case ISD::LIFETIME_START:
	case ISD::LIFETIME_END:
	case ISD::CopyFromReg:
	case ISD::CopyToReg:
	AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
	break;
	default:
	if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
	AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
	break;
	}
	OpWorkCount[CurOpNumber]--;
	if (OpWorkCount[CurOpNumber] == 0)
	NumLeftToConsider--;
	}

	// If we've changed things around then replace token factor.
	if (Changed) {
	SDValue Result;
	if (Ops.empty()) {
	// The entry token is the only possible outcome.
	Result = DAG.getEntryNode();
	} else {
	if (DidPruneOps) {
	SmallVector<SDValue, 8> PrunedOps;
	//
	for (const SDValue &Op : Ops) {
	if (SeenChains.count(Op.getNode()) == 0)
	PrunedOps.push_back(Op);
	}
	Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
	} else {
	Result = DAG.getTokenFactor(SDLoc(N), Ops);
	}
	}
	return Result;
	}
	return SDValue();
	}

	/// MERGE_VALUES can always be eliminated.
	SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
	WorklistRemover DeadNodes(*this);
	// Replacing results may cause a different MERGE_VALUES to suddenly
	// be CSE'd with N, and carry its uses with it. Iterate until no
	// uses remain, to ensure that the node can be safely deleted.
	// First add the users of this node to the work list so that they
	// can be tried again once they have new operands.
	AddUsersToWorklist(N);
	do {
	// Do as a single replacement to avoid rewalking use lists.
	SmallVector<SDValue, 8> Ops;
	for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
	Ops.push_back(N->getOperand(i));
	DAG.ReplaceAllUsesWith(N, Ops.data());
	} while (!N->use_empty());
	deleteAndRecombine(N);
	return SDValue(N, 0); // Return N so it doesn't get rechecked!
	}

	/// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
	/// ConstantSDNode pointer else nullptr.
	static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
	ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
	return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
	}

	/// Return true if 'Use' is a load or a store that uses N as its base pointer
	/// and that N may be folded in the load / store addressing mode.
	static bool canFoldInAddressingMode(SDNode N, SDNode Use, SelectionDAG &DAG,
	const TargetLowering &TLI) {
	EVT VT;
	unsigned AS;

	if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
	if (LD->isIndexed() \|\| LD->getBasePtr().getNode() != N)
	return false;
	VT = LD->getMemoryVT();
	AS = LD->getAddressSpace();
	} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
	if (ST->isIndexed() \|\| ST->getBasePtr().getNode() != N)
	return false;
	VT = ST->getMemoryVT();
	AS = ST->getAddressSpace();
	} else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
	if (LD->isIndexed() \|\| LD->getBasePtr().getNode() != N)
	return false;
	VT = LD->getMemoryVT();
	AS = LD->getAddressSpace();
	} else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
	if (ST->isIndexed() \|\| ST->getBasePtr().getNode() != N)
	return false;
	VT = ST->getMemoryVT();
	AS = ST->getAddressSpace();
	} else
	return false;

	TargetLowering::AddrMode AM;
	if (N->getOpcode() == ISD::ADD) {
	AM.HasBaseReg = true;
	ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
	if (Offset)
	// [reg +/- imm]
	AM.BaseOffs = Offset->getSExtValue();
	else
	// [reg +/- reg]
	AM.Scale = 1;
	} else if (N->getOpcode() == ISD::SUB) {
	AM.HasBaseReg = true;
	ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
	if (Offset)
	// [reg +/- imm]
	AM.BaseOffs = -Offset->getSExtValue();
	else
	// [reg +/- reg]
	AM.Scale = 1;
	} else
	return false;

	return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
	VT.getTypeForEVT(*DAG.getContext()), AS);
	}

	SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
	assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
	"Unexpected binary operator");

	// Don't do this unless the old select is going away. We want to eliminate the
	// binary operator, not replace a binop with a select.
	// TODO: Handle ISD::SELECT_CC.
	unsigned SelOpNo = 0;
	SDValue Sel = BO->getOperand(0);
	if (Sel.getOpcode() != ISD::SELECT \|\| !Sel.hasOneUse()) {
	SelOpNo = 1;
	Sel = BO->getOperand(1);
	}

	if (Sel.getOpcode() != ISD::SELECT \|\| !Sel.hasOneUse())
	return SDValue();

	SDValue CT = Sel.getOperand(1);
	if (!isConstantOrConstantVector(CT, true) &&
	!DAG.isConstantFPBuildVectorOrConstantFP(CT))
	return SDValue();

	SDValue CF = Sel.getOperand(2);
	if (!isConstantOrConstantVector(CF, true) &&
	!DAG.isConstantFPBuildVectorOrConstantFP(CF))
	return SDValue();

	// Bail out if any constants are opaque because we can't constant fold those.
	// The exception is "and" and "or" with either 0 or -1 in which case we can
	// propagate non constant operands into select. I.e.:
	// and (select Cond, 0, -1), X --> select Cond, 0, X
	// or X, (select Cond, -1, 0) --> select Cond, -1, X
	auto BinOpcode = BO->getOpcode();
	bool CanFoldNonConst =
	(BinOpcode == ISD::AND \|\| BinOpcode == ISD::OR) &&
	(isNullOrNullSplat(CT) \|\| isAllOnesOrAllOnesSplat(CT)) &&
	(isNullOrNullSplat(CF) \|\| isAllOnesOrAllOnesSplat(CF));

	SDValue CBO = BO->getOperand(SelOpNo ^ 1);
	if (!CanFoldNonConst &&
	!isConstantOrConstantVector(CBO, true) &&
	!DAG.isConstantFPBuildVectorOrConstantFP(CBO))
	return SDValue();

	EVT VT = BO->getValueType(0);

	// We have a select-of-constants followed by a binary operator with a
	// constant. Eliminate the binop by pulling the constant math into the select.
	// Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
	SDLoc DL(Sel);
	SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
	: DAG.getNode(BinOpcode, DL, VT, CT, CBO);
	if (!CanFoldNonConst && !NewCT.isUndef() &&
	!isConstantOrConstantVector(NewCT, true) &&
	!DAG.isConstantFPBuildVectorOrConstantFP(NewCT))
	return SDValue();

	SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
	: DAG.getNode(BinOpcode, DL, VT, CF, CBO);
	if (!CanFoldNonConst && !NewCF.isUndef() &&
	!isConstantOrConstantVector(NewCF, true) &&
	!DAG.isConstantFPBuildVectorOrConstantFP(NewCF))
	return SDValue();

	SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
	SelectOp->setFlags(BO->getFlags());
	return SelectOp;
	}

	static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
	assert((N->getOpcode() == ISD::ADD \|\| N->getOpcode() == ISD::SUB) &&
	"Expecting add or sub");

	// Match a constant operand and a zext operand for the math instruction:
	// add Z, C
	// sub C, Z
	bool IsAdd = N->getOpcode() == ISD::ADD;
	SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
	SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
	auto *CN = dyn_cast<ConstantSDNode>(C);
	if (!CN \|\| Z.getOpcode() != ISD::ZERO_EXTEND)
	return SDValue();

	// Match the zext operand as a setcc of a boolean.
	if (Z.getOperand(0).getOpcode() != ISD::SETCC \|\|
	Z.getOperand(0).getValueType() != MVT::i1)
	return SDValue();

	// Match the compare as: setcc (X & 1), 0, eq.
	SDValue SetCC = Z.getOperand(0);
	ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
	if (CC != ISD::SETEQ \|\| !isNullConstant(SetCC.getOperand(1)) \|\|
	SetCC.getOperand(0).getOpcode() != ISD::AND \|\|
	!isOneConstant(SetCC.getOperand(0).getOperand(1)))
	return SDValue();

	// We are adding/subtracting a constant and an inverted low bit. Turn that
	// into a subtract/add of the low bit with incremented/decremented constant:
	// add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
	// sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
	EVT VT = C.getValueType();
	SDLoc DL(N);
	SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
	SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
	DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
	return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
	}

	/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
	/// a shift and add with a different constant.
	static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
	assert((N->getOpcode() == ISD::ADD \|\| N->getOpcode() == ISD::SUB) &&
	"Expecting add or sub");

	// We need a constant operand for the add/sub, and the other operand is a
	// logical shift right: add (srl), C or sub C, (srl).
	bool IsAdd = N->getOpcode() == ISD::ADD;
	SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
	SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
	if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) \|\|
	ShiftOp.getOpcode() != ISD::SRL)
	return SDValue();

	// The shift must be of a 'not' value.
	SDValue Not = ShiftOp.getOperand(0);
	if (!Not.hasOneUse() \|\| !isBitwiseNot(Not))
	return SDValue();

	// The shift must be moving the sign bit to the least-significant-bit.
	EVT VT = ShiftOp.getValueType();
	SDValue ShAmt = ShiftOp.getOperand(1);
	ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
	if (!ShAmtC \|\| ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
	return SDValue();

	// Eliminate the 'not' by adjusting the shift and add/sub constant:
	// add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
	// sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
	SDLoc DL(N);
	auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
	SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
	if (SDValue NewC =
	DAG.FoldConstantArithmetic(IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
	{ConstantOp, DAG.getConstant(1, DL, VT)}))
	return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
	return SDValue();
	}

	/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
	/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
	/// are no common bits set in the operands).
	SDValue DAGCombiner::visitADDLike(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	EVT VT = N0.getValueType();
	SDLoc DL(N);

	// fold vector ops
	if (VT.isVector()) {
	if (SDValue FoldedVOp = SimplifyVBinOp(N))
	return FoldedVOp;

	// fold (add x, 0) -> x, vector edition
	if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
	return N0;
	if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
	return N1;
	}

	// fold (add x, undef) -> undef
	if (N0.isUndef())
	return N0;

	if (N1.isUndef())
	return N1;

	if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
	// canonicalize constant to RHS
	if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
	return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
	// fold (add c1, c2) -> c1+c2
	return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1});
	}

	// fold (add x, 0) -> x
	if (isNullConstant(N1))
	return N0;

	if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
	// fold ((A-c1)+c2) -> (A+(c2-c1))
	if (N0.getOpcode() == ISD::SUB &&
	isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
	SDValue Sub =
	DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N0.getOperand(1)});
	assert(Sub && "Constant folding failed");
	return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
	}

	// fold ((c1-A)+c2) -> (c1+c2)-A
	if (N0.getOpcode() == ISD::SUB &&
	isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
	SDValue Add =
	DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N0.getOperand(0)});
	assert(Add && "Constant folding failed");
	return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
	}

	// add (sext i1 X), 1 -> zext (not i1 X)
	// We don't transform this pattern:
	// add (zext i1 X), -1 -> sext (not i1 X)
	// because most (?) targets generate better code for the zext form.
	if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
	isOneOrOneSplat(N1)) {
	SDValue X = N0.getOperand(0);
	if ((!LegalOperations \|\|
	(TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
	TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
	X.getScalarValueSizeInBits() == 1) {
	SDValue Not = DAG.getNOT(DL, X, X.getValueType());
	return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
	}
	}

	// Fold (add (or x, c0), c1) -> (add x, (c0 + c1)) if (or x, c0) is
	// equivalent to (add x, c0).
	if (N0.getOpcode() == ISD::OR &&
	isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) &&
	DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
	if (SDValue Add0 = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT,
	{N1, N0.getOperand(1)}))
	return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
	}
	}

	if (SDValue NewSel = foldBinOpIntoSelect(N))
	return NewSel;

	// reassociate add
	if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) {
	if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
	return RADD;

	// Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
	// equivalent to (add x, c).
	auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
	if (N0.getOpcode() == ISD::OR && N0.hasOneUse() &&
	isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) &&
	DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
	return DAG.getNode(ISD::ADD, DL, VT,
	DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
	N0.getOperand(1));
	}
	return SDValue();
	};
	if (SDValue Add = ReassociateAddOr(N0, N1))
	return Add;
	if (SDValue Add = ReassociateAddOr(N1, N0))
	return Add;
	}
	// fold ((0-A) + B) -> B-A
	if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
	return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));

	// fold (A + (0-B)) -> A-B
	if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
	return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));

	// fold (A+(B-A)) -> B
	if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
	return N1.getOperand(0);

	// fold ((B-A)+A) -> B
	if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
	return N0.getOperand(0);

	// fold ((A-B)+(C-A)) -> (C-B)
	if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
	N0.getOperand(0) == N1.getOperand(1))
	return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
	N0.getOperand(1));

	// fold ((A-B)+(B-C)) -> (A-C)
	if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
	N0.getOperand(1) == N1.getOperand(0))
	return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
	N1.getOperand(1));

	// fold (A+(B-(A+C))) to (B-C)
	if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
	N0 == N1.getOperand(1).getOperand(0))
	return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
	N1.getOperand(1).getOperand(1));

	// fold (A+(B-(C+A))) to (B-C)
	if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
	N0 == N1.getOperand(1).getOperand(1))
	return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
	N1.getOperand(1).getOperand(0));

	// fold (A+((B-A)+or-C)) to (B+or-C)
	if ((N1.getOpcode() == ISD::SUB \|\| N1.getOpcode() == ISD::ADD) &&
	N1.getOperand(0).getOpcode() == ISD::SUB &&
	N0 == N1.getOperand(0).getOperand(1))
	return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
	N1.getOperand(1));

	// fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
	if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
	SDValue N00 = N0.getOperand(0);
	SDValue N01 = N0.getOperand(1);
	SDValue N10 = N1.getOperand(0);
	SDValue N11 = N1.getOperand(1);

	if (isConstantOrConstantVector(N00) \|\| isConstantOrConstantVector(N10))
	return DAG.getNode(ISD::SUB, DL, VT,
	DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
	DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
	}

	// fold (add (umax X, C), -C) --> (usubsat X, C)
	if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
	auto MatchUSUBSAT = [](ConstantSDNode Max, ConstantSDNode Op) {
	return (!Max && !Op) \|\|
	(Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
	};
	if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
	/AllowUndefs/ true))
	return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
	N0.getOperand(1));
	}

	if (SimplifyDemandedBits(SDValue(N, 0)))
	return SDValue(N, 0);

	if (isOneOrOneSplat(N1)) {
	// fold (add (xor a, -1), 1) -> (sub 0, a)
	if (isBitwiseNot(N0))
	return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
	N0.getOperand(0));

	// fold (add (add (xor a, -1), b), 1) -> (sub b, a)
	if (N0.getOpcode() == ISD::ADD) {
	SDValue A, Xor;

	if (isBitwiseNot(N0.getOperand(0))) {
	A = N0.getOperand(1);
	Xor = N0.getOperand(0);
	} else if (isBitwiseNot(N0.getOperand(1))) {
	A = N0.getOperand(0);
	Xor = N0.getOperand(1);
	}

	if (Xor)
	return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
	}

	// Look for:
	// add (add x, y), 1
	// And if the target does not like this form then turn into:
	// sub y, (xor x, -1)
	if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
	N0.getOpcode() == ISD::ADD) {
	SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
	DAG.getAllOnesConstant(DL, VT));
	return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
	}
	}

	// (x - y) + -1 -> add (xor y, -1), x
	if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
	isAllOnesOrAllOnesSplat(N1)) {
	SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
	return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
	}

	if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
	return Combined;

	if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
	return Combined;

	return SDValue();
	}

	SDValue DAGCombiner::visitADD(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	EVT VT = N0.getValueType();
	SDLoc DL(N);

	if (SDValue Combined = visitADDLike(N))
	return Combined;

	if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
	return V;

	if (SDValue V = foldAddSubOfSignBit(N, DAG))
	return V;

	// fold (a+b) -> (a\|b) iff a and b share no bits.
	if ((!LegalOperations \|\| TLI.isOperationLegal(ISD::OR, VT)) &&
	DAG.haveNoCommonBitsSet(N0, N1))
	return DAG.getNode(ISD::OR, DL, VT, N0, N1);

	// Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
	if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
	const APInt &C0 = N0->getConstantOperandAPInt(0);
	const APInt &C1 = N1->getConstantOperandAPInt(0);
	return DAG.getVScale(DL, VT, C0 + C1);
	}

	// fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
	if ((N0.getOpcode() == ISD::ADD) &&
	(N0.getOperand(1).getOpcode() == ISD::VSCALE) &&
	(N1.getOpcode() == ISD::VSCALE)) {
	const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
	const APInt &VS1 = N1->getConstantOperandAPInt(0);
	SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
	return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
	}

	// Fold (add step_vector(c1), step_vector(c2) to step_vector(c1+c2))
	if (N0.getOpcode() == ISD::STEP_VECTOR &&
	N1.getOpcode() == ISD::STEP_VECTOR) {
	const APInt &C0 = N0->getConstantOperandAPInt(0);
	const APInt &C1 = N1->getConstantOperandAPInt(0);
	APInt NewStep = C0 + C1;
	return DAG.getStepVector(DL, VT, NewStep);
	}

	// Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
	if ((N0.getOpcode() == ISD::ADD) &&
	(N0.getOperand(1).getOpcode() == ISD::STEP_VECTOR) &&
	(N1.getOpcode() == ISD::STEP_VECTOR)) {
	const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
	const APInt &SV1 = N1->getConstantOperandAPInt(0);
	APInt NewStep = SV0 + SV1;
	SDValue SV = DAG.getStepVector(DL, VT, NewStep);
	return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
	}

	return SDValue();
	}

	SDValue DAGCombiner::visitADDSAT(SDNode *N) {
	unsigned Opcode = N->getOpcode();
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	EVT VT = N0.getValueType();
	SDLoc DL(N);

	// fold vector ops
	if (VT.isVector()) {
	// TODO SimplifyVBinOp

	// fold (add_sat x, 0) -> x, vector edition
	if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
	return N0;
	if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
	return N1;
	}

	// fold (add_sat x, undef) -> -1
	if (N0.isUndef() \|\| N1.isUndef())
	return DAG.getAllOnesConstant(DL, VT);

	if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
	// canonicalize constant to RHS
	if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
	return DAG.getNode(Opcode, DL, VT, N1, N0);
	// fold (add_sat c1, c2) -> c3
	return DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1});
	}

	// fold (add_sat x, 0) -> x
	if (isNullConstant(N1))
	return N0;

	// If it cannot overflow, transform into an add.
	if (Opcode == ISD::UADDSAT)
	if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
	return DAG.getNode(ISD::ADD, DL, VT, N0, N1);

	return SDValue();
	}

	static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
	bool Masked = false;

	// First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
	while (true) {
	if (V.getOpcode() == ISD::TRUNCATE \|\| V.getOpcode() == ISD::ZERO_EXTEND) {
	V = V.getOperand(0);
	continue;
	}

	if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
	Masked = true;
	V = V.getOperand(0);
	continue;
	}

	break;
	}

	// If this is not a carry, return.
	if (V.getResNo() != 1)
	return SDValue();

	if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
	V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
	return SDValue();

	EVT VT = V.getNode()->getValueType(0);
	if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
	return SDValue();

	// If the result is masked, then no matter what kind of bool it is we can
	// return. If it isn't, then we need to make sure the bool type is either 0 or
	// 1 and not other values.
	if (Masked \|\|
	TLI.getBooleanContents(V.getValueType()) ==
	TargetLoweringBase::ZeroOrOneBooleanContent)
	return V;

	return SDValue();
	}

	/// Given the operands of an add/sub operation, see if the 2nd operand is a
	/// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
	/// the opcode and bypass the mask operation.
	static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
	SelectionDAG &DAG, const SDLoc &DL) {
	if (N1.getOpcode() != ISD::AND \|\| !isOneOrOneSplat(N1->getOperand(1)))
	return SDValue();

	EVT VT = N0.getValueType();
	if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
	return SDValue();

	// add N0, (and (AssertSext X, i1), 1) --> sub N0, X
	// sub N0, (and (AssertSext X, i1), 1) --> add N0, X
	return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
	}

	/// Helper for doing combines based on N0 and N1 being added to each other.
	SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
	SDNode *LocReference) {
	EVT VT = N0.getValueType();
	SDLoc DL(LocReference);

	// fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
	if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
	isNullOrNullSplat(N1.getOperand(0).getOperand(0)))
	return DAG.getNode(ISD::SUB, DL, VT, N0,
	DAG.getNode(ISD::SHL, DL, VT,
	N1.getOperand(0).getOperand(1),
	N1.getOperand(1)));

	if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
	return V;

	// Look for:
	// add (add x, 1), y
	// And if the target does not like this form then turn into:
	// sub y, (xor x, -1)
	if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
	N0.getOpcode() == ISD::ADD && isOneOrOneSplat(N0.getOperand(1))) {
	SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
	DAG.getAllOnesConstant(DL, VT));
	return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
	}

	// Hoist one-use subtraction by non-opaque constant:
	// (x - C) + y -> (x + y) - C
	// This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
	if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
	isConstantOrConstantVector(N0.getOperand(1), /NoOpaques=/true)) {
	SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
	return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
	}
	// Hoist one-use subtraction from non-opaque constant:
	// (C - x) + y -> (y - x) + C
	if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
	isConstantOrConstantVector(N0.getOperand(0), /NoOpaques=/true)) {
	SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
	return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
	}

	// If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
	// rather than 'add 0/-1' (the zext should get folded).
	// add (sext i1 Y), X --> sub X, (zext i1 Y)
	if (N0.getOpcode() == ISD::SIGN_EXTEND &&
	N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
	TLI.getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent) {
	SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
	return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
	}

	// add X, (sextinreg Y i1) -> sub X, (and Y 1)
	if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
	VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
	if (TN->getVT() == MVT::i1) {
	SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
	DAG.getConstant(1, DL, VT));
	return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
	}
	}

	// (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
	if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
	N1.getResNo() == 0)
	return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
	N0, N1.getOperand(0), N1.getOperand(2));

	// (add X, Carry) -> (addcarry X, 0, Carry)
	if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
	if (SDValue Carry = getAsCarry(TLI, N1))
	return DAG.getNode(ISD::ADDCARRY, DL,
	DAG.getVTList(VT, Carry.getValueType()), N0,
	DAG.getConstant(0, DL, VT), Carry);

	return SDValue();
	}

	SDValue DAGCombiner::visitADDC(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	EVT VT = N0.getValueType();
	SDLoc DL(N);

	// If the flag result is dead, turn this into an ADD.
	if (!N->hasAnyUseOfValue(1))
	return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
	DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));

	// canonicalize constant to RHS.
	ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
	ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
	if (N0C && !N1C)
	return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);

	// fold (addc x, 0) -> x + no carry out
	if (isNullConstant(N1))
	return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
	DL, MVT::Glue));

	// If it cannot overflow, transform into an add.
	if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
	return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
	DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));

	return SDValue();
	}

	/**
	* Flips a boolean if it is cheaper to compute. If the Force parameters is set,
	* then the flip also occurs if computing the inverse is the same cost.
	* This function returns an empty SDValue in case it cannot flip the boolean
	* without increasing the cost of the computation. If you want to flip a boolean
	* no matter what, use DAG.getLogicalNOT.
	*/
	static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
	const TargetLowering &TLI,
	bool Force) {
	if (Force && isa<ConstantSDNode>(V))
	return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());

	if (V.getOpcode() != ISD::XOR)
	return SDValue();

	ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
	if (!Const)
	return SDValue();

	EVT VT = V.getValueType();

	bool IsFlip = false;
	switch(TLI.getBooleanContents(VT)) {
	case TargetLowering::ZeroOrOneBooleanContent:
	IsFlip = Const->isOne();
	break;
	case TargetLowering::ZeroOrNegativeOneBooleanContent:
	IsFlip = Const->isAllOnesValue();
	break;
	case TargetLowering::UndefinedBooleanContent:
	IsFlip = (Const->getAPIntValue() & 0x01) == 1;
	break;
	}

	if (IsFlip)
	return V.getOperand(0);
	if (Force)
	return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
	return SDValue();
	}

	SDValue DAGCombiner::visitADDO(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	EVT VT = N0.getValueType();
	bool IsSigned = (ISD::SADDO == N->getOpcode());

	EVT CarryVT = N->getValueType(1);
	SDLoc DL(N);

	// If the flag result is dead, turn this into an ADD.
	if (!N->hasAnyUseOfValue(1))
	return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
	DAG.getUNDEF(CarryVT));

	// canonicalize constant to RHS.
	if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
	!DAG.isConstantIntBuildVectorOrConstantInt(N1))
	return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);

	// fold (addo x, 0) -> x + no carry out
	if (isNullOrNullSplat(N1))
	return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));

	if (!IsSigned) {
	// If it cannot overflow, transform into an add.
	if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
	return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
	DAG.getConstant(0, DL, CarryVT));

	// fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
	if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
	SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
	DAG.getConstant(0, DL, VT), N0.getOperand(0));
	return CombineTo(
	N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
	}

	if (SDValue Combined = visitUADDOLike(N0, N1, N))
	return Combined;

	if (SDValue Combined = visitUADDOLike(N1, N0, N))
	return Combined;
	}

	return SDValue();
	}

	SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
	EVT VT = N0.getValueType();
	if (VT.isVector())
	return SDValue();

	// (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
	// If Y + 1 cannot overflow.
	if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
	SDValue Y = N1.getOperand(0);
	SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
	if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
	return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
	N1.getOperand(2));
	}

	// (uaddo X, Carry) -> (addcarry X, 0, Carry)
	if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
	if (SDValue Carry = getAsCarry(TLI, N1))
	return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
	DAG.getConstant(0, SDLoc(N), VT), Carry);

	return SDValue();
	}

	SDValue DAGCombiner::visitADDE(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	SDValue CarryIn = N->getOperand(2);

	// canonicalize constant to RHS
	ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
	ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
	if (N0C && !N1C)
	return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
	N1, N0, CarryIn);

	// fold (adde x, y, false) -> (addc x, y)
	if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
	return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);

	return SDValue();
	}

	SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	SDValue CarryIn = N->getOperand(2);
	SDLoc DL(N);

	// canonicalize constant to RHS
	ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
	ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
	if (N0C && !N1C)
	return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);

	// fold (addcarry x, y, false) -> (uaddo x, y)
	if (isNullConstant(CarryIn)) {
	if (!LegalOperations \|\|
	TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
	return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
	}

	// fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
	if (isNullConstant(N0) && isNullConstant(N1)) {
	EVT VT = N0.getValueType();
	EVT CarryVT = CarryIn.getValueType();
	SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
	AddToWorklist(CarryExt.getNode());
	return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
	DAG.getConstant(1, DL, VT)),
	DAG.getConstant(0, DL, CarryVT));
	}

	if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
	return Combined;

	if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
	return Combined;

	return SDValue();
	}

	SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	SDValue CarryIn = N->getOperand(2);
	SDLoc DL(N);

	// canonicalize constant to RHS
	ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
	ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
	if (N0C && !N1C)
	return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);

	// fold (saddo_carry x, y, false) -> (saddo x, y)
	if (isNullConstant(CarryIn)) {
	if (!LegalOperations \|\|
	TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
	return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
	}

	return SDValue();
	}

	/**
	* If we are facing some sort of diamond carry propapagtion pattern try to
	* break it up to generate something like:
	* (addcarry X, 0, (addcarry A, B, Z):Carry)
	*
	* The end result is usually an increase in operation required, but because the
	* carry is now linearized, other tranforms can kick in and optimize the DAG.
	*
	* Patterns typically look something like
	* (uaddo A, B)
	* / \
	* Carry Sum
	* \| \
	* \| (addcarry *, 0, Z)
	* \| /
	* \ Carry
	* \| /
	* (addcarry X, , )
	*
	* But numerous variation exist. Our goal is to identify A, B, X and Z and
	* produce a combine with a single path for carry propagation.
	*/
	static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
	SDValue X, SDValue Carry0, SDValue Carry1,
	SDNode *N) {
	if (Carry1.getResNo() != 1 \|\| Carry0.getResNo() != 1)
	return SDValue();
	if (Carry1.getOpcode() != ISD::UADDO)
	return SDValue();

	SDValue Z;

	/**
	* First look for a suitable Z. It will present itself in the form of
	* (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
	*/
	if (Carry0.getOpcode() == ISD::ADDCARRY &&
	isNullConstant(Carry0.getOperand(1))) {
	Z = Carry0.getOperand(2);
	} else if (Carry0.getOpcode() == ISD::UADDO &&
	isOneConstant(Carry0.getOperand(1))) {
	EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
	Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
	} else {
	// We couldn't find a suitable Z.
	return SDValue();
	}


	auto cancelDiamond = [&](SDValue A,SDValue B) {
	SDLoc DL(N);
	SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
	Combiner.AddToWorklist(NewY.getNode());
	return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
	DAG.getConstant(0, DL, X.getValueType()),
	NewY.getValue(1));
	};

	/**
	* (uaddo A, B)
	* \|
	* Sum
	* \|
	* (addcarry *, 0, Z)
	*/
	if (Carry0.getOperand(0) == Carry1.getValue(0)) {
	return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
	}

	/**
	* (addcarry A, 0, Z)
	* \|
	* Sum
	* \|
	* (uaddo *, B)
	*/
	if (Carry1.getOperand(0) == Carry0.getValue(0)) {
	return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
	}

	if (Carry1.getOperand(1) == Carry0.getValue(0)) {
	return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
	}

	return SDValue();
	}

	// If we are facing some sort of diamond carry/borrow in/out pattern try to
	// match patterns like:
	//
	// (uaddo A, B) CarryIn
	// \| \ \|
	// \| \ \|
	// PartialSum PartialCarryOutX /
	// \| \| /
	// \| ____\|____________/
	// \| / \|
	// (uaddo , ) \________
	// \| \ \
	// \| \ \|
	// \| PartialCarryOutY \|
	// \| \ \|
	// \| \ /
	// AddCarrySum \| ______/
	// \| /
	// CarryOut = (or , )
	//
	// And generate ADDCARRY (or SUBCARRY) with two result values:
	//
	// {AddCarrySum, CarryOut} = (addcarry A, B, CarryIn)
	//
	// Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with
	// a single path for carry/borrow out propagation:
	static SDValue combineCarryDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
	const TargetLowering &TLI, SDValue Carry0,
	SDValue Carry1, SDNode *N) {
	if (Carry0.getResNo() != 1 \|\| Carry1.getResNo() != 1)
	return SDValue();
	unsigned Opcode = Carry0.getOpcode();
	if (Opcode != Carry1.getOpcode())
	return SDValue();
	if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
	return SDValue();

	// Canonicalize the add/sub of A and B as Carry0 and the add/sub of the
	// carry/borrow in as Carry1. (The top and middle uaddo nodes respectively in
	// the above ASCII art.)
	if (Carry1.getOperand(0) != Carry0.getValue(0) &&
	Carry1.getOperand(1) != Carry0.getValue(0))
	std::swap(Carry0, Carry1);
	if (Carry1.getOperand(0) != Carry0.getValue(0) &&
	Carry1.getOperand(1) != Carry0.getValue(0))
	return SDValue();

	// The carry in value must be on the righthand side for subtraction.
	unsigned CarryInOperandNum =
	Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
	if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
	return SDValue();
	SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);

	unsigned NewOp = Opcode == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY;
	if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
	return SDValue();

	// Verify that the carry/borrow in is plausibly a carry/borrow bit.
	// TODO: make getAsCarry() aware of how partial carries are merged.
	if (CarryIn.getOpcode() != ISD::ZERO_EXTEND)
	return SDValue();
	CarryIn = CarryIn.getOperand(0);
	if (CarryIn.getValueType() != MVT::i1)
	return SDValue();

	SDLoc DL(N);
	SDValue Merged =
	DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
	Carry0.getOperand(1), CarryIn);

	// Please note that because we have proven that the result of the UADDO/USUBO
	// of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
	// therefore prove that if the first UADDO/USUBO overflows, the second
	// UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
	// maximum value.
	//
	// 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
	// 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
	//
	// This is important because it means that OR and XOR can be used to merge
	// carry flags; and that AND can return a constant zero.
	//
	// TODO: match other operations that can merge flags (ADD, etc)
	DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
	if (N->getOpcode() == ISD::AND)
	return DAG.getConstant(0, DL, MVT::i1);
	return Merged.getValue(1);
	}

	SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
	SDNode *N) {
	// fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
	if (isBitwiseNot(N0))
	if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
	SDLoc DL(N);
	SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
	N0.getOperand(0), NotC);
	return CombineTo(
	N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
	}

	// Iff the flag result is dead:
	// (addcarry (add\|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
	// Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
	// or the dependency between the instructions.
	if ((N0.getOpcode() == ISD::ADD \|\|
	(N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
	N0.getValue(1) != CarryIn)) &&
	isNullConstant(N1) && !N->hasAnyUseOfValue(1))
	return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
	N0.getOperand(0), N0.getOperand(1), CarryIn);

	/**
	* When one of the addcarry argument is itself a carry, we may be facing
	* a diamond carry propagation. In which case we try to transform the DAG
	* to ensure linear carry propagation if that is possible.
	*/
	if (auto Y = getAsCarry(TLI, N1)) {
	// Because both are carries, Y and Z can be swapped.
	if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
	return R;
	if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
	return R;
	}

	return SDValue();
	}

	// Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
	// clamp/truncation if necessary.
	static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS,
	SDValue RHS, SelectionDAG &DAG,
	const SDLoc &DL) {
	assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&
	"Illegal truncation");

	if (DstVT == SrcVT)
	return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);

	// If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
	// clamping RHS.
	APInt UpperBits = APInt::getBitsSetFrom(SrcVT.getScalarSizeInBits(),
	DstVT.getScalarSizeInBits());
	if (!DAG.MaskedValueIsZero(LHS, UpperBits))
	return SDValue();

	SDValue SatLimit =
	DAG.getConstant(APInt::getLowBitsSet(SrcVT.getScalarSizeInBits(),
	DstVT.getScalarSizeInBits()),
	DL, SrcVT);
	RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
	RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
	LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
	return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
	}

	// Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
	// usubsat(a,b), optionally as a truncated type.
	SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N) {
	if (N->getOpcode() != ISD::SUB \|\|
	!(!LegalOperations \|\| hasOperation(ISD::USUBSAT, DstVT)))
	return SDValue();

	EVT SubVT = N->getValueType(0);
	SDValue Op0 = N->getOperand(0);
	SDValue Op1 = N->getOperand(1);

	// Try to find umax(a,b) - b or a - umin(a,b) patterns
	// they may be converted to usubsat(a,b).
	if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
	SDValue MaxLHS = Op0.getOperand(0);
	SDValue MaxRHS = Op0.getOperand(1);
	if (MaxLHS == Op1)
	return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, SDLoc(N));
	if (MaxRHS == Op1)
	return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, SDLoc(N));
	}

	if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
	SDValue MinLHS = Op1.getOperand(0);
	SDValue MinRHS = Op1.getOperand(1);
	if (MinLHS == Op0)
	return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, SDLoc(N));
	if (MinRHS == Op0)
	return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, SDLoc(N));
	}

	// sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
	if (Op1.getOpcode() == ISD::TRUNCATE &&
	Op1.getOperand(0).getOpcode() == ISD::UMIN &&
	Op1.getOperand(0).hasOneUse()) {
	SDValue MinLHS = Op1.getOperand(0).getOperand(0);
	SDValue MinRHS = Op1.getOperand(0).getOperand(1);
	if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
	return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
	DAG, SDLoc(N));
	if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
	return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
	DAG, SDLoc(N));
	}

	return SDValue();
	}

	// Since it may not be valid to emit a fold to zero for vector initializers
	// check if we can before folding.
	static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
	SelectionDAG &DAG, bool LegalOperations) {
	if (!VT.isVector())
	return DAG.getConstant(0, DL, VT);
	if (!LegalOperations \|\| TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
	return DAG.getConstant(0, DL, VT);
	return SDValue();
	}

	SDValue DAGCombiner::visitSUB(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	EVT VT = N0.getValueType();
	SDLoc DL(N);

	// fold vector ops
	if (VT.isVector()) {
	if (SDValue FoldedVOp = SimplifyVBinOp(N))
	return FoldedVOp;

	// fold (sub x, 0) -> x, vector edition
	if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
	return N0;
	}

	// fold (sub x, x) -> 0
	// FIXME: Refactor this and xor and other similar operations together.
	if (N0 == N1)
	return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);

	// fold (sub c1, c2) -> c3
	if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
	return C;

	if (SDValue NewSel = foldBinOpIntoSelect(N))
	return NewSel;

	ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);

	// fold (sub x, c) -> (add x, -c)
	if (N1C) {
	return DAG.getNode(ISD::ADD, DL, VT, N0,
	DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
	}

	if (isNullOrNullSplat(N0)) {
	unsigned BitWidth = VT.getScalarSizeInBits();
	// Right-shifting everything out but the sign bit followed by negation is
	// the same as flipping arithmetic/logical shift type without the negation:
	// -(X >>u 31) -> (X >>s 31)
	// -(X >>s 31) -> (X >>u 31)
	if (N1->getOpcode() == ISD::SRA \|\| N1->getOpcode() == ISD::SRL) {
	ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
	if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
	auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
	if (!LegalOperations \|\| TLI.isOperationLegal(NewSh, VT))
	return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
	}
	}

	// 0 - X --> 0 if the sub is NUW.
	if (N->getFlags().hasNoUnsignedWrap())
	return N0;

	if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
	// N1 is either 0 or the minimum signed value. If the sub is NSW, then
	// N1 must be 0 because negating the minimum signed value is undefined.
	if (N->getFlags().hasNoSignedWrap())
	return N0;

	// 0 - X --> X if X is 0 or the minimum signed value.
	return N1;
	}

	// Convert 0 - abs(x).
	SDValue Result;
	if (N1->getOpcode() == ISD::ABS &&
	!TLI.isOperationLegalOrCustom(ISD::ABS, VT) &&
	TLI.expandABS(N1.getNode(), Result, DAG, true))
	return Result;

	// Fold neg(splat(neg(x)) -> splat(x)
	if (VT.isVector()) {
	SDValue N1S = DAG.getSplatValue(N1, true);
	if (N1S && N1S.getOpcode() == ISD::SUB &&
	isNullConstant(N1S.getOperand(0))) {
	if (VT.isScalableVector())
	return DAG.getSplatVector(VT, DL, N1S.getOperand(1));
	return DAG.getSplatBuildVector(VT, DL, N1S.getOperand(1));
	}
	}
	}

	// Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
	if (isAllOnesOrAllOnesSplat(N0))
	return DAG.getNode(ISD::XOR, DL, VT, N1, N0);

	// fold (A - (0-B)) -> A+B
	if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
	return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));

	// fold A-(A-B) -> B
	if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
	return N1.getOperand(1);

	// fold (A+B)-A -> B
	if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
	return N0.getOperand(1);

	// fold (A+B)-B -> A
	if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
	return N0.getOperand(0);

	// fold (A+C1)-C2 -> A+(C1-C2)
	if (N0.getOpcode() == ISD::ADD &&
	isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
	isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
	SDValue NewC =
	DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(1), N1});
	assert(NewC && "Constant folding failed");
	return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
	}

	// fold C2-(A+C1) -> (C2-C1)-A
	if (N1.getOpcode() == ISD::ADD) {
	SDValue N11 = N1.getOperand(1);
	if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
	isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
	SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11});
	assert(NewC && "Constant folding failed");
	return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
	}
	}

	// fold (A-C1)-C2 -> A-(C1+C2)
	if (N0.getOpcode() == ISD::SUB &&
	isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
	isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
	SDValue NewC =
	DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0.getOperand(1), N1});
	assert(NewC && "Constant folding failed");
	return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
	}

	// fold (c1-A)-c2 -> (c1-c2)-A
	if (N0.getOpcode() == ISD::SUB &&
	isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
	isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) {
	SDValue NewC =
	DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(0), N1});
	assert(NewC && "Constant folding failed");
	return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
	}

	// fold ((A+(B+or-C))-B) -> A+or-C
	if (N0.getOpcode() == ISD::ADD &&
	(N0.getOperand(1).getOpcode() == ISD::SUB \|\|
	N0.getOperand(1).getOpcode() == ISD::ADD) &&
	N0.getOperand(1).getOperand(0) == N1)
	return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
	N0.getOperand(1).getOperand(1));

	// fold ((A+(C+B))-B) -> A+C
	if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
	N0.getOperand(1).getOperand(1) == N1)
	return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
	N0.getOperand(1).getOperand(0));

	// fold ((A-(B-C))-C) -> A-B
	if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
	N0.getOperand(1).getOperand(1) == N1)
	return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
	N0.getOperand(1).getOperand(0));

	// fold (A-(B-C)) -> A+(C-B)
	if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
	return DAG.getNode(ISD::ADD, DL, VT, N0,
	DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
	N1.getOperand(0)));

	// A - (A & B) -> A & (~B)
	if (N1.getOpcode() == ISD::AND) {
	SDValue A = N1.getOperand(0);
	SDValue B = N1.getOperand(1);
	if (A != N0)
	std::swap(A, B);
	if (A == N0 &&
	(N1.hasOneUse() \|\| isConstantOrConstantVector(B, /NoOpaques=/true))) {
	SDValue InvB =
	DAG.getNode(ISD::XOR, DL, VT, B, DAG.getAllOnesConstant(DL, VT));
	return DAG.getNode(ISD::AND, DL, VT, A, InvB);
	}
	}

	// fold (X - (-Y * Z)) -> (X + (Y * Z))
	if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
	if (N1.getOperand(0).getOpcode() == ISD::SUB &&
	isNullOrNullSplat(N1.getOperand(0).getOperand(0))) {
	SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
	N1.getOperand(0).getOperand(1),
	N1.getOperand(1));
	return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
	}
	if (N1.getOperand(1).getOpcode() == ISD::SUB &&
	isNullOrNullSplat(N1.getOperand(1).getOperand(0))) {
	SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
	N1.getOperand(0),
	N1.getOperand(1).getOperand(1));
	return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
	}
	}

	// If either operand of a sub is undef, the result is undef
	if (N0.isUndef())
	return N0;
	if (N1.isUndef())
	return N1;

	if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
	return V;

	if (SDValue V = foldAddSubOfSignBit(N, DAG))
	return V;

	if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
	return V;

	if (SDValue V = foldSubToUSubSat(VT, N))
	return V;

	// (x - y) - 1 -> add (xor y, -1), x
	if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
	SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
	DAG.getAllOnesConstant(DL, VT));
	return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
	}

	// Look for:
	// sub y, (xor x, -1)
	// And if the target does not like this form then turn into:
	// add (add x, y), 1
	if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
	SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
	return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
	}

	// Hoist one-use addition by non-opaque constant:
	// (x + C) - y -> (x - y) + C
	if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
	isConstantOrConstantVector(N0.getOperand(1), /NoOpaques=/true)) {
	SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
	return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
	}
	// y - (x + C) -> (y - x) - C
	if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
	isConstantOrConstantVector(N1.getOperand(1), /NoOpaques=/true)) {
	SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
	return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
	}
	// (x - C) - y -> (x - y) - C
	// This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
	if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
	isConstantOrConstantVector(N0.getOperand(1), /NoOpaques=/true)) {
	SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
	return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
	}
	// (C - x) - y -> C - (x + y)
	if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
	isConstantOrConstantVector(N0.getOperand(0), /NoOpaques=/true)) {
	SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
	return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
	}

	// If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
	// rather than 'sub 0/1' (the sext should get folded).
	// sub X, (zext i1 Y) --> add X, (sext i1 Y)
	if (N1.getOpcode() == ISD::ZERO_EXTEND &&
	N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
	TLI.getBooleanContents(VT) ==
	TargetLowering::ZeroOrNegativeOneBooleanContent) {
	SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
	return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
	}

	// fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
	if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
	if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
	SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
	SDValue S0 = N1.getOperand(0);
	if ((X0 == S0 && X1 == N1) \|\| (X0 == N1 && X1 == S0))
	if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
	if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
	return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
	}
	}

	// If the relocation model supports it, consider symbol offsets.
	if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
	if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
	// fold (sub Sym, c) -> Sym-c
	if (N1C && GA->getOpcode() == ISD::GlobalAddress)
	return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
	GA->getOffset() -
	(uint64_t)N1C->getSExtValue());
	// fold (sub Sym+c1, Sym+c2) -> c1-c2
	if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
	if (GA->getGlobal() == GB->getGlobal())
	return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
	DL, VT);
	}

	// sub X, (sextinreg Y i1) -> add X, (and Y 1)
	if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
	VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
	if (TN->getVT() == MVT::i1) {
	SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
	DAG.getConstant(1, DL, VT));
	return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
	}
	}

	// canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
	if (N1.getOpcode() == ISD::VSCALE) {
	const APInt &IntVal = N1.getConstantOperandAPInt(0);
	return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
	}

	// canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
	if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
	APInt NewStep = -N1.getConstantOperandAPInt(0);
	return DAG.getNode(ISD::ADD, DL, VT, N0,
	DAG.getStepVector(DL, VT, NewStep));
	}

	// Prefer an add for more folding potential and possibly better codegen:
	// sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
	if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
	SDValue ShAmt = N1.getOperand(1);
	ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
	if (ShAmtC &&
	ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
	SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
	return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
	}
	}

	if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) {
	// (sub Carry, X) -> (addcarry (sub 0, X), 0, Carry)
	if (SDValue Carry = getAsCarry(TLI, N0)) {
	SDValue X = N1;
	SDValue Zero = DAG.getConstant(0, DL, VT);
	SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
	return DAG.getNode(ISD::ADDCARRY, DL,
	DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
	Carry);
	}
	}

	return SDValue();
	}

	SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	EVT VT = N0.getValueType();
	SDLoc DL(N);

	// fold vector ops
	if (VT.isVector()) {
	// TODO SimplifyVBinOp

	// fold (sub_sat x, 0) -> x, vector edition
	if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
	return N0;
	}

	// fold (sub_sat x, undef) -> 0
	if (N0.isUndef() \|\| N1.isUndef())
	return DAG.getConstant(0, DL, VT);

	// fold (sub_sat x, x) -> 0
	if (N0 == N1)
	return DAG.getConstant(0, DL, VT);

	// fold (sub_sat c1, c2) -> c3
	if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
	return C;

	// fold (sub_sat x, 0) -> x
	if (isNullConstant(N1))
	return N0;

	return SDValue();
	}

	SDValue DAGCombiner::visitSUBC(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	EVT VT = N0.getValueType();
	SDLoc DL(N);

	// If the flag result is dead, turn this into an SUB.
	if (!N->hasAnyUseOfValue(1))
	return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
	DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));

	// fold (subc x, x) -> 0 + no borrow
	if (N0 == N1)
	return CombineTo(N, DAG.getConstant(0, DL, VT),
	DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));

	// fold (subc x, 0) -> x + no borrow
	if (isNullConstant(N1))
	return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));

	// Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
	if (isAllOnesConstant(N0))
	return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
	DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));

	return SDValue();
	}

	SDValue DAGCombiner::visitSUBO(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	EVT VT = N0.getValueType();
	bool IsSigned = (ISD::SSUBO == N->getOpcode());

	EVT CarryVT = N->getValueType(1);
	SDLoc DL(N);

	// If the flag result is dead, turn this into an SUB.
	if (!N->hasAnyUseOfValue(1))
	return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
	DAG.getUNDEF(CarryVT));

	// fold (subo x, x) -> 0 + no borrow
	if (N0 == N1)
	return CombineTo(N, DAG.getConstant(0, DL, VT),
	DAG.getConstant(0, DL, CarryVT));

	ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);

	// fold (subox, c) -> (addo x, -c)
	if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
	return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
	DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
	}

	// fold (subo x, 0) -> x + no borrow
	if (isNullOrNullSplat(N1))
	return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));

	// Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
	if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
	return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
	DAG.getConstant(0, DL, CarryVT));

	return SDValue();
	}

	SDValue DAGCombiner::visitSUBE(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	SDValue CarryIn = N->getOperand(2);

	// fold (sube x, y, false) -> (subc x, y)
	if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
	return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);

	return SDValue();
	}

	SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	SDValue CarryIn = N->getOperand(2);

	// fold (subcarry x, y, false) -> (usubo x, y)
	if (isNullConstant(CarryIn)) {
	if (!LegalOperations \|\|
	TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
	return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
	}

	return SDValue();
	}

	SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	SDValue CarryIn = N->getOperand(2);

	// fold (ssubo_carry x, y, false) -> (ssubo x, y)
	if (isNullConstant(CarryIn)) {
	if (!LegalOperations \|\|
	TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
	return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
	}

	return SDValue();
	}

	// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
	// UMULFIXSAT here.
	SDValue DAGCombiner::visitMULFIX(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	SDValue Scale = N->getOperand(2);
	EVT VT = N0.getValueType();

	// fold (mulfix x, undef, scale) -> 0
	if (N0.isUndef() \|\| N1.isUndef())
	return DAG.getConstant(0, SDLoc(N), VT);

	// Canonicalize constant to RHS (vector doesn't have to splat)
	if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
	!DAG.isConstantIntBuildVectorOrConstantInt(N1))
	return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);

	// fold (mulfix x, 0, scale) -> 0
	if (isNullConstant(N1))
	return DAG.getConstant(0, SDLoc(N), VT);

	return SDValue();
	}

	SDValue DAGCombiner::visitMUL(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	EVT VT = N0.getValueType();

	// fold (mul x, undef) -> 0
	if (N0.isUndef() \|\| N1.isUndef())
	return DAG.getConstant(0, SDLoc(N), VT);

	bool N1IsConst = false;
	bool N1IsOpaqueConst = false;
	APInt ConstValue1;

	// fold vector ops
	if (VT.isVector()) {
	if (SDValue FoldedVOp = SimplifyVBinOp(N))
	return FoldedVOp;

	N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
	assert((!N1IsConst \|\|
	ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
	"Splat APInt should be element width");
	} else {
	N1IsConst = isa<ConstantSDNode>(N1);
	if (N1IsConst) {
	ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
	N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
	}
	}

	// fold (mul c1, c2) -> c1*c2
	if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, {N0, N1}))
	return C;

	// canonicalize constant to RHS (vector doesn't have to splat)
	if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
	!DAG.isConstantIntBuildVectorOrConstantInt(N1))
	return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);

	// fold (mul x, 0) -> 0
	if (N1IsConst && ConstValue1.isNullValue())
	return N1;

	// fold (mul x, 1) -> x
	if (N1IsConst && ConstValue1.isOneValue())
	return N0;

	if (SDValue NewSel = foldBinOpIntoSelect(N))
	return NewSel;

	// fold (mul x, -1) -> 0-x
	if (N1IsConst && ConstValue1.isAllOnesValue()) {
	SDLoc DL(N);
	return DAG.getNode(ISD::SUB, DL, VT,
	DAG.getConstant(0, DL, VT), N0);
	}

	// fold (mul x, (1 << c)) -> x << c
	if (isConstantOrConstantVector(N1, /NoOpaques/ true) &&
	DAG.isKnownToBeAPowerOfTwo(N1) &&
	(!VT.isVector() \|\| Level <= AfterLegalizeVectorOps)) {
	SDLoc DL(N);
	SDValue LogBase2 = BuildLogBase2(N1, DL);
	EVT ShiftVT = getShiftAmountTy(N0.getValueType());
	SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
	return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
	}

	// fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
	if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
	unsigned Log2Val = (-ConstValue1).logBase2();
	SDLoc DL(N);
	// FIXME: If the input is something that is easily negated (e.g. a
	// single-use add), we should put the negate there.
	return DAG.getNode(ISD::SUB, DL, VT,
	DAG.getConstant(0, DL, VT),
	DAG.getNode(ISD::SHL, DL, VT, N0,
	DAG.getConstant(Log2Val, DL,
	getShiftAmountTy(N0.getValueType()))));
	}

	// Try to transform:
	// (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
	// mul x, (2^N + 1) --> add (shl x, N), x
	// mul x, (2^N - 1) --> sub (shl x, N), x
	// Examples: x * 33 --> (x << 5) + x
	// x * 15 --> (x << 4) - x
	// x * -33 --> -((x << 5) + x)
	// x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
	// (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
	// mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
	// mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
	// Examples: x * 0x8800 --> (x << 15) + (x << 11)
	// x * 0xf800 --> (x << 16) - (x << 11)
	// x * -0x8800 --> -((x << 15) + (x << 11))
	// x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
	if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
	// TODO: We could handle more general decomposition of any constant by
	// having the target set a limit on number of ops and making a
	// callback to determine that sequence (similar to sqrt expansion).
	unsigned MathOp = ISD::DELETED_NODE;
	APInt MulC = ConstValue1.abs();
	// The constant `2` should be treated as (2^0 + 1).
	unsigned TZeros = MulC == 2 ? 0 : MulC.countTrailingZeros();
	MulC.lshrInPlace(TZeros);
	if ((MulC - 1).isPowerOf2())
	MathOp = ISD::ADD;
	else if ((MulC + 1).isPowerOf2())
	MathOp = ISD::SUB;

	if (MathOp != ISD::DELETED_NODE) {
	unsigned ShAmt =
	MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
	ShAmt += TZeros;
	assert(ShAmt < VT.getScalarSizeInBits() &&
	"multiply-by-constant generated out of bounds shift");
	SDLoc DL(N);
	SDValue Shl =
	DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
	SDValue R =
	TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
	DAG.getNode(ISD::SHL, DL, VT, N0,
	DAG.getConstant(TZeros, DL, VT)))
	: DAG.getNode(MathOp, DL, VT, Shl, N0);
	if (ConstValue1.isNegative())
	R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
	return R;
	}
	}

	// (mul (shl X, c1), c2) -> (mul X, c2 << c1)
	if (N0.getOpcode() == ISD::SHL &&
	isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
	isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
	SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
	if (isConstantOrConstantVector(C3))
	return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
	}

	// Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
	// use.
	{
	SDValue Sh(nullptr, 0), Y(nullptr, 0);

	// Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
	if (N0.getOpcode() == ISD::SHL &&
	isConstantOrConstantVector(N0.getOperand(1)) &&
	N0.getNode()->hasOneUse()) {
	Sh = N0; Y = N1;
	} else if (N1.getOpcode() == ISD::SHL &&
	isConstantOrConstantVector(N1.getOperand(1)) &&
	N1.getNode()->hasOneUse()) {
	Sh = N1; Y = N0;
	}

	if (Sh.getNode()) {
	SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
	return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
	}
	}

	// fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
	if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
	N0.getOpcode() == ISD::ADD &&
	DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
	isMulAddWithConstProfitable(N, N0, N1))
	return DAG.getNode(ISD::ADD, SDLoc(N), VT,
	DAG.getNode(ISD::MUL, SDLoc(N0), VT,
	N0.getOperand(0), N1),
	DAG.getNode(ISD::MUL, SDLoc(N1), VT,
	N0.getOperand(1), N1));

	// Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
	if (N0.getOpcode() == ISD::VSCALE)
	if (ConstantSDNode *NC1 = isConstOrConstSplat(N1)) {
	const APInt &C0 = N0.getConstantOperandAPInt(0);
	const APInt &C1 = NC1->getAPIntValue();
	return DAG.getVScale(SDLoc(N), VT, C0 * C1);
	}

	// Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
	APInt MulVal;
	if (N0.getOpcode() == ISD::STEP_VECTOR)
	if (ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
	const APInt &C0 = N0.getConstantOperandAPInt(0);
	APInt NewStep = C0 * MulVal;
	return DAG.getStepVector(SDLoc(N), VT, NewStep);
	}

	// Fold ((mul x, 0/undef) -> 0,
	// (mul x, 1) -> x) -> x)
	// -> and(x, mask)
	// We can replace vectors with '0' and '1' factors with a clearing mask.
	if (VT.isFixedLengthVector()) {
	unsigned NumElts = VT.getVectorNumElements();
	SmallBitVector ClearMask;
	ClearMask.reserve(NumElts);
	auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
	if (!V \|\| V->isNullValue()) {
	ClearMask.push_back(true);
	return true;
	}
	ClearMask.push_back(false);
	return V->isOne();
	};
	if ((!LegalOperations \|\| TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
	ISD::matchUnaryPredicate(N1, IsClearMask, /AllowUndefs/ true)) {
	assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
	SDLoc DL(N);
	EVT LegalSVT = N1.getOperand(0).getValueType();
	SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
	SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
	SmallVector<SDValue, 16> Mask(NumElts, AllOnes);
	for (unsigned I = 0; I != NumElts; ++I)
	if (ClearMask[I])
	Mask[I] = Zero;
	return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
	}
	}

	// reassociate mul
	if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
	return RMUL;

	return SDValue();
	}

	/// Return true if divmod libcall is available.
	static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
	const TargetLowering &TLI) {
	RTLIB::Libcall LC;
	EVT NodeType = Node->getValueType(0);
	if (!NodeType.isSimple())
	return false;
	switch (NodeType.getSimpleVT().SimpleTy) {
	default: return false; // No libcall for vector types.
	case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
	case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
	case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
	case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
	case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
	}

	return TLI.getLibcallName(LC) != nullptr;
	}

	/// Issue divrem if both quotient and remainder are needed.
	SDValue DAGCombiner::useDivRem(SDNode *Node) {
	if (Node->use_empty())
	return SDValue(); // This is a dead node, leave it alone.

	unsigned Opcode = Node->getOpcode();
	bool isSigned = (Opcode == ISD::SDIV) \|\| (Opcode == ISD::SREM);
	unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;

	// DivMod lib calls can still work on non-legal types if using lib-calls.
	EVT VT = Node->getValueType(0);
	if (VT.isVector() \|\| !VT.isInteger())
	return SDValue();

	if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
	return SDValue();

	// If DIVREM is going to get expanded into a libcall,
	// but there is no libcall available, then don't combine.
	if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
	!isDivRemLibcallAvailable(Node, isSigned, TLI))
	return SDValue();

	// If div is legal, it's better to do the normal expansion
	unsigned OtherOpcode = 0;
	if ((Opcode == ISD::SDIV) \|\| (Opcode == ISD::UDIV)) {
	OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
	if (TLI.isOperationLegalOrCustom(Opcode, VT))
	return SDValue();
	} else {
	OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
	if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
	return SDValue();
	}

	SDValue Op0 = Node->getOperand(0);
	SDValue Op1 = Node->getOperand(1);
	SDValue combined;
	for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
	UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
	SDNode User = UI;
	if (User == Node \|\| User->getOpcode() == ISD::DELETED_NODE \|\|
	User->use_empty())
	continue;
	// Convert the other matching node(s), too;
	// otherwise, the DIVREM may get target-legalized into something
	// target-specific that we won't be able to recognize.
	unsigned UserOpc = User->getOpcode();
	if ((UserOpc == Opcode \|\| UserOpc == OtherOpcode \|\| UserOpc == DivRemOpc) &&
	User->getOperand(0) == Op0 &&
	User->getOperand(1) == Op1) {
	if (!combined) {
	if (UserOpc == OtherOpcode) {
	SDVTList VTs = DAG.getVTList(VT, VT);
	combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
	} else if (UserOpc == DivRemOpc) {
	combined = SDValue(User, 0);
	} else {
	assert(UserOpc == Opcode);
	continue;
	}
	}
	if (UserOpc == ISD::SDIV \|\| UserOpc == ISD::UDIV)
	CombineTo(User, combined);
	else if (UserOpc == ISD::SREM \|\| UserOpc == ISD::UREM)
	CombineTo(User, combined.getValue(1));
	}
	}
	return combined;
	}

	static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	EVT VT = N->getValueType(0);
	SDLoc DL(N);

	unsigned Opc = N->getOpcode();
	bool IsDiv = (ISD::SDIV == Opc) \|\| (ISD::UDIV == Opc);
	ConstantSDNode *N1C = isConstOrConstSplat(N1);

	// X / undef -> undef
	// X % undef -> undef
	// X / 0 -> undef
	// X % 0 -> undef
	// NOTE: This includes vectors where any divisor element is zero/undef.
	if (DAG.isUndef(Opc, {N0, N1}))
	return DAG.getUNDEF(VT);

	// undef / X -> 0
	// undef % X -> 0
	if (N0.isUndef())
	return DAG.getConstant(0, DL, VT);

	// 0 / X -> 0
	// 0 % X -> 0
	ConstantSDNode *N0C = isConstOrConstSplat(N0);
	if (N0C && N0C->isNullValue())
	return N0;

	// X / X -> 1
	// X % X -> 0
	if (N0 == N1)
	return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);

	// X / 1 -> X
	// X % 1 -> 0
	// If this is a boolean op (single-bit element type), we can't have
	// division-by-zero or remainder-by-zero, so assume the divisor is 1.
	// TODO: Similarly, if we're zero-extending a boolean divisor, then assume
	// it's a 1.
	if ((N1C && N1C->isOne()) \|\| (VT.getScalarType() == MVT::i1))
	return IsDiv ? N0 : DAG.getConstant(0, DL, VT);

	return SDValue();
	}

	SDValue DAGCombiner::visitSDIV(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	EVT VT = N->getValueType(0);
	EVT CCVT = getSetCCResultType(VT);

	// fold vector ops
	if (VT.isVector())
	if (SDValue FoldedVOp = SimplifyVBinOp(N))
	return FoldedVOp;

	SDLoc DL(N);

	// fold (sdiv c1, c2) -> c1/c2
	ConstantSDNode *N1C = isConstOrConstSplat(N1);
	if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
	return C;

	// fold (sdiv X, -1) -> 0-X
	if (N1C && N1C->isAllOnesValue())
	return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);

	// fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
	if (N1C && N1C->getAPIntValue().isMinSignedValue())
	return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
	DAG.getConstant(1, DL, VT),
	DAG.getConstant(0, DL, VT));

	if (SDValue V = simplifyDivRem(N, DAG))
	return V;

	if (SDValue NewSel = foldBinOpIntoSelect(N))
	return NewSel;

	// If we know the sign bits of both operands are zero, strength reduce to a
	// udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
	if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
	return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);

	if (SDValue V = visitSDIVLike(N0, N1, N)) {
	// If the corresponding remainder node exists, update its users with
	// (Dividend - (Quotient * Divisor).
	if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
	{ N0, N1 })) {
	SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
	SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
	AddToWorklist(Mul.getNode());
	AddToWorklist(Sub.getNode());
	CombineTo(RemNode, Sub);
	}
	return V;
	}

	// sdiv, srem -> sdivrem
	// If the divisor is constant, then return DIVREM only if isIntDivCheap() is
	// true. Otherwise, we break the simplification logic in visitREM().
	AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
	if (!N1C \|\| TLI.isIntDivCheap(N->getValueType(0), Attr))
	if (SDValue DivRem = useDivRem(N))
	return DivRem;

	return SDValue();
	}

	SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
	SDLoc DL(N);
	EVT VT = N->getValueType(0);
	EVT CCVT = getSetCCResultType(VT);
	unsigned BitWidth = VT.getScalarSizeInBits();

	// Helper for determining whether a value is a power-2 constant scalar or a
	// vector of such elements.
	auto IsPowerOfTwo = [](ConstantSDNode *C) {
	if (C->isNullValue() \|\| C->isOpaque())
	return false;
	if (C->getAPIntValue().isPowerOf2())
	return true;
	if ((-C->getAPIntValue()).isPowerOf2())
	return true;
	return false;
	};

	// fold (sdiv X, pow2) -> simple ops after legalize
	// FIXME: We check for the exact bit here because the generic lowering gives
	// better results in that case. The target-specific lowering should learn how
	// to handle exact sdivs efficiently.
	if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
	// Target-specific implementation of sdiv x, pow2.
	if (SDValue Res = BuildSDIVPow2(N))
	return Res;

	// Create constants that are functions of the shift amount value.
	EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
	SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
	SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
	C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
	SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
	if (!isConstantOrConstantVector(Inexact))
	return SDValue();

	// Splat the sign bit into the register
	SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
	DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
	AddToWorklist(Sign.getNode());

	// Add (N0 < 0) ? abs2 - 1 : 0;
	SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
	AddToWorklist(Srl.getNode());
	SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
	AddToWorklist(Add.getNode());
	SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
	AddToWorklist(Sra.getNode());

	// Special case: (sdiv X, 1) -> X
	// Special Case: (sdiv X, -1) -> 0-X
	SDValue One = DAG.getConstant(1, DL, VT);
	SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
	SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
	SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
	SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
	Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);

	// If dividing by a positive value, we're done. Otherwise, the result must
	// be negated.
	SDValue Zero = DAG.getConstant(0, DL, VT);
	SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);

	// FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
	SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
	SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
	return Res;
	}

	// If integer divide is expensive and we satisfy the requirements, emit an
	// alternate sequence. Targets may check function attributes for size/speed
	// trade-offs.
	AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
	if (isConstantOrConstantVector(N1) &&
	!TLI.isIntDivCheap(N->getValueType(0), Attr))
	if (SDValue Op = BuildSDIV(N))
	return Op;

	return SDValue();
	}

	SDValue DAGCombiner::visitUDIV(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	EVT VT = N->getValueType(0);
	EVT CCVT = getSetCCResultType(VT);

	// fold vector ops
	if (VT.isVector())
	if (SDValue FoldedVOp = SimplifyVBinOp(N))
	return FoldedVOp;

	SDLoc DL(N);

	// fold (udiv c1, c2) -> c1/c2
	ConstantSDNode *N1C = isConstOrConstSplat(N1);
	if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
	return C;

	// fold (udiv X, -1) -> select(X == -1, 1, 0)
	if (N1C && N1C->getAPIntValue().isAllOnesValue())
	return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
	DAG.getConstant(1, DL, VT),
	DAG.getConstant(0, DL, VT));

	if (SDValue V = simplifyDivRem(N, DAG))
	return V;

	if (SDValue NewSel = foldBinOpIntoSelect(N))
	return NewSel;

	if (SDValue V = visitUDIVLike(N0, N1, N)) {
	// If the corresponding remainder node exists, update its users with
	// (Dividend - (Quotient * Divisor).
	if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
	{ N0, N1 })) {
	SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
	SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
	AddToWorklist(Mul.getNode());
	AddToWorklist(Sub.getNode());
	CombineTo(RemNode, Sub);
	}
	return V;
	}

	// sdiv, srem -> sdivrem
	// If the divisor is constant, then return DIVREM only if isIntDivCheap() is
	// true. Otherwise, we break the simplification logic in visitREM().
	AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
	if (!N1C \|\| TLI.isIntDivCheap(N->getValueType(0), Attr))
	if (SDValue DivRem = useDivRem(N))
	return DivRem;

	return SDValue();
	}

	SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
	SDLoc DL(N);
	EVT VT = N->getValueType(0);

	// fold (udiv x, (1 << c)) -> x >>u c
	if (isConstantOrConstantVector(N1, /NoOpaques/ true) &&
	DAG.isKnownToBeAPowerOfTwo(N1)) {
	SDValue LogBase2 = BuildLogBase2(N1, DL);
	AddToWorklist(LogBase2.getNode());

	EVT ShiftVT = getShiftAmountTy(N0.getValueType());
	SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
	AddToWorklist(Trunc.getNode());
	return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
	}

	// fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
	if (N1.getOpcode() == ISD::SHL) {
	SDValue N10 = N1.getOperand(0);
	if (isConstantOrConstantVector(N10, /NoOpaques/ true) &&
	DAG.isKnownToBeAPowerOfTwo(N10)) {
	SDValue LogBase2 = BuildLogBase2(N10, DL);
	AddToWorklist(LogBase2.getNode());

	EVT ADDVT = N1.getOperand(1).getValueType();
	SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
	AddToWorklist(Trunc.getNode());
	SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
	AddToWorklist(Add.getNode());
	return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
	}
	}

	// fold (udiv x, c) -> alternate
	AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
	if (isConstantOrConstantVector(N1) &&
	!TLI.isIntDivCheap(N->getValueType(0), Attr))
	if (SDValue Op = BuildUDIV(N))
	return Op;

	return SDValue();
	}

	// handles ISD::SREM and ISD::UREM
	SDValue DAGCombiner::visitREM(SDNode *N) {
	unsigned Opcode = N->getOpcode();
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	EVT VT = N->getValueType(0);
	EVT CCVT = getSetCCResultType(VT);

	bool isSigned = (Opcode == ISD::SREM);
	SDLoc DL(N);

	// fold (rem c1, c2) -> c1%c2
	ConstantSDNode *N1C = isConstOrConstSplat(N1);
	if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
	return C;

	// fold (urem X, -1) -> select(X == -1, 0, x)
	if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
	return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
	DAG.getConstant(0, DL, VT), N0);

	if (SDValue V = simplifyDivRem(N, DAG))
	return V;

	if (SDValue NewSel = foldBinOpIntoSelect(N))
	return NewSel;

	if (isSigned) {
	// If we know the sign bits of both operands are zero, strength reduce to a
	// urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
	if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
	return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
	} else {
	if (DAG.isKnownToBeAPowerOfTwo(N1)) {
	// fold (urem x, pow2) -> (and x, pow2-1)
	SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
	SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
	AddToWorklist(Add.getNode());
	return DAG.getNode(ISD::AND, DL, VT, N0, Add);
	}
	if (N1.getOpcode() == ISD::SHL &&
	DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
	// fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
	SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
	SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
	AddToWorklist(Add.getNode());
	return DAG.getNode(ISD::AND, DL, VT, N0, Add);
	}
	}

	AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();

	// If X/C can be simplified by the division-by-constant logic, lower
	// X%C to the equivalent of X-X/C*C.
	// Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
	// speculative DIV must not cause a DIVREM conversion. We guard against this
	// by skipping the simplification if isIntDivCheap(). When div is not cheap,
	// combine will not return a DIVREM. Regardless, checking cheapness here
	// makes sense since the simplification results in fatter code.
	if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
	SDValue OptimizedDiv =
	isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
	if (OptimizedDiv.getNode()) {
	// If the equivalent Div node also exists, update its users.
	unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
	if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
	{ N0, N1 }))
	CombineTo(DivNode, OptimizedDiv);
	SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
	SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
	AddToWorklist(OptimizedDiv.getNode());
	AddToWorklist(Mul.getNode());
	return Sub;
	}
	}

	// sdiv, srem -> sdivrem
	if (SDValue DivRem = useDivRem(N))
	return DivRem.getValue(1);

	return SDValue();
	}

	SDValue DAGCombiner::visitMULHS(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	EVT VT = N->getValueType(0);
	SDLoc DL(N);

	if (VT.isVector()) {
	// fold (mulhs x, 0) -> 0
	// do not return N0/N1, because undef node may exist.
	if (ISD::isConstantSplatVectorAllZeros(N0.getNode()) \|\|
	ISD::isConstantSplatVectorAllZeros(N1.getNode()))
	return DAG.getConstant(0, DL, VT);
	}

	// fold (mulhs c1, c2)
	if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
	return C;

	// fold (mulhs x, 0) -> 0
	if (isNullConstant(N1))
	return N1;
	// fold (mulhs x, 1) -> (sra x, size(x)-1)
	if (isOneConstant(N1))
	return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
	DAG.getConstant(N0.getScalarValueSizeInBits() - 1, DL,
	getShiftAmountTy(N0.getValueType())));

	// fold (mulhs x, undef) -> 0
	if (N0.isUndef() \|\| N1.isUndef())
	return DAG.getConstant(0, DL, VT);

	// If the type twice as wide is legal, transform the mulhs to a wider multiply
	// plus a shift.
	if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() &&
	!VT.isVector()) {
	MVT Simple = VT.getSimpleVT();
	unsigned SimpleSize = Simple.getSizeInBits();
	EVT NewVT = EVT::getIntegerVT(DAG.getContext(), SimpleSize2);
	if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
	N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
	N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
	N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
	N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
	DAG.getConstant(SimpleSize, DL,
	getShiftAmountTy(N1.getValueType())));
	return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
	}
	}

	return SDValue();
	}

	SDValue DAGCombiner::visitMULHU(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	EVT VT = N->getValueType(0);
	SDLoc DL(N);

	if (VT.isVector()) {
	// fold (mulhu x, 0) -> 0
	// do not return N0/N1, because undef node may exist.
	if (ISD::isConstantSplatVectorAllZeros(N0.getNode()) \|\|
	ISD::isConstantSplatVectorAllZeros(N1.getNode()))
	return DAG.getConstant(0, DL, VT);
	}

	// fold (mulhu c1, c2)
	if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
	return C;

	// fold (mulhu x, 0) -> 0
	if (isNullConstant(N1))
	return N1;
	// fold (mulhu x, 1) -> 0
	if (isOneConstant(N1))
	return DAG.getConstant(0, DL, N0.getValueType());
	// fold (mulhu x, undef) -> 0
	if (N0.isUndef() \|\| N1.isUndef())
	return DAG.getConstant(0, DL, VT);

	// fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
	if (isConstantOrConstantVector(N1, /NoOpaques/ true) &&
	DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
	unsigned NumEltBits = VT.getScalarSizeInBits();
	SDValue LogBase2 = BuildLogBase2(N1, DL);
	SDValue SRLAmt = DAG.getNode(
	ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
	EVT ShiftVT = getShiftAmountTy(N0.getValueType());
	SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
	return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
	}

	// If the type twice as wide is legal, transform the mulhu to a wider multiply
	// plus a shift.
	if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() &&
	!VT.isVector()) {
	MVT Simple = VT.getSimpleVT();
	unsigned SimpleSize = Simple.getSizeInBits();
	EVT NewVT = EVT::getIntegerVT(DAG.getContext(), SimpleSize2);
	if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
	N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
	N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
	N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
	N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
	DAG.getConstant(SimpleSize, DL,
	getShiftAmountTy(N1.getValueType())));
	return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
	}
	}

	return SDValue();
	}

	/// Perform optimizations common to nodes that compute two values. LoOp and HiOp
	/// give the opcodes for the two computations that are being performed. Return
	/// true if a simplification was made.
	SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
	unsigned HiOp) {
	// If the high half is not needed, just compute the low half.
	bool HiExists = N->hasAnyUseOfValue(1);
	if (!HiExists && (!LegalOperations \|\|
	TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
	SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
	return CombineTo(N, Res, Res);
	}

	// If the low half is not needed, just compute the high half.
	bool LoExists = N->hasAnyUseOfValue(0);
	if (!LoExists && (!LegalOperations \|\|
	TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
	SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
	return CombineTo(N, Res, Res);
	}

	// If both halves are used, return as it is.
	if (LoExists && HiExists)
	return SDValue();

	// If the two computed results can be simplified separately, separate them.
	if (LoExists) {
	SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
	AddToWorklist(Lo.getNode());
	SDValue LoOpt = combine(Lo.getNode());
	if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
	(!LegalOperations \|\|
	TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
	return CombineTo(N, LoOpt, LoOpt);
	}

	if (HiExists) {
	SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
	AddToWorklist(Hi.getNode());
	SDValue HiOpt = combine(Hi.getNode());
	if (HiOpt.getNode() && HiOpt != Hi &&
	(!LegalOperations \|\|
	TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
	return CombineTo(N, HiOpt, HiOpt);
	}

	return SDValue();
	}

	SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
	if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
	return Res;

	EVT VT = N->getValueType(0);
	SDLoc DL(N);

	// If the type is twice as wide is legal, transform the mulhu to a wider
	// multiply plus a shift.
	if (VT.isSimple() && !VT.isVector()) {
	MVT Simple = VT.getSimpleVT();
	unsigned SimpleSize = Simple.getSizeInBits();
	EVT NewVT = EVT::getIntegerVT(DAG.getContext(), SimpleSize2);
	if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
	SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
	SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
	Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
	// Compute the high part as N1.
	Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
	DAG.getConstant(SimpleSize, DL,
	getShiftAmountTy(Lo.getValueType())));
	Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
	// Compute the low part as N0.
	Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
	return CombineTo(N, Lo, Hi);
	}
	}

	return SDValue();
	}

	SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
	if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
	return Res;

	EVT VT = N->getValueType(0);
	SDLoc DL(N);

	// (umul_lohi N0, 0) -> (0, 0)
	if (isNullConstant(N->getOperand(1))) {
	SDValue Zero = DAG.getConstant(0, DL, VT);
	return CombineTo(N, Zero, Zero);
	}

	// (umul_lohi N0, 1) -> (N0, 0)
	if (isOneConstant(N->getOperand(1))) {
	SDValue Zero = DAG.getConstant(0, DL, VT);
	return CombineTo(N, N->getOperand(0), Zero);
	}

	// If the type is twice as wide is legal, transform the mulhu to a wider
	// multiply plus a shift.
	if (VT.isSimple() && !VT.isVector()) {
	MVT Simple = VT.getSimpleVT();
	unsigned SimpleSize = Simple.getSizeInBits();
	EVT NewVT = EVT::getIntegerVT(DAG.getContext(), SimpleSize2);
	if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
	SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
	SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
	Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
	// Compute the high part as N1.
	Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
	DAG.getConstant(SimpleSize, DL,
	getShiftAmountTy(Lo.getValueType())));
	Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
	// Compute the low part as N0.
	Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
	return CombineTo(N, Lo, Hi);
	}
	}

	return SDValue();
	}

	SDValue DAGCombiner::visitMULO(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	EVT VT = N0.getValueType();
	bool IsSigned = (ISD::SMULO == N->getOpcode());

	EVT CarryVT = N->getValueType(1);
	SDLoc DL(N);

	ConstantSDNode *N0C = isConstOrConstSplat(N0);
	ConstantSDNode *N1C = isConstOrConstSplat(N1);

	// fold operation with constant operands.
	// TODO: Move this to FoldConstantArithmetic when it supports nodes with
	// multiple results.
	if (N0C && N1C) {
	bool Overflow;
	APInt Result =
	IsSigned ? N0C->getAPIntValue().smul_ov(N1C->getAPIntValue(), Overflow)
	: N0C->getAPIntValue().umul_ov(N1C->getAPIntValue(), Overflow);
	return CombineTo(N, DAG.getConstant(Result, DL, VT),
	DAG.getBoolConstant(Overflow, DL, CarryVT, CarryVT));
	}

	// canonicalize constant to RHS.
	if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
	!DAG.isConstantIntBuildVectorOrConstantInt(N1))
	return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);

	// fold (mulo x, 0) -> 0 + no carry out
	if (isNullOrNullSplat(N1))
	return CombineTo(N, DAG.getConstant(0, DL, VT),
	DAG.getConstant(0, DL, CarryVT));

	// (mulo x, 2) -> (addo x, x)
	if (N1C && N1C->getAPIntValue() == 2)
	return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
	N->getVTList(), N0, N0);

	if (IsSigned) {
	// A 1 bit SMULO overflows if both inputs are 1.
	if (VT.getScalarSizeInBits() == 1) {
	SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1);
	return CombineTo(N, And,
	DAG.getSetCC(DL, CarryVT, And,
	DAG.getConstant(0, DL, VT), ISD::SETNE));
	}

	// Multiplying n * m significant bits yields a result of n + m significant
	// bits. If the total number of significant bits does not exceed the
	// result bit width (minus 1), there is no overflow.
	unsigned SignBits = DAG.ComputeNumSignBits(N0);
	if (SignBits > 1)
	SignBits += DAG.ComputeNumSignBits(N1);
	if (SignBits > VT.getScalarSizeInBits() + 1)
	return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
	DAG.getConstant(0, DL, CarryVT));
	} else {
	KnownBits N1Known = DAG.computeKnownBits(N1);
	KnownBits N0Known = DAG.computeKnownBits(N0);
	bool Overflow;
	(void)N0Known.getMaxValue().umul_ov(N1Known.getMaxValue(), Overflow);
	if (!Overflow)
	return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
	DAG.getConstant(0, DL, CarryVT));
	}

	return SDValue();
	}

	SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	EVT VT = N0.getValueType();
	unsigned Opcode = N->getOpcode();

	// fold vector ops
	if (VT.isVector())
	if (SDValue FoldedVOp = SimplifyVBinOp(N))
	return FoldedVOp;

	// fold operation with constant operands.
	if (SDValue C = DAG.FoldConstantArithmetic(Opcode, SDLoc(N), VT, {N0, N1}))
	return C;

	// canonicalize constant to RHS
	if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
	!DAG.isConstantIntBuildVectorOrConstantInt(N1))
	return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);

	// Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
	// Only do this if the current op isn't legal and the flipped is.
	if (!TLI.isOperationLegal(Opcode, VT) &&
	(N0.isUndef() \|\| DAG.SignBitIsZero(N0)) &&
	(N1.isUndef() \|\| DAG.SignBitIsZero(N1))) {
	unsigned AltOpcode;
	switch (Opcode) {
	case ISD::SMIN: AltOpcode = ISD::UMIN; break;
	case ISD::SMAX: AltOpcode = ISD::UMAX; break;
	case ISD::UMIN: AltOpcode = ISD::SMIN; break;
	case ISD::UMAX: AltOpcode = ISD::SMAX; break;
	default: llvm_unreachable("Unknown MINMAX opcode");
	}
	if (TLI.isOperationLegal(AltOpcode, VT))
	return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
	}

	// Simplify the operands using demanded-bits information.
	if (SimplifyDemandedBits(SDValue(N, 0)))
	return SDValue(N, 0);

	return SDValue();
	}

	/// If this is a bitwise logic instruction and both operands have the same
	/// opcode, try to sink the other opcode after the logic instruction.
	SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
	SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
	EVT VT = N0.getValueType();
	unsigned LogicOpcode = N->getOpcode();
	unsigned HandOpcode = N0.getOpcode();
	assert((LogicOpcode == ISD::AND \|\| LogicOpcode == ISD::OR \|\|
	LogicOpcode == ISD::XOR) && "Expected logic opcode");
	assert(HandOpcode == N1.getOpcode() && "Bad input!");

	// Bail early if none of these transforms apply.
	if (N0.getNumOperands() == 0)
	return SDValue();

	// FIXME: We should check number of uses of the operands to not increase
	// the instruction count for all transforms.

	// Handle size-changing casts.
	SDValue X = N0.getOperand(0);
	SDValue Y = N1.getOperand(0);
	EVT XVT = X.getValueType();
	SDLoc DL(N);
	if (HandOpcode == ISD::ANY_EXTEND \|\| HandOpcode == ISD::ZERO_EXTEND \|\|
	HandOpcode == ISD::SIGN_EXTEND) {
	// If both operands have other uses, this transform would create extra
	// instructions without eliminating anything.
	if (!N0.hasOneUse() && !N1.hasOneUse())
	return SDValue();
	// We need matching integer source types.
	if (XVT != Y.getValueType())
	return SDValue();
	// Don't create an illegal op during or after legalization. Don't ever
	// create an unsupported vector op.
	if ((VT.isVector() \|\| LegalOperations) &&
	!TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
	return SDValue();
	// Avoid infinite looping with PromoteIntBinOp.
	// TODO: Should we apply desirable/legal constraints to all opcodes?
	if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
	!TLI.isTypeDesirableForOp(LogicOpcode, XVT))
	return SDValue();
	// logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
	SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
	return DAG.getNode(HandOpcode, DL, VT, Logic);
	}

	// logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
	if (HandOpcode == ISD::TRUNCATE) {
	// If both operands have other uses, this transform would create extra
	// instructions without eliminating anything.
	if (!N0.hasOneUse() && !N1.hasOneUse())
	return SDValue();
	// We need matching source types.
	if (XVT != Y.getValueType())
	return SDValue();
	// Don't create an illegal op during or after legalization.
	if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
	return SDValue();
	// Be extra careful sinking truncate. If it's free, there's no benefit in
	// widening a binop. Also, don't create a logic op on an illegal type.
	if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
	return SDValue();
	if (!TLI.isTypeLegal(XVT))
	return SDValue();
	SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
	return DAG.getNode(HandOpcode, DL, VT, Logic);
	}

	// For binops SHL/SRL/SRA/AND:
	// logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
	if ((HandOpcode == ISD::SHL \|\| HandOpcode == ISD::SRL \|\|
	HandOpcode == ISD::SRA \|\| HandOpcode == ISD::AND) &&
	N0.getOperand(1) == N1.getOperand(1)) {
	// If either operand has other uses, this transform is not an improvement.
	if (!N0.hasOneUse() \|\| !N1.hasOneUse())
	return SDValue();
	SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
	return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
	}

	// Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
	if (HandOpcode == ISD::BSWAP) {
	// If either operand has other uses, this transform is not an improvement.
	if (!N0.hasOneUse() \|\| !N1.hasOneUse())
	return SDValue();
	SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
	return DAG.getNode(HandOpcode, DL, VT, Logic);
	}

	// Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
	// Only perform this optimization up until type legalization, before
	// LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
	// adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
	// we don't want to undo this promotion.
	// We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
	// on scalars.
	if ((HandOpcode == ISD::BITCAST \|\| HandOpcode == ISD::SCALAR_TO_VECTOR) &&
	Level <= AfterLegalizeTypes) {
	// Input types must be integer and the same.
	if (XVT.isInteger() && XVT == Y.getValueType() &&
	!(VT.isVector() && TLI.isTypeLegal(VT) &&
	!XVT.isVector() && !TLI.isTypeLegal(XVT))) {
	SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
	return DAG.getNode(HandOpcode, DL, VT, Logic);
	}
	}

	// Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
	// Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
	// If both shuffles use the same mask, and both shuffle within a single
	// vector, then it is worthwhile to move the swizzle after the operation.
	// The type-legalizer generates this pattern when loading illegal
	// vector types from memory. In many cases this allows additional shuffle
	// optimizations.
	// There are other cases where moving the shuffle after the xor/and/or
	// is profitable even if shuffles don't perform a swizzle.
	// If both shuffles use the same mask, and both shuffles have the same first
	// or second operand, then it might still be profitable to move the shuffle
	// after the xor/and/or operation.
	if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
	auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
	auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
	assert(X.getValueType() == Y.getValueType() &&
	"Inputs to shuffles are not the same type");

	// Check that both shuffles use the same mask. The masks are known to be of
	// the same length because the result vector type is the same.
	// Check also that shuffles have only one use to avoid introducing extra
	// instructions.
	if (!SVN0->hasOneUse() \|\| !SVN1->hasOneUse() \|\|
	!SVN0->getMask().equals(SVN1->getMask()))
	return SDValue();

	// Don't try to fold this node if it requires introducing a
	// build vector of all zeros that might be illegal at this stage.
	SDValue ShOp = N0.getOperand(1);
	if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
	ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);

	// (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
	if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
	SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
	N0.getOperand(0), N1.getOperand(0));
	return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
	}

	// Don't try to fold this node if it requires introducing a
	// build vector of all zeros that might be illegal at this stage.
	ShOp = N0.getOperand(0);
	if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
	ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);

	// (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
	if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
	SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
	N1.getOperand(1));
	return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
	}
	}

	return SDValue();
	}

	/// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
	SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
	const SDLoc &DL) {
	SDValue LL, LR, RL, RR, N0CC, N1CC;
	if (!isSetCCEquivalent(N0, LL, LR, N0CC) \|\|
	!isSetCCEquivalent(N1, RL, RR, N1CC))
	return SDValue();

	assert(N0.getValueType() == N1.getValueType() &&
	"Unexpected operand types for bitwise logic op");
	assert(LL.getValueType() == LR.getValueType() &&
	RL.getValueType() == RR.getValueType() &&
	"Unexpected operand types for setcc");

	// If we're here post-legalization or the logic op type is not i1, the logic
	// op type must match a setcc result type. Also, all folds require new
	// operations on the left and right operands, so those types must match.
	EVT VT = N0.getValueType();
	EVT OpVT = LL.getValueType();
	if (LegalOperations \|\| VT.getScalarType() != MVT::i1)
	if (VT != getSetCCResultType(OpVT))
	return SDValue();
	if (OpVT != RL.getValueType())
	return SDValue();

	ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
	ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
	bool IsInteger = OpVT.isInteger();
	if (LR == RR && CC0 == CC1 && IsInteger) {
	bool IsZero = isNullOrNullSplat(LR);
	bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);

	// All bits clear?
	bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
	// All sign bits clear?
	bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
	// Any bits set?
	bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
	// Any sign bits set?
	bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;

	// (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
	// (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
	// (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
	// (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
	if (AndEqZero \|\| AndGtNeg1 \|\| OrNeZero \|\| OrLtZero) {
	SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
	AddToWorklist(Or.getNode());
	return DAG.getSetCC(DL, VT, Or, LR, CC1);
	}

	// All bits set?
	bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
	// All sign bits set?
	bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
	// Any bits clear?
	bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
	// Any sign bits clear?
	bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;

	// (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
	// (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
	// (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
	// (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
	if (AndEqNeg1 \|\| AndLtZero \|\| OrNeNeg1 \|\| OrGtNeg1) {
	SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
	AddToWorklist(And.getNode());
	return DAG.getSetCC(DL, VT, And, LR, CC1);
	}
	}

	// TODO: What is the 'or' equivalent of this fold?
	// (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
	if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
	IsInteger && CC0 == ISD::SETNE &&
	((isNullConstant(LR) && isAllOnesConstant(RR)) \|\|
	(isAllOnesConstant(LR) && isNullConstant(RR)))) {
	SDValue One = DAG.getConstant(1, DL, OpVT);
	SDValue Two = DAG.getConstant(2, DL, OpVT);
	SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
	AddToWorklist(Add.getNode());
	return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
	}

	// Try more general transforms if the predicates match and the only user of
	// the compares is the 'and' or 'or'.
	if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
	N0.hasOneUse() && N1.hasOneUse()) {
	// and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
	// or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
	if ((IsAnd && CC1 == ISD::SETEQ) \|\| (!IsAnd && CC1 == ISD::SETNE)) {
	SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
	SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
	SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
	SDValue Zero = DAG.getConstant(0, DL, OpVT);
	return DAG.getSetCC(DL, VT, Or, Zero, CC1);
	}

	// Turn compare of constants whose difference is 1 bit into add+and+setcc.
	// TODO - support non-uniform vector amounts.
	if ((IsAnd && CC1 == ISD::SETNE) \|\| (!IsAnd && CC1 == ISD::SETEQ)) {
	// Match a shared variable operand and 2 non-opaque constant operands.
	ConstantSDNode *C0 = isConstOrConstSplat(LR);
	ConstantSDNode *C1 = isConstOrConstSplat(RR);
	if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) {
	const APInt &CMax =
	APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue());
	const APInt &CMin =
	APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue());
	// The difference of the constants must be a single bit.
	if ((CMax - CMin).isPowerOf2()) {
	// and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
	// setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
	SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR);
	SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR);
	SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min);
	SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min);
	SDValue Mask = DAG.getNOT(DL, Diff, OpVT);
	SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask);
	SDValue Zero = DAG.getConstant(0, DL, OpVT);
	return DAG.getSetCC(DL, VT, And, Zero, CC0);
	}
	}
	}
	}

	// Canonicalize equivalent operands to LL == RL.
	if (LL == RR && LR == RL) {
	CC1 = ISD::getSetCCSwappedOperands(CC1);
	std::swap(RL, RR);
	}

	// (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
	// (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
	if (LL == RL && LR == RR) {
	ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
	: ISD::getSetCCOrOperation(CC0, CC1, OpVT);
	if (NewCC != ISD::SETCC_INVALID &&
	(!LegalOperations \|\|
	(TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
	TLI.isOperationLegal(ISD::SETCC, OpVT))))
	return DAG.getSetCC(DL, VT, LL, LR, NewCC);
	}

	return SDValue();
	}

	/// This contains all DAGCombine rules which reduce two values combined by
	/// an And operation to a single value. This makes them reusable in the context
	/// of visitSELECT(). Rules involving constants are not included as
	/// visitSELECT() already handles those cases.
	SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
	EVT VT = N1.getValueType();
	SDLoc DL(N);

	// fold (and x, undef) -> 0
	if (N0.isUndef() \|\| N1.isUndef())
	return DAG.getConstant(0, DL, VT);

	if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
	return V;

	+ // TODO: Rewrite this to return a new 'AND' instead of using CombineTo.
	if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
	- VT.getSizeInBits() <= 64) {
	+ VT.getSizeInBits() <= 64 && N0->hasOneUse()) {
	if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
	if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
	// Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
	// immediate for an add, but it is legal if its top c2 bits are set,
	// transform the ADD so the immediate doesn't need to be materialized
	// in a register.
	APInt ADDC = ADDI->getAPIntValue();
	APInt SRLC = SRLI->getAPIntValue();
	if (ADDC.getMinSignedBits() <= 64 &&
	SRLC.ult(VT.getSizeInBits()) &&
	!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
	APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
	SRLC.getZExtValue());
	if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
	ADDC \|= Mask;
	if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
	SDLoc DL0(N0);
	SDValue NewAdd =
	DAG.getNode(ISD::ADD, DL0, VT,
	N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
	CombineTo(N0.getNode(), NewAdd);
	// Return N so it doesn't get rechecked!
	return SDValue(N, 0);
	}
	}
	}
	}
	}
	}

	// Reduce bit extract of low half of an integer to the narrower type.
	// (and (srl i64:x, K), KMask) ->
	// (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
	if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
	if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
	if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
	unsigned Size = VT.getSizeInBits();
	const APInt &AndMask = CAnd->getAPIntValue();
	unsigned ShiftBits = CShift->getZExtValue();

	// Bail out, this node will probably disappear anyway.
	if (ShiftBits == 0)
	return SDValue();

	unsigned MaskBits = AndMask.countTrailingOnes();
	EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);

	if (AndMask.isMask() &&
	// Required bits must not span the two halves of the integer and
	// must fit in the half size type.
	(ShiftBits + MaskBits <= Size / 2) &&
	TLI.isNarrowingProfitable(VT, HalfVT) &&
	TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
	TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
	TLI.isTruncateFree(VT, HalfVT) &&
	TLI.isZExtFree(HalfVT, VT)) {
	// The isNarrowingProfitable is to avoid regressions on PPC and
	// AArch64 which match a few 64-bit bit insert / bit extract patterns
	// on downstream users of this. Those patterns could probably be
	// extended to handle extensions mixed in.

	SDValue SL(N0);
	assert(MaskBits <= Size);

	// Extracting the highest bit of the low half.
	EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
	SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
	N0.getOperand(0));

	SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
	SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
	SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
	SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
	return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
	}
	}
	}
	}

	return SDValue();
	}

	bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode AndC, LoadSDNode LoadN,
	EVT LoadResultTy, EVT &ExtVT) {
	if (!AndC->getAPIntValue().isMask())
	return false;

	unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();

	ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
	EVT LoadedVT = LoadN->getMemoryVT();

	if (ExtVT == LoadedVT &&
	(!LegalOperations \|\|
	TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
	// ZEXTLOAD will match without needing to change the size of the value being
	// loaded.
	return true;
	}

	// Do not change the width of a volatile or atomic loads.
	if (!LoadN->isSimple())
	return false;

	// Do not generate loads of non-round integer types since these can
	// be expensive (and would be wrong if the type is not byte sized).
	if (!LoadedVT.bitsGT(ExtVT) \|\| !ExtVT.isRound())
	return false;

	if (LegalOperations &&
	!TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
	return false;

	if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
	return false;

	return true;
	}

	bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
	ISD::LoadExtType ExtType, EVT &MemVT,
	unsigned ShAmt) {
	if (!LDST)
	return false;
	// Only allow byte offsets.
	if (ShAmt % 8)
	return false;

	// Do not generate loads of non-round integer types since these can
	// be expensive (and would be wrong if the type is not byte sized).
	if (!MemVT.isRound())
	return false;

	// Don't change the width of a volatile or atomic loads.
	if (!LDST->isSimple())
	return false;

	EVT LdStMemVT = LDST->getMemoryVT();

	// Bail out when changing the scalable property, since we can't be sure that
	// we're actually narrowing here.
	if (LdStMemVT.isScalableVector() != MemVT.isScalableVector())
	return false;

	// Verify that we are actually reducing a load width here.
	if (LdStMemVT.bitsLT(MemVT))
	return false;

	// Ensure that this isn't going to produce an unsupported memory access.
	if (ShAmt) {
	assert(ShAmt % 8 == 0 && "ShAmt is byte offset");
	const unsigned ByteShAmt = ShAmt / 8;
	const Align LDSTAlign = LDST->getAlign();
	const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
	if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
	LDST->getAddressSpace(), NarrowAlign,
	LDST->getMemOperand()->getFlags()))
	return false;
	}

	// It's not possible to generate a constant of extended or untyped type.
	EVT PtrType = LDST->getBasePtr().getValueType();
	if (PtrType == MVT::Untyped \|\| PtrType.isExtended())
	return false;

	if (isa<LoadSDNode>(LDST)) {
	LoadSDNode *Load = cast<LoadSDNode>(LDST);
	// Don't transform one with multiple uses, this would require adding a new
	// load.
	if (!SDValue(Load, 0).hasOneUse())
	return false;

	if (LegalOperations &&
	!TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
	return false;

	// For the transform to be legal, the load must produce only two values
	// (the value loaded and the chain). Don't transform a pre-increment
	// load, for example, which produces an extra value. Otherwise the
	// transformation is not equivalent, and the downstream logic to replace
	// uses gets things wrong.
	if (Load->getNumValues() > 2)
	return false;

	// If the load that we're shrinking is an extload and we're not just
	// discarding the extension we can't simply shrink the load. Bail.
	// TODO: It would be possible to merge the extensions in some cases.
	if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
	Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
	return false;

	if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
	return false;
	} else {
	assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
	StoreSDNode *Store = cast<StoreSDNode>(LDST);
	// Can't write outside the original store
	if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
	return false;

	if (LegalOperations &&
	!TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
	return false;
	}
	return true;
	}

	bool DAGCombiner::SearchForAndLoads(SDNode *N,
	SmallVectorImpl<LoadSDNode*> &Loads,
	SmallPtrSetImpl<SDNode*> &NodesWithConsts,
	ConstantSDNode *Mask,
	SDNode *&NodeToMask) {
	// Recursively search for the operands, looking for loads which can be
	// narrowed.
	for (SDValue Op : N->op_values()) {
	if (Op.getValueType().isVector())
	return false;

	// Some constants may need fixing up later if they are too large.
	if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
	if ((N->getOpcode() == ISD::OR \|\| N->getOpcode() == ISD::XOR) &&
	(Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
	NodesWithConsts.insert(N);
	continue;
	}

	if (!Op.hasOneUse())
	return false;

	switch(Op.getOpcode()) {
	case ISD::LOAD: {
	auto *Load = cast<LoadSDNode>(Op);
	EVT ExtVT;
	if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
	isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {

	// ZEXTLOAD is already small enough.
	if (Load->getExtensionType() == ISD::ZEXTLOAD &&
	ExtVT.bitsGE(Load->getMemoryVT()))
	continue;

	// Use LE to convert equal sized loads to zext.
	if (ExtVT.bitsLE(Load->getMemoryVT()))
	Loads.push_back(Load);

	continue;
	}
	return false;
	}
	case ISD::ZERO_EXTEND:
	case ISD::AssertZext: {
	unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
	EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
	EVT VT = Op.getOpcode() == ISD::AssertZext ?
	cast<VTSDNode>(Op.getOperand(1))->getVT() :
	Op.getOperand(0).getValueType();

	// We can accept extending nodes if the mask is wider or an equal
	// width to the original type.
	if (ExtVT.bitsGE(VT))
	continue;
	break;
	}
	case ISD::OR:
	case ISD::XOR:
	case ISD::AND:
	if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
	NodeToMask))
	return false;
	continue;
	}

	// Allow one node which will masked along with any loads found.
	if (NodeToMask)
	return false;

	// Also ensure that the node to be masked only produces one data result.
	NodeToMask = Op.getNode();
	if (NodeToMask->getNumValues() > 1) {
	bool HasValue = false;
	for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
	MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
	if (VT != MVT::Glue && VT != MVT::Other) {
	if (HasValue) {
	NodeToMask = nullptr;
	return false;
	}
	HasValue = true;
	}
	}
	assert(HasValue && "Node to be masked has no data result?");
	}
	}
	return true;
	}

	bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
	auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
	if (!Mask)
	return false;

	if (!Mask->getAPIntValue().isMask())
	return false;

	// No need to do anything if the and directly uses a load.
	if (isa<LoadSDNode>(N->getOperand(0)))
	return false;

	SmallVector<LoadSDNode*, 8> Loads;
	SmallPtrSet<SDNode*, 2> NodesWithConsts;
	SDNode *FixupNode = nullptr;
	if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
	if (Loads.size() == 0)
	return false;

	LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
	SDValue MaskOp = N->getOperand(1);

	// If it exists, fixup the single node we allow in the tree that needs
	// masking.
	if (FixupNode) {
	LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
	SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
	FixupNode->getValueType(0),
	SDValue(FixupNode, 0), MaskOp);
	DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
	if (And.getOpcode() == ISD ::AND)
	DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
	}

	// Narrow any constants that need it.
	for (auto *LogicN : NodesWithConsts) {
	SDValue Op0 = LogicN->getOperand(0);
	SDValue Op1 = LogicN->getOperand(1);

	if (isa<ConstantSDNode>(Op0))
	std::swap(Op0, Op1);

	SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
	Op1, MaskOp);

	DAG.UpdateNodeOperands(LogicN, Op0, And);
	}

	// Create narrow loads.
	for (auto *Load : Loads) {
	LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
	SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
	SDValue(Load, 0), MaskOp);
	DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
	if (And.getOpcode() == ISD ::AND)
	And = SDValue(
	DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
	SDValue NewLoad = ReduceLoadWidth(And.getNode());
	assert(NewLoad &&
	"Shouldn't be masking the load if it can't be narrowed");
	CombineTo(Load, NewLoad, NewLoad.getValue(1));
	}
	DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
	return true;
	}
	return false;
	}

	// Unfold
	// x & (-1 'logical shift' y)
	// To
	// (x 'opposite logical shift' y) 'logical shift' y
	// if it is better for performance.
	SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
	assert(N->getOpcode() == ISD::AND);

	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);

	// Do we actually prefer shifts over mask?
	if (!TLI.shouldFoldMaskToVariableShiftPair(N0))
	return SDValue();

	// Try to match (-1 '[outer] logical shift' y)
	unsigned OuterShift;
	unsigned InnerShift; // The opposite direction to the OuterShift.
	SDValue Y; // Shift amount.
	auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
	if (!M.hasOneUse())
	return false;
	OuterShift = M->getOpcode();
	if (OuterShift == ISD::SHL)
	InnerShift = ISD::SRL;
	else if (OuterShift == ISD::SRL)
	InnerShift = ISD::SHL;
	else
	return false;
	if (!isAllOnesConstant(M->getOperand(0)))
	return false;
	Y = M->getOperand(1);
	return true;
	};

	SDValue X;
	if (matchMask(N1))
	X = N0;
	else if (matchMask(N0))
	X = N1;
	else
	return SDValue();

	SDLoc DL(N);
	EVT VT = N->getValueType(0);

	// tmp = x 'opposite logical shift' y
	SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
	// ret = tmp 'logical shift' y
	SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);

	return T1;
	}

	/// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
	/// For a target with a bit test, this is expected to become test + set and save
	/// at least 1 instruction.
	static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
	assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");

	// This is probably not worthwhile without a supported type.
	EVT VT = And->getValueType(0);
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	if (!TLI.isTypeLegal(VT))
	return SDValue();

	// Look through an optional extension and find a 'not'.
	// TODO: Should we favor test+set even without the 'not' op?
	SDValue Not = And->getOperand(0), And1 = And->getOperand(1);
	if (Not.getOpcode() == ISD::ANY_EXTEND)
	Not = Not.getOperand(0);
	if (!isBitwiseNot(Not) \|\| !Not.hasOneUse() \|\| !isOneConstant(And1))
	return SDValue();

	// Look though an optional truncation. The source operand may not be the same
	// type as the original 'and', but that is ok because we are masking off
	// everything but the low bit.
	SDValue Srl = Not.getOperand(0);
	if (Srl.getOpcode() == ISD::TRUNCATE)
	Srl = Srl.getOperand(0);

	// Match a shift-right by constant.
	if (Srl.getOpcode() != ISD::SRL \|\| !Srl.hasOneUse() \|\|
	!isa<ConstantSDNode>(Srl.getOperand(1)))
	return SDValue();

	// We might have looked through casts that make this transform invalid.
	// TODO: If the source type is wider than the result type, do the mask and
	// compare in the source type.
	const APInt &ShiftAmt = Srl.getConstantOperandAPInt(1);
	unsigned VTBitWidth = VT.getSizeInBits();
	if (ShiftAmt.uge(VTBitWidth))
	return SDValue();

	// Turn this into a bit-test pattern using mask op + setcc:
	// and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
	SDLoc DL(And);
	SDValue X = DAG.getZExtOrTrunc(Srl.getOperand(0), DL, VT);
	EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
	SDValue Mask = DAG.getConstant(
	APInt::getOneBitSet(VTBitWidth, ShiftAmt.getZExtValue()), DL, VT);
	SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, Mask);
	SDValue Zero = DAG.getConstant(0, DL, VT);
	SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
	return DAG.getZExtOrTrunc(Setcc, DL, VT);
	}

	SDValue DAGCombiner::visitAND(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	EVT VT = N1.getValueType();

	// x & x --> x
	if (N0 == N1)
	return N0;

	// fold vector ops
	if (VT.isVector()) {
	if (SDValue FoldedVOp = SimplifyVBinOp(N))
	return FoldedVOp;

	// fold (and x, 0) -> 0, vector edition
	if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
	// do not return N0, because undef node may exist in N0
	return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
	SDLoc(N), N0.getValueType());
	if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
	// do not return N1, because undef node may exist in N1
	return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
	SDLoc(N), N1.getValueType());

	// fold (and x, -1) -> x, vector edition
	if (ISD::isConstantSplatVectorAllOnes(N0.getNode()))
	return N1;
	if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
	return N0;

	// fold (and (masked_load) (build_vec (x, ...))) to zext_masked_load
	auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
	auto *BVec = dyn_cast<BuildVectorSDNode>(N1);
	if (MLoad && BVec && MLoad->getExtensionType() == ISD::EXTLOAD &&
	N0.hasOneUse() && N1.hasOneUse()) {
	EVT LoadVT = MLoad->getMemoryVT();
	EVT ExtVT = VT;
	if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
	// For this AND to be a zero extension of the masked load the elements
	// of the BuildVec must mask the bottom bits of the extended element
	// type
	if (ConstantSDNode *Splat = BVec->getConstantSplatNode()) {
	uint64_t ElementSize =
	LoadVT.getVectorElementType().getScalarSizeInBits();
	if (Splat->getAPIntValue().isMask(ElementSize)) {
	return DAG.getMaskedLoad(
	ExtVT, SDLoc(N), MLoad->getChain(), MLoad->getBasePtr(),
	MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
	LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
	ISD::ZEXTLOAD, MLoad->isExpandingLoad());
	}
	}
	}
	}
	}

	// fold (and c1, c2) -> c1&c2
	ConstantSDNode *N1C = isConstOrConstSplat(N1);
	if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, {N0, N1}))
	return C;

	// canonicalize constant to RHS
	if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
	!DAG.isConstantIntBuildVectorOrConstantInt(N1))
	return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);

	// fold (and x, -1) -> x
	if (isAllOnesConstant(N1))
	return N0;

	// if (and x, c) is known to be zero, return 0
	unsigned BitWidth = VT.getScalarSizeInBits();
	if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
	APInt::getAllOnesValue(BitWidth)))
	return DAG.getConstant(0, SDLoc(N), VT);

	if (SDValue NewSel = foldBinOpIntoSelect(N))
	return NewSel;

	// reassociate and
	if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
	return RAND;

	// Try to convert a constant mask AND into a shuffle clear mask.
	if (VT.isVector())
	if (SDValue Shuffle = XformToShuffleWithZero(N))
	return Shuffle;

	if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
	return Combined;

	// fold (and (or x, C), D) -> D if (C & D) == D
	auto MatchSubset = [](ConstantSDNode LHS, ConstantSDNode RHS) {
	return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
	};
	if (N0.getOpcode() == ISD::OR &&
	ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
	return N1;
	// fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
	if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
	SDValue N0Op0 = N0.getOperand(0);
	APInt Mask = ~N1C->getAPIntValue();
	Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
	if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
	SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
	N0.getValueType(), N0Op0);

	// Replace uses of the AND with uses of the Zero extend node.
	CombineTo(N, Zext);

	// We actually want to replace all uses of the any_extend with the
	// zero_extend, to avoid duplicating things. This will later cause this
	// AND to be folded.
	CombineTo(N0.getNode(), Zext);
	return SDValue(N, 0); // Return N so it doesn't get rechecked!
	}
	}

	// similarly fold (and (X (load ([non_ext\|any_ext\|zero_ext] V))), c) ->
	// (X (load ([non_ext\|zero_ext] V))) if 'and' only clears top bits which must
	// already be zero by virtue of the width of the base type of the load.
	//
	// the 'X' node here can either be nothing or an extract_vector_elt to catch
	// more cases.
	if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
	N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
	N0.getOperand(0).getOpcode() == ISD::LOAD &&
	N0.getOperand(0).getResNo() == 0) \|\|
	(N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
	LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
	N0 : N0.getOperand(0) );

	// Get the constant (if applicable) the zero'th operand is being ANDed with.
	// This can be a pure constant or a vector splat, in which case we treat the
	// vector as a scalar and use the splat value.
	APInt Constant = APInt::getNullValue(1);
	if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
	Constant = C->getAPIntValue();
	} else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
	APInt SplatValue, SplatUndef;
	unsigned SplatBitSize;
	bool HasAnyUndefs;
	bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
	SplatBitSize, HasAnyUndefs);
	if (IsSplat) {
	// Undef bits can contribute to a possible optimisation if set, so
	// set them.
	SplatValue \|= SplatUndef;

	// The splat value may be something like "0x00FFFFFF", which means 0 for
	// the first vector value and FF for the rest, repeating. We need a mask
	// that will apply equally to all members of the vector, so AND all the
	// lanes of the constant together.
	unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();

	// If the splat value has been compressed to a bitlength lower
	// than the size of the vector lane, we need to re-expand it to
	// the lane size.
	if (EltBitWidth > SplatBitSize)
	for (SplatValue = SplatValue.zextOrTrunc(EltBitWidth);
	SplatBitSize < EltBitWidth; SplatBitSize = SplatBitSize * 2)
	SplatValue \|= SplatValue.shl(SplatBitSize);

	// Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
	// multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
	if ((SplatBitSize % EltBitWidth) == 0) {
	Constant = APInt::getAllOnesValue(EltBitWidth);
	for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
	Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
	}
	}
	}

	// If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
	// actually legal and isn't going to get expanded, else this is a false
	// optimisation.
	bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
	Load->getValueType(0),
	Load->getMemoryVT());

	// Resize the constant to the same size as the original memory access before
	// extension. If it is still the AllOnesValue then this AND is completely
	// unneeded.
	Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());

	bool B;
	switch (Load->getExtensionType()) {
	default: B = false; break;
	case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
	case ISD::ZEXTLOAD:
	case ISD::NON_EXTLOAD: B = true; break;
	}

	if (B && Constant.isAllOnesValue()) {
	// If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
	// preserve semantics once we get rid of the AND.
	SDValue NewLoad(Load, 0);

	// Fold the AND away. NewLoad may get replaced immediately.
	CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);

	if (Load->getExtensionType() == ISD::EXTLOAD) {
	NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
	Load->getValueType(0), SDLoc(Load),
	Load->getChain(), Load->getBasePtr(),
	Load->getOffset(), Load->getMemoryVT(),
	Load->getMemOperand());
	// Replace uses of the EXTLOAD with the new ZEXTLOAD.
	if (Load->getNumValues() == 3) {
	// PRE/POST_INC loads have 3 values.
	SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
	NewLoad.getValue(2) };
	CombineTo(Load, To, 3, true);
	} else {
	CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
	}
	}

	return SDValue(N, 0); // Return N so it doesn't get rechecked!
	}
	}

	// fold (and (masked_gather x)) -> (zext_masked_gather x)
	if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
	EVT MemVT = GN0->getMemoryVT();
	EVT ScalarVT = MemVT.getScalarType();

	if (SDValue(GN0, 0).hasOneUse() &&
	isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
	TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
	SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
	GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};

	SDValue ZExtLoad = DAG.getMaskedGather(
	DAG.getVTList(VT, MVT::Other), MemVT, SDLoc(N), Ops,
	GN0->getMemOperand(), GN0->getIndexType(), ISD::ZEXTLOAD);

	CombineTo(N, ZExtLoad);
	AddToWorklist(ZExtLoad.getNode());
	// Avoid recheck of N.
	return SDValue(N, 0);
	}
	}

	// fold (and (load x), 255) -> (zextload x, i8)
	// fold (and (extload x, i16), 255) -> (zextload x, i8)
	// fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
	if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD \|\|
	(N0.getOpcode() == ISD::ANY_EXTEND &&
	N0.getOperand(0).getOpcode() == ISD::LOAD))) {
	if (SDValue Res = ReduceLoadWidth(N)) {
	LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
	? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
	AddToWorklist(N);
	DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res);
	return SDValue(N, 0);
	}
	}

	if (LegalTypes) {
	// Attempt to propagate the AND back up to the leaves which, if they're
	// loads, can be combined to narrow loads and the AND node can be removed.
	// Perform after legalization so that extend nodes will already be
	// combined into the loads.
	if (BackwardsPropagateMask(N))
	return SDValue(N, 0);
	}

	if (SDValue Combined = visitANDLike(N0, N1, N))
	return Combined;

	// Simplify: (and (op x...), (op y...)) -> (op (and x, y))
	if (N0.getOpcode() == N1.getOpcode())
	if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
	return V;

	// Masking the negated extension of a boolean is just the zero-extended
	// boolean:
	// and (sub 0, zext(bool X)), 1 --> zext(bool X)
	// and (sub 0, sext(bool X)), 1 --> zext(bool X)
	//
	// Note: the SimplifyDemandedBits fold below can make an information-losing
	// transform, and then we have no way to find this better fold.
	if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
	if (isNullOrNullSplat(N0.getOperand(0))) {
	SDValue SubRHS = N0.getOperand(1);
	if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
	SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
	return SubRHS;
	if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
	SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
	return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
	}
	}

	// fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
	// fold (and (sra)) -> (and (srl)) when possible.
	if (SimplifyDemandedBits(SDValue(N, 0)))
	return SDValue(N, 0);

	// fold (zext_inreg (extload x)) -> (zextload x)
	// fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
	if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
	(ISD::isEXTLoad(N0.getNode()) \|\|
	(ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
	LoadSDNode *LN0 = cast<LoadSDNode>(N0);
	EVT MemVT = LN0->getMemoryVT();
	// If we zero all the possible extended bits, then we can turn this into
	// a zextload if we are running before legalize or the operation is legal.
	unsigned ExtBitSize = N1.getScalarValueSizeInBits();
	unsigned MemBitSize = MemVT.getScalarSizeInBits();
	APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
	if (DAG.MaskedValueIsZero(N1, ExtBits) &&
	((!LegalOperations && LN0->isSimple()) \|\|
	TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
	SDValue ExtLoad =
	DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
	LN0->getBasePtr(), MemVT, LN0->getMemOperand());
	AddToWorklist(N);
	CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
	return SDValue(N, 0); // Return N so it doesn't get rechecked!
	}
	}

	// fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
	if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
	if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
	N0.getOperand(1), false))
	return BSwap;
	}

	if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
	return Shifts;

	if (TLI.hasBitTest(N0, N1))
	if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
	return V;

	// Recognize the following pattern:
	//
	// AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
	//
	// where bitmask is a mask that clears the upper bits of AndVT. The
	// number of bits in bitmask must be a power of two.
	auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
	if (LHS->getOpcode() != ISD::SIGN_EXTEND)
	return false;

	auto *C = dyn_cast<ConstantSDNode>(RHS);
	if (!C)
	return false;

	if (!C->getAPIntValue().isMask(
	LHS.getOperand(0).getValueType().getFixedSizeInBits()))
	return false;

	return true;
	};

	// Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
	if (IsAndZeroExtMask(N0, N1))
	return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0));

	return SDValue();
	}

	/// Match (a >> 8) \| (a << 8) as (bswap a) >> 16.
	SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
	bool DemandHighBits) {
	if (!LegalOperations)
	return SDValue();

	EVT VT = N->getValueType(0);
	if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
	return SDValue();
	if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
	return SDValue();

	// Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
	bool LookPassAnd0 = false;
	bool LookPassAnd1 = false;
	if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
	std::swap(N0, N1);
	if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
	std::swap(N0, N1);
	if (N0.getOpcode() == ISD::AND) {
	if (!N0.getNode()->hasOneUse())
	return SDValue();
	ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
	// Also handle 0xffff since the LHS is guaranteed to have zeros there.
	// This is needed for X86.
	if (!N01C \|\| (N01C->getZExtValue() != 0xFF00 &&
	N01C->getZExtValue() != 0xFFFF))
	return SDValue();
	N0 = N0.getOperand(0);
	LookPassAnd0 = true;
	}

	if (N1.getOpcode() == ISD::AND) {
	if (!N1.getNode()->hasOneUse())
	return SDValue();
	ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
	if (!N11C \|\| N11C->getZExtValue() != 0xFF)
	return SDValue();
	N1 = N1.getOperand(0);
	LookPassAnd1 = true;
	}

	if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
	std::swap(N0, N1);
	if (N0.getOpcode() != ISD::SHL \|\| N1.getOpcode() != ISD::SRL)
	return SDValue();
	if (!N0.getNode()->hasOneUse() \|\| !N1.getNode()->hasOneUse())
	return SDValue();

	ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
	ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
	if (!N01C \|\| !N11C)
	return SDValue();
	if (N01C->getZExtValue() != 8 \|\| N11C->getZExtValue() != 8)
	return SDValue();

	// Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
	SDValue N00 = N0->getOperand(0);
	if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
	if (!N00.getNode()->hasOneUse())
	return SDValue();
	ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
	if (!N001C \|\| N001C->getZExtValue() != 0xFF)
	return SDValue();
	N00 = N00.getOperand(0);
	LookPassAnd0 = true;
	}

	SDValue N10 = N1->getOperand(0);
	if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
	if (!N10.getNode()->hasOneUse())
	return SDValue();
	ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
	// Also allow 0xFFFF since the bits will be shifted out. This is needed
	// for X86.
	if (!N101C \|\| (N101C->getZExtValue() != 0xFF00 &&
	N101C->getZExtValue() != 0xFFFF))
	return SDValue();
	N10 = N10.getOperand(0);
	LookPassAnd1 = true;
	}

	if (N00 != N10)
	return SDValue();

	// Make sure everything beyond the low halfword gets set to zero since the SRL
	// 16 will clear the top bits.
	unsigned OpSizeInBits = VT.getSizeInBits();
	if (DemandHighBits && OpSizeInBits > 16) {
	// If the left-shift isn't masked out then the only way this is a bswap is
	// if all bits beyond the low 8 are 0. In that case the entire pattern
	// reduces to a left shift anyway: leave it for other parts of the combiner.
	if (!LookPassAnd0)
	return SDValue();

	// However, if the right shift isn't masked out then it might be because
	// it's not needed. See if we can spot that too.
	if (!LookPassAnd1 &&
	!DAG.MaskedValueIsZero(
	N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
	return SDValue();
	}

	SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
	if (OpSizeInBits > 16) {
	SDLoc DL(N);
	Res = DAG.getNode(ISD::SRL, DL, VT, Res,
	DAG.getConstant(OpSizeInBits - 16, DL,
	getShiftAmountTy(VT)));
	}
	return Res;
	}

	/// Return true if the specified node is an element that makes up a 32-bit
	/// packed halfword byteswap.
	/// ((x & 0x000000ff) << 8) \|
	/// ((x & 0x0000ff00) >> 8) \|
	/// ((x & 0x00ff0000) << 8) \|
	/// ((x & 0xff000000) >> 8)
	static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
	if (!N.getNode()->hasOneUse())
	return false;

	unsigned Opc = N.getOpcode();
	if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
	return false;

	SDValue N0 = N.getOperand(0);
	unsigned Opc0 = N0.getOpcode();
	if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
	return false;

	ConstantSDNode *N1C = nullptr;
	// SHL or SRL: look upstream for AND mask operand
	if (Opc == ISD::AND)
	N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
	else if (Opc0 == ISD::AND)
	N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
	if (!N1C)
	return false;

	unsigned MaskByteOffset;
	switch (N1C->getZExtValue()) {
	default:
	return false;
	case 0xFF: MaskByteOffset = 0; break;
	case 0xFF00: MaskByteOffset = 1; break;
	case 0xFFFF:
	// In case demanded bits didn't clear the bits that will be shifted out.
	// This is needed for X86.
	if (Opc == ISD::SRL \|\| (Opc == ISD::AND && Opc0 == ISD::SHL)) {
	MaskByteOffset = 1;
	break;
	}
	return false;
	case 0xFF0000: MaskByteOffset = 2; break;
	case 0xFF000000: MaskByteOffset = 3; break;
	}

	// Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
	if (Opc == ISD::AND) {
	if (MaskByteOffset == 0 \|\| MaskByteOffset == 2) {
	// (x >> 8) & 0xff
	// (x >> 8) & 0xff0000
	if (Opc0 != ISD::SRL)
	return false;
	ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
	if (!C \|\| C->getZExtValue() != 8)
	return false;
	} else {
	// (x << 8) & 0xff00
	// (x << 8) & 0xff000000
	if (Opc0 != ISD::SHL)
	return false;
	ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
	if (!C \|\| C->getZExtValue() != 8)
	return false;
	}
	} else if (Opc == ISD::SHL) {
	// (x & 0xff) << 8
	// (x & 0xff0000) << 8
	if (MaskByteOffset != 0 && MaskByteOffset != 2)
	return false;
	ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
	if (!C \|\| C->getZExtValue() != 8)
	return false;
	} else { // Opc == ISD::SRL
	// (x & 0xff00) >> 8
	// (x & 0xff000000) >> 8
	if (MaskByteOffset != 1 && MaskByteOffset != 3)
	return false;
	ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
	if (!C \|\| C->getZExtValue() != 8)
	return false;
	}

	if (Parts[MaskByteOffset])
	return false;

	Parts[MaskByteOffset] = N0.getOperand(0).getNode();
	return true;
	}

	// Match 2 elements of a packed halfword bswap.
	static bool isBSwapHWordPair(SDValue N, MutableArrayRef<SDNode *> Parts) {
	if (N.getOpcode() == ISD::OR)
	return isBSwapHWordElement(N.getOperand(0), Parts) &&
	isBSwapHWordElement(N.getOperand(1), Parts);

	if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
	ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
	if (!C \|\| C->getAPIntValue() != 16)
	return false;
	Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
	return true;
	}

	return false;
	}

	// Match this pattern:
	// (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
	// And rewrite this to:
	// (rotr (bswap A), 16)
	static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI,
	SelectionDAG &DAG, SDNode *N, SDValue N0,
	SDValue N1, EVT VT, EVT ShiftAmountTy) {
	assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&
	"MatchBSwapHWordOrAndAnd: expecting i32");
	if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
	return SDValue();
	if (N0.getOpcode() != ISD::AND \|\| N1.getOpcode() != ISD::AND)
	return SDValue();
	// TODO: this is too restrictive; lifting this restriction requires more tests
	if (!N0->hasOneUse() \|\| !N1->hasOneUse())
	return SDValue();
	ConstantSDNode *Mask0 = isConstOrConstSplat(N0.getOperand(1));
	ConstantSDNode *Mask1 = isConstOrConstSplat(N1.getOperand(1));
	if (!Mask0 \|\| !Mask1)
	return SDValue();
	if (Mask0->getAPIntValue() != 0xff00ff00 \|\|
	Mask1->getAPIntValue() != 0x00ff00ff)
	return SDValue();
	SDValue Shift0 = N0.getOperand(0);
	SDValue Shift1 = N1.getOperand(0);
	if (Shift0.getOpcode() != ISD::SHL \|\| Shift1.getOpcode() != ISD::SRL)
	return SDValue();
	ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
	ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
	if (!ShiftAmt0 \|\| !ShiftAmt1)
	return SDValue();
	if (ShiftAmt0->getAPIntValue() != 8 \|\| ShiftAmt1->getAPIntValue() != 8)
	return SDValue();
	if (Shift0.getOperand(0) != Shift1.getOperand(0))
	return SDValue();

	SDLoc DL(N);
	SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
	SDValue ShAmt = DAG.getConstant(16, DL, ShiftAmountTy);
	return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
	}

	/// Match a 32-bit packed halfword bswap. That is
	/// ((x & 0x000000ff) << 8) \|
	/// ((x & 0x0000ff00) >> 8) \|
	/// ((x & 0x00ff0000) << 8) \|
	/// ((x & 0xff000000) >> 8)
	/// => (rotl (bswap x), 16)
	SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
	if (!LegalOperations)
	return SDValue();

	EVT VT = N->getValueType(0);
	if (VT != MVT::i32)
	return SDValue();
	if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
	return SDValue();

	if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT,
	getShiftAmountTy(VT)))
	return BSwap;

	// Try again with commuted operands.
	if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT,
	getShiftAmountTy(VT)))
	return BSwap;


	// Look for either
	// (or (bswaphpair), (bswaphpair))
	// (or (or (bswaphpair), (and)), (and))
	// (or (or (and), (bswaphpair)), (and))
	SDNode *Parts[4] = {};

	if (isBSwapHWordPair(N0, Parts)) {
	// (or (or (and), (and)), (or (and), (and)))
	if (!isBSwapHWordPair(N1, Parts))
	return SDValue();
	} else if (N0.getOpcode() == ISD::OR) {
	// (or (or (or (and), (and)), (and)), (and))
	if (!isBSwapHWordElement(N1, Parts))
	return SDValue();
	SDValue N00 = N0.getOperand(0);
	SDValue N01 = N0.getOperand(1);
	if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
	!(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
	return SDValue();
	} else
	return SDValue();

	// Make sure the parts are all coming from the same node.
	if (Parts[0] != Parts[1] \|\| Parts[0] != Parts[2] \|\| Parts[0] != Parts[3])
	return SDValue();

	SDLoc DL(N);
	SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
	SDValue(Parts[0], 0));

	// Result of the bswap should be rotated by 16. If it's not legal, then
	// do (x << 16) \| (x >> 16).
	SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
	if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
	return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
	if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
	return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
	return DAG.getNode(ISD::OR, DL, VT,
	DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
	DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
	}

	/// This contains all DAGCombine rules which reduce two values combined by
	/// an Or operation to a single value \see visitANDLike().
	SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
	EVT VT = N1.getValueType();
	SDLoc DL(N);

	// fold (or x, undef) -> -1
	if (!LegalOperations && (N0.isUndef() \|\| N1.isUndef()))
	return DAG.getAllOnesConstant(DL, VT);

	if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
	return V;

	// (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
	if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
	// Don't increase # computations.
	(N0.getNode()->hasOneUse() \|\| N1.getNode()->hasOneUse())) {
	// We can only do this xform if we know that bits from X that are set in C2
	// but not in C1 are already zero. Likewise for Y.
	if (const ConstantSDNode *N0O1C =
	getAsNonOpaqueConstant(N0.getOperand(1))) {
	if (const ConstantSDNode *N1O1C =
	getAsNonOpaqueConstant(N1.getOperand(1))) {
	// We can only do this xform if we know that bits from X that are set in
	// C2 but not in C1 are already zero. Likewise for Y.
	const APInt &LHSMask = N0O1C->getAPIntValue();
	const APInt &RHSMask = N1O1C->getAPIntValue();

	if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
	DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
	SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
	N0.getOperand(0), N1.getOperand(0));
	return DAG.getNode(ISD::AND, DL, VT, X,
	DAG.getConstant(LHSMask \| RHSMask, DL, VT));
	}
	}
	}
	}

	// (or (and X, M), (and X, N)) -> (and X, (or M, N))
	if (N0.getOpcode() == ISD::AND &&
	N1.getOpcode() == ISD::AND &&
	N0.getOperand(0) == N1.getOperand(0) &&
	// Don't increase # computations.
	(N0.getNode()->hasOneUse() \|\| N1.getNode()->hasOneUse())) {
	SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
	N0.getOperand(1), N1.getOperand(1));
	return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
	}

	return SDValue();
	}

	/// OR combines for which the commuted variant will be tried as well.
	static SDValue visitORCommutative(
	SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) {
	EVT VT = N0.getValueType();
	if (N0.getOpcode() == ISD::AND) {
	// fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
	if (isBitwiseNot(N0.getOperand(1)) && N0.getOperand(1).getOperand(0) == N1)
	return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1);

	// fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
	if (isBitwiseNot(N0.getOperand(0)) && N0.getOperand(0).getOperand(0) == N1)
	return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1);
	}

	return SDValue();
	}

	SDValue DAGCombiner::visitOR(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	EVT VT = N1.getValueType();

	// x \| x --> x
	if (N0 == N1)
	return N0;

	// fold vector ops
	if (VT.isVector()) {
	if (SDValue FoldedVOp = SimplifyVBinOp(N))
	return FoldedVOp;

	// fold (or x, 0) -> x, vector edition
	if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
	return N1;
	if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
	return N0;

	// fold (or x, -1) -> -1, vector edition
	if (ISD::isConstantSplatVectorAllOnes(N0.getNode()))
	// do not return N0, because undef node may exist in N0
	return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
	if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
	// do not return N1, because undef node may exist in N1
	return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());

	// fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
	// Do this only if the resulting shuffle is legal.
	if (isa<ShuffleVectorSDNode>(N0) &&
	isa<ShuffleVectorSDNode>(N1) &&
	// Avoid folding a node with illegal type.
	TLI.isTypeLegal(VT)) {
	bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
	bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
	bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
	bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
	// Ensure both shuffles have a zero input.
	if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
	assert((!ZeroN00 \|\| !ZeroN01) && "Both inputs zero!");
	assert((!ZeroN10 \|\| !ZeroN11) && "Both inputs zero!");
	const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
	const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
	bool CanFold = true;
	int NumElts = VT.getVectorNumElements();
	SmallVector<int, 4> Mask(NumElts);

	for (int i = 0; i != NumElts; ++i) {
	int M0 = SV0->getMaskElt(i);
	int M1 = SV1->getMaskElt(i);

	// Determine if either index is pointing to a zero vector.
	bool M0Zero = M0 < 0 \|\| (ZeroN00 == (M0 < NumElts));
	bool M1Zero = M1 < 0 \|\| (ZeroN10 == (M1 < NumElts));

	// If one element is zero and the otherside is undef, keep undef.
	// This also handles the case that both are undef.
	if ((M0Zero && M1 < 0) \|\| (M1Zero && M0 < 0)) {
	Mask[i] = -1;
	continue;
	}

	// Make sure only one of the elements is zero.
	if (M0Zero == M1Zero) {
	CanFold = false;
	break;
	}

	assert((M0 >= 0 \|\| M1 >= 0) && "Undef index!");

	// We have a zero and non-zero element. If the non-zero came from
	// SV0 make the index a LHS index. If it came from SV1, make it
	// a RHS index. We need to mod by NumElts because we don't care
	// which operand it came from in the original shuffles.
	Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
	}

	if (CanFold) {
	SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
	SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);

	SDValue LegalShuffle =
	TLI.buildLegalVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS,
	Mask, DAG);
	if (LegalShuffle)
	return LegalShuffle;
	}
	}
	}
	}

	// fold (or c1, c2) -> c1\|c2
	ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
	if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, {N0, N1}))
	return C;

	// canonicalize constant to RHS
	if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
	!DAG.isConstantIntBuildVectorOrConstantInt(N1))
	return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);

	// fold (or x, 0) -> x
	if (isNullConstant(N1))
	return N0;

	// fold (or x, -1) -> -1
	if (isAllOnesConstant(N1))
	return N1;

	if (SDValue NewSel = foldBinOpIntoSelect(N))
	return NewSel;

	// fold (or x, c) -> c iff (x & ~c) == 0
	if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
	return N1;

	if (SDValue Combined = visitORLike(N0, N1, N))
	return Combined;

	if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
	return Combined;

	// Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
	if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
	return BSwap;
	if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
	return BSwap;

	// reassociate or
	if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
	return ROR;

	// Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1\|c2)
	// iff (c1 & c2) != 0 or c1/c2 are undef.
	auto MatchIntersect = [](ConstantSDNode C1, ConstantSDNode C2) {
	return !C1 \|\| !C2 \|\| C1->getAPIntValue().intersects(C2->getAPIntValue());
	};
	if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
	ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
	if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
	{N1, N0.getOperand(1)})) {
	SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
	AddToWorklist(IOR.getNode());
	return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
	}
	}

	if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
	return Combined;
	if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
	return Combined;

	// Simplify: (or (op x...), (op y...)) -> (op (or x, y))
	if (N0.getOpcode() == N1.getOpcode())
	if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
	return V;

	// See if this is some rotate idiom.
	if (SDValue Rot = MatchRotate(N0, N1, SDLoc(N)))
	return Rot;

	if (SDValue Load = MatchLoadCombine(N))
	return Load;

	// Simplify the operands using demanded-bits information.
	if (SimplifyDemandedBits(SDValue(N, 0)))
	return SDValue(N, 0);

	// If OR can be rewritten into ADD, try combines based on ADD.
	if ((!LegalOperations \|\| TLI.isOperationLegal(ISD::ADD, VT)) &&
	DAG.haveNoCommonBitsSet(N0, N1))
	if (SDValue Combined = visitADDLike(N))
	return Combined;

	return SDValue();
	}

	static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) {
	if (Op.getOpcode() == ISD::AND &&
	DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
	Mask = Op.getOperand(1);
	return Op.getOperand(0);
	}
	return Op;
	}

	/// Match "(X shl/srl V1) & V2" where V2 may not be present.
	static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
	SDValue &Mask) {
	Op = stripConstantMask(DAG, Op, Mask);
	if (Op.getOpcode() == ISD::SRL \|\| Op.getOpcode() == ISD::SHL) {
	Shift = Op;
	return true;
	}
	return false;
	}

	/// Helper function for visitOR to extract the needed side of a rotate idiom
	/// from a shl/srl/mul/udiv. This is meant to handle cases where
	/// InstCombine merged some outside op with one of the shifts from
	/// the rotate pattern.
	/// \returns An empty \c SDValue if the needed shift couldn't be extracted.
	/// Otherwise, returns an expansion of \p ExtractFrom based on the following
	/// patterns:
	///
	/// (or (add v v) (shrl v bitwidth-1)):
	/// expands (add v v) -> (shl v 1)
	///
	/// (or (mul v c0) (shrl (mul v c1) c2)):
	/// expands (mul v c0) -> (shl (mul v c1) c3)
	///
	/// (or (udiv v c0) (shl (udiv v c1) c2)):
	/// expands (udiv v c0) -> (shrl (udiv v c1) c3)
	///
	/// (or (shl v c0) (shrl (shl v c1) c2)):
	/// expands (shl v c0) -> (shl (shl v c1) c3)
	///
	/// (or (shrl v c0) (shl (shrl v c1) c2)):
	/// expands (shrl v c0) -> (shrl (shrl v c1) c3)
	///
	/// Such that in all cases, c3+c2==bitwidth(op v c1).
	static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
	SDValue ExtractFrom, SDValue &Mask,
	const SDLoc &DL) {
	assert(OppShift && ExtractFrom && "Empty SDValue");
	assert(
	(OppShift.getOpcode() == ISD::SHL \|\| OppShift.getOpcode() == ISD::SRL) &&
	"Existing shift must be valid as a rotate half");

	ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);

	// Value and Type of the shift.
	SDValue OppShiftLHS = OppShift.getOperand(0);
	EVT ShiftedVT = OppShiftLHS.getValueType();

	// Amount of the existing shift.
	ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));

	// (add v v) -> (shl v 1)
	// TODO: Should this be a general DAG canonicalization?
	if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
	ExtractFrom.getOpcode() == ISD::ADD &&
	ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
	ExtractFrom.getOperand(0) == OppShiftLHS &&
	OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
	return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
	DAG.getShiftAmountConstant(1, ShiftedVT, DL));

	// Preconditions:
	// (or (op0 v c0) (shiftl/r (op0 v c1) c2))
	//
	// Find opcode of the needed shift to be extracted from (op0 v c0).
	unsigned Opcode = ISD::DELETED_NODE;
	bool IsMulOrDiv = false;
	// Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
	// opcode or its arithmetic (mul or udiv) variant.
	auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
	IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
	if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
	return false;
	Opcode = NeededShift;
	return true;
	};
	// op0 must be either the needed shift opcode or the mul/udiv equivalent
	// that the needed shift can be extracted from.
	if ((OppShift.getOpcode() != ISD::SRL \|\| !SelectOpcode(ISD::SHL, ISD::MUL)) &&
	(OppShift.getOpcode() != ISD::SHL \|\| !SelectOpcode(ISD::SRL, ISD::UDIV)))
	return SDValue();

	// op0 must be the same opcode on both sides, have the same LHS argument,
	// and produce the same value type.
	if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() \|\|
	OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) \|\|
	ShiftedVT != ExtractFrom.getValueType())
	return SDValue();

	// Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
	ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
	// Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
	ConstantSDNode *ExtractFromCst =
	isConstOrConstSplat(ExtractFrom.getOperand(1));
	// TODO: We should be able to handle non-uniform constant vectors for these values
	// Check that we have constant values.
	if (!OppShiftCst \|\| !OppShiftCst->getAPIntValue() \|\|
	!OppLHSCst \|\| !OppLHSCst->getAPIntValue() \|\|
	!ExtractFromCst \|\| !ExtractFromCst->getAPIntValue())
	return SDValue();

	// Compute the shift amount we need to extract to complete the rotate.
	const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
	if (OppShiftCst->getAPIntValue().ugt(VTWidth))
	return SDValue();
	APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
	// Normalize the bitwidth of the two mul/udiv/shift constant operands.
	APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
	APInt OppLHSAmt = OppLHSCst->getAPIntValue();
	zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);

	// Now try extract the needed shift from the ExtractFrom op and see if the
	// result matches up with the existing shift's LHS op.
	if (IsMulOrDiv) {
	// Op to extract from is a mul or udiv by a constant.
	// Check:
	// c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
	// c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
	const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
	NeededShiftAmt.getZExtValue());
	APInt ResultAmt;
	APInt Rem;
	APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
	if (Rem != 0 \|\| ResultAmt != OppLHSAmt)
	return SDValue();
	} else {
	// Op to extract from is a shift by a constant.
	// Check:
	// c2 - (bitwidth(op0 v c0) - c1) == c0
	if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
	ExtractFromAmt.getBitWidth()))
	return SDValue();
	}

	// Return the expanded shift op that should allow a rotate to be formed.
	EVT ShiftVT = OppShift.getOperand(1).getValueType();
	EVT ResVT = ExtractFrom.getValueType();
	SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
	return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
	}

	// Return true if we can prove that, whenever Neg and Pos are both in the
	// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
	// for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
	//
	// (or (shift1 X, Neg), (shift2 X, Pos))
	//
	// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
	// in direction shift1 by Neg. The range [0, EltSize) means that we only need
	// to consider shift amounts with defined behavior.
	//
	// The IsRotate flag should be set when the LHS of both shifts is the same.
	// Otherwise if matching a general funnel shift, it should be clear.
	static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
	SelectionDAG &DAG, bool IsRotate) {
	// If EltSize is a power of 2 then:
	//
	// (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
	// (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
	//
	// So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
	// for the stronger condition:
	//
	// Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
	//
	// for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
	// we can just replace Neg with Neg' for the rest of the function.
	//
	// In other cases we check for the even stronger condition:
	//
	// Neg == EltSize - Pos [B]
	//
	// for all Neg and Pos. Note that the (or ...) then invokes undefined
	// behavior if Pos == 0 (and consequently Neg == EltSize).
	//
	// We could actually use [A] whenever EltSize is a power of 2, but the
	// only extra cases that it would match are those uninteresting ones
	// where Neg and Pos are never in range at the same time. E.g. for
	// EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
	// as well as (sub 32, Pos), but:
	//
	// (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
	//
	// always invokes undefined behavior for 32-bit X.
	//
	// Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
	//
	// NOTE: We can only do this when matching an AND and not a general
	// funnel shift.
	unsigned MaskLoBits = 0;
	if (IsRotate && Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
	if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
	KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
	unsigned Bits = Log2_64(EltSize);
	if (NegC->getAPIntValue().getActiveBits() <= Bits &&
	((NegC->getAPIntValue() \| Known.Zero).countTrailingOnes() >= Bits)) {
	Neg = Neg.getOperand(0);
	MaskLoBits = Bits;
	}
	}
	}

	// Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
	if (Neg.getOpcode() != ISD::SUB)
	return false;
	ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
	if (!NegC)
	return false;
	SDValue NegOp1 = Neg.getOperand(1);

	// On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
	// Pos'. The truncation is redundant for the purpose of the equality.
	if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
	if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
	KnownBits Known = DAG.computeKnownBits(Pos.getOperand(0));
	if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
	((PosC->getAPIntValue() \| Known.Zero).countTrailingOnes() >=
	MaskLoBits))
	Pos = Pos.getOperand(0);
	}
	}

	// The condition we need is now:
	//
	// (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
	//
	// If NegOp1 == Pos then we need:
	//
	// EltSize & Mask == NegC & Mask
	//
	// (because "x & Mask" is a truncation and distributes through subtraction).
	//
	// We also need to account for a potential truncation of NegOp1 if the amount
	// has already been legalized to a shift amount type.
	APInt Width;
	if ((Pos == NegOp1) \|\|
	(NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
	Width = NegC->getAPIntValue();

	// Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
	// Then the condition we want to prove becomes:
	//
	// (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
	//
	// which, again because "x & Mask" is a truncation, becomes:
	//
	// NegC & Mask == (EltSize - PosC) & Mask
	// EltSize & Mask == (NegC + PosC) & Mask
	else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
	if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
	Width = PosC->getAPIntValue() + NegC->getAPIntValue();
	else
	return false;
	} else
	return false;

	// Now we just need to check that EltSize & Mask == Width & Mask.
	if (MaskLoBits)
	// EltSize & Mask is 0 since Mask is EltSize - 1.
	return Width.getLoBits(MaskLoBits) == 0;
	return Width == EltSize;
	}

	// A subroutine of MatchRotate used once we have found an OR of two opposite
	// shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
	// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
	// former being preferred if supported. InnerPos and InnerNeg are Pos and
	// Neg with outer conversions stripped away.
	SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
	SDValue Neg, SDValue InnerPos,
	SDValue InnerNeg, unsigned PosOpcode,
	unsigned NegOpcode, const SDLoc &DL) {
	// fold (or (shl x, (*ext y)),
	// (srl x, (*ext (sub 32, y)))) ->
	// (rotl x, y) or (rotr x, (sub 32, y))
	//
	// fold (or (shl x, (*ext (sub 32, y))),
	// (srl x, (*ext y))) ->
	// (rotr x, y) or (rotl x, (sub 32, y))
	EVT VT = Shifted.getValueType();
	if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
	/IsRotate/ true)) {
	bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
	return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
	HasPos ? Pos : Neg);
	}

	return SDValue();
	}

	// A subroutine of MatchRotate used once we have found an OR of two opposite
	// shifts of N0 + N1. If Neg == <operand size> - Pos then the OR reduces
	// to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
	// former being preferred if supported. InnerPos and InnerNeg are Pos and
	// Neg with outer conversions stripped away.
	// TODO: Merge with MatchRotatePosNeg.
	SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
	SDValue Neg, SDValue InnerPos,
	SDValue InnerNeg, unsigned PosOpcode,
	unsigned NegOpcode, const SDLoc &DL) {
	EVT VT = N0.getValueType();
	unsigned EltBits = VT.getScalarSizeInBits();

	// fold (or (shl x0, (*ext y)),
	// (srl x1, (*ext (sub 32, y)))) ->
	// (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
	//
	// fold (or (shl x0, (*ext (sub 32, y))),
	// (srl x1, (*ext y))) ->
	// (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
	if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /IsRotate/ N0 == N1)) {
	bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
	return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
	HasPos ? Pos : Neg);
	}

	// Matching the shift+xor cases, we can't easily use the xor'd shift amount
	// so for now just use the PosOpcode case if its legal.
	// TODO: When can we use the NegOpcode case?
	if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
	auto IsBinOpImm = [](SDValue Op, unsigned BinOpc, unsigned Imm) {
	if (Op.getOpcode() != BinOpc)
	return false;
	ConstantSDNode *Cst = isConstOrConstSplat(Op.getOperand(1));
	return Cst && (Cst->getAPIntValue() == Imm);
	};

	// fold (or (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
	// -> (fshl x0, x1, y)
	if (IsBinOpImm(N1, ISD::SRL, 1) &&
	IsBinOpImm(InnerNeg, ISD::XOR, EltBits - 1) &&
	InnerPos == InnerNeg.getOperand(0) &&
	TLI.isOperationLegalOrCustom(ISD::FSHL, VT)) {
	return DAG.getNode(ISD::FSHL, DL, VT, N0, N1.getOperand(0), Pos);
	}

	// fold (or (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
	// -> (fshr x0, x1, y)
	if (IsBinOpImm(N0, ISD::SHL, 1) &&
	IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
	InnerNeg == InnerPos.getOperand(0) &&
	TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
	return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
	}

	// fold (or (shl (add x0, x0), (xor y, 31)), (srl x1, y))
	// -> (fshr x0, x1, y)
	// TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
	if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N0.getOperand(1) &&
	IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
	InnerNeg == InnerPos.getOperand(0) &&
	TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
	return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
	}
	}

	return SDValue();
	}

	// MatchRotate - Handle an 'or' of two operands. If this is one of the many
	// idioms for rotate, and if the target supports rotation instructions, generate
	// a rot[lr]. This also matches funnel shift patterns, similar to rotation but
	// with different shifted sources.
	SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
	// Must be a legal type. Expanded 'n promoted things won't work with rotates.
	EVT VT = LHS.getValueType();
	if (!TLI.isTypeLegal(VT))
	return SDValue();

	// The target must have at least one rotate/funnel flavor.
	bool HasROTL = hasOperation(ISD::ROTL, VT);
	bool HasROTR = hasOperation(ISD::ROTR, VT);
	bool HasFSHL = hasOperation(ISD::FSHL, VT);
	bool HasFSHR = hasOperation(ISD::FSHR, VT);
	if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
	return SDValue();

	// Check for truncated rotate.
	if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
	LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
	assert(LHS.getValueType() == RHS.getValueType());
	if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
	return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
	}
	}

	// Match "(X shl/srl V1) & V2" where V2 may not be present.
	SDValue LHSShift; // The shift.
	SDValue LHSMask; // AND value if any.
	matchRotateHalf(DAG, LHS, LHSShift, LHSMask);

	SDValue RHSShift; // The shift.
	SDValue RHSMask; // AND value if any.
	matchRotateHalf(DAG, RHS, RHSShift, RHSMask);

	// If neither side matched a rotate half, bail
	if (!LHSShift && !RHSShift)
	return SDValue();

	// InstCombine may have combined a constant shl, srl, mul, or udiv with one
	// side of the rotate, so try to handle that here. In all cases we need to
	// pass the matched shift from the opposite side to compute the opcode and
	// needed shift amount to extract. We still want to do this if both sides
	// matched a rotate half because one half may be a potential overshift that
	// can be broken down (ie if InstCombine merged two shl or srl ops into a
	// single one).

	// Have LHS side of the rotate, try to extract the needed shift from the RHS.
	if (LHSShift)
	if (SDValue NewRHSShift =
	extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
	RHSShift = NewRHSShift;
	// Have RHS side of the rotate, try to extract the needed shift from the LHS.
	if (RHSShift)
	if (SDValue NewLHSShift =
	extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
	LHSShift = NewLHSShift;

	// If a side is still missing, nothing else we can do.
	if (!RHSShift \|\| !LHSShift)
	return SDValue();

	// At this point we've matched or extracted a shift op on each side.

	if (LHSShift.getOpcode() == RHSShift.getOpcode())
	return SDValue(); // Shifts must disagree.

	bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0);
	if (!IsRotate && !(HasFSHL \|\| HasFSHR))
	return SDValue(); // Requires funnel shift support.

	// Canonicalize shl to left side in a shl/srl pair.
	if (RHSShift.getOpcode() == ISD::SHL) {
	std::swap(LHS, RHS);
	std::swap(LHSShift, RHSShift);
	std::swap(LHSMask, RHSMask);
	}

	unsigned EltSizeInBits = VT.getScalarSizeInBits();
	SDValue LHSShiftArg = LHSShift.getOperand(0);
	SDValue LHSShiftAmt = LHSShift.getOperand(1);
	SDValue RHSShiftArg = RHSShift.getOperand(0);
	SDValue RHSShiftAmt = RHSShift.getOperand(1);

	// fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
	// fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
	// fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
	// fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
	// iff C1+C2 == EltSizeInBits
	auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
	ConstantSDNode *RHS) {
	return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
	};
	if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
	SDValue Res;
	if (IsRotate && (HasROTL \|\| HasROTR))
	Res = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
	HasROTL ? LHSShiftAmt : RHSShiftAmt);
	else
	Res = DAG.getNode(HasFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
	RHSShiftArg, HasFSHL ? LHSShiftAmt : RHSShiftAmt);

	// If there is an AND of either shifted operand, apply it to the result.
	if (LHSMask.getNode() \|\| RHSMask.getNode()) {
	SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
	SDValue Mask = AllOnes;

	if (LHSMask.getNode()) {
	SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
	Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
	DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
	}
	if (RHSMask.getNode()) {
	SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
	Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
	DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
	}

	Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
	}

	return Res;
	}

	// If there is a mask here, and we have a variable shift, we can't be sure
	// that we're masking out the right stuff.
	if (LHSMask.getNode() \|\| RHSMask.getNode())
	return SDValue();

	// If the shift amount is sign/zext/any-extended just peel it off.
	SDValue LExtOp0 = LHSShiftAmt;
	SDValue RExtOp0 = RHSShiftAmt;
	if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND \|\|
	LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND \|\|
	LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND \|\|
	LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
	(RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND \|\|
	RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND \|\|
	RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND \|\|
	RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
	LExtOp0 = LHSShiftAmt.getOperand(0);
	RExtOp0 = RHSShiftAmt.getOperand(0);
	}

	if (IsRotate && (HasROTL \|\| HasROTR)) {
	SDValue TryL =
	MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0,
	RExtOp0, ISD::ROTL, ISD::ROTR, DL);
	if (TryL)
	return TryL;

	SDValue TryR =
	MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0,
	LExtOp0, ISD::ROTR, ISD::ROTL, DL);
	if (TryR)
	return TryR;
	}

	SDValue TryL =
	MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt,
	LExtOp0, RExtOp0, ISD::FSHL, ISD::FSHR, DL);
	if (TryL)
	return TryL;

	SDValue TryR =
	MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
	RExtOp0, LExtOp0, ISD::FSHR, ISD::FSHL, DL);
	if (TryR)
	return TryR;

	return SDValue();
	}

	namespace {

	/// Represents known origin of an individual byte in load combine pattern. The
	/// value of the byte is either constant zero or comes from memory.
	struct ByteProvider {
	// For constant zero providers Load is set to nullptr. For memory providers
	// Load represents the node which loads the byte from memory.
	// ByteOffset is the offset of the byte in the value produced by the load.
	LoadSDNode *Load = nullptr;
	unsigned ByteOffset = 0;

	ByteProvider() = default;

	static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
	return ByteProvider(Load, ByteOffset);
	}

	static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }

	bool isConstantZero() const { return !Load; }
	bool isMemory() const { return Load; }

	bool operator==(const ByteProvider &Other) const {
	return Other.Load == Load && Other.ByteOffset == ByteOffset;
	}

	private:
	ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
	: Load(Load), ByteOffset(ByteOffset) {}
	};

	} // end anonymous namespace

	/// Recursively traverses the expression calculating the origin of the requested
	/// byte of the given value. Returns None if the provider can't be calculated.
	///
	/// For all the values except the root of the expression verifies that the value
	/// has exactly one use and if it's not true return None. This way if the origin
	/// of the byte is returned it's guaranteed that the values which contribute to
	/// the byte are not used outside of this expression.
	///
	/// Because the parts of the expression are not allowed to have more than one
	/// use this function iterates over trees, not DAGs. So it never visits the same
	/// node more than once.
	static const Optional<ByteProvider>
	calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
	bool Root = false) {
	// Typical i64 by i8 pattern requires recursion up to 8 calls depth
	if (Depth == 10)
	return None;

	if (!Root && !Op.hasOneUse())
	return None;

	assert(Op.getValueType().isScalarInteger() && "can't handle other types");
	unsigned BitWidth = Op.getValueSizeInBits();
	if (BitWidth % 8 != 0)
	return None;
	unsigned ByteWidth = BitWidth / 8;
	assert(Index < ByteWidth && "invalid index requested");
	(void) ByteWidth;

	switch (Op.getOpcode()) {
	case ISD::OR: {
	auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
	if (!LHS)
	return None;
	auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
	if (!RHS)
	return None;

	if (LHS->isConstantZero())
	return RHS;
	if (RHS->isConstantZero())
	return LHS;
	return None;
	}
	case ISD::SHL: {
	auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
	if (!ShiftOp)
	return None;

	uint64_t BitShift = ShiftOp->getZExtValue();
	if (BitShift % 8 != 0)
	return None;
	uint64_t ByteShift = BitShift / 8;

	return Index < ByteShift
	? ByteProvider::getConstantZero()
	: calculateByteProvider(Op->getOperand(0), Index - ByteShift,
	Depth + 1);
	}
	case ISD::ANY_EXTEND:
	case ISD::SIGN_EXTEND:
	case ISD::ZERO_EXTEND: {
	SDValue NarrowOp = Op->getOperand(0);
	unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
	if (NarrowBitWidth % 8 != 0)
	return None;
	uint64_t NarrowByteWidth = NarrowBitWidth / 8;

	if (Index >= NarrowByteWidth)
	return Op.getOpcode() == ISD::ZERO_EXTEND
	? Optional<ByteProvider>(ByteProvider::getConstantZero())
	: None;
	return calculateByteProvider(NarrowOp, Index, Depth + 1);
	}
	case ISD::BSWAP:
	return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
	Depth + 1);
	case ISD::LOAD: {
	auto L = cast<LoadSDNode>(Op.getNode());
	if (!L->isSimple() \|\| L->isIndexed())
	return None;

	unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
	if (NarrowBitWidth % 8 != 0)
	return None;
	uint64_t NarrowByteWidth = NarrowBitWidth / 8;

	if (Index >= NarrowByteWidth)
	return L->getExtensionType() == ISD::ZEXTLOAD
	? Optional<ByteProvider>(ByteProvider::getConstantZero())
	: None;
	return ByteProvider::getMemory(L, Index);
	}
	}

	return None;
	}

	static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
	return i;
	}

	static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
	return BW - i - 1;
	}

	// Check if the bytes offsets we are looking at match with either big or
	// little endian value loaded. Return true for big endian, false for little
	// endian, and None if match failed.
	static Optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
	int64_t FirstOffset) {
	// The endian can be decided only when it is 2 bytes at least.
	unsigned Width = ByteOffsets.size();
	if (Width < 2)
	return None;

	bool BigEndian = true, LittleEndian = true;
	for (unsigned i = 0; i < Width; i++) {
	int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
	LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i);
	BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
	if (!BigEndian && !LittleEndian)
	return None;
	}

	assert((BigEndian != LittleEndian) && "It should be either big endian or"
	"little endian");
	return BigEndian;
	}

	static SDValue stripTruncAndExt(SDValue Value) {
	switch (Value.getOpcode()) {
	case ISD::TRUNCATE:
	case ISD::ZERO_EXTEND:
	case ISD::SIGN_EXTEND:
	case ISD::ANY_EXTEND:
	return stripTruncAndExt(Value.getOperand(0));
	}
	return Value;
	}

	/// Match a pattern where a wide type scalar value is stored by several narrow
	/// stores. Fold it into a single store or a BSWAP and a store if the targets
	/// supports it.
	///
	/// Assuming little endian target:
	/// i8 *p = ...
	/// i32 val = ...
	/// p[0] = (val >> 0) & 0xFF;
	/// p[1] = (val >> 8) & 0xFF;
	/// p[2] = (val >> 16) & 0xFF;
	/// p[3] = (val >> 24) & 0xFF;
	/// =>
	/// *((i32)p) = val;
	///
	/// i8 *p = ...
	/// i32 val = ...
	/// p[0] = (val >> 24) & 0xFF;
	/// p[1] = (val >> 16) & 0xFF;
	/// p[2] = (val >> 8) & 0xFF;
	/// p[3] = (val >> 0) & 0xFF;
	/// =>
	/// *((i32)p) = BSWAP(val);
	SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
	// The matching looks for "store (trunc x)" patterns that appear early but are
	// likely to be replaced by truncating store nodes during combining.
	// TODO: If there is evidence that running this later would help, this
	// limitation could be removed. Legality checks may need to be added
	// for the created store and optional bswap/rotate.
	if (LegalOperations)
	return SDValue();

	// We only handle merging simple stores of 1-4 bytes.
	// TODO: Allow unordered atomics when wider type is legal (see D66309)
	EVT MemVT = N->getMemoryVT();
	if (!(MemVT == MVT::i8 \|\| MemVT == MVT::i16 \|\| MemVT == MVT::i32) \|\|
	!N->isSimple() \|\| N->isIndexed())
	return SDValue();

	// Collect all of the stores in the chain.
	SDValue Chain = N->getChain();
	SmallVector<StoreSDNode *, 8> Stores = {N};
	while (auto *Store = dyn_cast<StoreSDNode>(Chain)) {
	// All stores must be the same size to ensure that we are writing all of the
	// bytes in the wide value.
	// TODO: We could allow multiple sizes by tracking each stored byte.
	if (Store->getMemoryVT() != MemVT \|\| !Store->isSimple() \|\|
	Store->isIndexed())
	return SDValue();
	Stores.push_back(Store);
	Chain = Store->getChain();
	}
	// There is no reason to continue if we do not have at least a pair of stores.
	if (Stores.size() < 2)
	return SDValue();

	// Handle simple types only.
	LLVMContext &Context = *DAG.getContext();
	unsigned NumStores = Stores.size();
	unsigned NarrowNumBits = N->getMemoryVT().getScalarSizeInBits();
	unsigned WideNumBits = NumStores * NarrowNumBits;
	EVT WideVT = EVT::getIntegerVT(Context, WideNumBits);
	if (WideVT != MVT::i16 && WideVT != MVT::i32 && WideVT != MVT::i64)
	return SDValue();

	// Check if all bytes of the source value that we are looking at are stored
	// to the same base address. Collect offsets from Base address into OffsetMap.
	SDValue SourceValue;
	SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX);
	int64_t FirstOffset = INT64_MAX;
	StoreSDNode *FirstStore = nullptr;
	Optional<BaseIndexOffset> Base;
	for (auto Store : Stores) {
	// All the stores store different parts of the CombinedValue. A truncate is
	// required to get the partial value.
	SDValue Trunc = Store->getValue();
	if (Trunc.getOpcode() != ISD::TRUNCATE)
	return SDValue();
	// Other than the first/last part, a shift operation is required to get the
	// offset.
	int64_t Offset = 0;
	SDValue WideVal = Trunc.getOperand(0);
	if ((WideVal.getOpcode() == ISD::SRL \|\| WideVal.getOpcode() == ISD::SRA) &&
	isa<ConstantSDNode>(WideVal.getOperand(1))) {
	// The shift amount must be a constant multiple of the narrow type.
	// It is translated to the offset address in the wide source value "y".
	//
	// x = srl y, ShiftAmtC
	// i8 z = trunc x
	// store z, ...
	uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1);
	if (ShiftAmtC % NarrowNumBits != 0)
	return SDValue();

	Offset = ShiftAmtC / NarrowNumBits;
	WideVal = WideVal.getOperand(0);
	}

	// Stores must share the same source value with different offsets.
	// Truncate and extends should be stripped to get the single source value.
	if (!SourceValue)
	SourceValue = WideVal;
	else if (stripTruncAndExt(SourceValue) != stripTruncAndExt(WideVal))
	return SDValue();
	else if (SourceValue.getValueType() != WideVT) {
	if (WideVal.getValueType() == WideVT \|\|
	WideVal.getScalarValueSizeInBits() >
	SourceValue.getScalarValueSizeInBits())
	SourceValue = WideVal;
	// Give up if the source value type is smaller than the store size.
	if (SourceValue.getScalarValueSizeInBits() < WideVT.getScalarSizeInBits())
	return SDValue();
	}

	// Stores must share the same base address.
	BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
	int64_t ByteOffsetFromBase = 0;
	if (!Base)
	Base = Ptr;
	else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
	return SDValue();

	// Remember the first store.
	if (ByteOffsetFromBase < FirstOffset) {
	FirstStore = Store;
	FirstOffset = ByteOffsetFromBase;
	}
	// Map the offset in the store and the offset in the combined value, and
	// early return if it has been set before.
	if (Offset < 0 \|\| Offset >= NumStores \|\| OffsetMap[Offset] != INT64_MAX)
	return SDValue();
	OffsetMap[Offset] = ByteOffsetFromBase;
	}

	assert(FirstOffset != INT64_MAX && "First byte offset must be set");
	assert(FirstStore && "First store must be set");

	// Check that a store of the wide type is both allowed and fast on the target
	const DataLayout &Layout = DAG.getDataLayout();
	bool Fast = false;
	bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
	*FirstStore->getMemOperand(), &Fast);
	if (!Allowed \|\| !Fast)
	return SDValue();

	// Check if the pieces of the value are going to the expected places in memory
	// to merge the stores.
	auto checkOffsets = [&](bool MatchLittleEndian) {
	if (MatchLittleEndian) {
	for (unsigned i = 0; i != NumStores; ++i)
	if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
	return false;
	} else { // MatchBigEndian by reversing loop counter.
	for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
	if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
	return false;
	}
	return true;
	};

	// Check if the offsets line up for the native data layout of this target.
	bool NeedBswap = false;
	bool NeedRotate = false;
	if (!checkOffsets(Layout.isLittleEndian())) {
	// Special-case: check if byte offsets line up for the opposite endian.
	if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian()))
	NeedBswap = true;
	else if (NumStores == 2 && checkOffsets(Layout.isBigEndian()))
	NeedRotate = true;
	else
	return SDValue();
	}

	SDLoc DL(N);
	if (WideVT != SourceValue.getValueType()) {
	assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits &&
	"Unexpected store value to merge");
	SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);
	}

	// Before legalize we can introduce illegal bswaps/rotates which will be later
	// converted to an explicit bswap sequence. This way we end up with a single
	// store and byte shuffling instead of several stores and byte shuffling.
	if (NeedBswap) {
	SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);
	} else if (NeedRotate) {
	assert(WideNumBits % 2 == 0 && "Unexpected type for rotate");
	SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT);
	SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt);
	}

	SDValue NewStore =
	DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
	FirstStore->getPointerInfo(), FirstStore->getAlign());

	// Rely on other DAG combine rules to remove the other individual stores.
	DAG.ReplaceAllUsesWith(N, NewStore.getNode());
	return NewStore;
	}

	/// Match a pattern where a wide type scalar value is loaded by several narrow
	/// loads and combined by shifts and ors. Fold it into a single load or a load
	/// and a BSWAP if the targets supports it.
	///
	/// Assuming little endian target:
	/// i8 *a = ...
	/// i32 val = a[0] \| (a[1] << 8) \| (a[2] << 16) \| (a[3] << 24)
	/// =>
	/// i32 val = *((i32)a)
	///
	/// i8 *a = ...
	/// i32 val = (a[0] << 24) \| (a[1] << 16) \| (a[2] << 8) \| a[3]
	/// =>
	/// i32 val = BSWAP(*((i32)a))
	///
	/// TODO: This rule matches complex patterns with OR node roots and doesn't
	/// interact well with the worklist mechanism. When a part of the pattern is
	/// updated (e.g. one of the loads) its direct users are put into the worklist,
	/// but the root node of the pattern which triggers the load combine is not
	/// necessarily a direct user of the changed node. For example, once the address
	/// of t28 load is reassociated load combine won't be triggered:
	/// t25: i32 = add t4, Constant:i32<2>
	/// t26: i64 = sign_extend t25
	/// t27: i64 = add t2, t26
	/// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
	/// t29: i32 = zero_extend t28
	/// t32: i32 = shl t29, Constant:i8<8>
	/// t33: i32 = or t23, t32
	/// As a possible fix visitLoad can check if the load can be a part of a load
	/// combine pattern and add corresponding OR roots to the worklist.
	SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
	assert(N->getOpcode() == ISD::OR &&
	"Can only match load combining against OR nodes");

	// Handles simple types only
	EVT VT = N->getValueType(0);
	if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
	return SDValue();
	unsigned ByteWidth = VT.getSizeInBits() / 8;

	bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
	auto MemoryByteOffset = [&] (ByteProvider P) {
	assert(P.isMemory() && "Must be a memory byte provider");
	unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
	assert(LoadBitWidth % 8 == 0 &&
	"can only analyze providers for individual bytes not bit");
	unsigned LoadByteWidth = LoadBitWidth / 8;
	return IsBigEndianTarget
	? bigEndianByteAt(LoadByteWidth, P.ByteOffset)
	: littleEndianByteAt(LoadByteWidth, P.ByteOffset);
	};

	Optional<BaseIndexOffset> Base;
	SDValue Chain;

	SmallPtrSet<LoadSDNode *, 8> Loads;
	Optional<ByteProvider> FirstByteProvider;
	int64_t FirstOffset = INT64_MAX;

	// Check if all the bytes of the OR we are looking at are loaded from the same
	// base address. Collect bytes offsets from Base address in ByteOffsets.
	SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
	unsigned ZeroExtendedBytes = 0;
	for (int i = ByteWidth - 1; i >= 0; --i) {
	auto P = calculateByteProvider(SDValue(N, 0), i, 0, /Root=/true);
	if (!P)
	return SDValue();

	if (P->isConstantZero()) {
	// It's OK for the N most significant bytes to be 0, we can just
	// zero-extend the load.
	if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
	return SDValue();
	continue;
	}
	assert(P->isMemory() && "provenance should either be memory or zero");

	LoadSDNode *L = P->Load;
	assert(L->hasNUsesOfValue(1, 0) && L->isSimple() &&
	!L->isIndexed() &&
	"Must be enforced by calculateByteProvider");
	assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");

	// All loads must share the same chain
	SDValue LChain = L->getChain();
	if (!Chain)
	Chain = LChain;
	else if (Chain != LChain)
	return SDValue();

	// Loads must share the same base address
	BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
	int64_t ByteOffsetFromBase = 0;
	if (!Base)
	Base = Ptr;
	else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
	return SDValue();

	// Calculate the offset of the current byte from the base address
	ByteOffsetFromBase += MemoryByteOffset(*P);
	ByteOffsets[i] = ByteOffsetFromBase;

	// Remember the first byte load
	if (ByteOffsetFromBase < FirstOffset) {
	FirstByteProvider = P;
	FirstOffset = ByteOffsetFromBase;
	}

	Loads.insert(L);
	}
	assert(!Loads.empty() && "All the bytes of the value must be loaded from "
	"memory, so there must be at least one load which produces the value");
	assert(Base && "Base address of the accessed memory location must be set");
	assert(FirstOffset != INT64_MAX && "First byte offset must be set");

	bool NeedsZext = ZeroExtendedBytes > 0;

	EVT MemVT =
	EVT::getIntegerVT(DAG.getContext(), (ByteWidth - ZeroExtendedBytes) 8);

	if (!MemVT.isSimple())
	return SDValue();

	// Before legalize we can introduce too wide illegal loads which will be later
	// split into legal sized loads. This enables us to combine i64 load by i8
	// patterns to a couple of i32 loads on 32 bit targets.
	if (LegalOperations &&
	!TLI.isOperationLegal(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD,
	MemVT))
	return SDValue();

	// Check if the bytes of the OR we are looking at match with either big or
	// little endian value load
	Optional<bool> IsBigEndian = isBigEndian(
	makeArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
	if (!IsBigEndian.hasValue())
	return SDValue();

	assert(FirstByteProvider && "must be set");

	// Ensure that the first byte is loaded from zero offset of the first load.
	// So the combined value can be loaded from the first load address.
	if (MemoryByteOffset(*FirstByteProvider) != 0)
	return SDValue();
	LoadSDNode *FirstLoad = FirstByteProvider->Load;

	// The node we are looking at matches with the pattern, check if we can
	// replace it with a single (possibly zero-extended) load and bswap + shift if
	// needed.

	// If the load needs byte swap check if the target supports it
	bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;

	// Before legalize we can introduce illegal bswaps which will be later
	// converted to an explicit bswap sequence. This way we end up with a single
	// load and byte shuffling instead of several loads and byte shuffling.
	// We do not introduce illegal bswaps when zero-extending as this tends to
	// introduce too many arithmetic instructions.
	if (NeedsBswap && (LegalOperations \|\| NeedsZext) &&
	!TLI.isOperationLegal(ISD::BSWAP, VT))
	return SDValue();

	// If we need to bswap and zero extend, we have to insert a shift. Check that
	// it is legal.
	if (NeedsBswap && NeedsZext && LegalOperations &&
	!TLI.isOperationLegal(ISD::SHL, VT))
	return SDValue();

	// Check that a load of the wide type is both allowed and fast on the target
	bool Fast = false;
	bool Allowed =
	TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
	*FirstLoad->getMemOperand(), &Fast);
	if (!Allowed \|\| !Fast)
	return SDValue();

	SDValue NewLoad =
	DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,
	Chain, FirstLoad->getBasePtr(),
	FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());

	// Transfer chain users from old loads to the new load.
	for (LoadSDNode *L : Loads)
	DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));

	if (!NeedsBswap)
	return NewLoad;

	SDValue ShiftedLoad =
	NeedsZext
	? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
	DAG.getShiftAmountConstant(ZeroExtendedBytes * 8, VT,
	SDLoc(N), LegalOperations))
	: NewLoad;
	return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
	}

	// If the target has andn, bsl, or a similar bit-select instruction,
	// we want to unfold masked merge, with canonical pattern of:
	// \| A \| \|B\|
	// ((x ^ y) & m) ^ y
	// \| D \|
	// Into:
	// (x & m) \| (y & ~m)
	// If y is a constant, and the 'andn' does not work with immediates,
	// we unfold into a different pattern:
	// ~(~x & m) & (m \| y)
	// NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
	// the very least that breaks andnpd / andnps patterns, and because those
	// patterns are simplified in IR and shouldn't be created in the DAG
	SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
	assert(N->getOpcode() == ISD::XOR);

	// Don't touch 'not' (i.e. where y = -1).
	if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
	return SDValue();

	EVT VT = N->getValueType(0);

	// There are 3 commutable operators in the pattern,
	// so we have to deal with 8 possible variants of the basic pattern.
	SDValue X, Y, M;
	auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
	if (And.getOpcode() != ISD::AND \|\| !And.hasOneUse())
	return false;
	SDValue Xor = And.getOperand(XorIdx);
	if (Xor.getOpcode() != ISD::XOR \|\| !Xor.hasOneUse())
	return false;
	SDValue Xor0 = Xor.getOperand(0);
	SDValue Xor1 = Xor.getOperand(1);
	// Don't touch 'not' (i.e. where y = -1).
	if (isAllOnesOrAllOnesSplat(Xor1))
	return false;
	if (Other == Xor0)
	std::swap(Xor0, Xor1);
	if (Other != Xor1)
	return false;
	X = Xor0;
	Y = Xor1;
	M = And.getOperand(XorIdx ? 0 : 1);
	return true;
	};

	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
	!matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
	return SDValue();

	// Don't do anything if the mask is constant. This should not be reachable.
	// InstCombine should have already unfolded this pattern, and DAGCombiner
	// probably shouldn't produce it, too.
	if (isa<ConstantSDNode>(M.getNode()))
	return SDValue();

	// We can transform if the target has AndNot
	if (!TLI.hasAndNot(M))
	return SDValue();

	SDLoc DL(N);

	// If Y is a constant, check that 'andn' works with immediates.
	if (!TLI.hasAndNot(Y)) {
	assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
	// If not, we need to do a bit more work to make sure andn is still used.
	SDValue NotX = DAG.getNOT(DL, X, VT);
	SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
	SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
	SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
	return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
	}

	SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
	SDValue NotM = DAG.getNOT(DL, M, VT);
	SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);

	return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
	}

	SDValue DAGCombiner::visitXOR(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	EVT VT = N0.getValueType();

	// fold vector ops
	if (VT.isVector()) {
	if (SDValue FoldedVOp = SimplifyVBinOp(N))
	return FoldedVOp;

	// fold (xor x, 0) -> x, vector edition
	if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
	return N1;
	if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
	return N0;
	}

	// fold (xor undef, undef) -> 0. This is a common idiom (misuse).
	SDLoc DL(N);
	if (N0.isUndef() && N1.isUndef())
	return DAG.getConstant(0, DL, VT);

	// fold (xor x, undef) -> undef
	if (N0.isUndef())
	return N0;
	if (N1.isUndef())
	return N1;

	// fold (xor c1, c2) -> c1^c2
	if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
	return C;

	// canonicalize constant to RHS
	if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
	!DAG.isConstantIntBuildVectorOrConstantInt(N1))
	return DAG.getNode(ISD::XOR, DL, VT, N1, N0);

	// fold (xor x, 0) -> x
	if (isNullConstant(N1))
	return N0;

	if (SDValue NewSel = foldBinOpIntoSelect(N))
	return NewSel;

	// reassociate xor
	if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
	return RXOR;

	// fold !(x cc y) -> (x !cc y)
	unsigned N0Opcode = N0.getOpcode();
	SDValue LHS, RHS, CC;
	if (TLI.isConstTrueVal(N1.getNode()) &&
	isSetCCEquivalent(N0, LHS, RHS, CC, /MatchStrict/true)) {
	ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
	LHS.getValueType());
	if (!LegalOperations \|\|
	TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
	switch (N0Opcode) {
	default:
	llvm_unreachable("Unhandled SetCC Equivalent!");
	case ISD::SETCC:
	return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
	case ISD::SELECT_CC:
	return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
	N0.getOperand(3), NotCC);
	case ISD::STRICT_FSETCC:
	case ISD::STRICT_FSETCCS: {
	if (N0.hasOneUse()) {
	// FIXME Can we handle multiple uses? Could we token factor the chain
	// results from the new/old setcc?
	SDValue SetCC =
	DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
	N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS);
	CombineTo(N, SetCC);
	DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
	recursivelyDeleteUnusedNodes(N0.getNode());
	return SDValue(N, 0); // Return N so it doesn't get rechecked!
	}
	break;
	}
	}
	}
	}

	// fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
	if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
	isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
	SDValue V = N0.getOperand(0);
	SDLoc DL0(N0);
	V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
	DAG.getConstant(1, DL0, V.getValueType()));
	AddToWorklist(V.getNode());
	return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
	}

	// fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
	if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
	(N0Opcode == ISD::OR \|\| N0Opcode == ISD::AND)) {
	SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
	if (isOneUseSetCC(N01) \|\| isOneUseSetCC(N00)) {
	unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
	N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
	N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
	AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
	return DAG.getNode(NewOpcode, DL, VT, N00, N01);
	}
	}
	// fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
	if (isAllOnesConstant(N1) && N0.hasOneUse() &&
	(N0Opcode == ISD::OR \|\| N0Opcode == ISD::AND)) {
	SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
	if (isa<ConstantSDNode>(N01) \|\| isa<ConstantSDNode>(N00)) {
	unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
	N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
	N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
	AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
	return DAG.getNode(NewOpcode, DL, VT, N00, N01);
	}
	}

	// fold (not (neg x)) -> (add X, -1)
	// FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
	// Y is a constant or the subtract has a single use.
	if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
	isNullConstant(N0.getOperand(0))) {
	return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
	DAG.getAllOnesConstant(DL, VT));
	}

	// fold (not (add X, -1)) -> (neg X)
	if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD &&
	isAllOnesOrAllOnesSplat(N0.getOperand(1))) {
	return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
	N0.getOperand(0));
	}

	// fold (xor (and x, y), y) -> (and (not x), y)
	if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
	SDValue X = N0.getOperand(0);
	SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
	AddToWorklist(NotX.getNode());
	return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
	}

	if ((N0Opcode == ISD::SRL \|\| N0Opcode == ISD::SHL) && N0.hasOneUse()) {
	ConstantSDNode *XorC = isConstOrConstSplat(N1);
	ConstantSDNode *ShiftC = isConstOrConstSplat(N0.getOperand(1));
	unsigned BitWidth = VT.getScalarSizeInBits();
	if (XorC && ShiftC) {
	// Don't crash on an oversized shift. We can not guarantee that a bogus
	// shift has been simplified to undef.
	uint64_t ShiftAmt = ShiftC->getLimitedValue();
	if (ShiftAmt < BitWidth) {
	APInt Ones = APInt::getAllOnesValue(BitWidth);
	Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt);
	if (XorC->getAPIntValue() == Ones) {
	// If the xor constant is a shifted -1, do a 'not' before the shift:
	// xor (X << ShiftC), XorC --> (not X) << ShiftC
	// xor (X >> ShiftC), XorC --> (not X) >> ShiftC
	SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
	return DAG.getNode(N0Opcode, DL, VT, Not, N0.getOperand(1));
	}
	}
	}
	}

	// fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
	if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
	SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
	SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
	if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
	SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
	SDValue S0 = S.getOperand(0);
	if ((A0 == S && A1 == S0) \|\| (A1 == S && A0 == S0))
	if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
	if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
	return DAG.getNode(ISD::ABS, DL, VT, S0);
	}
	}

	// fold (xor x, x) -> 0
	if (N0 == N1)
	return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);

	// fold (xor (shl 1, x), -1) -> (rotl ~1, x)
	// Here is a concrete example of this equivalence:
	// i16 x == 14
	// i16 shl == 1 << 14 == 16384 == 0b0100000000000000
	// i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
	//
	// =>
	//
	// i16 ~1 == 0b1111111111111110
	// i16 rol(~1, 14) == 0b1011111111111111
	//
	// Some additional tips to help conceptualize this transform:
	// - Try to see the operation as placing a single zero in a value of all ones.
	// - There exists no value for x which would allow the result to contain zero.
	// - Values of x larger than the bitwidth are undefined and do not require a
	// consistent result.
	// - Pushing the zero left requires shifting one bits in from the right.
	// A rotate left of ~1 is a nice way of achieving the desired result.
	if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
	isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
	return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
	N0.getOperand(1));
	}

	// Simplify: xor (op x...), (op y...) -> (op (xor x, y))
	if (N0Opcode == N1.getOpcode())
	if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
	return V;

	// Unfold ((x ^ y) & m) ^ y into (x & m) \| (y & ~m) if profitable
	if (SDValue MM = unfoldMaskedMerge(N))
	return MM;

	// Simplify the expression using non-local knowledge.
	if (SimplifyDemandedBits(SDValue(N, 0)))
	return SDValue(N, 0);

	if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
	return Combined;

	return SDValue();
	}

	/// If we have a shift-by-constant of a bitwise logic op that itself has a
	/// shift-by-constant operand with identical opcode, we may be able to convert
	/// that into 2 independent shifts followed by the logic op. This is a
	/// throughput improvement.
	static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) {
	// Match a one-use bitwise logic op.
	SDValue LogicOp = Shift->getOperand(0);
	if (!LogicOp.hasOneUse())
	return SDValue();

	unsigned LogicOpcode = LogicOp.getOpcode();
	if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
	LogicOpcode != ISD::XOR)
	return SDValue();

	// Find a matching one-use shift by constant.
	unsigned ShiftOpcode = Shift->getOpcode();
	SDValue C1 = Shift->getOperand(1);
	ConstantSDNode *C1Node = isConstOrConstSplat(C1);
	assert(C1Node && "Expected a shift with constant operand");
	const APInt &C1Val = C1Node->getAPIntValue();
	auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
	const APInt *&ShiftAmtVal) {
	if (V.getOpcode() != ShiftOpcode \|\| !V.hasOneUse())
	return false;

	ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
	if (!ShiftCNode)
	return false;

	// Capture the shifted operand and shift amount value.
	ShiftOp = V.getOperand(0);
	ShiftAmtVal = &ShiftCNode->getAPIntValue();

	// Shift amount types do not have to match their operand type, so check that
	// the constants are the same width.
	if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
	return false;

	// The fold is not valid if the sum of the shift values exceeds bitwidth.
	if ((*ShiftAmtVal + C1Val).uge(V.getScalarValueSizeInBits()))
	return false;

	return true;
	};

	// Logic ops are commutative, so check each operand for a match.
	SDValue X, Y;
	const APInt *C0Val;
	if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
	Y = LogicOp.getOperand(1);
	else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
	Y = LogicOp.getOperand(0);
	else
	return SDValue();

	// shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
	SDLoc DL(Shift);
	EVT VT = Shift->getValueType(0);
	EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
	SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
	SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
	SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
	return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2);
	}

	/// Handle transforms common to the three shifts, when the shift amount is a
	/// constant.
	/// We are looking for: (shift being one of shl/sra/srl)
	/// shift (binop X, C0), C1
	/// And want to transform into:
	/// binop (shift X, C1), (shift C0, C1)
	SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
	assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");

	// Do not turn a 'not' into a regular xor.
	if (isBitwiseNot(N->getOperand(0)))
	return SDValue();

	// The inner binop must be one-use, since we want to replace it.
	SDValue LHS = N->getOperand(0);
	if (!LHS.hasOneUse() \|\| !TLI.isDesirableToCommuteWithShift(N, Level))
	return SDValue();

	// TODO: This is limited to early combining because it may reveal regressions
	// otherwise. But since we just checked a target hook to see if this is
	// desirable, that should have filtered out cases where this interferes
	// with some other pattern matching.
	if (!LegalTypes)
	if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
	return R;

	// We want to pull some binops through shifts, so that we have (and (shift))
	// instead of (shift (and)), likewise for add, or, xor, etc. This sort of
	// thing happens with address calculations, so it's important to canonicalize
	// it.
	switch (LHS.getOpcode()) {
	default:
	return SDValue();
	case ISD::OR:
	case ISD::XOR:
	case ISD::AND:
	break;
	case ISD::ADD:
	if (N->getOpcode() != ISD::SHL)
	return SDValue(); // only shl(add) not sr[al](add).
	break;
	}

	// We require the RHS of the binop to be a constant and not opaque as well.
	ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS.getOperand(1));
	if (!BinOpCst)
	return SDValue();

	// FIXME: disable this unless the input to the binop is a shift by a constant
	// or is copy/select. Enable this in other cases when figure out it's exactly
	// profitable.
	SDValue BinOpLHSVal = LHS.getOperand(0);
	bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL \|\|
	BinOpLHSVal.getOpcode() == ISD::SRA \|\|
	BinOpLHSVal.getOpcode() == ISD::SRL) &&
	isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
	bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg \|\|
	BinOpLHSVal.getOpcode() == ISD::SELECT;

	if (!IsShiftByConstant && !IsCopyOrSelect)
	return SDValue();

	if (IsCopyOrSelect && N->hasOneUse())
	return SDValue();

	// Fold the constants, shifting the binop RHS by the shift amount.
	SDLoc DL(N);
	EVT VT = N->getValueType(0);
	SDValue NewRHS = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(1),
	N->getOperand(1));
	assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");

	SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
	N->getOperand(1));
	return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
	}

	SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
	assert(N->getOpcode() == ISD::TRUNCATE);
	assert(N->getOperand(0).getOpcode() == ISD::AND);

	// (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
	EVT TruncVT = N->getValueType(0);
	if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
	TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
	SDValue N01 = N->getOperand(0).getOperand(1);
	if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
	SDLoc DL(N);
	SDValue N00 = N->getOperand(0).getOperand(0);
	SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
	SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
	AddToWorklist(Trunc00.getNode());
	AddToWorklist(Trunc01.getNode());
	return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
	}
	}

	return SDValue();
	}

	SDValue DAGCombiner::visitRotate(SDNode *N) {
	SDLoc dl(N);
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	EVT VT = N->getValueType(0);
	unsigned Bitsize = VT.getScalarSizeInBits();

	// fold (rot x, 0) -> x
	if (isNullOrNullSplat(N1))
	return N0;

	// fold (rot x, c) -> x iff (c % BitSize) == 0
	if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
	APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
	if (DAG.MaskedValueIsZero(N1, ModuloMask))
	return N0;
	}

	// fold (rot x, c) -> (rot x, c % BitSize)
	bool OutOfRange = false;
	auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
	OutOfRange \|= C->getAPIntValue().uge(Bitsize);
	return true;
	};
	if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
	EVT AmtVT = N1.getValueType();
	SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
	if (SDValue Amt =
	DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
	return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
	}

	// rot i16 X, 8 --> bswap X
	auto *RotAmtC = isConstOrConstSplat(N1);
	if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
	VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
	return DAG.getNode(ISD::BSWAP, dl, VT, N0);

	// Simplify the operands using demanded-bits information.
	if (SimplifyDemandedBits(SDValue(N, 0)))
	return SDValue(N, 0);

	// fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
	if (N1.getOpcode() == ISD::TRUNCATE &&
	N1.getOperand(0).getOpcode() == ISD::AND) {
	if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
	return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
	}

	unsigned NextOp = N0.getOpcode();
	// fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
	if (NextOp == ISD::ROTL \|\| NextOp == ISD::ROTR) {
	SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
	SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
	if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
	EVT ShiftVT = C1->getValueType(0);
	bool SameSide = (N->getOpcode() == NextOp);
	unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
	if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
	CombineOp, dl, ShiftVT, {N1, N0.getOperand(1)})) {
	SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
	SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
	ISD::SREM, dl, ShiftVT, {CombinedShift, BitsizeC});
	return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
	CombinedShiftNorm);
	}
	}
	}
	return SDValue();
	}

	SDValue DAGCombiner::visitSHL(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	if (SDValue V = DAG.simplifyShift(N0, N1))
	return V;

	EVT VT = N0.getValueType();
	EVT ShiftVT = N1.getValueType();
	unsigned OpSizeInBits = VT.getScalarSizeInBits();

	// fold vector ops
	if (VT.isVector()) {
	if (SDValue FoldedVOp = SimplifyVBinOp(N))
	return FoldedVOp;

	BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
	// If setcc produces all-one true value then:
	// (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
	if (N1CV && N1CV->isConstant()) {
	if (N0.getOpcode() == ISD::AND) {
	SDValue N00 = N0->getOperand(0);
	SDValue N01 = N0->getOperand(1);
	BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);

	if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
	TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
	TargetLowering::ZeroOrNegativeOneBooleanContent) {
	if (SDValue C =
	DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N01, N1}))
	return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
	}
	}
	}
	}

	ConstantSDNode *N1C = isConstOrConstSplat(N1);

	// fold (shl c1, c2) -> c1<<c2
	if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N0, N1}))
	return C;

	if (SDValue NewSel = foldBinOpIntoSelect(N))
	return NewSel;

	// if (shl x, c) is known to be zero, return 0
	if (DAG.MaskedValueIsZero(SDValue(N, 0),
	APInt::getAllOnesValue(OpSizeInBits)))
	return DAG.getConstant(0, SDLoc(N), VT);

	// fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
	if (N1.getOpcode() == ISD::TRUNCATE &&
	N1.getOperand(0).getOpcode() == ISD::AND) {
	if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
	return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
	}

	if (SimplifyDemandedBits(SDValue(N, 0)))
	return SDValue(N, 0);

	// fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
	if (N0.getOpcode() == ISD::SHL) {
	auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
	ConstantSDNode *RHS) {
	APInt c1 = LHS->getAPIntValue();
	APInt c2 = RHS->getAPIntValue();
	zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
	return (c1 + c2).uge(OpSizeInBits);
	};
	if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
	return DAG.getConstant(0, SDLoc(N), VT);

	auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
	ConstantSDNode *RHS) {
	APInt c1 = LHS->getAPIntValue();
	APInt c2 = RHS->getAPIntValue();
	zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
	return (c1 + c2).ult(OpSizeInBits);
	};
	if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
	SDLoc DL(N);
	SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
	return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
	}
	}

	// fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
	// For this to be valid, the second form must not preserve any of the bits
	// that are shifted out by the inner shift in the first form. This means
	// the outer shift size must be >= the number of bits added by the ext.
	// As a corollary, we don't care what kind of ext it is.
	if ((N0.getOpcode() == ISD::ZERO_EXTEND \|\|
	N0.getOpcode() == ISD::ANY_EXTEND \|\|
	N0.getOpcode() == ISD::SIGN_EXTEND) &&
	N0.getOperand(0).getOpcode() == ISD::SHL) {
	SDValue N0Op0 = N0.getOperand(0);
	SDValue InnerShiftAmt = N0Op0.getOperand(1);
	EVT InnerVT = N0Op0.getValueType();
	uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();

	auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
	ConstantSDNode *RHS) {
	APInt c1 = LHS->getAPIntValue();
	APInt c2 = RHS->getAPIntValue();
	zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
	return c2.uge(OpSizeInBits - InnerBitwidth) &&
	(c1 + c2).uge(OpSizeInBits);
	};
	if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
	/AllowUndefs/ false,
	/AllowTypeMismatch/ true))
	return DAG.getConstant(0, SDLoc(N), VT);

	auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
	ConstantSDNode *RHS) {
	APInt c1 = LHS->getAPIntValue();
	APInt c2 = RHS->getAPIntValue();
	zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
	return c2.uge(OpSizeInBits - InnerBitwidth) &&
	(c1 + c2).ult(OpSizeInBits);
	};
	if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
	/AllowUndefs/ false,
	/AllowTypeMismatch/ true)) {
	SDLoc DL(N);
	SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
	SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
	Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
	return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
	}
	}

	// fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
	// Only fold this if the inner zext has no other uses to avoid increasing
	// the total number of instructions.
	if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
	N0.getOperand(0).getOpcode() == ISD::SRL) {
	SDValue N0Op0 = N0.getOperand(0);
	SDValue InnerShiftAmt = N0Op0.getOperand(1);

	auto MatchEqual = [VT](ConstantSDNode LHS, ConstantSDNode RHS) {
	APInt c1 = LHS->getAPIntValue();
	APInt c2 = RHS->getAPIntValue();
	zeroExtendToMatch(c1, c2);
	return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
	};
	if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
	/AllowUndefs/ false,
	/AllowTypeMismatch/ true)) {
	SDLoc DL(N);
	EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
	SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
	NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
	AddToWorklist(NewSHL.getNode());
	return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
	}
	}

	// fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
	// fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2
	// TODO - support non-uniform vector shift amounts.
	if (N1C && (N0.getOpcode() == ISD::SRL \|\| N0.getOpcode() == ISD::SRA) &&
	N0->getFlags().hasExact()) {
	if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
	uint64_t C1 = N0C1->getZExtValue();
	uint64_t C2 = N1C->getZExtValue();
	SDLoc DL(N);
	if (C1 <= C2)
	return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
	DAG.getConstant(C2 - C1, DL, ShiftVT));
	return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
	DAG.getConstant(C1 - C2, DL, ShiftVT));
	}
	}

	// fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
	// (and (srl x, (sub c1, c2), MASK)
	// Only fold this if the inner shift has no other uses -- if it does, folding
	// this will increase the total number of instructions.
	// TODO - drop hasOneUse requirement if c1 == c2?
	// TODO - support non-uniform vector shift amounts.
	if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
	TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
	if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
	if (N0C1->getAPIntValue().ult(OpSizeInBits)) {
	uint64_t c1 = N0C1->getZExtValue();
	uint64_t c2 = N1C->getZExtValue();
	APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
	SDValue Shift;
	if (c2 > c1) {
	Mask <<= c2 - c1;
	SDLoc DL(N);
	Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
	DAG.getConstant(c2 - c1, DL, ShiftVT));
	} else {
	Mask.lshrInPlace(c1 - c2);
	SDLoc DL(N);
	Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
	DAG.getConstant(c1 - c2, DL, ShiftVT));
	}
	SDLoc DL(N0);
	return DAG.getNode(ISD::AND, DL, VT, Shift,
	DAG.getConstant(Mask, DL, VT));
	}
	}
	}

	// fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
	if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
	isConstantOrConstantVector(N1, /* No Opaques */ true)) {
	SDLoc DL(N);
	SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
	SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
	return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
	}

	// fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
	// fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
	// Variant of version done on multiply, except mul by a power of 2 is turned
	// into a shift.
	if ((N0.getOpcode() == ISD::ADD \|\| N0.getOpcode() == ISD::OR) &&
	N0.getNode()->hasOneUse() &&
	isConstantOrConstantVector(N1, /* No Opaques */ true) &&
	isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
	TLI.isDesirableToCommuteWithShift(N, Level)) {
	SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
	SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
	AddToWorklist(Shl0.getNode());
	AddToWorklist(Shl1.getNode());
	return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
	}

	// fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
	if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
	isConstantOrConstantVector(N1, /* No Opaques */ true) &&
	isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
	SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
	if (isConstantOrConstantVector(Shl))
	return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
	}

	if (N1C && !N1C->isOpaque())
	if (SDValue NewSHL = visitShiftByConstant(N))
	return NewSHL;

	// Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
	if (N0.getOpcode() == ISD::VSCALE)
	if (ConstantSDNode *NC1 = isConstOrConstSplat(N->getOperand(1))) {
	const APInt &C0 = N0.getConstantOperandAPInt(0);
	const APInt &C1 = NC1->getAPIntValue();
	return DAG.getVScale(SDLoc(N), VT, C0 << C1);
	}

	// Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)).
	APInt ShlVal;
	if (N0.getOpcode() == ISD::STEP_VECTOR)
	if (ISD::isConstantSplatVector(N1.getNode(), ShlVal)) {
	const APInt &C0 = N0.getConstantOperandAPInt(0);
	if (ShlVal.ult(C0.getBitWidth())) {
	APInt NewStep = C0 << ShlVal;
	return DAG.getStepVector(SDLoc(N), VT, NewStep);
	}
	}

	return SDValue();
	}

	// Transform a right shift of a multiply into a multiply-high.
	// Examples:
	// (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
	// (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
	static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
	const TargetLowering &TLI) {
	assert((N->getOpcode() == ISD::SRL \|\| N->getOpcode() == ISD::SRA) &&
	"SRL or SRA node is required here!");

	// Check the shift amount. Proceed with the transformation if the shift
	// amount is constant.
	ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
	if (!ShiftAmtSrc)
	return SDValue();

	SDLoc DL(N);

	// The operation feeding into the shift must be a multiply.
	SDValue ShiftOperand = N->getOperand(0);
	if (ShiftOperand.getOpcode() != ISD::MUL)
	return SDValue();

	// Both operands must be equivalent extend nodes.
	SDValue LeftOp = ShiftOperand.getOperand(0);
	SDValue RightOp = ShiftOperand.getOperand(1);
	bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
	bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;

	if ((!(IsSignExt \|\| IsZeroExt)) \|\| LeftOp.getOpcode() != RightOp.getOpcode())
	return SDValue();

	EVT WideVT1 = LeftOp.getValueType();
	EVT WideVT2 = RightOp.getValueType();
	(void)WideVT2;
	// Proceed with the transformation if the wide types match.
	assert((WideVT1 == WideVT2) &&
	"Cannot have a multiply node with two different operand types.");

	EVT NarrowVT = LeftOp.getOperand(0).getValueType();
	// Check that the two extend nodes are the same type.
	if (NarrowVT != RightOp.getOperand(0).getValueType())
	return SDValue();

	// Proceed with the transformation if the wide type is twice as large
	// as the narrow type.
	unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
	if (WideVT1.getScalarSizeInBits() != 2 * NarrowVTSize)
	return SDValue();

	// Check the shift amount with the narrow type size.
	// Proceed with the transformation if the shift amount is the width
	// of the narrow type.
	unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
	if (ShiftAmt != NarrowVTSize)
	return SDValue();

	// If the operation feeding into the MUL is a sign extend (sext),
	// we use mulhs. Othewise, zero extends (zext) use mulhu.
	unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;

	// Combine to mulh if mulh is legal/custom for the narrow type on the target.
	if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
	return SDValue();

	SDValue Result = DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0),
	RightOp.getOperand(0));
	return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT1)
	: DAG.getZExtOrTrunc(Result, DL, WideVT1));
	}

	SDValue DAGCombiner::visitSRA(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	if (SDValue V = DAG.simplifyShift(N0, N1))
	return V;

	EVT VT = N0.getValueType();
	unsigned OpSizeInBits = VT.getScalarSizeInBits();

	// Arithmetic shifting an all-sign-bit value is a no-op.
	// fold (sra 0, x) -> 0
	// fold (sra -1, x) -> -1
	if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
	return N0;

	// fold vector ops
	if (VT.isVector())
	if (SDValue FoldedVOp = SimplifyVBinOp(N))
	return FoldedVOp;

	ConstantSDNode *N1C = isConstOrConstSplat(N1);

	// fold (sra c1, c2) -> (sra c1, c2)
	if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, {N0, N1}))
	return C;

	if (SDValue NewSel = foldBinOpIntoSelect(N))
	return NewSel;

	// fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
	// sext_inreg.
	if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
	unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
	EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
	if (VT.isVector())
	ExtVT = EVT::getVectorVT(*DAG.getContext(), ExtVT,
	VT.getVectorElementCount());
	if (!LegalOperations \|\|
	TLI.getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) ==
	TargetLowering::Legal)
	return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
	N0.getOperand(0), DAG.getValueType(ExtVT));
	// Even if we can't convert to sext_inreg, we might be able to remove
	// this shift pair if the input is already sign extended.
	if (DAG.ComputeNumSignBits(N0.getOperand(0)) > N1C->getZExtValue())
	return N0.getOperand(0);
	}

	// fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
	// clamp (add c1, c2) to max shift.
	if (N0.getOpcode() == ISD::SRA) {
	SDLoc DL(N);
	EVT ShiftVT = N1.getValueType();
	EVT ShiftSVT = ShiftVT.getScalarType();
	SmallVector<SDValue, 16> ShiftValues;

	auto SumOfShifts = [&](ConstantSDNode LHS, ConstantSDNode RHS) {
	APInt c1 = LHS->getAPIntValue();
	APInt c2 = RHS->getAPIntValue();
	zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
	APInt Sum = c1 + c2;
	unsigned ShiftSum =
	Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
	ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
	return true;
	};
	if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
	SDValue ShiftValue;
	if (N1.getOpcode() == ISD::BUILD_VECTOR)
	ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
	else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
	assert(ShiftValues.size() == 1 &&
	"Expected matchBinaryPredicate to return one element for "
	"SPLAT_VECTORs");
	ShiftValue = DAG.getSplatVector(ShiftVT, DL, ShiftValues[0]);
	} else
	ShiftValue = ShiftValues[0];
	return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
	}
	}

	// fold (sra (shl X, m), (sub result_size, n))
	// -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
	// result_size - n != m.
	// If truncate is free for the target sext(shl) is likely to result in better
	// code.
	if (N0.getOpcode() == ISD::SHL && N1C) {
	// Get the two constanst of the shifts, CN0 = m, CN = n.
	const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
	if (N01C) {
	LLVMContext &Ctx = *DAG.getContext();
	// Determine what the truncate's result bitsize and type would be.
	EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());

	if (VT.isVector())
	TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());

	// Determine the residual right-shift amount.
	int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();

	// If the shift is not a no-op (in which case this should be just a sign
	// extend already), the truncated to type is legal, sign_extend is legal
	// on that type, and the truncate to that type is both legal and free,
	// perform the transform.
	if ((ShiftAmt > 0) &&
	TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
	TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
	TLI.isTruncateFree(VT, TruncVT)) {
	SDLoc DL(N);
	SDValue Amt = DAG.getConstant(ShiftAmt, DL,
	getShiftAmountTy(N0.getOperand(0).getValueType()));
	SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
	N0.getOperand(0), Amt);
	SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
	Shift);
	return DAG.getNode(ISD::SIGN_EXTEND, DL,
	N->getValueType(0), Trunc);
	}
	}
	}

	// We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
	// sra (add (shl X, N1C), AddC), N1C -->
	// sext (add (trunc X to (width - N1C)), AddC')
	if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C &&
	N0.getOperand(0).getOpcode() == ISD::SHL &&
	N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) {
	if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) {
	SDValue Shl = N0.getOperand(0);
	// Determine what the truncate's type would be and ask the target if that
	// is a free operation.
	LLVMContext &Ctx = *DAG.getContext();
	unsigned ShiftAmt = N1C->getZExtValue();
	EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
	if (VT.isVector())
	TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());

	// TODO: The simple type check probably belongs in the default hook
	// implementation and/or target-specific overrides (because
	// non-simple types likely require masking when legalized), but that
	// restriction may conflict with other transforms.
	if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
	TLI.isTruncateFree(VT, TruncVT)) {
	SDLoc DL(N);
	SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
	SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).
	trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT);
	SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
	return DAG.getSExtOrTrunc(Add, DL, VT);
	}
	}
	}

	// fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
	if (N1.getOpcode() == ISD::TRUNCATE &&
	N1.getOperand(0).getOpcode() == ISD::AND) {
	if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
	return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
	}

	// fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
	// fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
	// if c1 is equal to the number of bits the trunc removes
	// TODO - support non-uniform vector shift amounts.
	if (N0.getOpcode() == ISD::TRUNCATE &&
	(N0.getOperand(0).getOpcode() == ISD::SRL \|\|
	N0.getOperand(0).getOpcode() == ISD::SRA) &&
	N0.getOperand(0).hasOneUse() &&
	N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
	SDValue N0Op0 = N0.getOperand(0);
	if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
	EVT LargeVT = N0Op0.getValueType();
	unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
	if (LargeShift->getAPIntValue() == TruncBits) {
	SDLoc DL(N);
	SDValue Amt = DAG.getConstant(N1C->getZExtValue() + TruncBits, DL,
	getShiftAmountTy(LargeVT));
	SDValue SRA =
	DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
	return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
	}
	}
	}

	// Simplify, based on bits shifted out of the LHS.
	if (SimplifyDemandedBits(SDValue(N, 0)))
	return SDValue(N, 0);

	// If the sign bit is known to be zero, switch this to a SRL.
	if (DAG.SignBitIsZero(N0))
	return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);

	if (N1C && !N1C->isOpaque())
	if (SDValue NewSRA = visitShiftByConstant(N))
	return NewSRA;

	// Try to transform this shift into a multiply-high if
	// it matches the appropriate pattern detected in combineShiftToMULH.
	if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
	return MULH;

	return SDValue();
	}

	SDValue DAGCombiner::visitSRL(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	if (SDValue V = DAG.simplifyShift(N0, N1))
	return V;

	EVT VT = N0.getValueType();
	unsigned OpSizeInBits = VT.getScalarSizeInBits();

	// fold vector ops
	if (VT.isVector())
	if (SDValue FoldedVOp = SimplifyVBinOp(N))
	return FoldedVOp;

	ConstantSDNode *N1C = isConstOrConstSplat(N1);

	// fold (srl c1, c2) -> c1 >>u c2
	if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, {N0, N1}))
	return C;

	if (SDValue NewSel = foldBinOpIntoSelect(N))
	return NewSel;

	// if (srl x, c) is known to be zero, return 0
	if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
	APInt::getAllOnesValue(OpSizeInBits)))
	return DAG.getConstant(0, SDLoc(N), VT);

	// fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
	if (N0.getOpcode() == ISD::SRL) {
	auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
	ConstantSDNode *RHS) {
	APInt c1 = LHS->getAPIntValue();
	APInt c2 = RHS->getAPIntValue();
	zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
	return (c1 + c2).uge(OpSizeInBits);
	};
	if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
	return DAG.getConstant(0, SDLoc(N), VT);

	auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
	ConstantSDNode *RHS) {
	APInt c1 = LHS->getAPIntValue();
	APInt c2 = RHS->getAPIntValue();
	zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
	return (c1 + c2).ult(OpSizeInBits);
	};
	if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
	SDLoc DL(N);
	EVT ShiftVT = N1.getValueType();
	SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
	return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
	}
	}

	if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
	N0.getOperand(0).getOpcode() == ISD::SRL) {
	SDValue InnerShift = N0.getOperand(0);
	// TODO - support non-uniform vector shift amounts.
	if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
	uint64_t c1 = N001C->getZExtValue();
	uint64_t c2 = N1C->getZExtValue();
	EVT InnerShiftVT = InnerShift.getValueType();
	EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
	uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
	// srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
	// This is only valid if the OpSizeInBits + c1 = size of inner shift.
	if (c1 + OpSizeInBits == InnerShiftSize) {
	SDLoc DL(N);
	if (c1 + c2 >= InnerShiftSize)
	return DAG.getConstant(0, DL, VT);
	SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
	SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
	InnerShift.getOperand(0), NewShiftAmt);
	return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
	}
	// In the more general case, we can clear the high bits after the shift:
	// srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
	if (N0.hasOneUse() && InnerShift.hasOneUse() &&
	c1 + c2 < InnerShiftSize) {
	SDLoc DL(N);
	SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
	SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
	InnerShift.getOperand(0), NewShiftAmt);
	SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
	OpSizeInBits - c2),
	DL, InnerShiftVT);
	SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
	return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
	}
	}
	}

	// fold (srl (shl x, c), c) -> (and x, cst2)
	// TODO - (srl (shl x, c1), c2).
	if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
	isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
	SDLoc DL(N);
	SDValue Mask =
	DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
	AddToWorklist(Mask.getNode());
	return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
	}

	// fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
	// TODO - support non-uniform vector shift amounts.
	if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
	// Shifting in all undef bits?
	EVT SmallVT = N0.getOperand(0).getValueType();
	unsigned BitSize = SmallVT.getScalarSizeInBits();
	if (N1C->getAPIntValue().uge(BitSize))
	return DAG.getUNDEF(VT);

	if (!LegalTypes \|\| TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
	uint64_t ShiftAmt = N1C->getZExtValue();
	SDLoc DL0(N0);
	SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
	N0.getOperand(0),
	DAG.getConstant(ShiftAmt, DL0,
	getShiftAmountTy(SmallVT)));
	AddToWorklist(SmallShift.getNode());
	APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
	SDLoc DL(N);
	return DAG.getNode(ISD::AND, DL, VT,
	DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
	DAG.getConstant(Mask, DL, VT));
	}
	}

	// fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
	// bit, which is unmodified by sra.
	if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
	if (N0.getOpcode() == ISD::SRA)
	return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
	}

	// fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit).
	if (N1C && N0.getOpcode() == ISD::CTLZ &&
	N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
	KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));

	// If any of the input bits are KnownOne, then the input couldn't be all
	// zeros, thus the result of the srl will always be zero.
	if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);

	// If all of the bits input the to ctlz node are known to be zero, then
	// the result of the ctlz is "32" and the result of the shift is one.
	APInt UnknownBits = ~Known.Zero;
	if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);

	// Otherwise, check to see if there is exactly one bit input to the ctlz.
	if (UnknownBits.isPowerOf2()) {
	// Okay, we know that only that the single bit specified by UnknownBits
	// could be set on input to the CTLZ node. If this bit is set, the SRL
	// will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
	// to an SRL/XOR pair, which is likely to simplify more.
	unsigned ShAmt = UnknownBits.countTrailingZeros();
	SDValue Op = N0.getOperand(0);

	if (ShAmt) {
	SDLoc DL(N0);
	Op = DAG.getNode(ISD::SRL, DL, VT, Op,
	DAG.getConstant(ShAmt, DL,
	getShiftAmountTy(Op.getValueType())));
	AddToWorklist(Op.getNode());
	}

	SDLoc DL(N);
	return DAG.getNode(ISD::XOR, DL, VT,
	Op, DAG.getConstant(1, DL, VT));
	}
	}

	// fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
	if (N1.getOpcode() == ISD::TRUNCATE &&
	N1.getOperand(0).getOpcode() == ISD::AND) {
	if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
	return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
	}

	// fold operands of srl based on knowledge that the low bits are not
	// demanded.
	if (SimplifyDemandedBits(SDValue(N, 0)))
	return SDValue(N, 0);

	if (N1C && !N1C->isOpaque())
	if (SDValue NewSRL = visitShiftByConstant(N))
	return NewSRL;

	// Attempt to convert a srl of a load into a narrower zero-extending load.
	if (SDValue NarrowLoad = ReduceLoadWidth(N))
	return NarrowLoad;

	// Here is a common situation. We want to optimize:
	//
	// %a = ...
	// %b = and i32 %a, 2
	// %c = srl i32 %b, 1
	// brcond i32 %c ...
	//
	// into
	//
	// %a = ...
	// %b = and %a, 2
	// %c = setcc eq %b, 0
	// brcond %c ...
	//
	// However when after the source operand of SRL is optimized into AND, the SRL
	// itself may not be optimized further. Look for it and add the BRCOND into
	// the worklist.
	if (N->hasOneUse()) {
	SDNode Use = N->use_begin();
	if (Use->getOpcode() == ISD::BRCOND)
	AddToWorklist(Use);
	else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
	// Also look pass the truncate.
	Use = *Use->use_begin();
	if (Use->getOpcode() == ISD::BRCOND)
	AddToWorklist(Use);
	}
	}

	// Try to transform this shift into a multiply-high if
	// it matches the appropriate pattern detected in combineShiftToMULH.
	if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
	return MULH;

	return SDValue();
	}

	SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
	EVT VT = N->getValueType(0);
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	SDValue N2 = N->getOperand(2);
	bool IsFSHL = N->getOpcode() == ISD::FSHL;
	unsigned BitWidth = VT.getScalarSizeInBits();

	// fold (fshl N0, N1, 0) -> N0
	// fold (fshr N0, N1, 0) -> N1
	if (isPowerOf2_32(BitWidth))
	if (DAG.MaskedValueIsZero(
	N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
	return IsFSHL ? N0 : N1;

	auto IsUndefOrZero = [](SDValue V) {
	return V.isUndef() \|\| isNullOrNullSplat(V, /AllowUndefs/ true);
	};

	// TODO - support non-uniform vector shift amounts.
	if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
	EVT ShAmtTy = N2.getValueType();

	// fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
	if (Cst->getAPIntValue().uge(BitWidth)) {
	uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
	return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
	DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
	}

	unsigned ShAmt = Cst->getZExtValue();
	if (ShAmt == 0)
	return IsFSHL ? N0 : N1;

	// fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
	// fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
	// fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
	// fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
	if (IsUndefOrZero(N0))
	return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
	DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
	SDLoc(N), ShAmtTy));
	if (IsUndefOrZero(N1))
	return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
	DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
	SDLoc(N), ShAmtTy));

	// fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
	// fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
	// TODO - bigendian support once we have test coverage.
	// TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
	// TODO - permit LHS EXTLOAD if extensions are shifted out.
	if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
	!DAG.getDataLayout().isBigEndian()) {
	auto *LHS = dyn_cast<LoadSDNode>(N0);
	auto *RHS = dyn_cast<LoadSDNode>(N1);
	if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
	LHS->getAddressSpace() == RHS->getAddressSpace() &&
	(LHS->hasOneUse() \|\| RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) &&
	ISD::isNON_EXTLoad(LHS)) {
	if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
	SDLoc DL(RHS);
	uint64_t PtrOff =
	IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
	Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
	bool Fast = false;
	if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
	RHS->getAddressSpace(), NewAlign,
	RHS->getMemOperand()->getFlags(), &Fast) &&
	Fast) {
	SDValue NewPtr = DAG.getMemBasePlusOffset(
	RHS->getBasePtr(), TypeSize::Fixed(PtrOff), DL);
	AddToWorklist(NewPtr.getNode());
	SDValue Load = DAG.getLoad(
	VT, DL, RHS->getChain(), NewPtr,
	RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
	RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
	// Replace the old load's chain with the new load's chain.
	WorklistRemover DeadNodes(*this);
	DAG.ReplaceAllUsesOfValueWith(N1.getValue(1), Load.getValue(1));
	return Load;
	}
	}
	}
	}
	}

	// fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
	// fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
	// iff We know the shift amount is in range.
	// TODO: when is it worth doing SUB(BW, N2) as well?
	if (isPowerOf2_32(BitWidth)) {
	APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
	if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
	return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
	if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
	return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
	}

	// fold (fshl N0, N0, N2) -> (rotl N0, N2)
	// fold (fshr N0, N0, N2) -> (rotr N0, N2)
	// TODO: Investigate flipping this rotate if only one is legal, if funnel shift
	// is legal as well we might be better off avoiding non-constant (BW - N2).
	unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
	if (N0 == N1 && hasOperation(RotOpc, VT))
	return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);

	// Simplify, based on bits shifted out of N0/N1.
	if (SimplifyDemandedBits(SDValue(N, 0)))
	return SDValue(N, 0);

	return SDValue();
	}

	// Given a ABS node, detect the following pattern:
	// (ABS (SUB (EXTEND a), (EXTEND b))).
	// Generates UABD/SABD instruction.
	static SDValue combineABSToABD(SDNode *N, SelectionDAG &DAG,
	const TargetLowering &TLI) {
	SDValue AbsOp1 = N->getOperand(0);
	SDValue Op0, Op1;

	if (AbsOp1.getOpcode() != ISD::SUB)
	return SDValue();

	Op0 = AbsOp1.getOperand(0);
	Op1 = AbsOp1.getOperand(1);

	unsigned Opc0 = Op0.getOpcode();
	// Check if the operands of the sub are (zero\|sign)-extended.
	if (Opc0 != Op1.getOpcode() \|\|
	(Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND))
	return SDValue();

	EVT VT1 = Op0.getOperand(0).getValueType();
	EVT VT2 = Op1.getOperand(0).getValueType();
	// Check if the operands are of same type and valid size.
	unsigned ABDOpcode = (Opc0 == ISD::SIGN_EXTEND) ? ISD::ABDS : ISD::ABDU;
	if (VT1 != VT2 \|\| !TLI.isOperationLegalOrCustom(ABDOpcode, VT1))
	return SDValue();

	Op0 = Op0.getOperand(0);
	Op1 = Op1.getOperand(0);
	SDValue ABD =
	DAG.getNode(ABDOpcode, SDLoc(N), Op0->getValueType(0), Op0, Op1);
	return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), ABD);
	}

	SDValue DAGCombiner::visitABS(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	EVT VT = N->getValueType(0);

	// fold (abs c1) -> c2
	if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
	return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
	// fold (abs (abs x)) -> (abs x)
	if (N0.getOpcode() == ISD::ABS)
	return N0;
	// fold (abs x) -> x iff not-negative
	if (DAG.SignBitIsZero(N0))
	return N0;

	if (SDValue ABD = combineABSToABD(N, DAG, TLI))
	return ABD;

	return SDValue();
	}

	SDValue DAGCombiner::visitBSWAP(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	EVT VT = N->getValueType(0);

	// fold (bswap c1) -> c2
	if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
	return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
	// fold (bswap (bswap x)) -> x
	if (N0.getOpcode() == ISD::BSWAP)
	return N0->getOperand(0);
	return SDValue();
	}

	SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	EVT VT = N->getValueType(0);

	// fold (bitreverse c1) -> c2
	if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
	return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
	// fold (bitreverse (bitreverse x)) -> x
	if (N0.getOpcode() == ISD::BITREVERSE)
	return N0.getOperand(0);
	return SDValue();
	}

	SDValue DAGCombiner::visitCTLZ(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	EVT VT = N->getValueType(0);

	// fold (ctlz c1) -> c2
	if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
	return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);

	// If the value is known never to be zero, switch to the undef version.
	if (!LegalOperations \|\| TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
	if (DAG.isKnownNeverZero(N0))
	return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
	}

	return SDValue();
	}

	SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	EVT VT = N->getValueType(0);

	// fold (ctlz_zero_undef c1) -> c2
	if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
	return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
	return SDValue();
	}

	SDValue DAGCombiner::visitCTTZ(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	EVT VT = N->getValueType(0);

	// fold (cttz c1) -> c2
	if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
	return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);

	// If the value is known never to be zero, switch to the undef version.
	if (!LegalOperations \|\| TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
	if (DAG.isKnownNeverZero(N0))
	return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
	}

	return SDValue();
	}

	SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	EVT VT = N->getValueType(0);

	// fold (cttz_zero_undef c1) -> c2
	if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
	return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
	return SDValue();
	}

	SDValue DAGCombiner::visitCTPOP(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	EVT VT = N->getValueType(0);

	// fold (ctpop c1) -> c2
	if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
	return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
	return SDValue();
	}

	// FIXME: This should be checking for no signed zeros on individual operands, as
	// well as no nans.
	static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
	SDValue RHS,
	const TargetLowering &TLI) {
	const TargetOptions &Options = DAG.getTarget().Options;
	EVT VT = LHS.getValueType();

	return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
	TLI.isProfitableToCombineMinNumMaxNum(VT) &&
	DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
	}

	/// Generate Min/Max node
	static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
	SDValue RHS, SDValue True, SDValue False,
	ISD::CondCode CC, const TargetLowering &TLI,
	SelectionDAG &DAG) {
	if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
	return SDValue();

	EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
	switch (CC) {
	case ISD::SETOLT:
	case ISD::SETOLE:
	case ISD::SETLT:
	case ISD::SETLE:
	case ISD::SETULT:
	case ISD::SETULE: {
	// Since it's known never nan to get here already, either fminnum or
	// fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
	// expanded in terms of it.
	unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
	if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
	return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);

	unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
	if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
	return DAG.getNode(Opcode, DL, VT, LHS, RHS);
	return SDValue();
	}
	case ISD::SETOGT:
	case ISD::SETOGE:
	case ISD::SETGT:
	case ISD::SETGE:
	case ISD::SETUGT:
	case ISD::SETUGE: {
	unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
	if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
	return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);

	unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
	if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
	return DAG.getNode(Opcode, DL, VT, LHS, RHS);
	return SDValue();
	}
	default:
	return SDValue();
	}
	}

	/// If a (v)select has a condition value that is a sign-bit test, try to smear
	/// the condition operand sign-bit across the value width and use it as a mask.
	static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) {
	SDValue Cond = N->getOperand(0);
	SDValue C1 = N->getOperand(1);
	SDValue C2 = N->getOperand(2);
	if (!isConstantOrConstantVector(C1) \|\| !isConstantOrConstantVector(C2))
	return SDValue();

	EVT VT = N->getValueType(0);
	if (Cond.getOpcode() != ISD::SETCC \|\| !Cond.hasOneUse() \|\|
	VT != Cond.getOperand(0).getValueType())
	return SDValue();

	// The inverted-condition + commuted-select variants of these patterns are
	// canonicalized to these forms in IR.
	SDValue X = Cond.getOperand(0);
	SDValue CondC = Cond.getOperand(1);
	ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
	if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
	isAllOnesOrAllOnesSplat(C2)) {
	// i32 X > -1 ? C1 : -1 --> (X >>s 31) \| C1
	SDLoc DL(N);
	SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
	SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
	return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
	}
	if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
	// i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
	SDLoc DL(N);
	SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
	SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
	return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
	}
	return SDValue();
	}

	SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
	SDValue Cond = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	SDValue N2 = N->getOperand(2);
	EVT VT = N->getValueType(0);
	EVT CondVT = Cond.getValueType();
	SDLoc DL(N);

	if (!VT.isInteger())
	return SDValue();

	auto *C1 = dyn_cast<ConstantSDNode>(N1);
	auto *C2 = dyn_cast<ConstantSDNode>(N2);
	if (!C1 \|\| !C2)
	return SDValue();

	// Only do this before legalization to avoid conflicting with target-specific
	// transforms in the other direction (create a select from a zext/sext). There
	// is also a target-independent combine here in DAGCombiner in the other
	// direction for (select Cond, -1, 0) when the condition is not i1.
	if (CondVT == MVT::i1 && !LegalOperations) {
	if (C1->isNullValue() && C2->isOne()) {
	// select Cond, 0, 1 --> zext (!Cond)
	SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
	if (VT != MVT::i1)
	NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
	return NotCond;
	}
	if (C1->isNullValue() && C2->isAllOnesValue()) {
	// select Cond, 0, -1 --> sext (!Cond)
	SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
	if (VT != MVT::i1)
	NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
	return NotCond;
	}
	if (C1->isOne() && C2->isNullValue()) {
	// select Cond, 1, 0 --> zext (Cond)
	if (VT != MVT::i1)
	Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
	return Cond;
	}
	if (C1->isAllOnesValue() && C2->isNullValue()) {
	// select Cond, -1, 0 --> sext (Cond)
	if (VT != MVT::i1)
	Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
	return Cond;
	}

	// Use a target hook because some targets may prefer to transform in the
	// other direction.
	if (TLI.convertSelectOfConstantsToMath(VT)) {
	// For any constants that differ by 1, we can transform the select into an
	// extend and add.
	const APInt &C1Val = C1->getAPIntValue();
	const APInt &C2Val = C2->getAPIntValue();
	if (C1Val - 1 == C2Val) {
	// select Cond, C1, C1-1 --> add (zext Cond), C1-1
	if (VT != MVT::i1)
	Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
	return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
	}
	if (C1Val + 1 == C2Val) {
	// select Cond, C1, C1+1 --> add (sext Cond), C1+1
	if (VT != MVT::i1)
	Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
	return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
	}

	// select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
	if (C1Val.isPowerOf2() && C2Val.isNullValue()) {
	if (VT != MVT::i1)
	Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
	SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT);
	return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
	}

	if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
	return V;
	}

	return SDValue();
	}

	// fold (select Cond, 0, 1) -> (xor Cond, 1)
	// We can't do this reliably if integer based booleans have different contents
	// to floating point based booleans. This is because we can't tell whether we
	// have an integer-based boolean or a floating-point-based boolean unless we
	// can find the SETCC that produced it and inspect its operands. This is
	// fairly easy if C is the SETCC node, but it can potentially be
	// undiscoverable (or not reasonably discoverable). For example, it could be
	// in another basic block or it could require searching a complicated
	// expression.
	if (CondVT.isInteger() &&
	TLI.getBooleanContents(/isVec/false, /isFloat/true) ==
	TargetLowering::ZeroOrOneBooleanContent &&
	TLI.getBooleanContents(/isVec/false, /isFloat/false) ==
	TargetLowering::ZeroOrOneBooleanContent &&
	C1->isNullValue() && C2->isOne()) {
	SDValue NotCond =
	DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
	if (VT.bitsEq(CondVT))
	return NotCond;
	return DAG.getZExtOrTrunc(NotCond, DL, VT);
	}

	return SDValue();
	}

	static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG) {
	assert((N->getOpcode() == ISD::SELECT \|\| N->getOpcode() == ISD::VSELECT) &&
	"Expected a (v)select");
	SDValue Cond = N->getOperand(0);
	SDValue T = N->getOperand(1), F = N->getOperand(2);
	EVT VT = N->getValueType(0);
	if (VT != Cond.getValueType() \|\| VT.getScalarSizeInBits() != 1)
	return SDValue();

	// select Cond, Cond, F --> or Cond, F
	// select Cond, 1, F --> or Cond, F
	if (Cond == T \|\| isOneOrOneSplat(T, /* AllowUndefs */ true))
	return DAG.getNode(ISD::OR, SDLoc(N), VT, Cond, F);

	// select Cond, T, Cond --> and Cond, T
	// select Cond, T, 0 --> and Cond, T
	if (Cond == F \|\| isNullOrNullSplat(F, /* AllowUndefs */ true))
	return DAG.getNode(ISD::AND, SDLoc(N), VT, Cond, T);

	// select Cond, T, 1 --> or (not Cond), T
	if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
	SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
	return DAG.getNode(ISD::OR, SDLoc(N), VT, NotCond, T);
	}

	// select Cond, 0, F --> and (not Cond), F
	if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
	SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
	return DAG.getNode(ISD::AND, SDLoc(N), VT, NotCond, F);
	}

	return SDValue();
	}

	SDValue DAGCombiner::visitSELECT(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	SDValue N2 = N->getOperand(2);
	EVT VT = N->getValueType(0);
	EVT VT0 = N0.getValueType();
	SDLoc DL(N);
	SDNodeFlags Flags = N->getFlags();

	if (SDValue V = DAG.simplifySelect(N0, N1, N2))
	return V;

	if (SDValue V = foldSelectOfConstants(N))
	return V;

	if (SDValue V = foldBoolSelectToLogic(N, DAG))
	return V;

	// If we can fold this based on the true/false value, do so.
	if (SimplifySelectOps(N, N1, N2))
	return SDValue(N, 0); // Don't revisit N.

	if (VT0 == MVT::i1) {
	// The code in this block deals with the following 2 equivalences:
	// select(C0\|C1, x, y) <=> select(C0, x, select(C1, x, y))
	// select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
	// The target can specify its preferred form with the
	// shouldNormalizeToSelectSequence() callback. However we always transform
	// to the right anyway if we find the inner select exists in the DAG anyway
	// and we always transform to the left side if we know that we can further
	// optimize the combination of the conditions.
	bool normalizeToSequence =
	TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
	// select (and Cond0, Cond1), X, Y
	// -> select Cond0, (select Cond1, X, Y), Y
	if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
	SDValue Cond0 = N0->getOperand(0);
	SDValue Cond1 = N0->getOperand(1);
	SDValue InnerSelect =
	DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
	if (normalizeToSequence \|\| !InnerSelect.use_empty())
	return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
	InnerSelect, N2, Flags);
	// Cleanup on failure.
	if (InnerSelect.use_empty())
	recursivelyDeleteUnusedNodes(InnerSelect.getNode());
	}
	// select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
	if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
	SDValue Cond0 = N0->getOperand(0);
	SDValue Cond1 = N0->getOperand(1);
	SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
	Cond1, N1, N2, Flags);
	if (normalizeToSequence \|\| !InnerSelect.use_empty())
	return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
	InnerSelect, Flags);
	// Cleanup on failure.
	if (InnerSelect.use_empty())
	recursivelyDeleteUnusedNodes(InnerSelect.getNode());
	}

	// select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
	if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
	SDValue N1_0 = N1->getOperand(0);
	SDValue N1_1 = N1->getOperand(1);
	SDValue N1_2 = N1->getOperand(2);
	if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
	// Create the actual and node if we can generate good code for it.
	if (!normalizeToSequence) {
	SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
	return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
	N2, Flags);
	}
	// Otherwise see if we can optimize the "and" to a better pattern.
	if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
	return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
	N2, Flags);
	}
	}
	}
	// select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
	if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
	SDValue N2_0 = N2->getOperand(0);
	SDValue N2_1 = N2->getOperand(1);
	SDValue N2_2 = N2->getOperand(2);
	if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
	// Create the actual or node if we can generate good code for it.
	if (!normalizeToSequence) {
	SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
	return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
	N2_2, Flags);
	}
	// Otherwise see if we can optimize to a better pattern.
	if (SDValue Combined = visitORLike(N0, N2_0, N))
	return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
	N2_2, Flags);
	}
	}
	}

	// select (not Cond), N1, N2 -> select Cond, N2, N1
	if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
	SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
	SelectOp->setFlags(Flags);
	return SelectOp;
	}

	// Fold selects based on a setcc into other things, such as min/max/abs.
	if (N0.getOpcode() == ISD::SETCC) {
	SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
	ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();

	// select (fcmp lt x, y), x, y -> fminnum x, y
	// select (fcmp gt x, y), x, y -> fmaxnum x, y
	//
	// This is OK if we don't care what happens if either operand is a NaN.
	if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
	if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
	CC, TLI, DAG))
	return FMinMax;

	// Use 'unsigned add with overflow' to optimize an unsigned saturating add.
	// This is conservatively limited to pre-legal-operations to give targets
	// a chance to reverse the transform if they want to do that. Also, it is
	// unlikely that the pattern would be formed late, so it's probably not
	// worth going through the other checks.
	if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
	CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
	N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
	auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
	auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
	if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
	// select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
	// uaddo Cond0, C; select uaddo.1, -1, uaddo.0
	//
	// The IR equivalent of this transform would have this form:
	// %a = add %x, C
	// %c = icmp ugt %x, ~C
	// %r = select %c, -1, %a
	// =>
	// %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
	// %u0 = extractvalue %u, 0
	// %u1 = extractvalue %u, 1
	// %r = select %u1, -1, %u0
	SDVTList VTs = DAG.getVTList(VT, VT0);
	SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
	return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
	}
	}

	if (TLI.isOperationLegal(ISD::SELECT_CC, VT) \|\|
	(!LegalOperations &&
	TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) {
	// Any flags available in a select/setcc fold will be on the setcc as they
	// migrated from fcmp
	Flags = N0.getNode()->getFlags();
	SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
	N2, N0.getOperand(2));
	SelectNode->setFlags(Flags);
	return SelectNode;
	}

	if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2))
	return NewSel;
	}

	if (!VT.isVector())
	if (SDValue BinOp = foldSelectOfBinops(N))
	return BinOp;

	return SDValue();
	}

	// This function assumes all the vselect's arguments are CONCAT_VECTOR
	// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
	static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
	SDLoc DL(N);
	SDValue Cond = N->getOperand(0);
	SDValue LHS = N->getOperand(1);
	SDValue RHS = N->getOperand(2);
	EVT VT = N->getValueType(0);
	int NumElems = VT.getVectorNumElements();
	assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
	RHS.getOpcode() == ISD::CONCAT_VECTORS &&
	Cond.getOpcode() == ISD::BUILD_VECTOR);

	// CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
	// binary ones here.
	if (LHS->getNumOperands() != 2 \|\| RHS->getNumOperands() != 2)
	return SDValue();

	// We're sure we have an even number of elements due to the
	// concat_vectors we have as arguments to vselect.
	// Skip BV elements until we find one that's not an UNDEF
	// After we find an UNDEF element, keep looping until we get to half the
	// length of the BV and see if all the non-undef nodes are the same.
	ConstantSDNode *BottomHalf = nullptr;
	for (int i = 0; i < NumElems / 2; ++i) {
	if (Cond->getOperand(i)->isUndef())
	continue;

	if (BottomHalf == nullptr)
	BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
	else if (Cond->getOperand(i).getNode() != BottomHalf)
	return SDValue();
	}

	// Do the same for the second half of the BuildVector
	ConstantSDNode *TopHalf = nullptr;
	for (int i = NumElems / 2; i < NumElems; ++i) {
	if (Cond->getOperand(i)->isUndef())
	continue;

	if (TopHalf == nullptr)
	TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
	else if (Cond->getOperand(i).getNode() != TopHalf)
	return SDValue();
	}

	assert(TopHalf && BottomHalf &&
	"One half of the selector was all UNDEFs and the other was all the "
	"same value. This should have been addressed before this function.");
	return DAG.getNode(
	ISD::CONCAT_VECTORS, DL, VT,
	BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
	TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
	}

	bool refineUniformBase(SDValue &BasePtr, SDValue &Index, SelectionDAG &DAG) {
	if (!isNullConstant(BasePtr) \|\| Index.getOpcode() != ISD::ADD)
	return false;

	// For now we check only the LHS of the add.
	SDValue LHS = Index.getOperand(0);
	SDValue SplatVal = DAG.getSplatValue(LHS);
	if (!SplatVal)
	return false;

	BasePtr = SplatVal;
	Index = Index.getOperand(1);
	return true;
	}

	// Fold sext/zext of index into index type.
	bool refineIndexType(MaskedGatherScatterSDNode *MGS, SDValue &Index,
	bool Scaled, SelectionDAG &DAG) {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();

	if (Index.getOpcode() == ISD::ZERO_EXTEND) {
	SDValue Op = Index.getOperand(0);
	MGS->setIndexType(Scaled ? ISD::UNSIGNED_SCALED : ISD::UNSIGNED_UNSCALED);
	if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) {
	Index = Op;
	return true;
	}
	}

	if (Index.getOpcode() == ISD::SIGN_EXTEND) {
	SDValue Op = Index.getOperand(0);
	MGS->setIndexType(Scaled ? ISD::SIGNED_SCALED : ISD::SIGNED_UNSCALED);
	if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) {
	Index = Op;
	return true;
	}
	}

	return false;
	}

	SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
	MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
	SDValue Mask = MSC->getMask();
	SDValue Chain = MSC->getChain();
	SDValue Index = MSC->getIndex();
	SDValue Scale = MSC->getScale();
	SDValue StoreVal = MSC->getValue();
	SDValue BasePtr = MSC->getBasePtr();
	SDLoc DL(N);

	// Zap scatters with a zero mask.
	if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
	return Chain;

	if (refineUniformBase(BasePtr, Index, DAG)) {
	SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
	return DAG.getMaskedScatter(
	DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL, Ops,
	MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
	}

	if (refineIndexType(MSC, Index, MSC->isIndexScaled(), DAG)) {
	SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
	return DAG.getMaskedScatter(
	DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL, Ops,
	MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
	}

	return SDValue();
	}

	SDValue DAGCombiner::visitMSTORE(SDNode *N) {
	MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
	SDValue Mask = MST->getMask();
	SDValue Chain = MST->getChain();
	SDLoc DL(N);

	// Zap masked stores with a zero mask.
	if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
	return Chain;

	// If this is a masked load with an all ones mask, we can use a unmasked load.
	// FIXME: Can we do this for indexed, compressing, or truncating stores?
	if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) &&
	MST->isUnindexed() && !MST->isCompressingStore() &&
	!MST->isTruncatingStore())
	return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
	MST->getBasePtr(), MST->getMemOperand());

	// Try transforming N to an indexed store.
	if (CombineToPreIndexedLoadStore(N) \|\| CombineToPostIndexedLoadStore(N))
	return SDValue(N, 0);

	return SDValue();
	}

	SDValue DAGCombiner::visitMGATHER(SDNode *N) {
	MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
	SDValue Mask = MGT->getMask();
	SDValue Chain = MGT->getChain();
	SDValue Index = MGT->getIndex();
	SDValue Scale = MGT->getScale();
	SDValue PassThru = MGT->getPassThru();
	SDValue BasePtr = MGT->getBasePtr();
	SDLoc DL(N);

	// Zap gathers with a zero mask.
	if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
	return CombineTo(N, PassThru, MGT->getChain());

	if (refineUniformBase(BasePtr, Index, DAG)) {
	SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
	return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
	MGT->getMemoryVT(), DL, Ops,
	MGT->getMemOperand(), MGT->getIndexType(),
	MGT->getExtensionType());
	}

	if (refineIndexType(MGT, Index, MGT->isIndexScaled(), DAG)) {
	SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
	return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
	MGT->getMemoryVT(), DL, Ops,
	MGT->getMemOperand(), MGT->getIndexType(),
	MGT->getExtensionType());
	}

	return SDValue();
	}

	SDValue DAGCombiner::visitMLOAD(SDNode *N) {
	MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
	SDValue Mask = MLD->getMask();
	SDLoc DL(N);

	// Zap masked loads with a zero mask.
	if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
	return CombineTo(N, MLD->getPassThru(), MLD->getChain());

	// If this is a masked load with an all ones mask, we can use a unmasked load.
	// FIXME: Can we do this for indexed, expanding, or extending loads?
	if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) &&
	MLD->isUnindexed() && !MLD->isExpandingLoad() &&
	MLD->getExtensionType() == ISD::NON_EXTLOAD) {
	SDValue NewLd = DAG.getLoad(N->getValueType(0), SDLoc(N), MLD->getChain(),
	MLD->getBasePtr(), MLD->getMemOperand());
	return CombineTo(N, NewLd, NewLd.getValue(1));
	}

	// Try transforming N to an indexed load.
	if (CombineToPreIndexedLoadStore(N) \|\| CombineToPostIndexedLoadStore(N))
	return SDValue(N, 0);

	return SDValue();
	}

	/// A vector select of 2 constant vectors can be simplified to math/logic to
	/// avoid a variable select instruction and possibly avoid constant loads.
	SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
	SDValue Cond = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	SDValue N2 = N->getOperand(2);
	EVT VT = N->getValueType(0);
	if (!Cond.hasOneUse() \|\| Cond.getScalarValueSizeInBits() != 1 \|\|
	!TLI.convertSelectOfConstantsToMath(VT) \|\|
	!ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) \|\|
	!ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
	return SDValue();

	// Check if we can use the condition value to increment/decrement a single
	// constant value. This simplifies a select to an add and removes a constant
	// load/materialization from the general case.
	bool AllAddOne = true;
	bool AllSubOne = true;
	unsigned Elts = VT.getVectorNumElements();
	for (unsigned i = 0; i != Elts; ++i) {
	SDValue N1Elt = N1.getOperand(i);
	SDValue N2Elt = N2.getOperand(i);
	if (N1Elt.isUndef() \|\| N2Elt.isUndef())
	continue;
	if (N1Elt.getValueType() != N2Elt.getValueType())
	continue;

	const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
	const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
	if (C1 != C2 + 1)
	AllAddOne = false;
	if (C1 != C2 - 1)
	AllSubOne = false;
	}

	// Further simplifications for the extra-special cases where the constants are
	// all 0 or all -1 should be implemented as folds of these patterns.
	SDLoc DL(N);
	if (AllAddOne \|\| AllSubOne) {
	// vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
	// vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
	auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
	SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
	return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
	}

	// select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
	APInt Pow2C;
	if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
	isNullOrNullSplat(N2)) {
	SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
	SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
	return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
	}

	if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
	return V;

	// The general case for select-of-constants:
	// vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
	// ...but that only makes sense if a vselect is slower than 2 logic ops, so
	// leave that to a machine-specific pass.
	return SDValue();
	}

	SDValue DAGCombiner::visitVSELECT(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	SDValue N2 = N->getOperand(2);
	EVT VT = N->getValueType(0);
	SDLoc DL(N);

	if (SDValue V = DAG.simplifySelect(N0, N1, N2))
	return V;

	if (SDValue V = foldBoolSelectToLogic(N, DAG))
	return V;

	// vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
	if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
	return DAG.getSelect(DL, VT, F, N2, N1);

	// Canonicalize integer abs.
	// vselect (setg[te] X, 0), X, -X ->
	// vselect (setgt X, -1), X, -X ->
	// vselect (setl[te] X, 0), -X, X ->
	// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
	if (N0.getOpcode() == ISD::SETCC) {
	SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
	ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
	bool isAbs = false;
	bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());

	if (((RHSIsAllZeros && (CC == ISD::SETGT \|\| CC == ISD::SETGE)) \|\|
	(ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
	N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
	isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
	else if ((RHSIsAllZeros && (CC == ISD::SETLT \|\| CC == ISD::SETLE)) &&
	N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
	isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());

	if (isAbs) {
	if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
	return DAG.getNode(ISD::ABS, DL, VT, LHS);

	SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS,
	DAG.getConstant(VT.getScalarSizeInBits() - 1,
	DL, getShiftAmountTy(VT)));
	SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
	AddToWorklist(Shift.getNode());
	AddToWorklist(Add.getNode());
	return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
	}

	// vselect x, y (fcmp lt x, y) -> fminnum x, y
	// vselect x, y (fcmp gt x, y) -> fmaxnum x, y
	//
	// This is OK if we don't care about what happens if either operand is a
	// NaN.
	//
	if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) {
	if (SDValue FMinMax =
	combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC, TLI, DAG))
	return FMinMax;
	}

	// If this select has a condition (setcc) with narrower operands than the
	// select, try to widen the compare to match the select width.
	// TODO: This should be extended to handle any constant.
	// TODO: This could be extended to handle non-loading patterns, but that
	// requires thorough testing to avoid regressions.
	if (isNullOrNullSplat(RHS)) {
	EVT NarrowVT = LHS.getValueType();
	EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
	EVT SetCCVT = getSetCCResultType(LHS.getValueType());
	unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
	unsigned WideWidth = WideVT.getScalarSizeInBits();
	bool IsSigned = isSignedIntSetCC(CC);
	auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
	if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
	SetCCWidth != 1 && SetCCWidth < WideWidth &&
	TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
	TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
	// Both compare operands can be widened for free. The LHS can use an
	// extended load, and the RHS is a constant:
	// vselect (ext (setcc load(X), C)), N1, N2 -->
	// vselect (setcc extload(X), C'), N1, N2
	auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
	SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
	SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
	EVT WideSetCCVT = getSetCCResultType(WideVT);
	SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
	return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
	}
	}

	// Match VSELECTs into add with unsigned saturation.
	if (hasOperation(ISD::UADDSAT, VT)) {
	// Check if one of the arms of the VSELECT is vector with all bits set.
	// If it's on the left side invert the predicate to simplify logic below.
	SDValue Other;
	ISD::CondCode SatCC = CC;
	if (ISD::isConstantSplatVectorAllOnes(N1.getNode())) {
	Other = N2;
	SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
	} else if (ISD::isConstantSplatVectorAllOnes(N2.getNode())) {
	Other = N1;
	}

	if (Other && Other.getOpcode() == ISD::ADD) {
	SDValue CondLHS = LHS, CondRHS = RHS;
	SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);

	// Canonicalize condition operands.
	if (SatCC == ISD::SETUGE) {
	std::swap(CondLHS, CondRHS);
	SatCC = ISD::SETULE;
	}

	// We can test against either of the addition operands.
	// x <= x+y ? x+y : ~0 --> uaddsat x, y
	// x+y >= x ? x+y : ~0 --> uaddsat x, y
	if (SatCC == ISD::SETULE && Other == CondRHS &&
	(OpLHS == CondLHS \|\| OpRHS == CondLHS))
	return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);

	if (OpRHS.getOpcode() == CondRHS.getOpcode() &&
	(OpRHS.getOpcode() == ISD::BUILD_VECTOR \|\|
	OpRHS.getOpcode() == ISD::SPLAT_VECTOR) &&
	CondLHS == OpLHS) {
	// If the RHS is a constant we have to reverse the const
	// canonicalization.
	// x >= ~C ? x+C : ~0 --> uaddsat x, C
	auto MatchUADDSAT = [](ConstantSDNode Op, ConstantSDNode Cond) {
	return Cond->getAPIntValue() == ~Op->getAPIntValue();
	};
	if (SatCC == ISD::SETULE &&
	ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
	return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
	}
	}
	}

	// Match VSELECTs into sub with unsigned saturation.
	if (hasOperation(ISD::USUBSAT, VT)) {
	// Check if one of the arms of the VSELECT is a zero vector. If it's on
	// the left side invert the predicate to simplify logic below.
	SDValue Other;
	ISD::CondCode SatCC = CC;
	if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) {
	Other = N2;
	SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
	} else if (ISD::isConstantSplatVectorAllZeros(N2.getNode())) {
	Other = N1;
	}

	if (Other && Other.getNumOperands() == 2) {
	SDValue CondRHS = RHS;
	SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);

	if (Other.getOpcode() == ISD::SUB &&
	LHS.getOpcode() == ISD::ZERO_EXTEND && LHS.getOperand(0) == OpLHS &&
	OpRHS.getOpcode() == ISD::TRUNCATE && OpRHS.getOperand(0) == RHS) {
	// Look for a general sub with unsigned saturation first.
	// zext(x) >= y ? x - trunc(y) : 0
	// --> usubsat(x,trunc(umin(y,SatLimit)))
	// zext(x) > y ? x - trunc(y) : 0
	// --> usubsat(x,trunc(umin(y,SatLimit)))
	if (SatCC == ISD::SETUGE \|\| SatCC == ISD::SETUGT)
	return getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS, DAG,
	DL);
	}

	if (OpLHS == LHS) {
	// Look for a general sub with unsigned saturation first.
	// x >= y ? x-y : 0 --> usubsat x, y
	// x > y ? x-y : 0 --> usubsat x, y
	if ((SatCC == ISD::SETUGE \|\| SatCC == ISD::SETUGT) &&
	Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
	return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);

	if (OpRHS.getOpcode() == ISD::BUILD_VECTOR \|\|
	OpRHS.getOpcode() == ISD::SPLAT_VECTOR) {
	if (CondRHS.getOpcode() == ISD::BUILD_VECTOR \|\|
	CondRHS.getOpcode() == ISD::SPLAT_VECTOR) {
	// If the RHS is a constant we have to reverse the const
	// canonicalization.
	// x > C-1 ? x+-C : 0 --> usubsat x, C
	auto MatchUSUBSAT = [](ConstantSDNode Op, ConstantSDNode Cond) {
	return (!Op && !Cond) \|\|
	(Op && Cond &&
	Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
	};
	if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
	ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
	/AllowUndefs/ true)) {
	OpRHS = DAG.getNode(ISD::SUB, DL, VT,
	DAG.getConstant(0, DL, VT), OpRHS);
	return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
	}

	// Another special case: If C was a sign bit, the sub has been
	// canonicalized into a xor.
	// FIXME: Would it be better to use computeKnownBits to determine
	// whether it's safe to decanonicalize the xor?
	// x s< 0 ? x^C : 0 --> usubsat x, C
	APInt SplatValue;
	if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
	ISD::isConstantSplatVector(OpRHS.getNode(), SplatValue) &&
	ISD::isConstantSplatVectorAllZeros(CondRHS.getNode()) &&
	SplatValue.isSignMask()) {
	// Note that we have to rebuild the RHS constant here to
	// ensure we don't rely on particular values of undef lanes.
	OpRHS = DAG.getConstant(SplatValue, DL, VT);
	return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
	}
	}
	}
	}
	}
	}
	}

	if (SimplifySelectOps(N, N1, N2))
	return SDValue(N, 0); // Don't revisit N.

	// Fold (vselect all_ones, N1, N2) -> N1
	if (ISD::isConstantSplatVectorAllOnes(N0.getNode()))
	return N1;
	// Fold (vselect all_zeros, N1, N2) -> N2
	if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
	return N2;

	// The ConvertSelectToConcatVector function is assuming both the above
	// checks for (vselect (build_vector all{ones,zeros) ...) have been made
	// and addressed.
	if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
	N2.getOpcode() == ISD::CONCAT_VECTORS &&
	ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
	if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
	return CV;
	}

	if (SDValue V = foldVSelectOfConstants(N))
	return V;

	return SDValue();
	}

	SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	SDValue N2 = N->getOperand(2);
	SDValue N3 = N->getOperand(3);
	SDValue N4 = N->getOperand(4);
	ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();

	// fold select_cc lhs, rhs, x, x, cc -> x
	if (N2 == N3)
	return N2;

	// Determine if the condition we're dealing with is constant
	if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
	CC, SDLoc(N), false)) {
	AddToWorklist(SCC.getNode());

	if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
	if (!SCCC->isNullValue())
	return N2; // cond always true -> true val
	else
	return N3; // cond always false -> false val
	} else if (SCC->isUndef()) {
	// When the condition is UNDEF, just return the first operand. This is
	// coherent the DAG creation, no setcc node is created in this case
	return N2;
	} else if (SCC.getOpcode() == ISD::SETCC) {
	// Fold to a simpler select_cc
	SDValue SelectOp = DAG.getNode(
	ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
	SCC.getOperand(1), N2, N3, SCC.getOperand(2));
	SelectOp->setFlags(SCC->getFlags());
	return SelectOp;
	}
	}

	// If we can fold this based on the true/false value, do so.
	if (SimplifySelectOps(N, N2, N3))
	return SDValue(N, 0); // Don't revisit N.

	// fold select_cc into other things, such as min/max/abs
	return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
	}

	SDValue DAGCombiner::visitSETCC(SDNode *N) {
	// setcc is very commonly used as an argument to brcond. This pattern
	// also lend itself to numerous combines and, as a result, it is desired
	// we keep the argument to a brcond as a setcc as much as possible.
	bool PreferSetCC =
	N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;

	ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
	EVT VT = N->getValueType(0);

	// SETCC(FREEZE(X), CONST, Cond)
	// =>
	// FREEZE(SETCC(X, CONST, Cond))
	// This is correct if FREEZE(X) has one use and SETCC(FREEZE(X), CONST, Cond)
	// isn't equivalent to true or false.
	// For example, SETCC(FREEZE(X), -128, SETULT) cannot be folded to
	// FREEZE(SETCC(X, -128, SETULT)) because X can be poison.
	//
	// This transformation is beneficial because visitBRCOND can fold
	// BRCOND(FREEZE(X)) to BRCOND(X).

	// Conservatively optimize integer comparisons only.
	if (PreferSetCC) {
	// Do this only when SETCC is going to be used by BRCOND.

	SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
	ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
	ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
	bool Updated = false;

	// Is 'X Cond C' always true or false?
	auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {
	bool False = (Cond == ISD::SETULT && C->isNullValue()) \|\|
	(Cond == ISD::SETLT && C->isMinSignedValue()) \|\|
	(Cond == ISD::SETUGT && C->isAllOnesValue()) \|\|
	(Cond == ISD::SETGT && C->isMaxSignedValue());
	bool True = (Cond == ISD::SETULE && C->isAllOnesValue()) \|\|
	(Cond == ISD::SETLE && C->isMaxSignedValue()) \|\|
	(Cond == ISD::SETUGE && C->isNullValue()) \|\|
	(Cond == ISD::SETGE && C->isMinSignedValue());
	return True \|\| False;
	};

	if (N0->getOpcode() == ISD::FREEZE && N0.hasOneUse() && N1C) {
	if (!IsAlwaysTrueOrFalse(Cond, N1C)) {
	N0 = N0->getOperand(0);
	Updated = true;
	}
	}
	if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse() && N0C) {
	if (!IsAlwaysTrueOrFalse(ISD::getSetCCSwappedOperands(Cond),
	N0C)) {
	N1 = N1->getOperand(0);
	Updated = true;
	}
	}

	if (Updated)
	return DAG.getFreeze(DAG.getSetCC(SDLoc(N), VT, N0, N1, Cond));
	}

	SDValue Combined = SimplifySetCC(VT, N->getOperand(0), N->getOperand(1), Cond,
	SDLoc(N), !PreferSetCC);

	if (!Combined)
	return SDValue();

	// If we prefer to have a setcc, and we don't, we'll try our best to
	// recreate one using rebuildSetCC.
	if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
	SDValue NewSetCC = rebuildSetCC(Combined);

	// We don't have anything interesting to combine to.
	if (NewSetCC.getNode() == N)
	return SDValue();

	if (NewSetCC)
	return NewSetCC;
	}

	return Combined;
	}

	SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
	SDValue LHS = N->getOperand(0);
	SDValue RHS = N->getOperand(1);
	SDValue Carry = N->getOperand(2);
	SDValue Cond = N->getOperand(3);

	// If Carry is false, fold to a regular SETCC.
	if (isNullConstant(Carry))
	return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);

	return SDValue();
	}

	/// Check if N satisfies:
	/// N is used once.
	/// N is a Load.
	/// The load is compatible with ExtOpcode. It means
	/// If load has explicit zero/sign extension, ExpOpcode must have the same
	/// extension.
	/// Otherwise returns true.
	static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) {
	if (!N.hasOneUse())
	return false;

	if (!isa<LoadSDNode>(N))
	return false;

	LoadSDNode *Load = cast<LoadSDNode>(N);
	ISD::LoadExtType LoadExt = Load->getExtensionType();
	if (LoadExt == ISD::NON_EXTLOAD \|\| LoadExt == ISD::EXTLOAD)
	return true;

	// Now LoadExt is either SEXTLOAD or ZEXTLOAD, ExtOpcode must have the same
	// extension.
	if ((LoadExt == ISD::SEXTLOAD && ExtOpcode != ISD::SIGN_EXTEND) \|\|
	(LoadExt == ISD::ZEXTLOAD && ExtOpcode != ISD::ZERO_EXTEND))
	return false;

	return true;
	}

	/// Fold
	/// (sext (select c, load x, load y)) -> (select c, sextload x, sextload y)
	/// (zext (select c, load x, load y)) -> (select c, zextload x, zextload y)
	/// (aext (select c, load x, load y)) -> (select c, extload x, extload y)
	/// This function is called by the DAGCombiner when visiting sext/zext/aext
	/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
	static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI,
	SelectionDAG &DAG) {
	unsigned Opcode = N->getOpcode();
	SDValue N0 = N->getOperand(0);
	EVT VT = N->getValueType(0);
	SDLoc DL(N);

	assert((Opcode == ISD::SIGN_EXTEND \|\| Opcode == ISD::ZERO_EXTEND \|\|
	Opcode == ISD::ANY_EXTEND) &&
	"Expected EXTEND dag node in input!");

	if (!(N0->getOpcode() == ISD::SELECT \|\| N0->getOpcode() == ISD::VSELECT) \|\|
	!N0.hasOneUse())
	return SDValue();

	SDValue Op1 = N0->getOperand(1);
	SDValue Op2 = N0->getOperand(2);
	if (!isCompatibleLoad(Op1, Opcode) \|\| !isCompatibleLoad(Op2, Opcode))
	return SDValue();

	auto ExtLoadOpcode = ISD::EXTLOAD;
	if (Opcode == ISD::SIGN_EXTEND)
	ExtLoadOpcode = ISD::SEXTLOAD;
	else if (Opcode == ISD::ZERO_EXTEND)
	ExtLoadOpcode = ISD::ZEXTLOAD;

	LoadSDNode *Load1 = cast<LoadSDNode>(Op1);
	LoadSDNode *Load2 = cast<LoadSDNode>(Op2);
	if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) \|\|
	!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()))
	return SDValue();

	SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1);
	SDValue Ext2 = DAG.getNode(Opcode, DL, VT, Op2);
	return DAG.getSelect(DL, VT, N0->getOperand(0), Ext1, Ext2);
	}

	/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
	/// a build_vector of constants.
	/// This function is called by the DAGCombiner when visiting sext/zext/aext
	/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
	/// Vector extends are not folded if operations are legal; this is to
	/// avoid introducing illegal build_vector dag nodes.
	static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
	SelectionDAG &DAG, bool LegalTypes) {
	unsigned Opcode = N->getOpcode();
	SDValue N0 = N->getOperand(0);
	EVT VT = N->getValueType(0);
	SDLoc DL(N);

	assert((Opcode == ISD::SIGN_EXTEND \|\| Opcode == ISD::ZERO_EXTEND \|\|
	Opcode == ISD::ANY_EXTEND \|\| Opcode == ISD::SIGN_EXTEND_VECTOR_INREG \|\|
	Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
	&& "Expected EXTEND dag node in input!");

	// fold (sext c1) -> c1
	// fold (zext c1) -> c1
	// fold (aext c1) -> c1
	if (isa<ConstantSDNode>(N0))
	return DAG.getNode(Opcode, DL, VT, N0);

	// fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
	// fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
	// fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
	if (N0->getOpcode() == ISD::SELECT) {
	SDValue Op1 = N0->getOperand(1);
	SDValue Op2 = N0->getOperand(2);
	if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
	(Opcode != ISD::ZERO_EXTEND \|\| !TLI.isZExtFree(N0.getValueType(), VT))) {
	// For any_extend, choose sign extension of the constants to allow a
	// possible further transform to sign_extend_inreg.i.e.
	//
	// t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
	// t2: i64 = any_extend t1
	// -->
	// t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
	// -->
	// t4: i64 = sign_extend_inreg t3
	unsigned FoldOpc = Opcode;
	if (FoldOpc == ISD::ANY_EXTEND)
	FoldOpc = ISD::SIGN_EXTEND;
	return DAG.getSelect(DL, VT, N0->getOperand(0),
	DAG.getNode(FoldOpc, DL, VT, Op1),
	DAG.getNode(FoldOpc, DL, VT, Op2));
	}
	}

	// fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
	// fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
	// fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
	EVT SVT = VT.getScalarType();
	if (!(VT.isVector() && (!LegalTypes \|\| TLI.isTypeLegal(SVT)) &&
	ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
	return SDValue();

	// We can fold this node into a build_vector.
	unsigned VTBits = SVT.getSizeInBits();
	unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
	SmallVector<SDValue, 8> Elts;
	unsigned NumElts = VT.getVectorNumElements();

	// For zero-extensions, UNDEF elements still guarantee to have the upper
	// bits set to zero.
	bool IsZext =
	Opcode == ISD::ZERO_EXTEND \|\| Opcode == ISD::ZERO_EXTEND_VECTOR_INREG;

	for (unsigned i = 0; i != NumElts; ++i) {
	SDValue Op = N0.getOperand(i);
	if (Op.isUndef()) {
	Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT));
	continue;
	}

	SDLoc DL(Op);
	// Get the constant value and if needed trunc it to the size of the type.
	// Nodes like build_vector might have constants wider than the scalar type.
	APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
	if (Opcode == ISD::SIGN_EXTEND \|\| Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
	Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
	else
	Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
	}

	return DAG.getBuildVector(VT, DL, Elts);
	}

	// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
	// "fold ({s\|z\|a}ext (load x)) -> ({s\|z\|a}ext (truncate ({s\|z\|a}extload x)))"
	// transformation. Returns true if extension are possible and the above
	// mentioned transformation is profitable.
	static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
	unsigned ExtOpc,
	SmallVectorImpl<SDNode *> &ExtendNodes,
	const TargetLowering &TLI) {
	bool HasCopyToRegUses = false;
	bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
	for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
	UE = N0.getNode()->use_end();
	UI != UE; ++UI) {
	SDNode User = UI;
	if (User == N)
	continue;
	if (UI.getUse().getResNo() != N0.getResNo())
	continue;
	// FIXME: Only extend SETCC N, N and SETCC N, c for now.
	if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
	ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
	if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
	// Sign bits will be lost after a zext.
	return false;
	bool Add = false;
	for (unsigned i = 0; i != 2; ++i) {
	SDValue UseOp = User->getOperand(i);
	if (UseOp == N0)
	continue;
	if (!isa<ConstantSDNode>(UseOp))
	return false;
	Add = true;
	}
	if (Add)
	ExtendNodes.push_back(User);
	continue;
	}
	// If truncates aren't free and there are users we can't
	// extend, it isn't worthwhile.
	if (!isTruncFree)
	return false;
	// Remember if this value is live-out.
	if (User->getOpcode() == ISD::CopyToReg)
	HasCopyToRegUses = true;
	}

	if (HasCopyToRegUses) {
	bool BothLiveOut = false;
	for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
	UI != UE; ++UI) {
	SDUse &Use = UI.getUse();
	if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
	BothLiveOut = true;
	break;
	}
	}
	if (BothLiveOut)
	// Both unextended and extended values are live out. There had better be
	// a good reason for the transformation.
	return ExtendNodes.size();
	}
	return true;
	}

	void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
	SDValue OrigLoad, SDValue ExtLoad,
	ISD::NodeType ExtType) {
	// Extend SetCC uses if necessary.
	SDLoc DL(ExtLoad);
	for (SDNode *SetCC : SetCCs) {
	SmallVector<SDValue, 4> Ops;

	for (unsigned j = 0; j != 2; ++j) {
	SDValue SOp = SetCC->getOperand(j);
	if (SOp == OrigLoad)
	Ops.push_back(ExtLoad);
	else
	Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
	}

	Ops.push_back(SetCC->getOperand(2));
	CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
	}
	}

	// FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
	SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	EVT DstVT = N->getValueType(0);
	EVT SrcVT = N0.getValueType();

	assert((N->getOpcode() == ISD::SIGN_EXTEND \|\|
	N->getOpcode() == ISD::ZERO_EXTEND) &&
	"Unexpected node type (not an extend)!");

	// fold (sext (load x)) to multiple smaller sextloads; same for zext.
	// For example, on a target with legal v4i32, but illegal v8i32, turn:
	// (v8i32 (sext (v8i16 (load x))))
	// into:
	// (v8i32 (concat_vectors (v4i32 (sextload x)),
	// (v4i32 (sextload (x + 16)))))
	// Where uses of the original load, i.e.:
	// (v8i16 (load x))
	// are replaced with:
	// (v8i16 (truncate
	// (v8i32 (concat_vectors (v4i32 (sextload x)),
	// (v4i32 (sextload (x + 16)))))))
	//
	// This combine is only applicable to illegal, but splittable, vectors.
	// All legal types, and illegal non-vector types, are handled elsewhere.
	// This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
	//
	if (N0->getOpcode() != ISD::LOAD)
	return SDValue();

	LoadSDNode *LN0 = cast<LoadSDNode>(N0);

	if (!ISD::isNON_EXTLoad(LN0) \|\| !ISD::isUNINDEXEDLoad(LN0) \|\|
	!N0.hasOneUse() \|\| !LN0->isSimple() \|\|
	!DstVT.isVector() \|\| !DstVT.isPow2VectorType() \|\|
	!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
	return SDValue();

	SmallVector<SDNode *, 4> SetCCs;
	if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
	return SDValue();

	ISD::LoadExtType ExtType =
	N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;

	// Try to split the vector types to get down to legal types.
	EVT SplitSrcVT = SrcVT;
	EVT SplitDstVT = DstVT;
	while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
	SplitSrcVT.getVectorNumElements() > 1) {
	SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
	SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
	}

	if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
	return SDValue();

	assert(!DstVT.isScalableVector() && "Unexpected scalable vector type");

	SDLoc DL(N);
	const unsigned NumSplits =
	DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
	const unsigned Stride = SplitSrcVT.getStoreSize();
	SmallVector<SDValue, 4> Loads;
	SmallVector<SDValue, 4> Chains;

	SDValue BasePtr = LN0->getBasePtr();
	for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
	const unsigned Offset = Idx * Stride;
	const Align Align = commonAlignment(LN0->getAlign(), Offset);

	SDValue SplitLoad = DAG.getExtLoad(
	ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
	LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
	LN0->getMemOperand()->getFlags(), LN0->getAAInfo());

	BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::Fixed(Stride), DL);

	Loads.push_back(SplitLoad.getValue(0));
	Chains.push_back(SplitLoad.getValue(1));
	}

	SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
	SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);

	// Simplify TF.
	AddToWorklist(NewChain.getNode());

	CombineTo(N, NewValue);

	// Replace uses of the original load (before extension)
	// with a truncate of the concatenated sextloaded vectors.
	SDValue Trunc =
	DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
	ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
	CombineTo(N0.getNode(), Trunc, NewChain);
	return SDValue(N, 0); // Return N so it doesn't get rechecked!
	}

	// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
	// (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
	SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
	assert(N->getOpcode() == ISD::ZERO_EXTEND);
	EVT VT = N->getValueType(0);
	EVT OrigVT = N->getOperand(0).getValueType();
	if (TLI.isZExtFree(OrigVT, VT))
	return SDValue();

	// and/or/xor
	SDValue N0 = N->getOperand(0);
	if (!(N0.getOpcode() == ISD::AND \|\| N0.getOpcode() == ISD::OR \|\|
	N0.getOpcode() == ISD::XOR) \|\|
	N0.getOperand(1).getOpcode() != ISD::Constant \|\|
	(LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
	return SDValue();

	// shl/shr
	SDValue N1 = N0->getOperand(0);
	if (!(N1.getOpcode() == ISD::SHL \|\| N1.getOpcode() == ISD::SRL) \|\|
	N1.getOperand(1).getOpcode() != ISD::Constant \|\|
	(LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
	return SDValue();

	// load
	if (!isa<LoadSDNode>(N1.getOperand(0)))
	return SDValue();
	LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
	EVT MemVT = Load->getMemoryVT();
	if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) \|\|
	Load->getExtensionType() == ISD::SEXTLOAD \|\| Load->isIndexed())
	return SDValue();


	// If the shift op is SHL, the logic op must be AND, otherwise the result
	// will be wrong.
	if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
	return SDValue();

	if (!N0.hasOneUse() \|\| !N1.hasOneUse())
	return SDValue();

	SmallVector<SDNode*, 4> SetCCs;
	if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
	ISD::ZERO_EXTEND, SetCCs, TLI))
	return SDValue();

	// Actually do the transformation.
	SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
	Load->getChain(), Load->getBasePtr(),
	Load->getMemoryVT(), Load->getMemOperand());

	SDLoc DL1(N1);
	SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
	N1.getOperand(1));

	APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
	SDLoc DL0(N0);
	SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
	DAG.getConstant(Mask, DL0, VT));

	ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
	CombineTo(N, And);
	if (SDValue(Load, 0).hasOneUse()) {
	DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
	} else {
	SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
	Load->getValueType(0), ExtLoad);
	CombineTo(Load, Trunc, ExtLoad.getValue(1));
	}

	// N0 is dead at this point.
	recursivelyDeleteUnusedNodes(N0.getNode());

	return SDValue(N,0); // Return N so it doesn't get rechecked!
	}

	/// If we're narrowing or widening the result of a vector select and the final
	/// size is the same size as a setcc (compare) feeding the select, then try to
	/// apply the cast operation to the select's operands because matching vector
	/// sizes for a select condition and other operands should be more efficient.
	SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
	unsigned CastOpcode = Cast->getOpcode();
	assert((CastOpcode == ISD::SIGN_EXTEND \|\| CastOpcode == ISD::ZERO_EXTEND \|\|
	CastOpcode == ISD::TRUNCATE \|\| CastOpcode == ISD::FP_EXTEND \|\|
	CastOpcode == ISD::FP_ROUND) &&
	"Unexpected opcode for vector select narrowing/widening");

	// We only do this transform before legal ops because the pattern may be
	// obfuscated by target-specific operations after legalization. Do not create
	// an illegal select op, however, because that may be difficult to lower.
	EVT VT = Cast->getValueType(0);
	if (LegalOperations \|\| !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
	return SDValue();

	SDValue VSel = Cast->getOperand(0);
	if (VSel.getOpcode() != ISD::VSELECT \|\| !VSel.hasOneUse() \|\|
	VSel.getOperand(0).getOpcode() != ISD::SETCC)
	return SDValue();

	// Does the setcc have the same vector size as the casted select?
	SDValue SetCC = VSel.getOperand(0);
	EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
	if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
	return SDValue();

	// cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
	SDValue A = VSel.getOperand(1);
	SDValue B = VSel.getOperand(2);
	SDValue CastA, CastB;
	SDLoc DL(Cast);
	if (CastOpcode == ISD::FP_ROUND) {
	// FP_ROUND (fptrunc) has an extra flag operand to pass along.
	CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
	CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
	} else {
	CastA = DAG.getNode(CastOpcode, DL, VT, A);
	CastB = DAG.getNode(CastOpcode, DL, VT, B);
	}
	return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
	}

	// fold ([s\|z]ext ([s\|z]extload x)) -> ([s\|z]ext (truncate ([s\|z]extload x)))
	// fold ([s\|z]ext ( extload x)) -> ([s\|z]ext (truncate ([s\|z]extload x)))
	static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
	const TargetLowering &TLI, EVT VT,
	bool LegalOperations, SDNode *N,
	SDValue N0, ISD::LoadExtType ExtLoadType) {
	SDNode *N0Node = N0.getNode();
	bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
	: ISD::isZEXTLoad(N0Node);
	if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) \|\|
	!ISD::isUNINDEXEDLoad(N0Node) \|\| !N0.hasOneUse())
	return SDValue();

	LoadSDNode *LN0 = cast<LoadSDNode>(N0);
	EVT MemVT = LN0->getMemoryVT();
	if ((LegalOperations \|\| !LN0->isSimple() \|\|
	VT.isVector()) &&
	!TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
	return SDValue();

	SDValue ExtLoad =
	DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
	LN0->getBasePtr(), MemVT, LN0->getMemOperand());
	Combiner.CombineTo(N, ExtLoad);
	DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
	if (LN0->use_empty())
	Combiner.recursivelyDeleteUnusedNodes(LN0);
	return SDValue(N, 0); // Return N so it doesn't get rechecked!
	}

	// fold ([s\|z]ext (load x)) -> ([s\|z]ext (truncate ([s\|z]extload x)))
	// Only generate vector extloads when 1) they're legal, and 2) they are
	// deemed desirable by the target.
	static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
	const TargetLowering &TLI, EVT VT,
	bool LegalOperations, SDNode *N, SDValue N0,
	ISD::LoadExtType ExtLoadType,
	ISD::NodeType ExtOpc) {
	if (!ISD::isNON_EXTLoad(N0.getNode()) \|\|
	!ISD::isUNINDEXEDLoad(N0.getNode()) \|\|
	((LegalOperations \|\| VT.isVector() \|\|
	!cast<LoadSDNode>(N0)->isSimple()) &&
	!TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
	return {};

	bool DoXform = true;
	SmallVector<SDNode *, 4> SetCCs;
	if (!N0.hasOneUse())
	DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
	if (VT.isVector())
	DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
	if (!DoXform)
	return {};

	LoadSDNode *LN0 = cast<LoadSDNode>(N0);
	SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
	LN0->getBasePtr(), N0.getValueType(),
	LN0->getMemOperand());
	Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
	// If the load value is used only by N, replace it via CombineTo N.
	bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
	Combiner.CombineTo(N, ExtLoad);
	if (NoReplaceTrunc) {
	DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
	Combiner.recursivelyDeleteUnusedNodes(LN0);
	} else {
	SDValue Trunc =
	DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
	Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
	}
	return SDValue(N, 0); // Return N so it doesn't get rechecked!
	}

	static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG,
	const TargetLowering &TLI, EVT VT,
	SDNode *N, SDValue N0,
	ISD::LoadExtType ExtLoadType,
	ISD::NodeType ExtOpc) {
	if (!N0.hasOneUse())
	return SDValue();

	MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0);
	if (!Ld \|\| Ld->getExtensionType() != ISD::NON_EXTLOAD)
	return SDValue();

	if (!TLI.isLoadExtLegal(ExtLoadType, VT, Ld->getValueType(0)))
	return SDValue();

	if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
	return SDValue();

	SDLoc dl(Ld);
	SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
	SDValue NewLoad = DAG.getMaskedLoad(
	VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
	PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
	ExtLoadType, Ld->isExpandingLoad());
	DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
	return NewLoad;
	}

	static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
	bool LegalOperations) {
	assert((N->getOpcode() == ISD::SIGN_EXTEND \|\|
	N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");

	SDValue SetCC = N->getOperand(0);
	if (LegalOperations \|\| SetCC.getOpcode() != ISD::SETCC \|\|
	!SetCC.hasOneUse() \|\| SetCC.getValueType() != MVT::i1)
	return SDValue();

	SDValue X = SetCC.getOperand(0);
	SDValue Ones = SetCC.getOperand(1);
	ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
	EVT VT = N->getValueType(0);
	EVT XVT = X.getValueType();
	// setge X, C is canonicalized to setgt, so we do not need to match that
	// pattern. The setlt sibling is folded in SimplifySelectCC() because it does
	// not require the 'not' op.
	if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
	// Invert and smear/shift the sign bit:
	// sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
	// zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
	SDLoc DL(N);
	unsigned ShCt = VT.getSizeInBits() - 1;
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
	SDValue NotX = DAG.getNOT(DL, X, VT);
	SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
	auto ShiftOpcode =
	N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
	return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
	}
	}
	return SDValue();
	}

	SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	if (N0.getOpcode() != ISD::SETCC)
	return SDValue();

	SDValue N00 = N0.getOperand(0);
	SDValue N01 = N0.getOperand(1);
	ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
	EVT VT = N->getValueType(0);
	EVT N00VT = N00.getValueType();
	SDLoc DL(N);

	// On some architectures (such as SSE/NEON/etc) the SETCC result type is
	// the same size as the compared operands. Try to optimize sext(setcc())
	// if this is the case.
	if (VT.isVector() && !LegalOperations &&
	TLI.getBooleanContents(N00VT) ==
	TargetLowering::ZeroOrNegativeOneBooleanContent) {
	EVT SVT = getSetCCResultType(N00VT);

	// If we already have the desired type, don't change it.
	if (SVT != N0.getValueType()) {
	// We know that the # elements of the results is the same as the
	// # elements of the compare (and the # elements of the compare result
	// for that matter). Check to see that they are the same size. If so,
	// we know that the element size of the sext'd result matches the
	// element size of the compare operands.
	if (VT.getSizeInBits() == SVT.getSizeInBits())
	return DAG.getSetCC(DL, VT, N00, N01, CC);

	// If the desired elements are smaller or larger than the source
	// elements, we can use a matching integer vector type and then
	// truncate/sign extend.
	EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
	if (SVT == MatchingVecType) {
	SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
	return DAG.getSExtOrTrunc(VsetCC, DL, VT);
	}
	}

	// Try to eliminate the sext of a setcc by zexting the compare operands.
	if (N0.hasOneUse() && TLI.isOperationLegalOrCustom(ISD::SETCC, VT) &&
	!TLI.isOperationLegalOrCustom(ISD::SETCC, SVT)) {
	bool IsSignedCmp = ISD::isSignedIntSetCC(CC);
	unsigned LoadOpcode = IsSignedCmp ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
	unsigned ExtOpcode = IsSignedCmp ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;

	// We have an unsupported narrow vector compare op that would be legal
	// if extended to the destination type. See if the compare operands
	// can be freely extended to the destination type.
	auto IsFreeToExtend = [&](SDValue V) {
	if (isConstantOrConstantVector(V, /NoOpaques/ true))
	return true;
	// Match a simple, non-extended load that can be converted to a
	// legal {z/s}ext-load.
	// TODO: Allow widening of an existing {z/s}ext-load?
	if (!(ISD::isNON_EXTLoad(V.getNode()) &&
	ISD::isUNINDEXEDLoad(V.getNode()) &&
	cast<LoadSDNode>(V)->isSimple() &&
	TLI.isLoadExtLegal(LoadOpcode, VT, V.getValueType())))
	return false;

	// Non-chain users of this value must either be the setcc in this
	// sequence or extends that can be folded into the new {z/s}ext-load.
	for (SDNode::use_iterator UI = V->use_begin(), UE = V->use_end();
	UI != UE; ++UI) {
	// Skip uses of the chain and the setcc.
	SDNode User = UI;
	if (UI.getUse().getResNo() != 0 \|\| User == N0.getNode())
	continue;
	// Extra users must have exactly the same cast we are about to create.
	// TODO: This restriction could be eased if ExtendUsesToFormExtLoad()
	// is enhanced similarly.
	if (User->getOpcode() != ExtOpcode \|\| User->getValueType(0) != VT)
	return false;
	}
	return true;
	};

	if (IsFreeToExtend(N00) && IsFreeToExtend(N01)) {
	SDValue Ext0 = DAG.getNode(ExtOpcode, DL, VT, N00);
	SDValue Ext1 = DAG.getNode(ExtOpcode, DL, VT, N01);
	return DAG.getSetCC(DL, VT, Ext0, Ext1, CC);
	}
	}
	}

	// sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
	// Here, T can be 1 or -1, depending on the type of the setcc and
	// getBooleanContents().
	unsigned SetCCWidth = N0.getScalarValueSizeInBits();

	// To determine the "true" side of the select, we need to know the high bit
	// of the value returned by the setcc if it evaluates to true.
	// If the type of the setcc is i1, then the true case of the select is just
	// sext(i1 1), that is, -1.
	// If the type of the setcc is larger (say, i8) then the value of the high
	// bit depends on getBooleanContents(), so ask TLI for a real "true" value
	// of the appropriate width.
	SDValue ExtTrueVal = (SetCCWidth == 1)
	? DAG.getAllOnesConstant(DL, VT)
	: DAG.getBoolConstant(true, DL, VT, N00VT);
	SDValue Zero = DAG.getConstant(0, DL, VT);
	if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
	return SCC;

	if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
	EVT SetCCVT = getSetCCResultType(N00VT);
	// Don't do this transform for i1 because there's a select transform
	// that would reverse it.
	// TODO: We should not do this transform at all without a target hook
	// because a sext is likely cheaper than a select?
	if (SetCCVT.getScalarSizeInBits() != 1 &&
	(!LegalOperations \|\| TLI.isOperationLegal(ISD::SETCC, N00VT))) {
	SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
	return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
	}
	}

	return SDValue();
	}

	SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	EVT VT = N->getValueType(0);
	SDLoc DL(N);

	if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
	return Res;

	// fold (sext (sext x)) -> (sext x)
	// fold (sext (aext x)) -> (sext x)
	if (N0.getOpcode() == ISD::SIGN_EXTEND \|\| N0.getOpcode() == ISD::ANY_EXTEND)
	return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));

	if (N0.getOpcode() == ISD::TRUNCATE) {
	// fold (sext (truncate (load x))) -> (sext (smaller load x))
	// fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
	if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
	SDNode *oye = N0.getOperand(0).getNode();
	if (NarrowLoad.getNode() != N0.getNode()) {
	CombineTo(N0.getNode(), NarrowLoad);
	// CombineTo deleted the truncate, if needed, but not what's under it.
	AddToWorklist(oye);
	}
	return SDValue(N, 0); // Return N so it doesn't get rechecked!
	}

	// See if the value being truncated is already sign extended. If so, just
	// eliminate the trunc/sext pair.
	SDValue Op = N0.getOperand(0);
	unsigned OpBits = Op.getScalarValueSizeInBits();
	unsigned MidBits = N0.getScalarValueSizeInBits();
	unsigned DestBits = VT.getScalarSizeInBits();
	unsigned NumSignBits = DAG.ComputeNumSignBits(Op);

	if (OpBits == DestBits) {
	// Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
	// bits, it is already ready.
	if (NumSignBits > DestBits-MidBits)
	return Op;
	} else if (OpBits < DestBits) {
	// Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
	// bits, just sext from i32.
	if (NumSignBits > OpBits-MidBits)
	return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
	} else {
	// Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
	// bits, just truncate to i32.
	if (NumSignBits > OpBits-MidBits)
	return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
	}

	// fold (sext (truncate x)) -> (sextinreg x).
	if (!LegalOperations \|\| TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
	N0.getValueType())) {
	if (OpBits < DestBits)
	Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
	else if (OpBits > DestBits)
	Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
	return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
	DAG.getValueType(N0.getValueType()));
	}
	}

	// Try to simplify (sext (load x)).
	if (SDValue foldedExt =
	tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
	ISD::SEXTLOAD, ISD::SIGN_EXTEND))
	return foldedExt;

	if (SDValue foldedExt =
	tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::SEXTLOAD,
	ISD::SIGN_EXTEND))
	return foldedExt;

	// fold (sext (load x)) to multiple smaller sextloads.
	// Only on illegal but splittable vectors.
	if (SDValue ExtLoad = CombineExtLoad(N))
	return ExtLoad;

	// Try to simplify (sext (sextload x)).
	if (SDValue foldedExt = tryToFoldExtOfExtload(
	DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
	return foldedExt;

	// fold (sext (and/or/xor (load x), cst)) ->
	// (and/or/xor (sextload x), (sext cst))
	if ((N0.getOpcode() == ISD::AND \|\| N0.getOpcode() == ISD::OR \|\|
	N0.getOpcode() == ISD::XOR) &&
	isa<LoadSDNode>(N0.getOperand(0)) &&
	N0.getOperand(1).getOpcode() == ISD::Constant &&
	(!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
	LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
	EVT MemVT = LN00->getMemoryVT();
	if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
	LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
	SmallVector<SDNode*, 4> SetCCs;
	bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
	ISD::SIGN_EXTEND, SetCCs, TLI);
	if (DoXform) {
	SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
	LN00->getChain(), LN00->getBasePtr(),
	LN00->getMemoryVT(),
	LN00->getMemOperand());
	APInt Mask = N0.getConstantOperandAPInt(1).sext(VT.getSizeInBits());
	SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
	ExtLoad, DAG.getConstant(Mask, DL, VT));
	ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
	bool NoReplaceTruncAnd = !N0.hasOneUse();
	bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
	CombineTo(N, And);
	// If N0 has multiple uses, change other uses as well.
	if (NoReplaceTruncAnd) {
	SDValue TruncAnd =
	DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
	CombineTo(N0.getNode(), TruncAnd);
	}
	if (NoReplaceTrunc) {
	DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
	} else {
	SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
	LN00->getValueType(0), ExtLoad);
	CombineTo(LN00, Trunc, ExtLoad.getValue(1));
	}
	return SDValue(N,0); // Return N so it doesn't get rechecked!
	}
	}
	}

	if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
	return V;

	if (SDValue V = foldSextSetcc(N))
	return V;

	// fold (sext x) -> (zext x) if the sign bit is known zero.
	if ((!LegalOperations \|\| TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
	DAG.SignBitIsZero(N0))
	return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);

	if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
	return NewVSel;

	// Eliminate this sign extend by doing a negation in the destination type:
	// sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
	if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
	isNullOrNullSplat(N0.getOperand(0)) &&
	N0.getOperand(1).getOpcode() == ISD::ZERO_EXTEND &&
	TLI.isOperationLegalOrCustom(ISD::SUB, VT)) {
	SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
	return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Zext);
	}
	// Eliminate this sign extend by doing a decrement in the destination type:
	// sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
	if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
	isAllOnesOrAllOnesSplat(N0.getOperand(1)) &&
	N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
	TLI.isOperationLegalOrCustom(ISD::ADD, VT)) {
	SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
	return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
	}

	// fold sext (not i1 X) -> add (zext i1 X), -1
	// TODO: This could be extended to handle bool vectors.
	if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() &&
	(!LegalOperations \|\| (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) &&
	TLI.isOperationLegal(ISD::ADD, VT)))) {
	// If we can eliminate the 'not', the sext form should be better
	if (SDValue NewXor = visitXOR(N0.getNode())) {
	// Returning N0 is a form of in-visit replacement that may have
	// invalidated N0.
	if (NewXor.getNode() == N0.getNode()) {
	// Return SDValue here as the xor should have already been replaced in
	// this sext.
	return SDValue();
	} else {
	// Return a new sext with the new xor.
	return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
	}
	}

	SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
	return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
	}

	if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
	return Res;

	return SDValue();
	}

	// isTruncateOf - If N is a truncate of some other value, return true, record
	// the value being truncated in Op and which of Op's bits are zero/one in Known.
	// This function computes KnownBits to avoid a duplicated call to
	// computeKnownBits in the caller.
	static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
	KnownBits &Known) {
	if (N->getOpcode() == ISD::TRUNCATE) {
	Op = N->getOperand(0);
	Known = DAG.computeKnownBits(Op);
	return true;
	}

	if (N.getOpcode() != ISD::SETCC \|\|
	N.getValueType().getScalarType() != MVT::i1 \|\|
	cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
	return false;

	SDValue Op0 = N->getOperand(0);
	SDValue Op1 = N->getOperand(1);
	assert(Op0.getValueType() == Op1.getValueType());

	if (isNullOrNullSplat(Op0))
	Op = Op1;
	else if (isNullOrNullSplat(Op1))
	Op = Op0;
	else
	return false;

	Known = DAG.computeKnownBits(Op);

	return (Known.Zero \| 1).isAllOnesValue();
	}

	/// Given an extending node with a pop-count operand, if the target does not
	/// support a pop-count in the narrow source type but does support it in the
	/// destination type, widen the pop-count to the destination type.
	static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG) {
	assert((Extend->getOpcode() == ISD::ZERO_EXTEND \|\|
	Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op");

	SDValue CtPop = Extend->getOperand(0);
	if (CtPop.getOpcode() != ISD::CTPOP \|\| !CtPop.hasOneUse())
	return SDValue();

	EVT VT = Extend->getValueType(0);
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	if (TLI.isOperationLegalOrCustom(ISD::CTPOP, CtPop.getValueType()) \|\|
	!TLI.isOperationLegalOrCustom(ISD::CTPOP, VT))
	return SDValue();

	// zext (ctpop X) --> ctpop (zext X)
	SDLoc DL(Extend);
	SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
	return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
	}

	SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	EVT VT = N->getValueType(0);

	if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
	return Res;

	// fold (zext (zext x)) -> (zext x)
	// fold (zext (aext x)) -> (zext x)
	if (N0.getOpcode() == ISD::ZERO_EXTEND \|\| N0.getOpcode() == ISD::ANY_EXTEND)
	return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
	N0.getOperand(0));

	// fold (zext (truncate x)) -> (zext x) or
	// (zext (truncate x)) -> (truncate x)
	// This is valid when the truncated bits of x are already zero.
	SDValue Op;
	KnownBits Known;
	if (isTruncateOf(DAG, N0, Op, Known)) {
	APInt TruncatedBits =
	(Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
	APInt(Op.getScalarValueSizeInBits(), 0) :
	APInt::getBitsSet(Op.getScalarValueSizeInBits(),
	N0.getScalarValueSizeInBits(),
	std::min(Op.getScalarValueSizeInBits(),
	VT.getScalarSizeInBits()));
	if (TruncatedBits.isSubsetOf(Known.Zero))
	return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
	}

	// fold (zext (truncate x)) -> (and x, mask)
	if (N0.getOpcode() == ISD::TRUNCATE) {
	// fold (zext (truncate (load x))) -> (zext (smaller load x))
	// fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
	if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
	SDNode *oye = N0.getOperand(0).getNode();
	if (NarrowLoad.getNode() != N0.getNode()) {
	CombineTo(N0.getNode(), NarrowLoad);
	// CombineTo deleted the truncate, if needed, but not what's under it.
	AddToWorklist(oye);
	}
	return SDValue(N, 0); // Return N so it doesn't get rechecked!
	}

	EVT SrcVT = N0.getOperand(0).getValueType();
	EVT MinVT = N0.getValueType();

	// Try to mask before the extension to avoid having to generate a larger mask,
	// possibly over several sub-vectors.
	if (SrcVT.bitsLT(VT) && VT.isVector()) {
	if (!LegalOperations \|\| (TLI.isOperationLegal(ISD::AND, SrcVT) &&
	TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
	SDValue Op = N0.getOperand(0);
	Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
	AddToWorklist(Op.getNode());
	SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
	// Transfer the debug info; the new node is equivalent to N0.
	DAG.transferDbgValues(N0, ZExtOrTrunc);
	return ZExtOrTrunc;
	}
	}

	if (!LegalOperations \|\| TLI.isOperationLegal(ISD::AND, VT)) {
	SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
	AddToWorklist(Op.getNode());
	SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
	// We may safely transfer the debug info describing the truncate node over
	// to the equivalent and operation.
	DAG.transferDbgValues(N0, And);
	return And;
	}
	}

	// Fold (zext (and (trunc x), cst)) -> (and x, cst),
	// if either of the casts is not free.
	if (N0.getOpcode() == ISD::AND &&
	N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
	N0.getOperand(1).getOpcode() == ISD::Constant &&
	(!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
	N0.getValueType()) \|\|
	!TLI.isZExtFree(N0.getValueType(), VT))) {
	SDValue X = N0.getOperand(0).getOperand(0);
	X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
	APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
	SDLoc DL(N);
	return DAG.getNode(ISD::AND, DL, VT,
	X, DAG.getConstant(Mask, DL, VT));
	}

	// Try to simplify (zext (load x)).
	if (SDValue foldedExt =
	tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
	ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
	return foldedExt;

	if (SDValue foldedExt =
	tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::ZEXTLOAD,
	ISD::ZERO_EXTEND))
	return foldedExt;

	// fold (zext (load x)) to multiple smaller zextloads.
	// Only on illegal but splittable vectors.
	if (SDValue ExtLoad = CombineExtLoad(N))
	return ExtLoad;

	// fold (zext (and/or/xor (load x), cst)) ->
	// (and/or/xor (zextload x), (zext cst))
	// Unless (and (load x) cst) will match as a zextload already and has
	// additional users.
	if ((N0.getOpcode() == ISD::AND \|\| N0.getOpcode() == ISD::OR \|\|
	N0.getOpcode() == ISD::XOR) &&
	isa<LoadSDNode>(N0.getOperand(0)) &&
	N0.getOperand(1).getOpcode() == ISD::Constant &&
	(!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
	LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
	EVT MemVT = LN00->getMemoryVT();
	if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
	LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
	bool DoXform = true;
	SmallVector<SDNode*, 4> SetCCs;
	if (!N0.hasOneUse()) {
	if (N0.getOpcode() == ISD::AND) {
	auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
	EVT LoadResultTy = AndC->getValueType(0);
	EVT ExtVT;
	if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
	DoXform = false;
	}
	}
	if (DoXform)
	DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
	ISD::ZERO_EXTEND, SetCCs, TLI);
	if (DoXform) {
	SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
	LN00->getChain(), LN00->getBasePtr(),
	LN00->getMemoryVT(),
	LN00->getMemOperand());
	APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
	SDLoc DL(N);
	SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
	ExtLoad, DAG.getConstant(Mask, DL, VT));
	ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
	bool NoReplaceTruncAnd = !N0.hasOneUse();
	bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
	CombineTo(N, And);
	// If N0 has multiple uses, change other uses as well.
	if (NoReplaceTruncAnd) {
	SDValue TruncAnd =
	DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
	CombineTo(N0.getNode(), TruncAnd);
	}
	if (NoReplaceTrunc) {
	DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
	} else {
	SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
	LN00->getValueType(0), ExtLoad);
	CombineTo(LN00, Trunc, ExtLoad.getValue(1));
	}
	return SDValue(N,0); // Return N so it doesn't get rechecked!
	}
	}
	}

	// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
	// (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
	if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
	return ZExtLoad;

	// Try to simplify (zext (zextload x)).
	if (SDValue foldedExt = tryToFoldExtOfExtload(
	DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
	return foldedExt;

	if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
	return V;

	if (N0.getOpcode() == ISD::SETCC) {
	// Only do this before legalize for now.
	if (!LegalOperations && VT.isVector() &&
	N0.getValueType().getVectorElementType() == MVT::i1) {
	EVT N00VT = N0.getOperand(0).getValueType();
	if (getSetCCResultType(N00VT) == N0.getValueType())
	return SDValue();

	// We know that the # elements of the results is the same as the #
	// elements of the compare (and the # elements of the compare result for
	// that matter). Check to see that they are the same size. If so, we know
	// that the element size of the sext'd result matches the element size of
	// the compare operands.
	SDLoc DL(N);
	if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
	// zext(setcc) -> zext_in_reg(vsetcc) for vectors.
	SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
	N0.getOperand(1), N0.getOperand(2));
	return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
	}

	// If the desired elements are smaller or larger than the source
	// elements we can use a matching integer vector type and then
	// truncate/any extend followed by zext_in_reg.
	EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
	SDValue VsetCC =
	DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
	N0.getOperand(1), N0.getOperand(2));
	return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
	N0.getValueType());
	}

	// zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
	SDLoc DL(N);
	EVT N0VT = N0.getValueType();
	EVT N00VT = N0.getOperand(0).getValueType();
	if (SDValue SCC = SimplifySelectCC(
	DL, N0.getOperand(0), N0.getOperand(1),
	DAG.getBoolConstant(true, DL, N0VT, N00VT),
	DAG.getBoolConstant(false, DL, N0VT, N00VT),
	cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
	return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC);
	}

	// (zext (shl (zext x), cst)) -> (shl (zext x), cst)
	if ((N0.getOpcode() == ISD::SHL \|\| N0.getOpcode() == ISD::SRL) &&
	isa<ConstantSDNode>(N0.getOperand(1)) &&
	N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
	N0.hasOneUse()) {
	SDValue ShAmt = N0.getOperand(1);
	if (N0.getOpcode() == ISD::SHL) {
	SDValue InnerZExt = N0.getOperand(0);
	// If the original shl may be shifting out bits, do not perform this
	// transformation.
	unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
	InnerZExt.getOperand(0).getValueSizeInBits();
	if (cast<ConstantSDNode>(ShAmt)->getAPIntValue().ugt(KnownZeroBits))
	return SDValue();
	}

	SDLoc DL(N);

	// Ensure that the shift amount is wide enough for the shifted value.
	if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
	ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);

	return DAG.getNode(N0.getOpcode(), DL, VT,
	DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
	ShAmt);
	}

	if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
	return NewVSel;

	if (SDValue NewCtPop = widenCtPop(N, DAG))
	return NewCtPop;

	if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
	return Res;

	return SDValue();
	}

	SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	EVT VT = N->getValueType(0);

	if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
	return Res;

	// fold (aext (aext x)) -> (aext x)
	// fold (aext (zext x)) -> (zext x)
	// fold (aext (sext x)) -> (sext x)
	if (N0.getOpcode() == ISD::ANY_EXTEND \|\|
	N0.getOpcode() == ISD::ZERO_EXTEND \|\|
	N0.getOpcode() == ISD::SIGN_EXTEND)
	return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));

	// fold (aext (truncate (load x))) -> (aext (smaller load x))
	// fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
	if (N0.getOpcode() == ISD::TRUNCATE) {
	if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
	SDNode *oye = N0.getOperand(0).getNode();
	if (NarrowLoad.getNode() != N0.getNode()) {
	CombineTo(N0.getNode(), NarrowLoad);
	// CombineTo deleted the truncate, if needed, but not what's under it.
	AddToWorklist(oye);
	}
	return SDValue(N, 0); // Return N so it doesn't get rechecked!
	}
	}

	// fold (aext (truncate x))
	if (N0.getOpcode() == ISD::TRUNCATE)
	return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);

	// Fold (aext (and (trunc x), cst)) -> (and x, cst)
	// if the trunc is not free.
	if (N0.getOpcode() == ISD::AND &&
	N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
	N0.getOperand(1).getOpcode() == ISD::Constant &&
	!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
	N0.getValueType())) {
	SDLoc DL(N);
	SDValue X = N0.getOperand(0).getOperand(0);
	X = DAG.getAnyExtOrTrunc(X, DL, VT);
	APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
	return DAG.getNode(ISD::AND, DL, VT,
	X, DAG.getConstant(Mask, DL, VT));
	}

	// fold (aext (load x)) -> (aext (truncate (extload x)))
	// None of the supported targets knows how to perform load and any_ext
	// on vectors in one instruction, so attempt to fold to zext instead.
	if (VT.isVector()) {
	// Try to simplify (zext (load x)).
	if (SDValue foldedExt =
	tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
	ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
	return foldedExt;
	} else if (ISD::isNON_EXTLoad(N0.getNode()) &&
	ISD::isUNINDEXEDLoad(N0.getNode()) &&
	TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
	bool DoXform = true;
	SmallVector<SDNode *, 4> SetCCs;
	if (!N0.hasOneUse())
	DoXform =
	ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
	if (DoXform) {
	LoadSDNode *LN0 = cast<LoadSDNode>(N0);
	SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
	LN0->getChain(), LN0->getBasePtr(),
	N0.getValueType(), LN0->getMemOperand());
	ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
	// If the load value is used only by N, replace it via CombineTo N.
	bool NoReplaceTrunc = N0.hasOneUse();
	CombineTo(N, ExtLoad);
	if (NoReplaceTrunc) {
	DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
	recursivelyDeleteUnusedNodes(LN0);
	} else {
	SDValue Trunc =
	DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
	CombineTo(LN0, Trunc, ExtLoad.getValue(1));
	}
	return SDValue(N, 0); // Return N so it doesn't get rechecked!
	}
	}

	// fold (aext (zextload x)) -> (aext (truncate (zextload x)))
	// fold (aext (sextload x)) -> (aext (truncate (sextload x)))
	// fold (aext ( extload x)) -> (aext (truncate (extload x)))
	if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
	ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
	LoadSDNode *LN0 = cast<LoadSDNode>(N0);
	ISD::LoadExtType ExtType = LN0->getExtensionType();
	EVT MemVT = LN0->getMemoryVT();
	if (!LegalOperations \|\| TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
	SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
	VT, LN0->getChain(), LN0->getBasePtr(),
	MemVT, LN0->getMemOperand());
	CombineTo(N, ExtLoad);
	DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
	recursivelyDeleteUnusedNodes(LN0);
	return SDValue(N, 0); // Return N so it doesn't get rechecked!
	}
	}

	if (N0.getOpcode() == ISD::SETCC) {
	// For vectors:
	// aext(setcc) -> vsetcc
	// aext(setcc) -> truncate(vsetcc)
	// aext(setcc) -> aext(vsetcc)
	// Only do this before legalize for now.
	if (VT.isVector() && !LegalOperations) {
	EVT N00VT = N0.getOperand(0).getValueType();
	if (getSetCCResultType(N00VT) == N0.getValueType())
	return SDValue();

	// We know that the # elements of the results is the same as the
	// # elements of the compare (and the # elements of the compare result
	// for that matter). Check to see that they are the same size. If so,
	// we know that the element size of the sext'd result matches the
	// element size of the compare operands.
	if (VT.getSizeInBits() == N00VT.getSizeInBits())
	return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
	N0.getOperand(1),
	cast<CondCodeSDNode>(N0.getOperand(2))->get());

	// If the desired elements are smaller or larger than the source
	// elements we can use a matching integer vector type and then
	// truncate/any extend
	EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
	SDValue VsetCC =
	DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
	N0.getOperand(1),
	cast<CondCodeSDNode>(N0.getOperand(2))->get());
	return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
	}

	// aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
	SDLoc DL(N);
	if (SDValue SCC = SimplifySelectCC(
	DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
	DAG.getConstant(0, DL, VT),
	cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
	return SCC;
	}

	if (SDValue NewCtPop = widenCtPop(N, DAG))
	return NewCtPop;

	if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
	return Res;

	return SDValue();
	}

	SDValue DAGCombiner::visitAssertExt(SDNode *N) {
	unsigned Opcode = N->getOpcode();
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	EVT AssertVT = cast<VTSDNode>(N1)->getVT();

	// fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
	if (N0.getOpcode() == Opcode &&
	AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
	return N0;

	if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
	N0.getOperand(0).getOpcode() == Opcode) {
	// We have an assert, truncate, assert sandwich. Make one stronger assert
	// by asserting on the smallest asserted type to the larger source type.
	// This eliminates the later assert:
	// assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
	// assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
	SDValue BigA = N0.getOperand(0);
	EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
	assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
	"Asserting zero/sign-extended bits to a type larger than the "
	"truncated destination does not provide information");

	SDLoc DL(N);
	EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
	SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
	SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
	BigA.getOperand(0), MinAssertVTVal);
	return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
	}

	// If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
	// than X. Just move the AssertZext in front of the truncate and drop the
	// AssertSExt.
	if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
	N0.getOperand(0).getOpcode() == ISD::AssertSext &&
	Opcode == ISD::AssertZext) {
	SDValue BigA = N0.getOperand(0);
	EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
	assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
	"Asserting zero/sign-extended bits to a type larger than the "
	"truncated destination does not provide information");

	if (AssertVT.bitsLT(BigA_AssertVT)) {
	SDLoc DL(N);
	SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
	BigA.getOperand(0), N1);
	return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
	}
	}

	return SDValue();
	}

	SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
	SDLoc DL(N);

	Align AL = cast<AssertAlignSDNode>(N)->getAlign();
	SDValue N0 = N->getOperand(0);

	// Fold (assertalign (assertalign x, AL0), AL1) ->
	// (assertalign x, max(AL0, AL1))
	if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
	return DAG.getAssertAlign(DL, N0.getOperand(0),
	std::max(AL, AAN->getAlign()));

	// In rare cases, there are trivial arithmetic ops in source operands. Sink
	// this assert down to source operands so that those arithmetic ops could be
	// exposed to the DAG combining.
	switch (N0.getOpcode()) {
	default:
	break;
	case ISD::ADD:
	case ISD::SUB: {
	unsigned AlignShift = Log2(AL);
	SDValue LHS = N0.getOperand(0);
	SDValue RHS = N0.getOperand(1);
	unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
	unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
	if (LHSAlignShift >= AlignShift \|\| RHSAlignShift >= AlignShift) {
	if (LHSAlignShift < AlignShift)
	LHS = DAG.getAssertAlign(DL, LHS, AL);
	if (RHSAlignShift < AlignShift)
	RHS = DAG.getAssertAlign(DL, RHS, AL);
	return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
	}
	break;
	}
	}

	return SDValue();
	}

	/// If the result of a wider load is shifted to right of N bits and then
	/// truncated to a narrower type and where N is a multiple of number of bits of
	/// the narrower type, transform it to a narrower load from address + N / num of
	/// bits of new type. Also narrow the load if the result is masked with an AND
	/// to effectively produce a smaller type. If the result is to be extended, also
	/// fold the extension to form a extending load.
	SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
	unsigned Opc = N->getOpcode();

	ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
	SDValue N0 = N->getOperand(0);
	EVT VT = N->getValueType(0);
	EVT ExtVT = VT;

	// This transformation isn't valid for vector loads.
	if (VT.isVector())
	return SDValue();

	unsigned ShAmt = 0;
	bool HasShiftedOffset = false;
	// Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
	// extended to VT.
	if (Opc == ISD::SIGN_EXTEND_INREG) {
	ExtType = ISD::SEXTLOAD;
	ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
	} else if (Opc == ISD::SRL) {
	// Another special-case: SRL is basically zero-extending a narrower value,
	// or it maybe shifting a higher subword, half or byte into the lowest
	// bits.
	ExtType = ISD::ZEXTLOAD;
	N0 = SDValue(N, 0);

	auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0));
	auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
	if (!N01 \|\| !LN0)
	return SDValue();

	uint64_t ShiftAmt = N01->getZExtValue();
	uint64_t MemoryWidth = LN0->getMemoryVT().getScalarSizeInBits();
	if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt)
	ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt);
	else
	ExtVT = EVT::getIntegerVT(*DAG.getContext(),
	VT.getScalarSizeInBits() - ShiftAmt);
	} else if (Opc == ISD::AND) {
	// An AND with a constant mask is the same as a truncate + zero-extend.
	auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
	if (!AndC)
	return SDValue();

	const APInt &Mask = AndC->getAPIntValue();
	unsigned ActiveBits = 0;
	if (Mask.isMask()) {
	ActiveBits = Mask.countTrailingOnes();
	} else if (Mask.isShiftedMask()) {
	ShAmt = Mask.countTrailingZeros();
	APInt ShiftedMask = Mask.lshr(ShAmt);
	ActiveBits = ShiftedMask.countTrailingOnes();
	HasShiftedOffset = true;
	} else
	return SDValue();

	ExtType = ISD::ZEXTLOAD;
	ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
	}

	if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
	SDValue SRL = N0;
	if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) {
	ShAmt = ConstShift->getZExtValue();
	unsigned EVTBits = ExtVT.getScalarSizeInBits();
	// Is the shift amount a multiple of size of VT?
	if ((ShAmt & (EVTBits-1)) == 0) {
	N0 = N0.getOperand(0);
	// Is the load width a multiple of size of VT?
	if ((N0.getScalarValueSizeInBits() & (EVTBits - 1)) != 0)
	return SDValue();
	}

	// At this point, we must have a load or else we can't do the transform.
	auto *LN0 = dyn_cast<LoadSDNode>(N0);
	if (!LN0) return SDValue();

	// Because a SRL must be assumed to need to zero-extend the high bits
	// (as opposed to anyext the high bits), we can't combine the zextload
	// lowering of SRL and an sextload.
	if (LN0->getExtensionType() == ISD::SEXTLOAD)
	return SDValue();

	// If the shift amount is larger than the input type then we're not
	// accessing any of the loaded bytes. If the load was a zextload/extload
	// then the result of the shift+trunc is zero/undef (handled elsewhere).
	if (ShAmt >= LN0->getMemoryVT().getSizeInBits())
	return SDValue();

	// If the SRL is only used by a masking AND, we may be able to adjust
	// the ExtVT to make the AND redundant.
	SDNode Mask = (SRL->use_begin());
	if (Mask->getOpcode() == ISD::AND &&
	isa<ConstantSDNode>(Mask->getOperand(1))) {
	const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
	if (ShiftMask.isMask()) {
	EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
	ShiftMask.countTrailingOnes());
	// If the mask is smaller, recompute the type.
	if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
	TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT))
	ExtVT = MaskedVT;
	}
	}
	}
	}

	// If the load is shifted left (and the result isn't shifted back right),
	// we can fold the truncate through the shift.
	unsigned ShLeftAmt = 0;
	if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
	ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
	if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
	ShLeftAmt = N01->getZExtValue();
	N0 = N0.getOperand(0);
	}
	}

	// If we haven't found a load, we can't narrow it.
	if (!isa<LoadSDNode>(N0))
	return SDValue();

	LoadSDNode *LN0 = cast<LoadSDNode>(N0);
	// Reducing the width of a volatile load is illegal. For atomics, we may be
	// able to reduce the width provided we never widen again. (see D66309)
	if (!LN0->isSimple() \|\|
	!isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
	return SDValue();

	auto AdjustBigEndianShift = [&](unsigned ShAmt) {
	unsigned LVTStoreBits =
	LN0->getMemoryVT().getStoreSizeInBits().getFixedSize();
	unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedSize();
	return LVTStoreBits - EVTStoreBits - ShAmt;
	};

	// For big endian targets, we need to adjust the offset to the pointer to
	// load the correct bytes.
	if (DAG.getDataLayout().isBigEndian())
	ShAmt = AdjustBigEndianShift(ShAmt);

	uint64_t PtrOff = ShAmt / 8;
	Align NewAlign = commonAlignment(LN0->getAlign(), PtrOff);
	SDLoc DL(LN0);
	// The original load itself didn't wrap, so an offset within it doesn't.
	SDNodeFlags Flags;
	Flags.setNoUnsignedWrap(true);
	SDValue NewPtr = DAG.getMemBasePlusOffset(LN0->getBasePtr(),
	TypeSize::Fixed(PtrOff), DL, Flags);
	AddToWorklist(NewPtr.getNode());

	SDValue Load;
	if (ExtType == ISD::NON_EXTLOAD)
	Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
	LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
	LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
	else
	Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
	LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
	NewAlign, LN0->getMemOperand()->getFlags(),
	LN0->getAAInfo());

	// Replace the old load's chain with the new load's chain.
	WorklistRemover DeadNodes(*this);
	DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));

	// Shift the result left, if we've swallowed a left shift.
	SDValue Result = Load;
	if (ShLeftAmt != 0) {
	EVT ShImmTy = getShiftAmountTy(Result.getValueType());
	if (!isUIntN(ShImmTy.getScalarSizeInBits(), ShLeftAmt))
	ShImmTy = VT;
	// If the shift amount is as large as the result size (but, presumably,
	// no larger than the source) then the useful bits of the result are
	// zero; we can't simply return the shortened shift, because the result
	// of that operation is undefined.
	if (ShLeftAmt >= VT.getScalarSizeInBits())
	Result = DAG.getConstant(0, DL, VT);
	else
	Result = DAG.getNode(ISD::SHL, DL, VT,
	Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
	}

	if (HasShiftedOffset) {
	// Recalculate the shift amount after it has been altered to calculate
	// the offset.
	if (DAG.getDataLayout().isBigEndian())
	ShAmt = AdjustBigEndianShift(ShAmt);

	// We're using a shifted mask, so the load now has an offset. This means
	// that data has been loaded into the lower bytes than it would have been
	// before, so we need to shl the loaded data into the correct position in the
	// register.
	SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
	Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
	DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
	}

	// Return the new loaded value.
	return Result;
	}

	SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	EVT VT = N->getValueType(0);
	EVT ExtVT = cast<VTSDNode>(N1)->getVT();
	unsigned VTBits = VT.getScalarSizeInBits();
	unsigned ExtVTBits = ExtVT.getScalarSizeInBits();

	// sext_vector_inreg(undef) = 0 because the top bit will all be the same.
	if (N0.isUndef())
	return DAG.getConstant(0, SDLoc(N), VT);

	// fold (sext_in_reg c1) -> c1
	if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
	return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);

	// If the input is already sign extended, just drop the extension.
	if (DAG.ComputeNumSignBits(N0) >= (VTBits - ExtVTBits + 1))
	return N0;

	// fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
	if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
	ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
	return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0),
	N1);

	// fold (sext_in_reg (sext x)) -> (sext x)
	// fold (sext_in_reg (aext x)) -> (sext x)
	// if x is small enough or if we know that x has more than 1 sign bit and the
	// sign_extend_inreg is extending from one of them.
	if (N0.getOpcode() == ISD::SIGN_EXTEND \|\| N0.getOpcode() == ISD::ANY_EXTEND) {
	SDValue N00 = N0.getOperand(0);
	unsigned N00Bits = N00.getScalarValueSizeInBits();
	if ((N00Bits <= ExtVTBits \|\|
	(N00Bits - DAG.ComputeNumSignBits(N00)) < ExtVTBits) &&
	(!LegalOperations \|\| TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
	return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
	}

	// fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
	// if x is small enough or if we know that x has more than 1 sign bit and the
	// sign_extend_inreg is extending from one of them.
	if (N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG \|\|
	N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG \|\|
	N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
	SDValue N00 = N0.getOperand(0);
	unsigned N00Bits = N00.getScalarValueSizeInBits();
	unsigned DstElts = N0.getValueType().getVectorMinNumElements();
	unsigned SrcElts = N00.getValueType().getVectorMinNumElements();
	bool IsZext = N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
	APInt DemandedSrcElts = APInt::getLowBitsSet(SrcElts, DstElts);
	if ((N00Bits == ExtVTBits \|\|
	(!IsZext && (N00Bits < ExtVTBits \|\|
	(N00Bits - DAG.ComputeNumSignBits(N00, DemandedSrcElts)) <
	ExtVTBits))) &&
	(!LegalOperations \|\|
	TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT)))
	return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, N00);
	}

	// fold (sext_in_reg (zext x)) -> (sext x)
	// iff we are extending the source sign bit.
	if (N0.getOpcode() == ISD::ZERO_EXTEND) {
	SDValue N00 = N0.getOperand(0);
	if (N00.getScalarValueSizeInBits() == ExtVTBits &&
	(!LegalOperations \|\| TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
	return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
	}

	// fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
	if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
	return DAG.getZeroExtendInReg(N0, SDLoc(N), ExtVT);

	// fold operands of sext_in_reg based on knowledge that the top bits are not
	// demanded.
	if (SimplifyDemandedBits(SDValue(N, 0)))
	return SDValue(N, 0);

	// fold (sext_in_reg (load x)) -> (smaller sextload x)
	// fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
	if (SDValue NarrowLoad = ReduceLoadWidth(N))
	return NarrowLoad;

	// fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
	// fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
	// We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
	if (N0.getOpcode() == ISD::SRL) {
	if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
	if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
	// We can turn this into an SRA iff the input to the SRL is already sign
	// extended enough.
	unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
	if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
	return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
	N0.getOperand(1));
	}
	}

	// fold (sext_inreg (extload x)) -> (sextload x)
	// If sextload is not supported by target, we can only do the combine when
	// load has one use. Doing otherwise can block folding the extload with other
	// extends that the target does support.
	if (ISD::isEXTLoad(N0.getNode()) &&
	ISD::isUNINDEXEDLoad(N0.getNode()) &&
	ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
	((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
	N0.hasOneUse()) \|\|
	TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
	LoadSDNode *LN0 = cast<LoadSDNode>(N0);
	SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
	LN0->getChain(),
	LN0->getBasePtr(), ExtVT,
	LN0->getMemOperand());
	CombineTo(N, ExtLoad);
	CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
	AddToWorklist(ExtLoad.getNode());
	return SDValue(N, 0); // Return N so it doesn't get rechecked!
	}

	// fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
	if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
	N0.hasOneUse() &&
	ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
	((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
	TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
	LoadSDNode *LN0 = cast<LoadSDNode>(N0);
	SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
	LN0->getChain(),
	LN0->getBasePtr(), ExtVT,
	LN0->getMemOperand());
	CombineTo(N, ExtLoad);
	CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
	return SDValue(N, 0); // Return N so it doesn't get rechecked!
	}

	// fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
	// ignore it if the masked load is already sign extended
	if (MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0)) {
	if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() &&
	Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
	TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) {
	SDValue ExtMaskedLoad = DAG.getMaskedLoad(
	VT, SDLoc(N), Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
	Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
	Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
	CombineTo(N, ExtMaskedLoad);
	CombineTo(N0.getNode(), ExtMaskedLoad, ExtMaskedLoad.getValue(1));
	return SDValue(N, 0); // Return N so it doesn't get rechecked!
	}
	}

	// fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
	if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
	if (SDValue(GN0, 0).hasOneUse() &&
	ExtVT == GN0->getMemoryVT() &&
	TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
	SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
	GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};

	SDValue ExtLoad = DAG.getMaskedGather(
	DAG.getVTList(VT, MVT::Other), ExtVT, SDLoc(N), Ops,
	GN0->getMemOperand(), GN0->getIndexType(), ISD::SEXTLOAD);

	CombineTo(N, ExtLoad);
	CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
	AddToWorklist(ExtLoad.getNode());
	return SDValue(N, 0); // Return N so it doesn't get rechecked!
	}
	}

	// Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
	if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
	if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
	N0.getOperand(1), false))
	return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1);
	}

	return SDValue();
	}

	SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	EVT VT = N->getValueType(0);

	// {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same.
	if (N0.isUndef())
	return DAG.getConstant(0, SDLoc(N), VT);

	if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
	return Res;

	if (SimplifyDemandedVectorElts(SDValue(N, 0)))
	return SDValue(N, 0);

	return SDValue();
	}

	SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	EVT VT = N->getValueType(0);
	EVT SrcVT = N0.getValueType();
	bool isLE = DAG.getDataLayout().isLittleEndian();

	// noop truncate
	if (SrcVT == VT)
	return N0;

	// fold (truncate (truncate x)) -> (truncate x)
	if (N0.getOpcode() == ISD::TRUNCATE)
	return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));

	// fold (truncate c1) -> c1
	if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
	SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
	if (C.getNode() != N)
	return C;
	}

	// fold (truncate (ext x)) -> (ext x) or (truncate x) or x
	if (N0.getOpcode() == ISD::ZERO_EXTEND \|\|
	N0.getOpcode() == ISD::SIGN_EXTEND \|\|
	N0.getOpcode() == ISD::ANY_EXTEND) {
	// if the source is smaller than the dest, we still need an extend.
	if (N0.getOperand(0).getValueType().bitsLT(VT))
	return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
	// if the source is larger than the dest, than we just need the truncate.
	if (N0.getOperand(0).getValueType().bitsGT(VT))
	return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
	// if the source and dest are the same type, we can drop both the extend
	// and the truncate.
	return N0.getOperand(0);
	}

	// If this is anyext(trunc), don't fold it, allow ourselves to be folded.
	if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
	return SDValue();

	// Fold extract-and-trunc into a narrow extract. For example:
	// i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
	// i32 y = TRUNCATE(i64 x)
	// -- becomes --
	// v16i8 b = BITCAST (v2i64 val)
	// i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
	//
	// Note: We only run this optimization after type legalization (which often
	// creates this pattern) and before operation legalization after which
	// we need to be more careful about the vector instructions that we generate.
	if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
	LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
	EVT VecTy = N0.getOperand(0).getValueType();
	EVT ExTy = N0.getValueType();
	EVT TrTy = N->getValueType(0);

	auto EltCnt = VecTy.getVectorElementCount();
	unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
	auto NewEltCnt = EltCnt * SizeRatio;

	EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt);
	assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");

	SDValue EltNo = N0->getOperand(1);
	if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
	int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
	int Index = isLE ? (EltSizeRatio) : (EltSizeRatio + (SizeRatio-1));

	SDLoc DL(N);
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
	DAG.getBitcast(NVT, N0.getOperand(0)),
	DAG.getVectorIdxConstant(Index, DL));
	}
	}

	// trunc (select c, a, b) -> select c, (trunc a), (trunc b)
	if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
	if ((!LegalOperations \|\| TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
	TLI.isTruncateFree(SrcVT, VT)) {
	SDLoc SL(N0);
	SDValue Cond = N0.getOperand(0);
	SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
	SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
	return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
	}
	}

	// trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
	if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
	(!LegalOperations \|\| TLI.isOperationLegal(ISD::SHL, VT)) &&
	TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
	SDValue Amt = N0.getOperand(1);
	KnownBits Known = DAG.computeKnownBits(Amt);
	unsigned Size = VT.getScalarSizeInBits();
	if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
	SDLoc SL(N);
	EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());

	SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
	if (AmtVT != Amt.getValueType()) {
	Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
	AddToWorklist(Amt.getNode());
	}
	return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
	}
	}

	if (SDValue V = foldSubToUSubSat(VT, N0.getNode()))
	return V;

	// Attempt to pre-truncate BUILD_VECTOR sources.
	if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
	TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
	// Avoid creating illegal types if running after type legalizer.
	(!LegalTypes \|\| TLI.isTypeLegal(VT.getScalarType()))) {
	SDLoc DL(N);
	EVT SVT = VT.getScalarType();
	SmallVector<SDValue, 8> TruncOps;
	for (const SDValue &Op : N0->op_values()) {
	SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
	TruncOps.push_back(TruncOp);
	}
	return DAG.getBuildVector(VT, DL, TruncOps);
	}

	// Fold a series of buildvector, bitcast, and truncate if possible.
	// For example fold
	// (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
	// (2xi32 (buildvector x, y)).
	if (Level == AfterLegalizeVectorOps && VT.isVector() &&
	N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
	N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
	N0.getOperand(0).hasOneUse()) {
	SDValue BuildVect = N0.getOperand(0);
	EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
	EVT TruncVecEltTy = VT.getVectorElementType();

	// Check that the element types match.
	if (BuildVectEltTy == TruncVecEltTy) {
	// Now we only need to compute the offset of the truncated elements.
	unsigned BuildVecNumElts = BuildVect.getNumOperands();
	unsigned TruncVecNumElts = VT.getVectorNumElements();
	unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;

	assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
	"Invalid number of elements");

	SmallVector<SDValue, 8> Opnds;
	for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
	Opnds.push_back(BuildVect.getOperand(i));

	return DAG.getBuildVector(VT, SDLoc(N), Opnds);
	}
	}

	// See if we can simplify the input to this truncate through knowledge that
	// only the low bits are being used.
	// For example "trunc (or (shl x, 8), y)" // -> trunc y
	// Currently we only perform this optimization on scalars because vectors
	// may have different active low bits.
	if (!VT.isVector()) {
	APInt Mask =
	APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
	if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
	return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
	}

	// fold (truncate (load x)) -> (smaller load x)
	// fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
	if (!LegalTypes \|\| TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
	if (SDValue Reduced = ReduceLoadWidth(N))
	return Reduced;

	// Handle the case where the load remains an extending load even
	// after truncation.
	if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
	LoadSDNode *LN0 = cast<LoadSDNode>(N0);
	if (LN0->isSimple() && LN0->getMemoryVT().bitsLT(VT)) {
	SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
	VT, LN0->getChain(), LN0->getBasePtr(),
	LN0->getMemoryVT(),
	LN0->getMemOperand());
	DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
	return NewLoad;
	}
	}
	}

	// fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
	// where ... are all 'undef'.
	if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
	SmallVector<EVT, 8> VTs;
	SDValue V;
	unsigned Idx = 0;
	unsigned NumDefs = 0;

	for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
	SDValue X = N0.getOperand(i);
	if (!X.isUndef()) {
	V = X;
	Idx = i;
	NumDefs++;
	}
	// Stop if more than one members are non-undef.
	if (NumDefs > 1)
	break;

	VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
	VT.getVectorElementType(),
	X.getValueType().getVectorElementCount()));
	}

	if (NumDefs == 0)
	return DAG.getUNDEF(VT);

	if (NumDefs == 1) {
	assert(V.getNode() && "The single defined operand is empty!");
	SmallVector<SDValue, 8> Opnds;
	for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
	if (i != Idx) {
	Opnds.push_back(DAG.getUNDEF(VTs[i]));
	continue;
	}
	SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
	AddToWorklist(NV.getNode());
	Opnds.push_back(NV);
	}
	return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
	}
	}

	// Fold truncate of a bitcast of a vector to an extract of the low vector
	// element.
	//
	// e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
	if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
	SDValue VecSrc = N0.getOperand(0);
	EVT VecSrcVT = VecSrc.getValueType();
	if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
	(!LegalOperations \|\|
	TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
	SDLoc SL(N);

	unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, VecSrc,
	DAG.getVectorIdxConstant(Idx, SL));
	}
	}

	// Simplify the operands using demanded-bits information.
	if (SimplifyDemandedBits(SDValue(N, 0)))
	return SDValue(N, 0);

	// (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
	// (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
	// When the adde's carry is not used.
	if ((N0.getOpcode() == ISD::ADDE \|\| N0.getOpcode() == ISD::ADDCARRY) &&
	N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
	// We only do for addcarry before legalize operation
	((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) \|\|
	TLI.isOperationLegal(N0.getOpcode(), VT))) {
	SDLoc SL(N);
	auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
	auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
	auto VTs = DAG.getVTList(VT, N0->getValueType(1));
	return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
	}

	// fold (truncate (extract_subvector(ext x))) ->
	// (extract_subvector x)
	// TODO: This can be generalized to cover cases where the truncate and extract
	// do not fully cancel each other out.
	if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
	SDValue N00 = N0.getOperand(0);
	if (N00.getOpcode() == ISD::SIGN_EXTEND \|\|
	N00.getOpcode() == ISD::ZERO_EXTEND \|\|
	N00.getOpcode() == ISD::ANY_EXTEND) {
	if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
	VT.getVectorElementType())
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
	N00.getOperand(0), N0.getOperand(1));
	}
	}

	if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
	return NewVSel;

	// Narrow a suitable binary operation with a non-opaque constant operand by
	// moving it ahead of the truncate. This is limited to pre-legalization
	// because targets may prefer a wider type during later combines and invert
	// this transform.
	switch (N0.getOpcode()) {
	case ISD::ADD:
	case ISD::SUB:
	case ISD::MUL:
	case ISD::AND:
	case ISD::OR:
	case ISD::XOR:
	if (!LegalOperations && N0.hasOneUse() &&
	(isConstantOrConstantVector(N0.getOperand(0), true) \|\|
	isConstantOrConstantVector(N0.getOperand(1), true))) {
	// TODO: We already restricted this to pre-legalization, but for vectors
	// we are extra cautious to not create an unsupported operation.
	// Target-specific changes are likely needed to avoid regressions here.
	if (VT.isScalarInteger() \|\| TLI.isOperationLegal(N0.getOpcode(), VT)) {
	SDLoc DL(N);
	SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
	SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
	return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
	}
	}
	break;
	case ISD::USUBSAT:
	// Truncate the USUBSAT only if LHS is a known zero-extension, its not
	// enough to know that the upper bits are zero we must ensure that we don't
	// introduce an extra truncate.
	if (!LegalOperations && N0.hasOneUse() &&
	N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
	N0.getOperand(0).getOperand(0).getScalarValueSizeInBits() <=
	VT.getScalarSizeInBits() &&
	hasOperation(N0.getOpcode(), VT)) {
	return getTruncatedUSUBSAT(VT, SrcVT, N0.getOperand(0), N0.getOperand(1),
	DAG, SDLoc(N));
	}
	break;
	}

	return SDValue();
	}

	static SDNode getBuildPairElt(SDNode N, unsigned i) {
	SDValue Elt = N->getOperand(i);
	if (Elt.getOpcode() != ISD::MERGE_VALUES)
	return Elt.getNode();
	return Elt.getOperand(Elt.getResNo()).getNode();
	}

	/// build_pair (load, load) -> load
	/// if load locations are consecutive.
	SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
	assert(N->getOpcode() == ISD::BUILD_PAIR);

	LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
	LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));

	// A BUILD_PAIR is always having the least significant part in elt 0 and the
	// most significant part in elt 1. So when combining into one large load, we
	// need to consider the endianness.
	if (DAG.getDataLayout().isBigEndian())
	std::swap(LD1, LD2);

	if (!LD1 \|\| !LD2 \|\| !ISD::isNON_EXTLoad(LD1) \|\| !LD1->hasOneUse() \|\|
	LD1->getAddressSpace() != LD2->getAddressSpace())
	return SDValue();
	EVT LD1VT = LD1->getValueType(0);
	unsigned LD1Bytes = LD1VT.getStoreSize();
	if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
	DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
	Align Alignment = LD1->getAlign();
	Align NewAlign = DAG.getDataLayout().getABITypeAlign(
	VT.getTypeForEVT(*DAG.getContext()));

	if (NewAlign <= Alignment &&
	(!LegalOperations \|\| TLI.isOperationLegal(ISD::LOAD, VT)))
	return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
	LD1->getPointerInfo(), Alignment);
	}

	return SDValue();
	}

	static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
	// On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
	// and Lo parts; on big-endian machines it doesn't.
	return DAG.getDataLayout().isBigEndian() ? 1 : 0;
	}

	static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
	const TargetLowering &TLI) {
	// If this is not a bitcast to an FP type or if the target doesn't have
	// IEEE754-compliant FP logic, we're done.
	EVT VT = N->getValueType(0);
	if (!VT.isFloatingPoint() \|\| !TLI.hasBitPreservingFPLogic(VT))
	return SDValue();

	// TODO: Handle cases where the integer constant is a different scalar
	// bitwidth to the FP.
	SDValue N0 = N->getOperand(0);
	EVT SourceVT = N0.getValueType();
	if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
	return SDValue();

	unsigned FPOpcode;
	APInt SignMask;
	switch (N0.getOpcode()) {
	case ISD::AND:
	FPOpcode = ISD::FABS;
	SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
	break;
	case ISD::XOR:
	FPOpcode = ISD::FNEG;
	SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
	break;
	case ISD::OR:
	FPOpcode = ISD::FABS;
	SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
	break;
	default:
	return SDValue();
	}

	// Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
	// Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
	// Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
	// fneg (fabs X)
	SDValue LogicOp0 = N0.getOperand(0);
	ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
	if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
	LogicOp0.getOpcode() == ISD::BITCAST &&
	LogicOp0.getOperand(0).getValueType() == VT) {
	SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0));
	NumFPLogicOpsConv++;
	if (N0.getOpcode() == ISD::OR)
	return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
	return FPOp;
	}

	return SDValue();
	}

	SDValue DAGCombiner::visitBITCAST(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	EVT VT = N->getValueType(0);

	if (N0.isUndef())
	return DAG.getUNDEF(VT);

	// If the input is a BUILD_VECTOR with all constant elements, fold this now.
	// Only do this before legalize types, unless both types are integer and the
	// scalar type is legal. Only do this before legalize ops, since the target
	// maybe depending on the bitcast.
	// First check to see if this is all constant.
	// TODO: Support FP bitcasts after legalize types.
	if (VT.isVector() &&
	(!LegalTypes \|\|
	(!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
	TLI.isTypeLegal(VT.getVectorElementType()))) &&
	N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
	cast<BuildVectorSDNode>(N0)->isConstant())
	return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
	VT.getVectorElementType());

	// If the input is a constant, let getNode fold it.
	if (isIntOrFPConstant(N0)) {
	// If we can't allow illegal operations, we need to check that this is just
	// a fp -> int or int -> conversion and that the resulting operation will
	// be legal.
	if (!LegalOperations \|\|
	(isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
	TLI.isOperationLegal(ISD::ConstantFP, VT)) \|\|
	(isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
	TLI.isOperationLegal(ISD::Constant, VT))) {
	SDValue C = DAG.getBitcast(VT, N0);
	if (C.getNode() != N)
	return C;
	}
	}

	// (conv (conv x, t1), t2) -> (conv x, t2)
	if (N0.getOpcode() == ISD::BITCAST)
	return DAG.getBitcast(VT, N0.getOperand(0));

	// fold (conv (load x)) -> (load (conv*)x)
	// If the resultant load doesn't need a higher alignment than the original!
	if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
	// Do not remove the cast if the types differ in endian layout.
	TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
	TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
	// If the load is volatile, we only want to change the load type if the
	// resulting load is legal. Otherwise we might increase the number of
	// memory accesses. We don't care if the original type was legal or not
	// as we assume software couldn't rely on the number of accesses of an
	// illegal type.
	((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) \|\|
	TLI.isOperationLegal(ISD::LOAD, VT))) {
	LoadSDNode *LN0 = cast<LoadSDNode>(N0);

	if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
	*LN0->getMemOperand())) {
	SDValue Load =
	DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
	LN0->getPointerInfo(), LN0->getAlign(),
	LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
	DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
	return Load;
	}
	}

	if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
	return V;

	// fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
	// fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
	//
	// For ppc_fp128:
	// fold (bitcast (fneg x)) ->
	// flipbit = signbit
	// (xor (bitcast x) (build_pair flipbit, flipbit))
	//
	// fold (bitcast (fabs x)) ->
	// flipbit = (and (extract_element (bitcast x), 0), signbit)
	// (xor (bitcast x) (build_pair flipbit, flipbit))
	// This often reduces constant pool loads.
	if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) \|\|
	(N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
	N0.getNode()->hasOneUse() && VT.isInteger() &&
	!VT.isVector() && !N0.getValueType().isVector()) {
	SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
	AddToWorklist(NewConv.getNode());

	SDLoc DL(N);
	if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
	assert(VT.getSizeInBits() == 128);
	SDValue SignBit = DAG.getConstant(
	APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
	SDValue FlipBit;
	if (N0.getOpcode() == ISD::FNEG) {
	FlipBit = SignBit;
	AddToWorklist(FlipBit.getNode());
	} else {
	assert(N0.getOpcode() == ISD::FABS);
	SDValue Hi =
	DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
	DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
	SDLoc(NewConv)));
	AddToWorklist(Hi.getNode());
	FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
	AddToWorklist(FlipBit.getNode());
	}
	SDValue FlipBits =
	DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
	AddToWorklist(FlipBits.getNode());
	return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
	}
	APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
	if (N0.getOpcode() == ISD::FNEG)
	return DAG.getNode(ISD::XOR, DL, VT,
	NewConv, DAG.getConstant(SignBit, DL, VT));
	assert(N0.getOpcode() == ISD::FABS);
	return DAG.getNode(ISD::AND, DL, VT,
	NewConv, DAG.getConstant(~SignBit, DL, VT));
	}

	// fold (bitconvert (fcopysign cst, x)) ->
	// (or (and (bitconvert x), sign), (and cst, (not sign)))
	// Note that we don't handle (copysign x, cst) because this can always be
	// folded to an fneg or fabs.
	//
	// For ppc_fp128:
	// fold (bitcast (fcopysign cst, x)) ->
	// flipbit = (and (extract_element
	// (xor (bitcast cst), (bitcast x)), 0),
	// signbit)
	// (xor (bitcast cst) (build_pair flipbit, flipbit))
	if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
	isa<ConstantFPSDNode>(N0.getOperand(0)) &&
	VT.isInteger() && !VT.isVector()) {
	unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
	EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
	if (isTypeLegal(IntXVT)) {
	SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
	AddToWorklist(X.getNode());

	// If X has a different width than the result/lhs, sext it or truncate it.
	unsigned VTWidth = VT.getSizeInBits();
	if (OrigXWidth < VTWidth) {
	X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
	AddToWorklist(X.getNode());
	} else if (OrigXWidth > VTWidth) {
	// To get the sign bit in the right place, we have to shift it right
	// before truncating.
	SDLoc DL(X);
	X = DAG.getNode(ISD::SRL, DL,
	X.getValueType(), X,
	DAG.getConstant(OrigXWidth-VTWidth, DL,
	X.getValueType()));
	AddToWorklist(X.getNode());
	X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
	AddToWorklist(X.getNode());
	}

	if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
	APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
	SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
	AddToWorklist(Cst.getNode());
	SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
	AddToWorklist(X.getNode());
	SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
	AddToWorklist(XorResult.getNode());
	SDValue XorResult64 = DAG.getNode(
	ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
	DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
	SDLoc(XorResult)));
	AddToWorklist(XorResult64.getNode());
	SDValue FlipBit =
	DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
	DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
	AddToWorklist(FlipBit.getNode());
	SDValue FlipBits =
	DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
	AddToWorklist(FlipBits.getNode());
	return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
	}
	APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
	X = DAG.getNode(ISD::AND, SDLoc(X), VT,
	X, DAG.getConstant(SignBit, SDLoc(X), VT));
	AddToWorklist(X.getNode());

	SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
	Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
	Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
	AddToWorklist(Cst.getNode());

	return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
	}
	}

	// bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
	if (N0.getOpcode() == ISD::BUILD_PAIR)
	if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
	return CombineLD;

	// Remove double bitcasts from shuffles - this is often a legacy of
	// XformToShuffleWithZero being used to combine bitmaskings (of
	// float vectors bitcast to integer vectors) into shuffles.
	// bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
	if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
	N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
	VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
	!(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
	ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);

	// If operands are a bitcast, peek through if it casts the original VT.
	// If operands are a constant, just bitcast back to original VT.
	auto PeekThroughBitcast = [&](SDValue Op) {
	if (Op.getOpcode() == ISD::BITCAST &&
	Op.getOperand(0).getValueType() == VT)
	return SDValue(Op.getOperand(0));
	if (Op.isUndef() \|\| ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) \|\|
	ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
	return DAG.getBitcast(VT, Op);
	return SDValue();
	};

	// FIXME: If either input vector is bitcast, try to convert the shuffle to
	// the result type of this bitcast. This would eliminate at least one
	// bitcast. See the transform in InstCombine.
	SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
	SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
	if (!(SV0 && SV1))
	return SDValue();

	int MaskScale =
	VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
	SmallVector<int, 8> NewMask;
	for (int M : SVN->getMask())
	for (int i = 0; i != MaskScale; ++i)
	NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);

	SDValue LegalShuffle =
	TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
	if (LegalShuffle)
	return LegalShuffle;
	}

	return SDValue();
	}

	SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
	EVT VT = N->getValueType(0);
	return CombineConsecutiveLoads(N, VT);
	}

	SDValue DAGCombiner::visitFREEZE(SDNode *N) {
	SDValue N0 = N->getOperand(0);

	if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /PoisonOnly/ false))
	return N0;

	return SDValue();
	}

	/// We know that BV is a build_vector node with Constant, ConstantFP or Undef
	/// operands. DstEltVT indicates the destination element value type.
	SDValue DAGCombiner::
	ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
	EVT SrcEltVT = BV->getValueType(0).getVectorElementType();

	// If this is already the right type, we're done.
	if (SrcEltVT == DstEltVT) return SDValue(BV, 0);

	unsigned SrcBitSize = SrcEltVT.getSizeInBits();
	unsigned DstBitSize = DstEltVT.getSizeInBits();

	// If this is a conversion of N elements of one type to N elements of another
	// type, convert each element. This handles FP<->INT cases.
	if (SrcBitSize == DstBitSize) {
	SmallVector<SDValue, 8> Ops;
	for (SDValue Op : BV->op_values()) {
	// If the vector element type is not legal, the BUILD_VECTOR operands
	// are promoted and implicitly truncated. Make that explicit here.
	if (Op.getValueType() != SrcEltVT)
	Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
	Ops.push_back(DAG.getBitcast(DstEltVT, Op));
	AddToWorklist(Ops.back().getNode());
	}
	EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
	BV->getValueType(0).getVectorNumElements());
	return DAG.getBuildVector(VT, SDLoc(BV), Ops);
	}

	// Otherwise, we're growing or shrinking the elements. To avoid having to
	// handle annoying details of growing/shrinking FP values, we convert them to
	// int first.
	if (SrcEltVT.isFloatingPoint()) {
	// Convert the input float vector to a int vector where the elements are the
	// same sizes.
	EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
	BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
	SrcEltVT = IntVT;
	}

	// Now we know the input is an integer vector. If the output is a FP type,
	// convert to integer first, then to FP of the right size.
	if (DstEltVT.isFloatingPoint()) {
	EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
	SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();

	// Next, convert to FP elements of the same size.
	return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
	}

	SDLoc DL(BV);

	// Okay, we know the src/dst types are both integers of differing types.
	// Handling growing first.
	assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
	if (SrcBitSize < DstBitSize) {
	unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;

	SmallVector<SDValue, 8> Ops;
	for (unsigned i = 0, e = BV->getNumOperands(); i != e;
	i += NumInputsPerOutput) {
	bool isLE = DAG.getDataLayout().isLittleEndian();
	APInt NewBits = APInt(DstBitSize, 0);
	bool EltIsUndef = true;
	for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
	// Shift the previously computed bits over.
	NewBits <<= SrcBitSize;
	SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
	if (Op.isUndef()) continue;
	EltIsUndef = false;

	NewBits \|= cast<ConstantSDNode>(Op)->getAPIntValue().
	zextOrTrunc(SrcBitSize).zext(DstBitSize);
	}

	if (EltIsUndef)
	Ops.push_back(DAG.getUNDEF(DstEltVT));
	else
	Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
	}

	EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
	return DAG.getBuildVector(VT, DL, Ops);
	}

	// Finally, this must be the case where we are shrinking elements: each input
	// turns into multiple outputs.
	unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
	EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
	NumOutputsPerInput*BV->getNumOperands());
	SmallVector<SDValue, 8> Ops;

	for (const SDValue &Op : BV->op_values()) {
	if (Op.isUndef()) {
	Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
	continue;
	}

	APInt OpVal = cast<ConstantSDNode>(Op)->
	getAPIntValue().zextOrTrunc(SrcBitSize);

	for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
	APInt ThisVal = OpVal.trunc(DstBitSize);
	Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
	OpVal.lshrInPlace(DstBitSize);
	}

	// For big endian targets, swap the order of the pieces of each element.
	if (DAG.getDataLayout().isBigEndian())
	std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
	}

	return DAG.getBuildVector(VT, DL, Ops);
	}

	/// Try to perform FMA combining on a given FADD node.
	SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	EVT VT = N->getValueType(0);
	SDLoc SL(N);

	const TargetOptions &Options = DAG.getTarget().Options;

	// Floating-point multiply-add with intermediate rounding.
	bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));

	// Floating-point multiply-add without intermediate rounding.
	bool HasFMA =
	TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
	(!LegalOperations \|\| TLI.isOperationLegalOrCustom(ISD::FMA, VT));

	// No valid opcode, do not combine.
	if (!HasFMAD && !HasFMA)
	return SDValue();

	bool CanReassociate =
	Options.UnsafeFPMath \|\| N->getFlags().hasAllowReassociation();
	bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast \|\|
	Options.UnsafeFPMath \|\| HasFMAD);
	// If the addition is not contractable, do not combine.
	if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
	return SDValue();

	if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
	return SDValue();

	// Always prefer FMAD to FMA for precision.
	unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
	bool Aggressive = TLI.enableAggressiveFMAFusion(VT);

	// Is the node an FMUL and contractable either due to global flags or
	// SDNodeFlags.
	auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
	if (N.getOpcode() != ISD::FMUL)
	return false;
	return AllowFusionGlobally \|\| N->getFlags().hasAllowContract();
	};
	// If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
	// prefer to fold the multiply with fewer uses.
	if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
	if (N0.getNode()->use_size() > N1.getNode()->use_size())
	std::swap(N0, N1);
	}

	// fold (fadd (fmul x, y), z) -> (fma x, y, z)
	if (isContractableFMUL(N0) && (Aggressive \|\| N0->hasOneUse())) {
	return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
	N0.getOperand(1), N1);
	}

	// fold (fadd x, (fmul y, z)) -> (fma y, z, x)
	// Note: Commutes FADD operands.
	if (isContractableFMUL(N1) && (Aggressive \|\| N1->hasOneUse())) {
	return DAG.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
	N1.getOperand(1), N0);
	}

	// fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
	// fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
	// This requires reassociation because it changes the order of operations.
	SDValue FMA, E;
	if (CanReassociate && N0.getOpcode() == PreferredFusedOpcode &&
	N0.getOperand(2).getOpcode() == ISD::FMUL && N0.hasOneUse() &&
	N0.getOperand(2).hasOneUse()) {
	FMA = N0;
	E = N1;
	} else if (CanReassociate && N1.getOpcode() == PreferredFusedOpcode &&
	N1.getOperand(2).getOpcode() == ISD::FMUL && N1.hasOneUse() &&
	N1.getOperand(2).hasOneUse()) {
	FMA = N1;
	E = N0;
	}
	if (FMA && E) {
	SDValue A = FMA.getOperand(0);
	SDValue B = FMA.getOperand(1);
	SDValue C = FMA.getOperand(2).getOperand(0);
	SDValue D = FMA.getOperand(2).getOperand(1);
	SDValue CDE = DAG.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
	return DAG.getNode(PreferredFusedOpcode, SL, VT, A, B, CDE);
	}

	// Look through FP_EXTEND nodes to do more combining.

	// fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
	if (N0.getOpcode() == ISD::FP_EXTEND) {
	SDValue N00 = N0.getOperand(0);
	if (isContractableFMUL(N00) &&
	TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
	N00.getValueType())) {
	return DAG.getNode(PreferredFusedOpcode, SL, VT,
	DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
	DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
	N1);
	}
	}

	// fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
	// Note: Commutes FADD operands.
	if (N1.getOpcode() == ISD::FP_EXTEND) {
	SDValue N10 = N1.getOperand(0);
	if (isContractableFMUL(N10) &&
	TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
	N10.getValueType())) {
	return DAG.getNode(PreferredFusedOpcode, SL, VT,
	DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
	DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)),
	N0);
	}
	}

	// More folding opportunities when target permits.
	if (Aggressive) {
	// fold (fadd (fma x, y, (fpext (fmul u, v))), z)
	// -> (fma x, y, (fma (fpext u), (fpext v), z))
	auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
	SDValue Z) {
	return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
	DAG.getNode(PreferredFusedOpcode, SL, VT,
	DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
	DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
	Z));
	};
	if (N0.getOpcode() == PreferredFusedOpcode) {
	SDValue N02 = N0.getOperand(2);
	if (N02.getOpcode() == ISD::FP_EXTEND) {
	SDValue N020 = N02.getOperand(0);
	if (isContractableFMUL(N020) &&
	TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
	N020.getValueType())) {
	return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
	N020.getOperand(0), N020.getOperand(1),
	N1);
	}
	}
	}

	// fold (fadd (fpext (fma x, y, (fmul u, v))), z)
	// -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
	// FIXME: This turns two single-precision and one double-precision
	// operation into two double-precision operations, which might not be
	// interesting for all targets, especially GPUs.
	auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
	SDValue Z) {
	return DAG.getNode(
	PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
	DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
	DAG.getNode(PreferredFusedOpcode, SL, VT,
	DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
	DAG.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
	};
	if (N0.getOpcode() == ISD::FP_EXTEND) {
	SDValue N00 = N0.getOperand(0);
	if (N00.getOpcode() == PreferredFusedOpcode) {
	SDValue N002 = N00.getOperand(2);
	if (isContractableFMUL(N002) &&
	TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
	N00.getValueType())) {
	return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
	N002.getOperand(0), N002.getOperand(1),
	N1);
	}
	}
	}

	// fold (fadd x, (fma y, z, (fpext (fmul u, v)))
	// -> (fma y, z, (fma (fpext u), (fpext v), x))
	if (N1.getOpcode() == PreferredFusedOpcode) {
	SDValue N12 = N1.getOperand(2);
	if (N12.getOpcode() == ISD::FP_EXTEND) {
	SDValue N120 = N12.getOperand(0);
	if (isContractableFMUL(N120) &&
	TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
	N120.getValueType())) {
	return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
	N120.getOperand(0), N120.getOperand(1),
	N0);
	}
	}
	}

	// fold (fadd x, (fpext (fma y, z, (fmul u, v)))
	// -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
	// FIXME: This turns two single-precision and one double-precision
	// operation into two double-precision operations, which might not be
	// interesting for all targets, especially GPUs.
	if (N1.getOpcode() == ISD::FP_EXTEND) {
	SDValue N10 = N1.getOperand(0);
	if (N10.getOpcode() == PreferredFusedOpcode) {
	SDValue N102 = N10.getOperand(2);
	if (isContractableFMUL(N102) &&
	TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
	N10.getValueType())) {
	return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
	N102.getOperand(0), N102.getOperand(1),
	N0);
	}
	}
	}
	}

	return SDValue();
	}

	/// Try to perform FMA combining on a given FSUB node.
	SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	EVT VT = N->getValueType(0);
	SDLoc SL(N);

	const TargetOptions &Options = DAG.getTarget().Options;
	// Floating-point multiply-add with intermediate rounding.
	bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));

	// Floating-point multiply-add without intermediate rounding.
	bool HasFMA =
	TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
	(!LegalOperations \|\| TLI.isOperationLegalOrCustom(ISD::FMA, VT));

	// No valid opcode, do not combine.
	if (!HasFMAD && !HasFMA)
	return SDValue();

	const SDNodeFlags Flags = N->getFlags();
	bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast \|\|
	Options.UnsafeFPMath \|\| HasFMAD);

	// If the subtraction is not contractable, do not combine.
	if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
	return SDValue();

	if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
	return SDValue();

	// Always prefer FMAD to FMA for precision.
	unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
	bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
	bool NoSignedZero = Options.NoSignedZerosFPMath \|\| Flags.hasNoSignedZeros();

	// Is the node an FMUL and contractable either due to global flags or
	// SDNodeFlags.
	auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
	if (N.getOpcode() != ISD::FMUL)
	return false;
	return AllowFusionGlobally \|\| N->getFlags().hasAllowContract();
	};

	// fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
	auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
	if (isContractableFMUL(XY) && (Aggressive \|\| XY->hasOneUse())) {
	return DAG.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
	XY.getOperand(1), DAG.getNode(ISD::FNEG, SL, VT, Z));
	}
	return SDValue();
	};

	// fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
	// Note: Commutes FSUB operands.
	auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
	if (isContractableFMUL(YZ) && (Aggressive \|\| YZ->hasOneUse())) {
	return DAG.getNode(PreferredFusedOpcode, SL, VT,
	DAG.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
	YZ.getOperand(1), X);
	}
	return SDValue();
	};

	// If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
	// prefer to fold the multiply with fewer uses.
	if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
	(N0.getNode()->use_size() > N1.getNode()->use_size())) {
	// fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
	if (SDValue V = tryToFoldXSubYZ(N0, N1))
	return V;
	// fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
	if (SDValue V = tryToFoldXYSubZ(N0, N1))
	return V;
	} else {
	// fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
	if (SDValue V = tryToFoldXYSubZ(N0, N1))
	return V;
	// fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
	if (SDValue V = tryToFoldXSubYZ(N0, N1))
	return V;
	}

	// fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
	if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
	(Aggressive \|\| (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
	SDValue N00 = N0.getOperand(0).getOperand(0);
	SDValue N01 = N0.getOperand(0).getOperand(1);
	return DAG.getNode(PreferredFusedOpcode, SL, VT,
	DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
	DAG.getNode(ISD::FNEG, SL, VT, N1));
	}

	// Look through FP_EXTEND nodes to do more combining.

	// fold (fsub (fpext (fmul x, y)), z)
	// -> (fma (fpext x), (fpext y), (fneg z))
	if (N0.getOpcode() == ISD::FP_EXTEND) {
	SDValue N00 = N0.getOperand(0);
	if (isContractableFMUL(N00) &&
	TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
	N00.getValueType())) {
	return DAG.getNode(PreferredFusedOpcode, SL, VT,
	DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
	DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
	DAG.getNode(ISD::FNEG, SL, VT, N1));
	}
	}

	// fold (fsub x, (fpext (fmul y, z)))
	// -> (fma (fneg (fpext y)), (fpext z), x)
	// Note: Commutes FSUB operands.
	if (N1.getOpcode() == ISD::FP_EXTEND) {
	SDValue N10 = N1.getOperand(0);
	if (isContractableFMUL(N10) &&
	TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
	N10.getValueType())) {
	return DAG.getNode(
	PreferredFusedOpcode, SL, VT,
	DAG.getNode(ISD::FNEG, SL, VT,
	DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
	DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
	}
	}

	// fold (fsub (fpext (fneg (fmul, x, y))), z)
	// -> (fneg (fma (fpext x), (fpext y), z))
	// Note: This could be removed with appropriate canonicalization of the
	// input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
	// orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
	// from implementing the canonicalization in visitFSUB.
	if (N0.getOpcode() == ISD::FP_EXTEND) {
	SDValue N00 = N0.getOperand(0);
	if (N00.getOpcode() == ISD::FNEG) {
	SDValue N000 = N00.getOperand(0);
	if (isContractableFMUL(N000) &&
	TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
	N00.getValueType())) {
	return DAG.getNode(
	ISD::FNEG, SL, VT,
	DAG.getNode(PreferredFusedOpcode, SL, VT,
	DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
	DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
	N1));
	}
	}
	}

	// fold (fsub (fneg (fpext (fmul, x, y))), z)
	// -> (fneg (fma (fpext x)), (fpext y), z)
	// Note: This could be removed with appropriate canonicalization of the
	// input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
	// orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
	// from implementing the canonicalization in visitFSUB.
	if (N0.getOpcode() == ISD::FNEG) {
	SDValue N00 = N0.getOperand(0);
	if (N00.getOpcode() == ISD::FP_EXTEND) {
	SDValue N000 = N00.getOperand(0);
	if (isContractableFMUL(N000) &&
	TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
	N000.getValueType())) {
	return DAG.getNode(
	ISD::FNEG, SL, VT,
	DAG.getNode(PreferredFusedOpcode, SL, VT,
	DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
	DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
	N1));
	}
	}
	}

	auto isReassociable = [Options](SDNode *N) {
	return Options.UnsafeFPMath \|\| N->getFlags().hasAllowReassociation();
	};

	auto isContractableAndReassociableFMUL = [isContractableFMUL,
	isReassociable](SDValue N) {
	return isContractableFMUL(N) && isReassociable(N.getNode());
	};

	// More folding opportunities when target permits.
	if (Aggressive && isReassociable(N)) {
	bool CanFuse = Options.UnsafeFPMath \|\| N->getFlags().hasAllowContract();
	// fold (fsub (fma x, y, (fmul u, v)), z)
	// -> (fma x, y (fma u, v, (fneg z)))
	if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
	isContractableAndReassociableFMUL(N0.getOperand(2)) &&
	N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
	return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
	N0.getOperand(1),
	DAG.getNode(PreferredFusedOpcode, SL, VT,
	N0.getOperand(2).getOperand(0),
	N0.getOperand(2).getOperand(1),
	DAG.getNode(ISD::FNEG, SL, VT, N1)));
	}

	// fold (fsub x, (fma y, z, (fmul u, v)))
	// -> (fma (fneg y), z, (fma (fneg u), v, x))
	if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
	isContractableAndReassociableFMUL(N1.getOperand(2)) &&
	N1->hasOneUse() && NoSignedZero) {
	SDValue N20 = N1.getOperand(2).getOperand(0);
	SDValue N21 = N1.getOperand(2).getOperand(1);
	return DAG.getNode(
	PreferredFusedOpcode, SL, VT,
	DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
	DAG.getNode(PreferredFusedOpcode, SL, VT,
	DAG.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
	}

	// fold (fsub (fma x, y, (fpext (fmul u, v))), z)
	// -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
	if (N0.getOpcode() == PreferredFusedOpcode &&
	N0->hasOneUse()) {
	SDValue N02 = N0.getOperand(2);
	if (N02.getOpcode() == ISD::FP_EXTEND) {
	SDValue N020 = N02.getOperand(0);
	if (isContractableAndReassociableFMUL(N020) &&
	TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
	N020.getValueType())) {
	return DAG.getNode(
	PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
	DAG.getNode(
	PreferredFusedOpcode, SL, VT,
	DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
	DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
	DAG.getNode(ISD::FNEG, SL, VT, N1)));
	}
	}
	}

	// fold (fsub (fpext (fma x, y, (fmul u, v))), z)
	// -> (fma (fpext x), (fpext y),
	// (fma (fpext u), (fpext v), (fneg z)))
	// FIXME: This turns two single-precision and one double-precision
	// operation into two double-precision operations, which might not be
	// interesting for all targets, especially GPUs.
	if (N0.getOpcode() == ISD::FP_EXTEND) {
	SDValue N00 = N0.getOperand(0);
	if (N00.getOpcode() == PreferredFusedOpcode) {
	SDValue N002 = N00.getOperand(2);
	if (isContractableAndReassociableFMUL(N002) &&
	TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
	N00.getValueType())) {
	return DAG.getNode(
	PreferredFusedOpcode, SL, VT,
	DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
	DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
	DAG.getNode(
	PreferredFusedOpcode, SL, VT,
	DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
	DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
	DAG.getNode(ISD::FNEG, SL, VT, N1)));
	}
	}
	}

	// fold (fsub x, (fma y, z, (fpext (fmul u, v))))
	// -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
	if (N1.getOpcode() == PreferredFusedOpcode &&
	N1.getOperand(2).getOpcode() == ISD::FP_EXTEND &&
	N1->hasOneUse()) {
	SDValue N120 = N1.getOperand(2).getOperand(0);
	if (isContractableAndReassociableFMUL(N120) &&
	TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
	N120.getValueType())) {
	SDValue N1200 = N120.getOperand(0);
	SDValue N1201 = N120.getOperand(1);
	return DAG.getNode(
	PreferredFusedOpcode, SL, VT,
	DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
	DAG.getNode(PreferredFusedOpcode, SL, VT,
	DAG.getNode(ISD::FNEG, SL, VT,
	DAG.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
	DAG.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
	}
	}

	// fold (fsub x, (fpext (fma y, z, (fmul u, v))))
	// -> (fma (fneg (fpext y)), (fpext z),
	// (fma (fneg (fpext u)), (fpext v), x))
	// FIXME: This turns two single-precision and one double-precision
	// operation into two double-precision operations, which might not be
	// interesting for all targets, especially GPUs.
	if (N1.getOpcode() == ISD::FP_EXTEND &&
	N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
	SDValue CvtSrc = N1.getOperand(0);
	SDValue N100 = CvtSrc.getOperand(0);
	SDValue N101 = CvtSrc.getOperand(1);
	SDValue N102 = CvtSrc.getOperand(2);
	if (isContractableAndReassociableFMUL(N102) &&
	TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
	CvtSrc.getValueType())) {
	SDValue N1020 = N102.getOperand(0);
	SDValue N1021 = N102.getOperand(1);
	return DAG.getNode(
	PreferredFusedOpcode, SL, VT,
	DAG.getNode(ISD::FNEG, SL, VT,
	DAG.getNode(ISD::FP_EXTEND, SL, VT, N100)),
	DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
	DAG.getNode(PreferredFusedOpcode, SL, VT,
	DAG.getNode(ISD::FNEG, SL, VT,
	DAG.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
	DAG.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
	}
	}
	}

	return SDValue();
	}

	/// Try to perform FMA combining on a given FMUL node based on the distributive
	/// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
	/// subtraction instead of addition).
	SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	EVT VT = N->getValueType(0);
	SDLoc SL(N);

	assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");

	const TargetOptions &Options = DAG.getTarget().Options;

	// The transforms below are incorrect when x == 0 and y == inf, because the
	// intermediate multiplication produces a nan.
	if (!Options.NoInfsFPMath)
	return SDValue();

	// Floating-point multiply-add without intermediate rounding.
	bool HasFMA =
	(Options.AllowFPOpFusion == FPOpFusion::Fast \|\| Options.UnsafeFPMath) &&
	TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
	(!LegalOperations \|\| TLI.isOperationLegalOrCustom(ISD::FMA, VT));

	// Floating-point multiply-add with intermediate rounding. This can result
	// in a less precise result due to the changed rounding order.
	bool HasFMAD = Options.UnsafeFPMath &&
	(LegalOperations && TLI.isFMADLegal(DAG, N));

	// No valid opcode, do not combine.
	if (!HasFMAD && !HasFMA)
	return SDValue();

	// Always prefer FMAD to FMA for precision.
	unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
	bool Aggressive = TLI.enableAggressiveFMAFusion(VT);

	// fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
	// fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
	auto FuseFADD = [&](SDValue X, SDValue Y) {
	if (X.getOpcode() == ISD::FADD && (Aggressive \|\| X->hasOneUse())) {
	if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
	if (C->isExactlyValue(+1.0))
	return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
	Y);
	if (C->isExactlyValue(-1.0))
	return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
	DAG.getNode(ISD::FNEG, SL, VT, Y));
	}
	}
	return SDValue();
	};

	if (SDValue FMA = FuseFADD(N0, N1))
	return FMA;
	if (SDValue FMA = FuseFADD(N1, N0))
	return FMA;

	// fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
	// fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
	// fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
	// fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
	auto FuseFSUB = [&](SDValue X, SDValue Y) {
	if (X.getOpcode() == ISD::FSUB && (Aggressive \|\| X->hasOneUse())) {
	if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
	if (C0->isExactlyValue(+1.0))
	return DAG.getNode(PreferredFusedOpcode, SL, VT,
	DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
	Y);
	if (C0->isExactlyValue(-1.0))
	return DAG.getNode(PreferredFusedOpcode, SL, VT,
	DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
	DAG.getNode(ISD::FNEG, SL, VT, Y));
	}
	if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
	if (C1->isExactlyValue(+1.0))
	return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
	DAG.getNode(ISD::FNEG, SL, VT, Y));
	if (C1->isExactlyValue(-1.0))
	return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
	Y);
	}
	}
	return SDValue();
	};

	if (SDValue FMA = FuseFSUB(N0, N1))
	return FMA;
	if (SDValue FMA = FuseFSUB(N1, N0))
	return FMA;

	return SDValue();
	}

	SDValue DAGCombiner::visitFADD(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
	bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
	EVT VT = N->getValueType(0);
	SDLoc DL(N);
	const TargetOptions &Options = DAG.getTarget().Options;
	SDNodeFlags Flags = N->getFlags();
	SelectionDAG::FlagInserter FlagsInserter(DAG, N);

	if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
	return R;

	// fold vector ops
	if (VT.isVector())
	if (SDValue FoldedVOp = SimplifyVBinOp(N))
	return FoldedVOp;

	// fold (fadd c1, c2) -> c1 + c2
	if (N0CFP && N1CFP)
	return DAG.getNode(ISD::FADD, DL, VT, N0, N1);

	// canonicalize constant to RHS
	if (N0CFP && !N1CFP)
	return DAG.getNode(ISD::FADD, DL, VT, N1, N0);

	// N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
	ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
	if (N1C && N1C->isZero())
	if (N1C->isNegative() \|\| Options.NoSignedZerosFPMath \|\| Flags.hasNoSignedZeros())
	return N0;

	if (SDValue NewSel = foldBinOpIntoSelect(N))
	return NewSel;

	// fold (fadd A, (fneg B)) -> (fsub A, B)
	if (!LegalOperations \|\| TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
	if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
	N1, DAG, LegalOperations, ForCodeSize))
	return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1);

	// fold (fadd (fneg A), B) -> (fsub B, A)
	if (!LegalOperations \|\| TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
	if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
	N0, DAG, LegalOperations, ForCodeSize))
	return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0);

	auto isFMulNegTwo = [](SDValue FMul) {
	if (!FMul.hasOneUse() \|\| FMul.getOpcode() != ISD::FMUL)
	return false;
	auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
	return C && C->isExactlyValue(-2.0);
	};

	// fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
	if (isFMulNegTwo(N0)) {
	SDValue B = N0.getOperand(0);
	SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
	return DAG.getNode(ISD::FSUB, DL, VT, N1, Add);
	}
	// fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
	if (isFMulNegTwo(N1)) {
	SDValue B = N1.getOperand(0);
	SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
	return DAG.getNode(ISD::FSUB, DL, VT, N0, Add);
	}

	// No FP constant should be created after legalization as Instruction
	// Selection pass has a hard time dealing with FP constants.
	bool AllowNewConst = (Level < AfterLegalizeDAG);

	// If nnan is enabled, fold lots of things.
	if ((Options.NoNaNsFPMath \|\| Flags.hasNoNaNs()) && AllowNewConst) {
	// If allowed, fold (fadd (fneg x), x) -> 0.0
	if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
	return DAG.getConstantFP(0.0, DL, VT);

	// If allowed, fold (fadd x, (fneg x)) -> 0.0
	if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
	return DAG.getConstantFP(0.0, DL, VT);
	}

	// If 'unsafe math' or reassoc and nsz, fold lots of things.
	// TODO: break out portions of the transformations below for which Unsafe is
	// considered and which do not require both nsz and reassoc
	if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) \|\|
	(Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
	AllowNewConst) {
	// fadd (fadd x, c1), c2 -> fadd x, c1 + c2
	if (N1CFP && N0.getOpcode() == ISD::FADD &&
	DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
	SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1);
	return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC);
	}

	// We can fold chains of FADD's of the same value into multiplications.
	// This transform is not safe in general because we are reducing the number
	// of rounding steps.
	if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
	if (N0.getOpcode() == ISD::FMUL) {
	bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
	bool CFP01 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));

	// (fadd (fmul x, c), x) -> (fmul x, c+1)
	if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
	SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
	DAG.getConstantFP(1.0, DL, VT));
	return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
	}

	// (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
	if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
	N1.getOperand(0) == N1.getOperand(1) &&
	N0.getOperand(0) == N1.getOperand(0)) {
	SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
	DAG.getConstantFP(2.0, DL, VT));
	return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
	}
	}

	if (N1.getOpcode() == ISD::FMUL) {
	bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
	bool CFP11 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));

	// (fadd x, (fmul x, c)) -> (fmul x, c+1)
	if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
	SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
	DAG.getConstantFP(1.0, DL, VT));
	return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
	}

	// (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
	if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
	N0.getOperand(0) == N0.getOperand(1) &&
	N1.getOperand(0) == N0.getOperand(0)) {
	SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
	DAG.getConstantFP(2.0, DL, VT));
	return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
	}
	}

	if (N0.getOpcode() == ISD::FADD) {
	bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
	// (fadd (fadd x, x), x) -> (fmul x, 3.0)
	if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
	(N0.getOperand(0) == N1)) {
	return DAG.getNode(ISD::FMUL, DL, VT, N1,
	DAG.getConstantFP(3.0, DL, VT));
	}
	}

	if (N1.getOpcode() == ISD::FADD) {
	bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
	// (fadd x, (fadd x, x)) -> (fmul x, 3.0)
	if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
	N1.getOperand(0) == N0) {
	return DAG.getNode(ISD::FMUL, DL, VT, N0,
	DAG.getConstantFP(3.0, DL, VT));
	}
	}

	// (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
	if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
	N0.getOperand(0) == N0.getOperand(1) &&
	N1.getOperand(0) == N1.getOperand(1) &&
	N0.getOperand(0) == N1.getOperand(0)) {
	return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
	DAG.getConstantFP(4.0, DL, VT));
	}
	}
	} // enable-unsafe-fp-math

	// FADD -> FMA combines:
	if (SDValue Fused = visitFADDForFMACombine(N)) {
	AddToWorklist(Fused.getNode());
	return Fused;
	}
	return SDValue();
	}

	SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
	SDValue Chain = N->getOperand(0);
	SDValue N0 = N->getOperand(1);
	SDValue N1 = N->getOperand(2);
	EVT VT = N->getValueType(0);
	EVT ChainVT = N->getValueType(1);
	SDLoc DL(N);
	SelectionDAG::FlagInserter FlagsInserter(DAG, N);

	// fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
	if (!LegalOperations \|\| TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
	if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
	N1, DAG, LegalOperations, ForCodeSize)) {
	return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
	{Chain, N0, NegN1});
	}

	// fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
	if (!LegalOperations \|\| TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
	if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
	N0, DAG, LegalOperations, ForCodeSize)) {
	return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
	{Chain, N1, NegN0});
	}
	return SDValue();
	}

	SDValue DAGCombiner::visitFSUB(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
	ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
	EVT VT = N->getValueType(0);
	SDLoc DL(N);
	const TargetOptions &Options = DAG.getTarget().Options;
	const SDNodeFlags Flags = N->getFlags();
	SelectionDAG::FlagInserter FlagsInserter(DAG, N);

	if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
	return R;

	// fold vector ops
	if (VT.isVector())
	if (SDValue FoldedVOp = SimplifyVBinOp(N))
	return FoldedVOp;

	// fold (fsub c1, c2) -> c1-c2
	if (N0CFP && N1CFP)
	return DAG.getNode(ISD::FSUB, DL, VT, N0, N1);

	if (SDValue NewSel = foldBinOpIntoSelect(N))
	return NewSel;

	// (fsub A, 0) -> A
	if (N1CFP && N1CFP->isZero()) {
	if (!N1CFP->isNegative() \|\| Options.NoSignedZerosFPMath \|\|
	Flags.hasNoSignedZeros()) {
	return N0;
	}
	}

	if (N0 == N1) {
	// (fsub x, x) -> 0.0
	if (Options.NoNaNsFPMath \|\| Flags.hasNoNaNs())
	return DAG.getConstantFP(0.0f, DL, VT);
	}

	// (fsub -0.0, N1) -> -N1
	if (N0CFP && N0CFP->isZero()) {
	if (N0CFP->isNegative() \|\|
	(Options.NoSignedZerosFPMath \|\| Flags.hasNoSignedZeros())) {
	// We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
	// flushed to zero, unless all users treat denorms as zero (DAZ).
	// FIXME: This transform will change the sign of a NaN and the behavior
	// of a signaling NaN. It is only valid when a NoNaN flag is present.
	DenormalMode DenormMode = DAG.getDenormalMode(VT);
	if (DenormMode == DenormalMode::getIEEE()) {
	if (SDValue NegN1 =
	TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
	return NegN1;
	if (!LegalOperations \|\| TLI.isOperationLegal(ISD::FNEG, VT))
	return DAG.getNode(ISD::FNEG, DL, VT, N1);
	}
	}
	}

	if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) \|\|
	(Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
	N1.getOpcode() == ISD::FADD) {
	// X - (X + Y) -> -Y
	if (N0 == N1->getOperand(0))
	return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
	// X - (Y + X) -> -Y
	if (N0 == N1->getOperand(1))
	return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
	}

	// fold (fsub A, (fneg B)) -> (fadd A, B)
	if (SDValue NegN1 =
	TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
	return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);

	// FSUB -> FMA combines:
	if (SDValue Fused = visitFSUBForFMACombine(N)) {
	AddToWorklist(Fused.getNode());
	return Fused;
	}

	return SDValue();
	}

	SDValue DAGCombiner::visitFMUL(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
	ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
	EVT VT = N->getValueType(0);
	SDLoc DL(N);
	const TargetOptions &Options = DAG.getTarget().Options;
	const SDNodeFlags Flags = N->getFlags();
	SelectionDAG::FlagInserter FlagsInserter(DAG, N);

	if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
	return R;

	// fold vector ops
	if (VT.isVector()) {
	// This just handles C1 * C2 for vectors. Other vector folds are below.
	if (SDValue FoldedVOp = SimplifyVBinOp(N))
	return FoldedVOp;
	}

	// fold (fmul c1, c2) -> c1*c2
	if (N0CFP && N1CFP)
	return DAG.getNode(ISD::FMUL, DL, VT, N0, N1);

	// canonicalize constant to RHS
	if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
	!DAG.isConstantFPBuildVectorOrConstantFP(N1))
	return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);

	if (SDValue NewSel = foldBinOpIntoSelect(N))
	return NewSel;

	if (Options.UnsafeFPMath \|\| Flags.hasAllowReassociation()) {
	// fmul (fmul X, C1), C2 -> fmul X, C1 * C2
	if (DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
	N0.getOpcode() == ISD::FMUL) {
	SDValue N00 = N0.getOperand(0);
	SDValue N01 = N0.getOperand(1);
	// Avoid an infinite loop by making sure that N00 is not a constant
	// (the inner multiply has not been constant folded yet).
	if (DAG.isConstantFPBuildVectorOrConstantFP(N01) &&
	!DAG.isConstantFPBuildVectorOrConstantFP(N00)) {
	SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
	return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
	}
	}

	// Match a special-case: we convert X * 2.0 into fadd.
	// fmul (fadd X, X), C -> fmul X, 2.0 * C
	if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
	N0.getOperand(0) == N0.getOperand(1)) {
	const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
	SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
	return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
	}
	}

	// fold (fmul X, 2.0) -> (fadd X, X)
	if (N1CFP && N1CFP->isExactlyValue(+2.0))
	return DAG.getNode(ISD::FADD, DL, VT, N0, N0);

	// fold (fmul X, -1.0) -> (fneg X)
	if (N1CFP && N1CFP->isExactlyValue(-1.0))
	if (!LegalOperations \|\| TLI.isOperationLegal(ISD::FNEG, VT))
	return DAG.getNode(ISD::FNEG, DL, VT, N0);

	// -N0 * -N1 --> N0 * N1
	TargetLowering::NegatibleCost CostN0 =
	TargetLowering::NegatibleCost::Expensive;
	TargetLowering::NegatibleCost CostN1 =
	TargetLowering::NegatibleCost::Expensive;
	SDValue NegN0 =
	TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
	SDValue NegN1 =
	TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
	if (NegN0 && NegN1 &&
	(CostN0 == TargetLowering::NegatibleCost::Cheaper \|\|
	CostN1 == TargetLowering::NegatibleCost::Cheaper))
	return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);

	// fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
	// fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
	if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
	(N0.getOpcode() == ISD::SELECT \|\| N1.getOpcode() == ISD::SELECT) &&
	TLI.isOperationLegal(ISD::FABS, VT)) {
	SDValue Select = N0, X = N1;
	if (Select.getOpcode() != ISD::SELECT)
	std::swap(Select, X);

	SDValue Cond = Select.getOperand(0);
	auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
	auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));

	if (TrueOpnd && FalseOpnd &&
	Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
	isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
	cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
	ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
	switch (CC) {
	default: break;
	case ISD::SETOLT:
	case ISD::SETULT:
	case ISD::SETOLE:
	case ISD::SETULE:
	case ISD::SETLT:
	case ISD::SETLE:
	std::swap(TrueOpnd, FalseOpnd);
	LLVM_FALLTHROUGH;
	case ISD::SETOGT:
	case ISD::SETUGT:
	case ISD::SETOGE:
	case ISD::SETUGE:
	case ISD::SETGT:
	case ISD::SETGE:
	if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
	TLI.isOperationLegal(ISD::FNEG, VT))
	return DAG.getNode(ISD::FNEG, DL, VT,
	DAG.getNode(ISD::FABS, DL, VT, X));
	if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
	return DAG.getNode(ISD::FABS, DL, VT, X);

	break;
	}
	}
	}

	// FMUL -> FMA combines:
	if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
	AddToWorklist(Fused.getNode());
	return Fused;
	}

	return SDValue();
	}

	SDValue DAGCombiner::visitFMA(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	SDValue N2 = N->getOperand(2);
	ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
	ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
	EVT VT = N->getValueType(0);
	SDLoc DL(N);
	const TargetOptions &Options = DAG.getTarget().Options;
	// FMA nodes have flags that propagate to the created nodes.
	SelectionDAG::FlagInserter FlagsInserter(DAG, N);

	bool UnsafeFPMath =
	Options.UnsafeFPMath \|\| N->getFlags().hasAllowReassociation();

	// Constant fold FMA.
	if (isa<ConstantFPSDNode>(N0) &&
	isa<ConstantFPSDNode>(N1) &&
	isa<ConstantFPSDNode>(N2)) {
	return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
	}

	// (-N0 * -N1) + N2 --> (N0 * N1) + N2
	TargetLowering::NegatibleCost CostN0 =
	TargetLowering::NegatibleCost::Expensive;
	TargetLowering::NegatibleCost CostN1 =
	TargetLowering::NegatibleCost::Expensive;
	SDValue NegN0 =
	TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
	SDValue NegN1 =
	TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
	if (NegN0 && NegN1 &&
	(CostN0 == TargetLowering::NegatibleCost::Cheaper \|\|
	CostN1 == TargetLowering::NegatibleCost::Cheaper))
	return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);

	if (UnsafeFPMath) {
	if (N0CFP && N0CFP->isZero())
	return N2;
	if (N1CFP && N1CFP->isZero())
	return N2;
	}

	if (N0CFP && N0CFP->isExactlyValue(1.0))
	return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
	if (N1CFP && N1CFP->isExactlyValue(1.0))
	return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);

	// Canonicalize (fma c, x, y) -> (fma x, c, y)
	if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
	!DAG.isConstantFPBuildVectorOrConstantFP(N1))
	return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);

	if (UnsafeFPMath) {
	// (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
	if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
	DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
	DAG.isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
	return DAG.getNode(ISD::FMUL, DL, VT, N0,
	DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
	}

	// (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
	if (N0.getOpcode() == ISD::FMUL &&
	DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
	DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
	return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
	DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)),
	N2);
	}
	}

	// (fma x, -1, y) -> (fadd (fneg x), y)
	if (N1CFP) {
	if (N1CFP->isExactlyValue(1.0))
	return DAG.getNode(ISD::FADD, DL, VT, N0, N2);

	if (N1CFP->isExactlyValue(-1.0) &&
	(!LegalOperations \|\| TLI.isOperationLegal(ISD::FNEG, VT))) {
	SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
	AddToWorklist(RHSNeg.getNode());
	return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
	}

	// fma (fneg x), K, y -> fma x -K, y
	if (N0.getOpcode() == ISD::FNEG &&
	(TLI.isOperationLegal(ISD::ConstantFP, VT) \|\|
	(N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT,
	ForCodeSize)))) {
	return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
	DAG.getNode(ISD::FNEG, DL, VT, N1), N2);
	}
	}

	if (UnsafeFPMath) {
	// (fma x, c, x) -> (fmul x, (c+1))
	if (N1CFP && N0 == N2) {
	return DAG.getNode(
	ISD::FMUL, DL, VT, N0,
	DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(1.0, DL, VT)));
	}

	// (fma x, c, (fneg x)) -> (fmul x, (c-1))
	if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
	return DAG.getNode(
	ISD::FMUL, DL, VT, N0,
	DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(-1.0, DL, VT)));
	}
	}

	// fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
	// fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
	if (!TLI.isFNegFree(VT))
	if (SDValue Neg = TLI.getCheaperNegatedExpression(
	SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
	return DAG.getNode(ISD::FNEG, DL, VT, Neg);
	return SDValue();
	}

	// Combine multiple FDIVs with the same divisor into multiple FMULs by the
	// reciprocal.
	// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
	// Notice that this is not always beneficial. One reason is different targets
	// may have different costs for FDIV and FMUL, so sometimes the cost of two
	// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
	// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
	SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
	// TODO: Limit this transform based on optsize/minsize - it always creates at
	// least 1 extra instruction. But the perf win may be substantial enough
	// that only minsize should restrict this.
	bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
	const SDNodeFlags Flags = N->getFlags();
	if (LegalDAG \|\| (!UnsafeMath && !Flags.hasAllowReciprocal()))
	return SDValue();

	// Skip if current node is a reciprocal/fneg-reciprocal.
	SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
	ConstantFPSDNode N0CFP = isConstOrConstSplatFP(N0, / AllowUndefs */ true);
	if (N0CFP && (N0CFP->isExactlyValue(1.0) \|\| N0CFP->isExactlyValue(-1.0)))
	return SDValue();

	// Exit early if the target does not want this transform or if there can't
	// possibly be enough uses of the divisor to make the transform worthwhile.
	unsigned MinUses = TLI.combineRepeatedFPDivisors();

	// For splat vectors, scale the number of uses by the splat factor. If we can
	// convert the division into a scalar op, that will likely be much faster.
	unsigned NumElts = 1;
	EVT VT = N->getValueType(0);
	if (VT.isVector() && DAG.isSplatValue(N1))
	NumElts = VT.getVectorNumElements();

	if (!MinUses \|\| (N1->use_size() * NumElts) < MinUses)
	return SDValue();

	// Find all FDIV users of the same divisor.
	// Use a set because duplicates may be present in the user list.
	SetVector<SDNode *> Users;
	for (auto *U : N1->uses()) {
	if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
	// Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
	if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
	U->getOperand(0) == U->getOperand(1).getOperand(0) &&
	U->getFlags().hasAllowReassociation() &&
	U->getFlags().hasNoSignedZeros())
	continue;

	// This division is eligible for optimization only if global unsafe math
	// is enabled or if this division allows reciprocal formation.
	if (UnsafeMath \|\| U->getFlags().hasAllowReciprocal())
	Users.insert(U);
	}
	}

	// Now that we have the actual number of divisor uses, make sure it meets
	// the minimum threshold specified by the target.
	if ((Users.size() * NumElts) < MinUses)
	return SDValue();

	SDLoc DL(N);
	SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
	SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);

	// Dividend / Divisor -> Dividend * Reciprocal
	for (auto *U : Users) {
	SDValue Dividend = U->getOperand(0);
	if (Dividend != FPOne) {
	SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
	Reciprocal, Flags);
	CombineTo(U, NewNode);
	} else if (U != Reciprocal.getNode()) {
	// In the absence of fast-math-flags, this user node is always the
	// same node as Reciprocal, but with FMF they may be different nodes.
	CombineTo(U, Reciprocal);
	}
	}
	return SDValue(N, 0); // N was replaced.
	}

	SDValue DAGCombiner::visitFDIV(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
	ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
	EVT VT = N->getValueType(0);
	SDLoc DL(N);
	const TargetOptions &Options = DAG.getTarget().Options;
	SDNodeFlags Flags = N->getFlags();
	SelectionDAG::FlagInserter FlagsInserter(DAG, N);

	if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
	return R;

	// fold vector ops
	if (VT.isVector())
	if (SDValue FoldedVOp = SimplifyVBinOp(N))
	return FoldedVOp;

	// fold (fdiv c1, c2) -> c1/c2
	if (N0CFP && N1CFP)
	return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1);

	if (SDValue NewSel = foldBinOpIntoSelect(N))
	return NewSel;

	if (SDValue V = combineRepeatedFPDivisors(N))
	return V;

	if (Options.UnsafeFPMath \|\| Flags.hasAllowReciprocal()) {
	// fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
	if (N1CFP) {
	// Compute the reciprocal 1.0 / c2.
	const APFloat &N1APF = N1CFP->getValueAPF();
	APFloat Recip(N1APF.getSemantics(), 1); // 1.0
	APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
	// Only do the transform if the reciprocal is a legal fp immediate that
	// isn't too nasty (eg NaN, denormal, ...).
	if ((st == APFloat::opOK \|\| st == APFloat::opInexact) && // Not too nasty
	(!LegalOperations \|\|
	// FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
	// backend)... we should handle this gracefully after Legalize.
	// TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) \|\|
	TLI.isOperationLegal(ISD::ConstantFP, VT) \|\|
	TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
	return DAG.getNode(ISD::FMUL, DL, VT, N0,
	DAG.getConstantFP(Recip, DL, VT));
	}

	// If this FDIV is part of a reciprocal square root, it may be folded
	// into a target-specific square root estimate instruction.
	if (N1.getOpcode() == ISD::FSQRT) {
	if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
	return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
	} else if (N1.getOpcode() == ISD::FP_EXTEND &&
	N1.getOperand(0).getOpcode() == ISD::FSQRT) {
	if (SDValue RV =
	buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
	RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
	AddToWorklist(RV.getNode());
	return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
	}
	} else if (N1.getOpcode() == ISD::FP_ROUND &&
	N1.getOperand(0).getOpcode() == ISD::FSQRT) {
	if (SDValue RV =
	buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
	RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
	AddToWorklist(RV.getNode());
	return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
	}
	} else if (N1.getOpcode() == ISD::FMUL) {
	// Look through an FMUL. Even though this won't remove the FDIV directly,
	// it's still worthwhile to get rid of the FSQRT if possible.
	SDValue Sqrt, Y;
	if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
	Sqrt = N1.getOperand(0);
	Y = N1.getOperand(1);
	} else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
	Sqrt = N1.getOperand(1);
	Y = N1.getOperand(0);
	}
	if (Sqrt.getNode()) {
	// If the other multiply operand is known positive, pull it into the
	// sqrt. That will eliminate the division if we convert to an estimate.
	if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
	N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
	SDValue A;
	if (Y.getOpcode() == ISD::FABS && Y.hasOneUse())
	A = Y.getOperand(0);
	else if (Y == Sqrt.getOperand(0))
	A = Y;
	if (A) {
	// X / (fabs(A) * sqrt(Z)) --> X / sqrt(AAZ) --> X * rsqrt(AAZ)
	// X / (A * sqrt(A)) --> X / sqrt(AAA) --> X * rsqrt(AAA)
	SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
	SDValue AAZ =
	DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
	if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
	return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);

	// Estimate creation failed. Clean up speculatively created nodes.
	recursivelyDeleteUnusedNodes(AAZ.getNode());
	}
	}

	// We found a FSQRT, so try to make this fold:
	// X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
	if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
	SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
	AddToWorklist(Div.getNode());
	return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
	}
	}
	}

	// Fold into a reciprocal estimate and multiply instead of a real divide.
	if (Options.NoInfsFPMath \|\| Flags.hasNoInfs())
	if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
	return RV;
	}

	// Fold X/Sqrt(X) -> Sqrt(X)
	if ((Options.NoSignedZerosFPMath \|\| Flags.hasNoSignedZeros()) &&
	(Options.UnsafeFPMath \|\| Flags.hasAllowReassociation()))
	if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
	return N1;

	// (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
	TargetLowering::NegatibleCost CostN0 =
	TargetLowering::NegatibleCost::Expensive;
	TargetLowering::NegatibleCost CostN1 =
	TargetLowering::NegatibleCost::Expensive;
	SDValue NegN0 =
	TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
	SDValue NegN1 =
	TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
	if (NegN0 && NegN1 &&
	(CostN0 == TargetLowering::NegatibleCost::Cheaper \|\|
	CostN1 == TargetLowering::NegatibleCost::Cheaper))
	return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1);

	return SDValue();
	}

	SDValue DAGCombiner::visitFREM(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
	ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
	EVT VT = N->getValueType(0);
	SDNodeFlags Flags = N->getFlags();
	SelectionDAG::FlagInserter FlagsInserter(DAG, N);

	if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
	return R;

	// fold (frem c1, c2) -> fmod(c1,c2)
	if (N0CFP && N1CFP)
	return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1);

	if (SDValue NewSel = foldBinOpIntoSelect(N))
	return NewSel;

	return SDValue();
	}

	SDValue DAGCombiner::visitFSQRT(SDNode *N) {
	SDNodeFlags Flags = N->getFlags();
	const TargetOptions &Options = DAG.getTarget().Options;

	// Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
	// sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
	if (!Flags.hasApproximateFuncs() \|\|
	(!Options.NoInfsFPMath && !Flags.hasNoInfs()))
	return SDValue();

	SDValue N0 = N->getOperand(0);
	if (TLI.isFsqrtCheap(N0, DAG))
	return SDValue();

	// FSQRT nodes have flags that propagate to the created nodes.
	// TODO: If this is N0/sqrt(N0), and we reach this node before trying to
	// transform the fdiv, we may produce a sub-optimal estimate sequence
	// because the reciprocal calculation may not have to filter out a
	// 0.0 input.
	return buildSqrtEstimate(N0, Flags);
	}

	/// copysign(x, fp_extend(y)) -> copysign(x, y)
	/// copysign(x, fp_round(y)) -> copysign(x, y)
	static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
	SDValue N1 = N->getOperand(1);
	if ((N1.getOpcode() == ISD::FP_EXTEND \|\|
	N1.getOpcode() == ISD::FP_ROUND)) {
	EVT N1VT = N1->getValueType(0);
	EVT N1Op0VT = N1->getOperand(0).getValueType();

	// Always fold no-op FP casts.
	if (N1VT == N1Op0VT)
	return true;

	// Do not optimize out type conversion of f128 type yet.
	// For some targets like x86_64, configuration is changed to keep one f128
	// value in one SSE register, but instruction selection cannot handle
	// FCOPYSIGN on SSE registers yet.
	if (N1Op0VT == MVT::f128)
	return false;

	// Avoid mismatched vector operand types, for better instruction selection.
	if (N1Op0VT.isVector())
	return false;

	return true;
	}
	return false;
	}

	SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
	bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
	EVT VT = N->getValueType(0);

	if (N0CFP && N1CFP) // Constant fold
	return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);

	if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
	const APFloat &V = N1C->getValueAPF();
	// copysign(x, c1) -> fabs(x) iff ispos(c1)
	// copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
	if (!V.isNegative()) {
	if (!LegalOperations \|\| TLI.isOperationLegal(ISD::FABS, VT))
	return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
	} else {
	if (!LegalOperations \|\| TLI.isOperationLegal(ISD::FNEG, VT))
	return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
	DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
	}
	}

	// copysign(fabs(x), y) -> copysign(x, y)
	// copysign(fneg(x), y) -> copysign(x, y)
	// copysign(copysign(x,z), y) -> copysign(x, y)
	if (N0.getOpcode() == ISD::FABS \|\| N0.getOpcode() == ISD::FNEG \|\|
	N0.getOpcode() == ISD::FCOPYSIGN)
	return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);

	// copysign(x, abs(y)) -> abs(x)
	if (N1.getOpcode() == ISD::FABS)
	return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);

	// copysign(x, copysign(y,z)) -> copysign(x, z)
	if (N1.getOpcode() == ISD::FCOPYSIGN)
	return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));

	// copysign(x, fp_extend(y)) -> copysign(x, y)
	// copysign(x, fp_round(y)) -> copysign(x, y)
	if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
	return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));

	return SDValue();
	}

	SDValue DAGCombiner::visitFPOW(SDNode *N) {
	ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
	if (!ExponentC)
	return SDValue();
	SelectionDAG::FlagInserter FlagsInserter(DAG, N);

	// Try to convert x ** (1/3) into cube root.
	// TODO: Handle the various flavors of long double.
	// TODO: Since we're approximating, we don't need an exact 1/3 exponent.
	// Some range near 1/3 should be fine.
	EVT VT = N->getValueType(0);
	if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) \|\|
	(VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
	// pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
	// pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
	// pow(-val, 1/3) = nan; cbrt(-val) = -num.
	// For regular numbers, rounding may cause the results to differ.
	// Therefore, we require { nsz ninf nnan afn } for this transform.
	// TODO: We could select out the special cases if we don't have nsz/ninf.
	SDNodeFlags Flags = N->getFlags();
	if (!Flags.hasNoSignedZeros() \|\| !Flags.hasNoInfs() \|\| !Flags.hasNoNaNs() \|\|
	!Flags.hasApproximateFuncs())
	return SDValue();

	// Do not create a cbrt() libcall if the target does not have it, and do not
	// turn a pow that has lowering support into a cbrt() libcall.
	if (!DAG.getLibInfo().has(LibFunc_cbrt) \|\|
	(!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
	DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
	return SDValue();

	return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
	}

	// Try to convert x (1/4) and x (3/4) into square roots.
	// x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
	// TODO: This could be extended (using a target hook) to handle smaller
	// power-of-2 fractional exponents.
	bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
	bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
	if (ExponentIs025 \|\| ExponentIs075) {
	// pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
	// pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN.
	// pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
	// pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) = NaN.
	// For regular numbers, rounding may cause the results to differ.
	// Therefore, we require { nsz ninf afn } for this transform.
	// TODO: We could select out the special cases if we don't have nsz/ninf.
	SDNodeFlags Flags = N->getFlags();

	// We only need no signed zeros for the 0.25 case.
	if ((!Flags.hasNoSignedZeros() && ExponentIs025) \|\| !Flags.hasNoInfs() \|\|
	!Flags.hasApproximateFuncs())
	return SDValue();

	// Don't double the number of libcalls. We are trying to inline fast code.
	if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
	return SDValue();

	// Assume that libcalls are the smallest code.
	// TODO: This restriction should probably be lifted for vectors.
	if (ForCodeSize)
	return SDValue();

	// pow(X, 0.25) --> sqrt(sqrt(X))
	SDLoc DL(N);
	SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
	SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt);
	if (ExponentIs025)
	return SqrtSqrt;
	// pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
	return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt);
	}

	return SDValue();
	}

	static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
	const TargetLowering &TLI) {
	// This optimization is guarded by a function attribute because it may produce
	// unexpected results. Ie, programs may be relying on the platform-specific
	// undefined behavior when the float-to-int conversion overflows.
	const Function &F = DAG.getMachineFunction().getFunction();
	Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow");
	if (StrictOverflow.getValueAsString().equals("false"))
	return SDValue();

	// We only do this if the target has legal ftrunc. Otherwise, we'd likely be
	// replacing casts with a libcall. We also must be allowed to ignore -0.0
	// because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
	// conversions would return +0.0.
	// FIXME: We should be able to use node-level FMF here.
	// TODO: If strict math, should we use FABS (+ range check for signed cast)?
	EVT VT = N->getValueType(0);
	if (!TLI.isOperationLegal(ISD::FTRUNC, VT) \|\|
	!DAG.getTarget().Options.NoSignedZerosFPMath)
	return SDValue();

	// fptosi/fptoui round towards zero, so converting from FP to integer and
	// back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
	SDValue N0 = N->getOperand(0);
	if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
	N0.getOperand(0).getValueType() == VT)
	return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));

	if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
	N0.getOperand(0).getValueType() == VT)
	return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));

	return SDValue();
	}

	SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	EVT VT = N->getValueType(0);
	EVT OpVT = N0.getValueType();

	// [us]itofp(undef) = 0, because the result value is bounded.
	if (N0.isUndef())
	return DAG.getConstantFP(0.0, SDLoc(N), VT);

	// fold (sint_to_fp c1) -> c1fp
	if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
	// ...but only if the target supports immediate floating-point values
	(!LegalOperations \|\|
	TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
	return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);

	// If the input is a legal type, and SINT_TO_FP is not legal on this target,
	// but UINT_TO_FP is legal on this target, try to convert.
	if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
	hasOperation(ISD::UINT_TO_FP, OpVT)) {
	// If the sign bit is known to be zero, we can change this to UINT_TO_FP.
	if (DAG.SignBitIsZero(N0))
	return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
	}

	// The next optimizations are desirable only if SELECT_CC can be lowered.
	// fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
	if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
	!VT.isVector() &&
	(!LegalOperations \|\| TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
	SDLoc DL(N);
	return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
	DAG.getConstantFP(0.0, DL, VT));
	}

	// fold (sint_to_fp (zext (setcc x, y, cc))) ->
	// (select (setcc x, y, cc), 1.0, 0.0)
	if (N0.getOpcode() == ISD::ZERO_EXTEND &&
	N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
	(!LegalOperations \|\| TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
	SDLoc DL(N);
	return DAG.getSelect(DL, VT, N0.getOperand(0),
	DAG.getConstantFP(1.0, DL, VT),
	DAG.getConstantFP(0.0, DL, VT));
	}

	if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
	return FTrunc;

	return SDValue();
	}

	SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	EVT VT = N->getValueType(0);
	EVT OpVT = N0.getValueType();

	// [us]itofp(undef) = 0, because the result value is bounded.
	if (N0.isUndef())
	return DAG.getConstantFP(0.0, SDLoc(N), VT);

	// fold (uint_to_fp c1) -> c1fp
	if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
	// ...but only if the target supports immediate floating-point values
	(!LegalOperations \|\|
	TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
	return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);

	// If the input is a legal type, and UINT_TO_FP is not legal on this target,
	// but SINT_TO_FP is legal on this target, try to convert.
	if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
	hasOperation(ISD::SINT_TO_FP, OpVT)) {
	// If the sign bit is known to be zero, we can change this to SINT_TO_FP.
	if (DAG.SignBitIsZero(N0))
	return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
	}

	// fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
	if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
	(!LegalOperations \|\| TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
	SDLoc DL(N);
	return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
	DAG.getConstantFP(0.0, DL, VT));
	}

	if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
	return FTrunc;

	return SDValue();
	}

	// Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
	static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
	SDValue N0 = N->getOperand(0);
	EVT VT = N->getValueType(0);

	if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
	return SDValue();

	SDValue Src = N0.getOperand(0);
	EVT SrcVT = Src.getValueType();
	bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
	bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;

	// We can safely assume the conversion won't overflow the output range,
	// because (for example) (uint8_t)18293.f is undefined behavior.

	// Since we can assume the conversion won't overflow, our decision as to
	// whether the input will fit in the float should depend on the minimum
	// of the input range and output range.

	// This means this is also safe for a signed input and unsigned output, since
	// a negative input would lead to undefined behavior.
	unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
	unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
	unsigned ActualSize = std::min(InputSize, OutputSize);
	const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());

	// We can only fold away the float conversion if the input range can be
	// represented exactly in the float range.
	if (APFloat::semanticsPrecision(sem) >= ActualSize) {
	if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
	unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
	: ISD::ZERO_EXTEND;
	return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
	}
	if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
	return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
	return DAG.getBitcast(VT, Src);
	}
	return SDValue();
	}

	SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	EVT VT = N->getValueType(0);

	// fold (fp_to_sint undef) -> undef
	if (N0.isUndef())
	return DAG.getUNDEF(VT);

	// fold (fp_to_sint c1fp) -> c1
	if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
	return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);

	return FoldIntToFPToInt(N, DAG);
	}

	SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	EVT VT = N->getValueType(0);

	// fold (fp_to_uint undef) -> undef
	if (N0.isUndef())
	return DAG.getUNDEF(VT);

	// fold (fp_to_uint c1fp) -> c1
	if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
	return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);

	return FoldIntToFPToInt(N, DAG);
	}

	SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
	EVT VT = N->getValueType(0);

	// fold (fp_round c1fp) -> c1fp
	if (N0CFP)
	return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);

	// fold (fp_round (fp_extend x)) -> x
	if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
	return N0.getOperand(0);

	// fold (fp_round (fp_round x)) -> (fp_round x)
	if (N0.getOpcode() == ISD::FP_ROUND) {
	const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
	const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;

	// Skip this folding if it results in an fp_round from f80 to f16.
	//
	// f80 to f16 always generates an expensive (and as yet, unimplemented)
	// libcall to __truncxfhf2 instead of selecting native f16 conversion
	// instructions from f32 or f64. Moreover, the first (value-preserving)
	// fp_round from f80 to either f32 or f64 may become a NOP in platforms like
	// x86.
	if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
	return SDValue();

	// If the first fp_round isn't a value preserving truncation, it might
	// introduce a tie in the second fp_round, that wouldn't occur in the
	// single-step fp_round we want to fold to.
	// In other words, double rounding isn't the same as rounding.
	// Also, this is a value preserving truncation iff both fp_round's are.
	if (DAG.getTarget().Options.UnsafeFPMath \|\| N0IsTrunc) {
	SDLoc DL(N);
	return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
	DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
	}
	}

	// fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
	if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
	SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
	N0.getOperand(0), N1);
	AddToWorklist(Tmp.getNode());
	return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
	Tmp, N0.getOperand(1));
	}

	if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
	return NewVSel;

	return SDValue();
	}

	SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	EVT VT = N->getValueType(0);

	// If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
	if (N->hasOneUse() &&
	N->use_begin()->getOpcode() == ISD::FP_ROUND)
	return SDValue();

	// fold (fp_extend c1fp) -> c1fp
	if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
	return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);

	// fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
	if (N0.getOpcode() == ISD::FP16_TO_FP &&
	TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
	return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));

	// Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
	// value of X.
	if (N0.getOpcode() == ISD::FP_ROUND
	&& N0.getConstantOperandVal(1) == 1) {
	SDValue In = N0.getOperand(0);
	if (In.getValueType() == VT) return In;
	if (VT.bitsLT(In.getValueType()))
	return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
	In, N0.getOperand(1));
	return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
	}

	// fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
	if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
	TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
	LoadSDNode *LN0 = cast<LoadSDNode>(N0);
	SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
	LN0->getChain(),
	LN0->getBasePtr(), N0.getValueType(),
	LN0->getMemOperand());
	CombineTo(N, ExtLoad);
	CombineTo(N0.getNode(),
	DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
	N0.getValueType(), ExtLoad,
	DAG.getIntPtrConstant(1, SDLoc(N0))),
	ExtLoad.getValue(1));
	return SDValue(N, 0); // Return N so it doesn't get rechecked!
	}

	if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
	return NewVSel;

	return SDValue();
	}

	SDValue DAGCombiner::visitFCEIL(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	EVT VT = N->getValueType(0);

	// fold (fceil c1) -> fceil(c1)
	if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
	return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);

	return SDValue();
	}

	SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	EVT VT = N->getValueType(0);

	// fold (ftrunc c1) -> ftrunc(c1)
	if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
	return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);

	// fold ftrunc (known rounded int x) -> x
	// ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
	// likely to be generated to extract integer from a rounded floating value.
	switch (N0.getOpcode()) {
	default: break;
	case ISD::FRINT:
	case ISD::FTRUNC:
	case ISD::FNEARBYINT:
	case ISD::FFLOOR:
	case ISD::FCEIL:
	return N0;
	}

	return SDValue();
	}

	SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	EVT VT = N->getValueType(0);

	// fold (ffloor c1) -> ffloor(c1)
	if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
	return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);

	return SDValue();
	}

	SDValue DAGCombiner::visitFNEG(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	EVT VT = N->getValueType(0);
	SelectionDAG::FlagInserter FlagsInserter(DAG, N);

	// Constant fold FNEG.
	if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
	return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);

	if (SDValue NegN0 =
	TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
	return NegN0;

	// -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
	// FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
	// know it was called from a context with a nsz flag if the input fsub does
	// not.
	if (N0.getOpcode() == ISD::FSUB &&
	(DAG.getTarget().Options.NoSignedZerosFPMath \|\|
	N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
	return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
	N0.getOperand(0));
	}

	if (SDValue Cast = foldSignChangeInBitcast(N))
	return Cast;

	return SDValue();
	}

	static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N,
	APFloat (*Op)(const APFloat &, const APFloat &)) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	EVT VT = N->getValueType(0);
	const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
	const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
	const SDNodeFlags Flags = N->getFlags();
	unsigned Opc = N->getOpcode();
	bool PropagatesNaN = Opc == ISD::FMINIMUM \|\| Opc == ISD::FMAXIMUM;
	bool IsMin = Opc == ISD::FMINNUM \|\| Opc == ISD::FMINIMUM;
	SelectionDAG::FlagInserter FlagsInserter(DAG, N);

	if (N0CFP && N1CFP) {
	const APFloat &C0 = N0CFP->getValueAPF();
	const APFloat &C1 = N1CFP->getValueAPF();
	return DAG.getConstantFP(Op(C0, C1), SDLoc(N), VT);
	}

	// Canonicalize to constant on RHS.
	if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
	!DAG.isConstantFPBuildVectorOrConstantFP(N1))
	return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);

	if (N1CFP) {
	const APFloat &AF = N1CFP->getValueAPF();

	// minnum(X, nan) -> X
	// maxnum(X, nan) -> X
	// minimum(X, nan) -> nan
	// maximum(X, nan) -> nan
	if (AF.isNaN())
	return PropagatesNaN ? N->getOperand(1) : N->getOperand(0);

	// In the following folds, inf can be replaced with the largest finite
	// float, if the ninf flag is set.
	if (AF.isInfinity() \|\| (Flags.hasNoInfs() && AF.isLargest())) {
	// minnum(X, -inf) -> -inf
	// maxnum(X, +inf) -> +inf
	// minimum(X, -inf) -> -inf if nnan
	// maximum(X, +inf) -> +inf if nnan
	if (IsMin == AF.isNegative() && (!PropagatesNaN \|\| Flags.hasNoNaNs()))
	return N->getOperand(1);

	// minnum(X, +inf) -> X if nnan
	// maxnum(X, -inf) -> X if nnan
	// minimum(X, +inf) -> X
	// maximum(X, -inf) -> X
	if (IsMin != AF.isNegative() && (PropagatesNaN \|\| Flags.hasNoNaNs()))
	return N->getOperand(0);
	}
	}

	return SDValue();
	}

	SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
	return visitFMinMax(DAG, N, minnum);
	}

	SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
	return visitFMinMax(DAG, N, maxnum);
	}

	SDValue DAGCombiner::visitFMINIMUM(SDNode *N) {
	return visitFMinMax(DAG, N, minimum);
	}

	SDValue DAGCombiner::visitFMAXIMUM(SDNode *N) {
	return visitFMinMax(DAG, N, maximum);
	}

	SDValue DAGCombiner::visitFABS(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	EVT VT = N->getValueType(0);

	// fold (fabs c1) -> fabs(c1)
	if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
	return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);

	// fold (fabs (fabs x)) -> (fabs x)
	if (N0.getOpcode() == ISD::FABS)
	return N->getOperand(0);

	// fold (fabs (fneg x)) -> (fabs x)
	// fold (fabs (fcopysign x, y)) -> (fabs x)
	if (N0.getOpcode() == ISD::FNEG \|\| N0.getOpcode() == ISD::FCOPYSIGN)
	return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));

	if (SDValue Cast = foldSignChangeInBitcast(N))
	return Cast;

	return SDValue();
	}

	SDValue DAGCombiner::visitBRCOND(SDNode *N) {
	SDValue Chain = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	SDValue N2 = N->getOperand(2);

	// BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are
	// nondeterministic jumps).
	if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
	return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
	N1->getOperand(0), N2);
	}

	// If N is a constant we could fold this into a fallthrough or unconditional
	// branch. However that doesn't happen very often in normal code, because
	// Instcombine/SimplifyCFG should have handled the available opportunities.
	// If we did this folding here, it would be necessary to update the
	// MachineBasicBlock CFG, which is awkward.

	// fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
	// on the target.
	if (N1.getOpcode() == ISD::SETCC &&
	TLI.isOperationLegalOrCustom(ISD::BR_CC,
	N1.getOperand(0).getValueType())) {
	return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
	Chain, N1.getOperand(2),
	N1.getOperand(0), N1.getOperand(1), N2);
	}

	if (N1.hasOneUse()) {
	// rebuildSetCC calls visitXor which may change the Chain when there is a
	// STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
	HandleSDNode ChainHandle(Chain);
	if (SDValue NewN1 = rebuildSetCC(N1))
	return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
	ChainHandle.getValue(), NewN1, N2);
	}

	return SDValue();
	}

	SDValue DAGCombiner::rebuildSetCC(SDValue N) {
	if (N.getOpcode() == ISD::SRL \|\|
	(N.getOpcode() == ISD::TRUNCATE &&
	(N.getOperand(0).hasOneUse() &&
	N.getOperand(0).getOpcode() == ISD::SRL))) {
	// Look pass the truncate.
	if (N.getOpcode() == ISD::TRUNCATE)
	N = N.getOperand(0);

	// Match this pattern so that we can generate simpler code:
	//
	// %a = ...
	// %b = and i32 %a, 2
	// %c = srl i32 %b, 1
	// brcond i32 %c ...
	//
	// into
	//
	// %a = ...
	// %b = and i32 %a, 2
	// %c = setcc eq %b, 0
	// brcond %c ...
	//
	// This applies only when the AND constant value has one bit set and the
	// SRL constant is equal to the log2 of the AND constant. The back-end is
	// smart enough to convert the result into a TEST/JMP sequence.
	SDValue Op0 = N.getOperand(0);
	SDValue Op1 = N.getOperand(1);

	if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
	SDValue AndOp1 = Op0.getOperand(1);

	if (AndOp1.getOpcode() == ISD::Constant) {
	const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();

	if (AndConst.isPowerOf2() &&
	cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
	SDLoc DL(N);
	return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
	Op0, DAG.getConstant(0, DL, Op0.getValueType()),
	ISD::SETNE);
	}
	}
	}
	}

	// Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
	// Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
	if (N.getOpcode() == ISD::XOR) {
	// Because we may call this on a speculatively constructed
	// SimplifiedSetCC Node, we need to simplify this node first.
	// Ideally this should be folded into SimplifySetCC and not
	// here. For now, grab a handle to N so we don't lose it from
	// replacements interal to the visit.
	HandleSDNode XORHandle(N);
	while (N.getOpcode() == ISD::XOR) {
	SDValue Tmp = visitXOR(N.getNode());
	// No simplification done.
	if (!Tmp.getNode())
	break;
	// Returning N is form in-visit replacement that may invalidated
	// N. Grab value from Handle.
	if (Tmp.getNode() == N.getNode())
	N = XORHandle.getValue();
	else // Node simplified. Try simplifying again.
	N = Tmp;
	}

	if (N.getOpcode() != ISD::XOR)
	return N;

	SDValue Op0 = N->getOperand(0);
	SDValue Op1 = N->getOperand(1);

	if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
	bool Equal = false;
	// (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
	if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
	Op0.getValueType() == MVT::i1) {
	N = Op0;
	Op0 = N->getOperand(0);
	Op1 = N->getOperand(1);
	Equal = true;
	}

	EVT SetCCVT = N.getValueType();
	if (LegalTypes)
	SetCCVT = getSetCCResultType(SetCCVT);
	// Replace the uses of XOR with SETCC
	return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1,
	Equal ? ISD::SETEQ : ISD::SETNE);
	}
	}

	return SDValue();
	}

	// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
	//
	SDValue DAGCombiner::visitBR_CC(SDNode *N) {
	CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
	SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);

	// If N is a constant we could fold this into a fallthrough or unconditional
	// branch. However that doesn't happen very often in normal code, because
	// Instcombine/SimplifyCFG should have handled the available opportunities.
	// If we did this folding here, it would be necessary to update the
	// MachineBasicBlock CFG, which is awkward.

	// Use SimplifySetCC to simplify SETCC's.
	SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
	CondLHS, CondRHS, CC->get(), SDLoc(N),
	false);
	if (Simp.getNode()) AddToWorklist(Simp.getNode());

	// fold to a simpler setcc
	if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
	return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
	N->getOperand(0), Simp.getOperand(2),
	Simp.getOperand(0), Simp.getOperand(1),
	N->getOperand(4));

	return SDValue();
	}

	static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
	bool &IsLoad, bool &IsMasked, SDValue &Ptr,
	const TargetLowering &TLI) {
	if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
	if (LD->isIndexed())
	return false;
	EVT VT = LD->getMemoryVT();
	if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
	return false;
	Ptr = LD->getBasePtr();
	} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
	if (ST->isIndexed())
	return false;
	EVT VT = ST->getMemoryVT();
	if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
	return false;
	Ptr = ST->getBasePtr();
	IsLoad = false;
	} else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
	if (LD->isIndexed())
	return false;
	EVT VT = LD->getMemoryVT();
	if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
	!TLI.isIndexedMaskedLoadLegal(Dec, VT))
	return false;
	Ptr = LD->getBasePtr();
	IsMasked = true;
	} else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
	if (ST->isIndexed())
	return false;
	EVT VT = ST->getMemoryVT();
	if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
	!TLI.isIndexedMaskedStoreLegal(Dec, VT))
	return false;
	Ptr = ST->getBasePtr();
	IsLoad = false;
	IsMasked = true;
	} else {
	return false;
	}
	return true;
	}

	/// Try turning a load/store into a pre-indexed load/store when the base
	/// pointer is an add or subtract and it has other uses besides the load/store.
	/// After the transformation, the new indexed load/store has effectively folded
	/// the add/subtract in and all of its other uses are redirected to the
	/// new load/store.
	bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
	if (Level < AfterLegalizeDAG)
	return false;

	bool IsLoad = true;
	bool IsMasked = false;
	SDValue Ptr;
	if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
	Ptr, TLI))
	return false;

	// If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
	// out. There is no reason to make this a preinc/predec.
	if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) \|\|
	Ptr.getNode()->hasOneUse())
	return false;

	// Ask the target to do addressing mode selection.
	SDValue BasePtr;
	SDValue Offset;
	ISD::MemIndexedMode AM = ISD::UNINDEXED;
	if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
	return false;

	// Backends without true r+i pre-indexed forms may need to pass a
	// constant base with a variable offset so that constant coercion
	// will work with the patterns in canonical form.
	bool Swapped = false;
	if (isa<ConstantSDNode>(BasePtr)) {
	std::swap(BasePtr, Offset);
	Swapped = true;
	}

	// Don't create a indexed load / store with zero offset.
	if (isNullConstant(Offset))
	return false;

	// Try turning it into a pre-indexed load / store except when:
	// 1) The new base ptr is a frame index.
	// 2) If N is a store and the new base ptr is either the same as or is a
	// predecessor of the value being stored.
	// 3) Another use of old base ptr is a predecessor of N. If ptr is folded
	// that would create a cycle.
	// 4) All uses are load / store ops that use it as old base ptr.

	// Check #1. Preinc'ing a frame index would require copying the stack pointer
	// (plus the implicit offset) to a register to preinc anyway.
	if (isa<FrameIndexSDNode>(BasePtr) \|\| isa<RegisterSDNode>(BasePtr))
	return false;

	// Check #2.
	if (!IsLoad) {
	SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
	: cast<StoreSDNode>(N)->getValue();

	// Would require a copy.
	if (Val == BasePtr)
	return false;

	// Would create a cycle.
	if (Val == Ptr \|\| Ptr->isPredecessorOf(Val.getNode()))
	return false;
	}

	// Caches for hasPredecessorHelper.
	SmallPtrSet<const SDNode *, 32> Visited;
	SmallVector<const SDNode *, 16> Worklist;
	Worklist.push_back(N);

	// If the offset is a constant, there may be other adds of constants that
	// can be folded with this one. We should do this to avoid having to keep
	// a copy of the original base pointer.
	SmallVector<SDNode *, 16> OtherUses;
	if (isa<ConstantSDNode>(Offset))
	for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
	UE = BasePtr.getNode()->use_end();
	UI != UE; ++UI) {
	SDUse &Use = UI.getUse();
	// Skip the use that is Ptr and uses of other results from BasePtr's
	// node (important for nodes that return multiple results).
	if (Use.getUser() == Ptr.getNode() \|\| Use != BasePtr)
	continue;

	if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
	continue;

	if (Use.getUser()->getOpcode() != ISD::ADD &&
	Use.getUser()->getOpcode() != ISD::SUB) {
	OtherUses.clear();
	break;
	}

	SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
	if (!isa<ConstantSDNode>(Op1)) {
	OtherUses.clear();
	break;
	}

	// FIXME: In some cases, we can be smarter about this.
	if (Op1.getValueType() != Offset.getValueType()) {
	OtherUses.clear();
	break;
	}

	OtherUses.push_back(Use.getUser());
	}

	if (Swapped)
	std::swap(BasePtr, Offset);

	// Now check for #3 and #4.
	bool RealUse = false;

	for (SDNode *Use : Ptr.getNode()->uses()) {
	if (Use == N)
	continue;
	if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
	return false;

	// If Ptr may be folded in addressing mode of other use, then it's
	// not profitable to do this transformation.
	if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
	RealUse = true;
	}

	if (!RealUse)
	return false;

	SDValue Result;
	if (!IsMasked) {
	if (IsLoad)
	Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
	else
	Result =
	DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
	} else {
	if (IsLoad)
	Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
	Offset, AM);
	else
	Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
	Offset, AM);
	}
	++PreIndexedNodes;
	++NodesCombined;
	LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
	Result.getNode()->dump(&DAG); dbgs() << '\n');
	WorklistRemover DeadNodes(*this);
	if (IsLoad) {
	DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
	DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
	} else {
	DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
	}

	// Finally, since the node is now dead, remove it from the graph.
	deleteAndRecombine(N);

	if (Swapped)
	std::swap(BasePtr, Offset);

	// Replace other uses of BasePtr that can be updated to use Ptr
	for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
	unsigned OffsetIdx = 1;
	if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
	OffsetIdx = 0;
	assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
	BasePtr.getNode() && "Expected BasePtr operand");

	// We need to replace ptr0 in the following expression:
	// x0 * offset0 + y0 * ptr0 = t0
	// knowing that
	// x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
	//
	// where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
	// indexed load/store and the expression that needs to be re-written.
	//
	// Therefore, we have:
	// t0 = (x0 * offset0 - x1 * y0 * y1 offset1) + (y0 y1) * t1

	auto *CN = cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
	const APInt &Offset0 = CN->getAPIntValue();
	const APInt &Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
	int X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
	int Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
	int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
	int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;

	unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;

	APInt CNV = Offset0;
	if (X0 < 0) CNV = -CNV;
	if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
	else CNV = CNV - Offset1;

	SDLoc DL(OtherUses[i]);

	// We can now generate the new expression.
	SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
	SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);

	SDValue NewUse = DAG.getNode(Opcode,
	DL,
	OtherUses[i]->getValueType(0), NewOp1, NewOp2);
	DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
	deleteAndRecombine(OtherUses[i]);
	}

	// Replace the uses of Ptr with uses of the updated base value.
	DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
	deleteAndRecombine(Ptr.getNode());
	AddToWorklist(Result.getNode());

	return true;
	}

	static bool shouldCombineToPostInc(SDNode N, SDValue Ptr, SDNode PtrUse,
	SDValue &BasePtr, SDValue &Offset,
	ISD::MemIndexedMode &AM,
	SelectionDAG &DAG,
	const TargetLowering &TLI) {
	if (PtrUse == N \|\|
	(PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
	return false;

	if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
	return false;

	// Don't create a indexed load / store with zero offset.
	if (isNullConstant(Offset))
	return false;

	if (isa<FrameIndexSDNode>(BasePtr) \|\| isa<RegisterSDNode>(BasePtr))
	return false;

	SmallPtrSet<const SDNode *, 32> Visited;
	for (SDNode *Use : BasePtr.getNode()->uses()) {
	if (Use == Ptr.getNode())
	continue;

	// No if there's a later user which could perform the index instead.
	if (isa<MemSDNode>(Use)) {
	bool IsLoad = true;
	bool IsMasked = false;
	SDValue OtherPtr;
	if (getCombineLoadStoreParts(Use, ISD::POST_INC, ISD::POST_DEC, IsLoad,
	IsMasked, OtherPtr, TLI)) {
	SmallVector<const SDNode *, 2> Worklist;
	Worklist.push_back(Use);
	if (SDNode::hasPredecessorHelper(N, Visited, Worklist))
	return false;
	}
	}

	// If all the uses are load / store addresses, then don't do the
	// transformation.
	if (Use->getOpcode() == ISD::ADD \|\| Use->getOpcode() == ISD::SUB) {
	for (SDNode *UseUse : Use->uses())
	if (canFoldInAddressingMode(Use, UseUse, DAG, TLI))
	return false;
	}
	}
	return true;
	}

	static SDNode getPostIndexedLoadStoreOp(SDNode N, bool &IsLoad,
	bool &IsMasked, SDValue &Ptr,
	SDValue &BasePtr, SDValue &Offset,
	ISD::MemIndexedMode &AM,
	SelectionDAG &DAG,
	const TargetLowering &TLI) {
	if (!getCombineLoadStoreParts(N, ISD::POST_INC, ISD::POST_DEC, IsLoad,
	IsMasked, Ptr, TLI) \|\|
	Ptr.getNode()->hasOneUse())
	return nullptr;

	// Try turning it into a post-indexed load / store except when
	// 1) All uses are load / store ops that use it as base ptr (and
	// it may be folded as addressing mmode).
	// 2) Op must be independent of N, i.e. Op is neither a predecessor
	// nor a successor of N. Otherwise, if Op is folded that would
	// create a cycle.
	for (SDNode *Op : Ptr->uses()) {
	// Check for #1.
	if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
	continue;

	// Check for #2.
	SmallPtrSet<const SDNode *, 32> Visited;
	SmallVector<const SDNode *, 8> Worklist;
	// Ptr is predecessor to both N and Op.
	Visited.insert(Ptr.getNode());
	Worklist.push_back(N);
	Worklist.push_back(Op);
	if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
	!SDNode::hasPredecessorHelper(Op, Visited, Worklist))
	return Op;
	}
	return nullptr;
	}

	/// Try to combine a load/store with a add/sub of the base pointer node into a
	/// post-indexed load/store. The transformation folded the add/subtract into the
	/// new indexed load/store effectively and all of its uses are redirected to the
	/// new load/store.
	bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
	if (Level < AfterLegalizeDAG)
	return false;

	bool IsLoad = true;
	bool IsMasked = false;
	SDValue Ptr;
	SDValue BasePtr;
	SDValue Offset;
	ISD::MemIndexedMode AM = ISD::UNINDEXED;
	SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
	Offset, AM, DAG, TLI);
	if (!Op)
	return false;

	SDValue Result;
	if (!IsMasked)
	Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
	Offset, AM)
	: DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
	BasePtr, Offset, AM);
	else
	Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
	BasePtr, Offset, AM)
	: DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N),
	BasePtr, Offset, AM);
	++PostIndexedNodes;
	++NodesCombined;
	LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);
	dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);
	dbgs() << '\n');
	WorklistRemover DeadNodes(*this);
	if (IsLoad) {
	DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
	DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
	} else {
	DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
	}

	// Finally, since the node is now dead, remove it from the graph.
	deleteAndRecombine(N);

	// Replace the uses of Use with uses of the updated base value.
	DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
	Result.getValue(IsLoad ? 1 : 0));
	deleteAndRecombine(Op);
	return true;
	}

	/// Return the base-pointer arithmetic from an indexed \p LD.
	SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
	ISD::MemIndexedMode AM = LD->getAddressingMode();
	assert(AM != ISD::UNINDEXED);
	SDValue BP = LD->getOperand(1);
	SDValue Inc = LD->getOperand(2);

	// Some backends use TargetConstants for load offsets, but don't expect
	// TargetConstants in general ADD nodes. We can convert these constants into
	// regular Constants (if the constant is not opaque).
	assert((Inc.getOpcode() != ISD::TargetConstant \|\|
	!cast<ConstantSDNode>(Inc)->isOpaque()) &&
	"Cannot split out indexing using opaque target constants");
	if (Inc.getOpcode() == ISD::TargetConstant) {
	ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
	Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
	ConstInc->getValueType(0));
	}

	unsigned Opc =
	(AM == ISD::PRE_INC \|\| AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
	return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
	}

	static inline ElementCount numVectorEltsOrZero(EVT T) {
	return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(0);
	}

	bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
	Val = ST->getValue();
	EVT STType = Val.getValueType();
	EVT STMemType = ST->getMemoryVT();
	if (STType == STMemType)
	return true;
	if (isTypeLegal(STMemType))
	return false; // fail.
	if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
	TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
	Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
	return true;
	}
	if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
	STType.isInteger() && STMemType.isInteger()) {
	Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
	return true;
	}
	if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
	Val = DAG.getBitcast(STMemType, Val);
	return true;
	}
	return false; // fail.
	}

	bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
	EVT LDMemType = LD->getMemoryVT();
	EVT LDType = LD->getValueType(0);
	assert(Val.getValueType() == LDMemType &&
	"Attempting to extend value of non-matching type");
	if (LDType == LDMemType)
	return true;
	if (LDMemType.isInteger() && LDType.isInteger()) {
	switch (LD->getExtensionType()) {
	case ISD::NON_EXTLOAD:
	Val = DAG.getBitcast(LDType, Val);
	return true;
	case ISD::EXTLOAD:
	Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
	return true;
	case ISD::SEXTLOAD:
	Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
	return true;
	case ISD::ZEXTLOAD:
	Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
	return true;
	}
	}
	return false;
	}

	SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
	if (OptLevel == CodeGenOpt::None \|\| !LD->isSimple())
	return SDValue();
	SDValue Chain = LD->getOperand(0);
	StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
	// TODO: Relax this restriction for unordered atomics (see D66309)
	if (!ST \|\| !ST->isSimple())
	return SDValue();

	EVT LDType = LD->getValueType(0);
	EVT LDMemType = LD->getMemoryVT();
	EVT STMemType = ST->getMemoryVT();
	EVT STType = ST->getValue().getValueType();

	// There are two cases to consider here:
	// 1. The store is fixed width and the load is scalable. In this case we
	// don't know at compile time if the store completely envelops the load
	// so we abandon the optimisation.
	// 2. The store is scalable and the load is fixed width. We could
	// potentially support a limited number of cases here, but there has been
	// no cost-benefit analysis to prove it's worth it.
	bool LdStScalable = LDMemType.isScalableVector();
	if (LdStScalable != STMemType.isScalableVector())
	return SDValue();

	// If we are dealing with scalable vectors on a big endian platform the
	// calculation of offsets below becomes trickier, since we do not know at
	// compile time the absolute size of the vector. Until we've done more
	// analysis on big-endian platforms it seems better to bail out for now.
	if (LdStScalable && DAG.getDataLayout().isBigEndian())
	return SDValue();

	BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
	BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
	int64_t Offset;
	if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
	return SDValue();

	// Normalize for Endianness. After this Offset=0 will denote that the least
	// significant bit in the loaded value maps to the least significant bit in
	// the stored value). With Offset=n (for n > 0) the loaded value starts at the
	// n:th least significant byte of the stored value.
	if (DAG.getDataLayout().isBigEndian())
	Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedSize() -
	(int64_t)LDMemType.getStoreSizeInBits().getFixedSize()) /
	8 -
	Offset;

	// Check that the stored value cover all bits that are loaded.
	bool STCoversLD;

	TypeSize LdMemSize = LDMemType.getSizeInBits();
	TypeSize StMemSize = STMemType.getSizeInBits();
	if (LdStScalable)
	STCoversLD = (Offset == 0) && LdMemSize == StMemSize;
	else
	STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedSize() <=
	StMemSize.getFixedSize());

	auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
	if (LD->isIndexed()) {
	// Cannot handle opaque target constants and we must respect the user's
	// request not to split indexes from loads.
	if (!canSplitIdx(LD))
	return SDValue();
	SDValue Idx = SplitIndexingFromLoad(LD);
	SDValue Ops[] = {Val, Idx, Chain};
	return CombineTo(LD, Ops, 3);
	}
	return CombineTo(LD, Val, Chain);
	};

	if (!STCoversLD)
	return SDValue();

	// Memory as copy space (potentially masked).
	if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
	// Simple case: Direct non-truncating forwarding
	if (LDType.getSizeInBits() == LdMemSize)
	return ReplaceLd(LD, ST->getValue(), Chain);
	// Can we model the truncate and extension with an and mask?
	if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
	!LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
	// Mask to size of LDMemType
	auto Mask =
	DAG.getConstant(APInt::getLowBitsSet(STType.getFixedSizeInBits(),
	StMemSize.getFixedSize()),
	SDLoc(ST), STType);
	auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
	return ReplaceLd(LD, Val, Chain);
	}
	}

	// TODO: Deal with nonzero offset.
	if (LD->getBasePtr().isUndef() \|\| Offset != 0)
	return SDValue();
	// Model necessary truncations / extenstions.
	SDValue Val;
	// Truncate Value To Stored Memory Size.
	do {
	if (!getTruncatedStoreValue(ST, Val))
	continue;
	if (!isTypeLegal(LDMemType))
	continue;
	if (STMemType != LDMemType) {
	// TODO: Support vectors? This requires extract_subvector/bitcast.
	if (!STMemType.isVector() && !LDMemType.isVector() &&
	STMemType.isInteger() && LDMemType.isInteger())
	Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
	else
	continue;
	}
	if (!extendLoadedValueToExtension(LD, Val))
	continue;
	return ReplaceLd(LD, Val, Chain);
	} while (false);

	// On failure, cleanup dead nodes we may have created.
	if (Val->use_empty())
	deleteAndRecombine(Val.getNode());
	return SDValue();
	}

	SDValue DAGCombiner::visitLOAD(SDNode *N) {
	LoadSDNode *LD = cast<LoadSDNode>(N);
	SDValue Chain = LD->getChain();
	SDValue Ptr = LD->getBasePtr();

	// If load is not volatile and there are no uses of the loaded value (and
	// the updated indexed value in case of indexed loads), change uses of the
	// chain value into uses of the chain input (i.e. delete the dead load).
	// TODO: Allow this for unordered atomics (see D66309)
	if (LD->isSimple()) {
	if (N->getValueType(1) == MVT::Other) {
	// Unindexed loads.
	if (!N->hasAnyUseOfValue(0)) {
	// It's not safe to use the two value CombineTo variant here. e.g.
	// v1, chain2 = load chain1, loc
	// v2, chain3 = load chain2, loc
	// v3 = add v2, c
	// Now we replace use of chain2 with chain1. This makes the second load
	// isomorphic to the one we are deleting, and thus makes this load live.
	LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
	dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG);
	dbgs() << "\n");
	WorklistRemover DeadNodes(*this);
	DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
	AddUsersToWorklist(Chain.getNode());
	if (N->use_empty())
	deleteAndRecombine(N);

	return SDValue(N, 0); // Return N so it doesn't get rechecked!
	}
	} else {
	// Indexed loads.
	assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");

	// If this load has an opaque TargetConstant offset, then we cannot split
	// the indexing into an add/sub directly (that TargetConstant may not be
	// valid for a different type of node, and we cannot convert an opaque
	// target constant into a regular constant).
	bool CanSplitIdx = canSplitIdx(LD);

	if (!N->hasAnyUseOfValue(0) && (CanSplitIdx \|\| !N->hasAnyUseOfValue(1))) {
	SDValue Undef = DAG.getUNDEF(N->getValueType(0));
	SDValue Index;
	if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
	Index = SplitIndexingFromLoad(LD);
	// Try to fold the base pointer arithmetic into subsequent loads and
	// stores.
	AddUsersToWorklist(N);
	} else
	Index = DAG.getUNDEF(N->getValueType(1));
	LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
	dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG);
	dbgs() << " and 2 other values\n");
	WorklistRemover DeadNodes(*this);
	DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
	DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
	DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
	deleteAndRecombine(N);
	return SDValue(N, 0); // Return N so it doesn't get rechecked!
	}
	}
	}

	// If this load is directly stored, replace the load value with the stored
	// value.
	if (auto V = ForwardStoreValueToDirectLoad(LD))
	return V;

	// Try to infer better alignment information than the load already has.
	if (OptLevel != CodeGenOpt::None && LD->isUnindexed() && !LD->isAtomic()) {
	if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
	if (*Alignment > LD->getAlign() &&
	isAligned(*Alignment, LD->getSrcValueOffset())) {
	SDValue NewLoad = DAG.getExtLoad(
	LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
	LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
	LD->getMemOperand()->getFlags(), LD->getAAInfo());
	// NewLoad will always be N as we are only refining the alignment
	assert(NewLoad.getNode() == N);
	(void)NewLoad;
	}
	}
	}

	if (LD->isUnindexed()) {
	// Walk up chain skipping non-aliasing memory nodes.
	SDValue BetterChain = FindBetterChain(LD, Chain);

	// If there is a better chain.
	if (Chain != BetterChain) {
	SDValue ReplLoad;

	// Replace the chain to void dependency.
	if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
	ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
	BetterChain, Ptr, LD->getMemOperand());
	} else {
	ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
	LD->getValueType(0),
	BetterChain, Ptr, LD->getMemoryVT(),
	LD->getMemOperand());
	}

	// Create token factor to keep old chain connected.
	SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
	MVT::Other, Chain, ReplLoad.getValue(1));

	// Replace uses with load result and token factor
	return CombineTo(N, ReplLoad.getValue(0), Token);
	}
	}

	// Try transforming N to an indexed load.
	if (CombineToPreIndexedLoadStore(N) \|\| CombineToPostIndexedLoadStore(N))
	return SDValue(N, 0);

	// Try to slice up N to more direct loads if the slices are mapped to
	// different register banks or pairing can take place.
	if (SliceUpLoad(N))
	return SDValue(N, 0);

	return SDValue();
	}

	namespace {

	/// Helper structure used to slice a load in smaller loads.
	/// Basically a slice is obtained from the following sequence:
	/// Origin = load Ty1, Base
	/// Shift = srl Ty1 Origin, CstTy Amount
	/// Inst = trunc Shift to Ty2
	///
	/// Then, it will be rewritten into:
	/// Slice = load SliceTy, Base + SliceOffset
	/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
	///
	/// SliceTy is deduced from the number of bits that are actually used to
	/// build Inst.
	struct LoadedSlice {
	/// Helper structure used to compute the cost of a slice.
	struct Cost {
	/// Are we optimizing for code size.
	bool ForCodeSize = false;

	/// Various cost.
	unsigned Loads = 0;
	unsigned Truncates = 0;
	unsigned CrossRegisterBanksCopies = 0;
	unsigned ZExts = 0;
	unsigned Shift = 0;

	explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}

	/// Get the cost of one isolated slice.
	Cost(const LoadedSlice &LS, bool ForCodeSize)
	: ForCodeSize(ForCodeSize), Loads(1) {
	EVT TruncType = LS.Inst->getValueType(0);
	EVT LoadedType = LS.getLoadedType();
	if (TruncType != LoadedType &&
	!LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
	ZExts = 1;
	}

	/// Account for slicing gain in the current cost.
	/// Slicing provide a few gains like removing a shift or a
	/// truncate. This method allows to grow the cost of the original
	/// load with the gain from this slice.
	void addSliceGain(const LoadedSlice &LS) {
	// Each slice saves a truncate.
	const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
	if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
	LS.Inst->getValueType(0)))
	++Truncates;
	// If there is a shift amount, this slice gets rid of it.
	if (LS.Shift)
	++Shift;
	// If this slice can merge a cross register bank copy, account for it.
	if (LS.canMergeExpensiveCrossRegisterBankCopy())
	++CrossRegisterBanksCopies;
	}

	Cost &operator+=(const Cost &RHS) {
	Loads += RHS.Loads;
	Truncates += RHS.Truncates;
	CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
	ZExts += RHS.ZExts;
	Shift += RHS.Shift;
	return *this;
	}

	bool operator==(const Cost &RHS) const {
	return Loads == RHS.Loads && Truncates == RHS.Truncates &&
	CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
	ZExts == RHS.ZExts && Shift == RHS.Shift;
	}

	bool operator!=(const Cost &RHS) const { return !(*this == RHS); }

	bool operator<(const Cost &RHS) const {
	// Assume cross register banks copies are as expensive as loads.
	// FIXME: Do we want some more target hooks?
	unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
	unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
	// Unless we are optimizing for code size, consider the
	// expensive operation first.
	if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
	return ExpensiveOpsLHS < ExpensiveOpsRHS;
	return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
	(RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
	}

	bool operator>(const Cost &RHS) const { return RHS < *this; }

	bool operator<=(const Cost &RHS) const { return !(RHS < *this); }

	bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
	};

	// The last instruction that represent the slice. This should be a
	// truncate instruction.
	SDNode *Inst;

	// The original load instruction.
	LoadSDNode *Origin;

	// The right shift amount in bits from the original load.
	unsigned Shift;

	// The DAG from which Origin came from.
	// This is used to get some contextual information about legal types, etc.
	SelectionDAG *DAG;

	LoadedSlice(SDNode Inst = nullptr, LoadSDNode Origin = nullptr,
	unsigned Shift = 0, SelectionDAG *DAG = nullptr)
	: Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}

	/// Get the bits used in a chunk of bits \p BitWidth large.
	/// \return Result is \p BitWidth and has used bits set to 1 and
	/// not used bits set to 0.
	APInt getUsedBits() const {
	// Reproduce the trunc(lshr) sequence:
	// - Start from the truncated value.
	// - Zero extend to the desired bit width.
	// - Shift left.
	assert(Origin && "No original load to compare against.");
	unsigned BitWidth = Origin->getValueSizeInBits(0);
	assert(Inst && "This slice is not bound to an instruction");
	assert(Inst->getValueSizeInBits(0) <= BitWidth &&
	"Extracted slice is bigger than the whole type!");
	APInt UsedBits(Inst->getValueSizeInBits(0), 0);
	UsedBits.setAllBits();
	UsedBits = UsedBits.zext(BitWidth);
	UsedBits <<= Shift;
	return UsedBits;
	}

	/// Get the size of the slice to be loaded in bytes.
	unsigned getLoadedSize() const {
	unsigned SliceSize = getUsedBits().countPopulation();
	assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
	return SliceSize / 8;
	}

	/// Get the type that will be loaded for this slice.
	/// Note: This may not be the final type for the slice.
	EVT getLoadedType() const {
	assert(DAG && "Missing context");
	LLVMContext &Ctxt = *DAG->getContext();
	return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
	}

	/// Get the alignment of the load used for this slice.
	Align getAlign() const {
	Align Alignment = Origin->getAlign();
	uint64_t Offset = getOffsetFromBase();
	if (Offset != 0)
	Alignment = commonAlignment(Alignment, Alignment.value() + Offset);
	return Alignment;
	}

	/// Check if this slice can be rewritten with legal operations.
	bool isLegal() const {
	// An invalid slice is not legal.
	if (!Origin \|\| !Inst \|\| !DAG)
	return false;

	// Offsets are for indexed load only, we do not handle that.
	if (!Origin->getOffset().isUndef())
	return false;

	const TargetLowering &TLI = DAG->getTargetLoweringInfo();

	// Check that the type is legal.
	EVT SliceType = getLoadedType();
	if (!TLI.isTypeLegal(SliceType))
	return false;

	// Check that the load is legal for this type.
	if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
	return false;

	// Check that the offset can be computed.
	// 1. Check its type.
	EVT PtrType = Origin->getBasePtr().getValueType();
	if (PtrType == MVT::Untyped \|\| PtrType.isExtended())
	return false;

	// 2. Check that it fits in the immediate.
	if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
	return false;

	// 3. Check that the computation is legal.
	if (!TLI.isOperationLegal(ISD::ADD, PtrType))
	return false;

	// Check that the zext is legal if it needs one.
	EVT TruncateType = Inst->getValueType(0);
	if (TruncateType != SliceType &&
	!TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
	return false;

	return true;
	}

	/// Get the offset in bytes of this slice in the original chunk of
	/// bits.
	/// \pre DAG != nullptr.
	uint64_t getOffsetFromBase() const {
	assert(DAG && "Missing context.");
	bool IsBigEndian = DAG->getDataLayout().isBigEndian();
	assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
	uint64_t Offset = Shift / 8;
	unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
	assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
	"The size of the original loaded type is not a multiple of a"
	" byte.");
	// If Offset is bigger than TySizeInBytes, it means we are loading all
	// zeros. This should have been optimized before in the process.
	assert(TySizeInBytes > Offset &&
	"Invalid shift amount for given loaded size");
	if (IsBigEndian)
	Offset = TySizeInBytes - Offset - getLoadedSize();
	return Offset;
	}

	/// Generate the sequence of instructions to load the slice
	/// represented by this object and redirect the uses of this slice to
	/// this new sequence of instructions.
	/// \pre this->Inst && this->Origin are valid Instructions and this
	/// object passed the legal check: LoadedSlice::isLegal returned true.
	/// \return The last instruction of the sequence used to load the slice.
	SDValue loadSlice() const {
	assert(Inst && Origin && "Unable to replace a non-existing slice.");
	const SDValue &OldBaseAddr = Origin->getBasePtr();
	SDValue BaseAddr = OldBaseAddr;
	// Get the offset in that chunk of bytes w.r.t. the endianness.
	int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
	assert(Offset >= 0 && "Offset too big to fit in int64_t!");
	if (Offset) {
	// BaseAddr = BaseAddr + Offset.
	EVT ArithType = BaseAddr.getValueType();
	SDLoc DL(Origin);
	BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
	DAG->getConstant(Offset, DL, ArithType));
	}

	// Create the type of the loaded slice according to its size.
	EVT SliceType = getLoadedType();

	// Create the load for the slice.
	SDValue LastInst =
	DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
	Origin->getPointerInfo().getWithOffset(Offset), getAlign(),
	Origin->getMemOperand()->getFlags());
	// If the final type is not the same as the loaded type, this means that
	// we have to pad with zero. Create a zero extend for that.
	EVT FinalType = Inst->getValueType(0);
	if (SliceType != FinalType)
	LastInst =
	DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
	return LastInst;
	}

	/// Check if this slice can be merged with an expensive cross register
	/// bank copy. E.g.,
	/// i = load i32
	/// f = bitcast i32 i to float
	bool canMergeExpensiveCrossRegisterBankCopy() const {
	if (!Inst \|\| !Inst->hasOneUse())
	return false;
	SDNode Use = Inst->use_begin();
	if (Use->getOpcode() != ISD::BITCAST)
	return false;
	assert(DAG && "Missing context");
	const TargetLowering &TLI = DAG->getTargetLoweringInfo();
	EVT ResVT = Use->getValueType(0);
	const TargetRegisterClass *ResRC =
	TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
	const TargetRegisterClass *ArgRC =
	TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
	Use->getOperand(0)->isDivergent());
	if (ArgRC == ResRC \|\| !TLI.isOperationLegal(ISD::LOAD, ResVT))
	return false;

	// At this point, we know that we perform a cross-register-bank copy.
	// Check if it is expensive.
	const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
	// Assume bitcasts are cheap, unless both register classes do not
	// explicitly share a common sub class.
	if (!TRI \|\| TRI->getCommonSubClass(ArgRC, ResRC))
	return false;

	// Check if it will be merged with the load.
	// 1. Check the alignment constraint.
	Align RequiredAlignment = DAG->getDataLayout().getABITypeAlign(
	ResVT.getTypeForEVT(*DAG->getContext()));

	if (RequiredAlignment > getAlign())
	return false;

	// 2. Check that the load is a legal operation for that type.
	if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
	return false;

	// 3. Check that we do not have a zext in the way.
	if (Inst->getValueType(0) != getLoadedType())
	return false;

	return true;
	}
	};

	} // end anonymous namespace

	/// Check that all bits set in \p UsedBits form a dense region, i.e.,
	/// \p UsedBits looks like 0..0 1..1 0..0.
	static bool areUsedBitsDense(const APInt &UsedBits) {
	// If all the bits are one, this is dense!
	if (UsedBits.isAllOnesValue())
	return true;

	// Get rid of the unused bits on the right.
	APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
	// Get rid of the unused bits on the left.
	if (NarrowedUsedBits.countLeadingZeros())
	NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
	// Check that the chunk of bits is completely used.
	return NarrowedUsedBits.isAllOnesValue();
	}

	/// Check whether or not \p First and \p Second are next to each other
	/// in memory. This means that there is no hole between the bits loaded
	/// by \p First and the bits loaded by \p Second.
	static bool areSlicesNextToEachOther(const LoadedSlice &First,
	const LoadedSlice &Second) {
	assert(First.Origin == Second.Origin && First.Origin &&
	"Unable to match different memory origins.");
	APInt UsedBits = First.getUsedBits();
	assert((UsedBits & Second.getUsedBits()) == 0 &&
	"Slices are not supposed to overlap.");
	UsedBits \|= Second.getUsedBits();
	return areUsedBitsDense(UsedBits);
	}

	/// Adjust the \p GlobalLSCost according to the target
	/// paring capabilities and the layout of the slices.
	/// \pre \p GlobalLSCost should account for at least as many loads as
	/// there is in the slices in \p LoadedSlices.
	static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
	LoadedSlice::Cost &GlobalLSCost) {
	unsigned NumberOfSlices = LoadedSlices.size();
	// If there is less than 2 elements, no pairing is possible.
	if (NumberOfSlices < 2)
	return;

	// Sort the slices so that elements that are likely to be next to each
	// other in memory are next to each other in the list.
	llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
	assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
	return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
	});
	const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
	// First (resp. Second) is the first (resp. Second) potentially candidate
	// to be placed in a paired load.
	const LoadedSlice *First = nullptr;
	const LoadedSlice *Second = nullptr;
	for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
	// Set the beginning of the pair.
	First = Second) {
	Second = &LoadedSlices[CurrSlice];

	// If First is NULL, it means we start a new pair.
	// Get to the next slice.
	if (!First)
	continue;

	EVT LoadedType = First->getLoadedType();

	// If the types of the slices are different, we cannot pair them.
	if (LoadedType != Second->getLoadedType())
	continue;

	// Check if the target supplies paired loads for this type.
	Align RequiredAlignment;
	if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
	// move to the next pair, this type is hopeless.
	Second = nullptr;
	continue;
	}
	// Check if we meet the alignment requirement.
	if (First->getAlign() < RequiredAlignment)
	continue;

	// Check that both loads are next to each other in memory.
	if (!areSlicesNextToEachOther(First, Second))
	continue;

	assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
	--GlobalLSCost.Loads;
	// Move to the next pair.
	Second = nullptr;
	}
	}

	/// Check the profitability of all involved LoadedSlice.
	/// Currently, it is considered profitable if there is exactly two
	/// involved slices (1) which are (2) next to each other in memory, and
	/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
	///
	/// Note: The order of the elements in \p LoadedSlices may be modified, but not
	/// the elements themselves.
	///
	/// FIXME: When the cost model will be mature enough, we can relax
	/// constraints (1) and (2).
	static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
	const APInt &UsedBits, bool ForCodeSize) {
	unsigned NumberOfSlices = LoadedSlices.size();
	if (StressLoadSlicing)
	return NumberOfSlices > 1;

	// Check (1).
	if (NumberOfSlices != 2)
	return false;

	// Check (2).
	if (!areUsedBitsDense(UsedBits))
	return false;

	// Check (3).
	LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
	// The original code has one big load.
	OrigCost.Loads = 1;
	for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
	const LoadedSlice &LS = LoadedSlices[CurrSlice];
	// Accumulate the cost of all the slices.
	LoadedSlice::Cost SliceCost(LS, ForCodeSize);
	GlobalSlicingCost += SliceCost;

	// Account as cost in the original configuration the gain obtained
	// with the current slices.
	OrigCost.addSliceGain(LS);
	}

	// If the target supports paired load, adjust the cost accordingly.
	adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
	return OrigCost > GlobalSlicingCost;
	}

	/// If the given load, \p LI, is used only by trunc or trunc(lshr)
	/// operations, split it in the various pieces being extracted.
	///
	/// This sort of thing is introduced by SROA.
	/// This slicing takes care not to insert overlapping loads.
	/// \pre LI is a simple load (i.e., not an atomic or volatile load).
	bool DAGCombiner::SliceUpLoad(SDNode *N) {
	if (Level < AfterLegalizeDAG)
	return false;

	LoadSDNode *LD = cast<LoadSDNode>(N);
	if (!LD->isSimple() \|\| !ISD::isNormalLoad(LD) \|\|
	!LD->getValueType(0).isInteger())
	return false;

	// The algorithm to split up a load of a scalable vector into individual
	// elements currently requires knowing the length of the loaded type,
	// so will need adjusting to work on scalable vectors.
	if (LD->getValueType(0).isScalableVector())
	return false;

	// Keep track of already used bits to detect overlapping values.
	// In that case, we will just abort the transformation.
	APInt UsedBits(LD->getValueSizeInBits(0), 0);

	SmallVector<LoadedSlice, 4> LoadedSlices;

	// Check if this load is used as several smaller chunks of bits.
	// Basically, look for uses in trunc or trunc(lshr) and record a new chain
	// of computation for each trunc.
	for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
	UI != UIEnd; ++UI) {
	// Skip the uses of the chain.
	if (UI.getUse().getResNo() != 0)
	continue;

	SDNode User = UI;
	unsigned Shift = 0;

	// Check if this is a trunc(lshr).
	if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
	isa<ConstantSDNode>(User->getOperand(1))) {
	Shift = User->getConstantOperandVal(1);
	User = *User->use_begin();
	}

	// At this point, User is a Truncate, iff we encountered, trunc or
	// trunc(lshr).
	if (User->getOpcode() != ISD::TRUNCATE)
	return false;

	// The width of the type must be a power of 2 and greater than 8-bits.
	// Otherwise the load cannot be represented in LLVM IR.
	// Moreover, if we shifted with a non-8-bits multiple, the slice
	// will be across several bytes. We do not support that.
	unsigned Width = User->getValueSizeInBits(0);
	if (Width < 8 \|\| !isPowerOf2_32(Width) \|\| (Shift & 0x7))
	return false;

	// Build the slice for this chain of computations.
	LoadedSlice LS(User, LD, Shift, &DAG);
	APInt CurrentUsedBits = LS.getUsedBits();

	// Check if this slice overlaps with another.
	if ((CurrentUsedBits & UsedBits) != 0)
	return false;
	// Update the bits used globally.
	UsedBits \|= CurrentUsedBits;

	// Check if the new slice would be legal.
	if (!LS.isLegal())
	return false;

	// Record the slice.
	LoadedSlices.push_back(LS);
	}

	// Abort slicing if it does not seem to be profitable.
	if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
	return false;

	++SlicedLoads;

	// Rewrite each chain to use an independent load.
	// By construction, each chain can be represented by a unique load.

	// Prepare the argument for the new token factor for all the slices.
	SmallVector<SDValue, 8> ArgChains;
	for (const LoadedSlice &LS : LoadedSlices) {
	SDValue SliceInst = LS.loadSlice();
	CombineTo(LS.Inst, SliceInst, true);
	if (SliceInst.getOpcode() != ISD::LOAD)
	SliceInst = SliceInst.getOperand(0);
	assert(SliceInst->getOpcode() == ISD::LOAD &&
	"It takes more than a zext to get to the loaded slice!!");
	ArgChains.push_back(SliceInst.getValue(1));
	}

	SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
	ArgChains);
	DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
	AddToWorklist(Chain.getNode());
	return true;
	}

	/// Check to see if V is (and load (ptr), imm), where the load is having
	/// specific bytes cleared out. If so, return the byte size being masked out
	/// and the shift amount.
	static std::pair<unsigned, unsigned>
	CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
	std::pair<unsigned, unsigned> Result(0, 0);

	// Check for the structure we're looking for.
	if (V->getOpcode() != ISD::AND \|\|
	!isa<ConstantSDNode>(V->getOperand(1)) \|\|
	!ISD::isNormalLoad(V->getOperand(0).getNode()))
	return Result;

	// Check the chain and pointer.
	LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
	if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.

	// This only handles simple types.
	if (V.getValueType() != MVT::i16 &&
	V.getValueType() != MVT::i32 &&
	V.getValueType() != MVT::i64)
	return Result;

	// Check the constant mask. Invert it so that the bits being masked out are
	// 0 and the bits being kept are 1. Use getSExtValue so that leading bits
	// follow the sign bit for uniformity.
	uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
	unsigned NotMaskLZ = countLeadingZeros(NotMask);
	if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
	unsigned NotMaskTZ = countTrailingZeros(NotMask);
	if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
	if (NotMaskLZ == 64) return Result; // All zero mask.

	// See if we have a continuous run of bits. If so, we have 01+0
	if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
	return Result;

	// Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
	if (V.getValueType() != MVT::i64 && NotMaskLZ)
	NotMaskLZ -= 64-V.getValueSizeInBits();

	unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
	switch (MaskedBytes) {
	case 1:
	case 2:
	case 4: break;
	default: return Result; // All one mask, or 5-byte mask.
	}

	// Verify that the first bit starts at a multiple of mask so that the access
	// is aligned the same as the access width.
	if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;

	// For narrowing to be valid, it must be the case that the load the
	// immediately preceding memory operation before the store.
	if (LD == Chain.getNode())
	; // ok.
	else if (Chain->getOpcode() == ISD::TokenFactor &&
	SDValue(LD, 1).hasOneUse()) {
	// LD has only 1 chain use so they are no indirect dependencies.
	if (!LD->isOperandOf(Chain.getNode()))
	return Result;
	} else
	return Result; // Fail.

	Result.first = MaskedBytes;
	Result.second = NotMaskTZ/8;
	return Result;
	}

	/// Check to see if IVal is something that provides a value as specified by
	/// MaskInfo. If so, replace the specified store with a narrower store of
	/// truncated IVal.
	static SDValue
	ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
	SDValue IVal, StoreSDNode *St,
	DAGCombiner *DC) {
	unsigned NumBytes = MaskInfo.first;
	unsigned ByteShift = MaskInfo.second;
	SelectionDAG &DAG = DC->getDAG();

	// Check to see if IVal is all zeros in the part being masked in by the 'or'
	// that uses this. If not, this is not a replacement.
	APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
	ByteShift8, (ByteShift+NumBytes)8);
	if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();

	// Check that it is legal on the target to do this. It is legal if the new
	// VT we're shrinking to (i8/i16/i32) is legal or we're still before type
	// legalization (and the target doesn't explicitly think this is a bad idea).
	MVT VT = MVT::getIntegerVT(NumBytes * 8);
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	if (!DC->isTypeLegal(VT))
	return SDValue();
	if (St->getMemOperand() &&
	!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
	*St->getMemOperand()))
	return SDValue();

	// Okay, we can do this! Replace the 'St' store with a store of IVal that is
	// shifted by ByteShift and truncated down to NumBytes.
	if (ByteShift) {
	SDLoc DL(IVal);
	IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
	DAG.getConstant(ByteShift*8, DL,
	DC->getShiftAmountTy(IVal.getValueType())));
	}

	// Figure out the offset for the store and the alignment of the access.
	unsigned StOffset;
	if (DAG.getDataLayout().isLittleEndian())
	StOffset = ByteShift;
	else
	StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;

	SDValue Ptr = St->getBasePtr();
	if (StOffset) {
	SDLoc DL(IVal);
	Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(StOffset), DL);
	}

	// Truncate down to the new size.
	IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);

	++OpsNarrowed;
	return DAG
	.getStore(St->getChain(), SDLoc(St), IVal, Ptr,
	St->getPointerInfo().getWithOffset(StOffset),
	St->getOriginalAlign());
	}

	/// Look for sequence of load / op / store where op is one of 'or', 'xor', and
	/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
	/// narrowing the load and store if it would end up being a win for performance
	/// or code size.
	SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
	StoreSDNode *ST = cast<StoreSDNode>(N);
	if (!ST->isSimple())
	return SDValue();

	SDValue Chain = ST->getChain();
	SDValue Value = ST->getValue();
	SDValue Ptr = ST->getBasePtr();
	EVT VT = Value.getValueType();

	if (ST->isTruncatingStore() \|\| VT.isVector() \|\| !Value.hasOneUse())
	return SDValue();

	unsigned Opc = Value.getOpcode();

	// If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
	// is a byte mask indicating a consecutive number of bytes, check to see if
	// Y is known to provide just those bytes. If so, we try to replace the
	// load + replace + store sequence with a single (narrower) store, which makes
	// the load dead.
	if (Opc == ISD::OR && EnableShrinkLoadReplaceStoreWithStore) {
	std::pair<unsigned, unsigned> MaskedLoad;
	MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
	if (MaskedLoad.first)
	if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
	Value.getOperand(1), ST,this))
	return NewST;

	// Or is commutative, so try swapping X and Y.
	MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
	if (MaskedLoad.first)
	if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
	Value.getOperand(0), ST,this))
	return NewST;
	}

	if (!EnableReduceLoadOpStoreWidth)
	return SDValue();

	if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) \|\|
	Value.getOperand(1).getOpcode() != ISD::Constant)
	return SDValue();

	SDValue N0 = Value.getOperand(0);
	if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
	Chain == SDValue(N0.getNode(), 1)) {
	LoadSDNode *LD = cast<LoadSDNode>(N0);
	if (LD->getBasePtr() != Ptr \|\|
	LD->getPointerInfo().getAddrSpace() !=
	ST->getPointerInfo().getAddrSpace())
	return SDValue();

	// Find the type to narrow it the load / op / store to.
	SDValue N1 = Value.getOperand(1);
	unsigned BitWidth = N1.getValueSizeInBits();
	APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
	if (Opc == ISD::AND)
	Imm ^= APInt::getAllOnesValue(BitWidth);
	if (Imm == 0 \|\| Imm.isAllOnesValue())
	return SDValue();
	unsigned ShAmt = Imm.countTrailingZeros();
	unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
	unsigned NewBW = NextPowerOf2(MSB - ShAmt);
	EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
	// The narrowing should be profitable, the load/store operation should be
	// legal (or custom) and the store size should be equal to the NewVT width.
	while (NewBW < BitWidth &&
	(NewVT.getStoreSizeInBits() != NewBW \|\|
	!TLI.isOperationLegalOrCustom(Opc, NewVT) \|\|
	!TLI.isNarrowingProfitable(VT, NewVT))) {
	NewBW = NextPowerOf2(NewBW);
	NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
	}
	if (NewBW >= BitWidth)
	return SDValue();

	// If the lsb changed does not start at the type bitwidth boundary,
	// start at the previous one.
	if (ShAmt % NewBW)
	ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
	APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
	std::min(BitWidth, ShAmt + NewBW));
	if ((Imm & Mask) == Imm) {
	APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
	if (Opc == ISD::AND)
	NewImm ^= APInt::getAllOnesValue(NewBW);
	uint64_t PtrOff = ShAmt / 8;
	// For big endian targets, we need to adjust the offset to the pointer to
	// load the correct bytes.
	if (DAG.getDataLayout().isBigEndian())
	PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;

	Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
	Type NewVTTy = NewVT.getTypeForEVT(DAG.getContext());
	if (NewAlign < DAG.getDataLayout().getABITypeAlign(NewVTTy))
	return SDValue();

	SDValue NewPtr =
	DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(PtrOff), SDLoc(LD));
	SDValue NewLD =
	DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
	LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
	LD->getMemOperand()->getFlags(), LD->getAAInfo());
	SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
	DAG.getConstant(NewImm, SDLoc(Value),
	NewVT));
	SDValue NewST =
	DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
	ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);

	AddToWorklist(NewPtr.getNode());
	AddToWorklist(NewLD.getNode());
	AddToWorklist(NewVal.getNode());
	WorklistRemover DeadNodes(*this);
	DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
	++OpsNarrowed;
	return NewST;
	}
	}

	return SDValue();
	}

	/// For a given floating point load / store pair, if the load value isn't used
	/// by any other operations, then consider transforming the pair to integer
	/// load / store operations if the target deems the transformation profitable.
	SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
	StoreSDNode *ST = cast<StoreSDNode>(N);
	SDValue Value = ST->getValue();
	if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
	Value.hasOneUse()) {
	LoadSDNode *LD = cast<LoadSDNode>(Value);
	EVT VT = LD->getMemoryVT();
	if (!VT.isFloatingPoint() \|\|
	VT != ST->getMemoryVT() \|\|
	LD->isNonTemporal() \|\|
	ST->isNonTemporal() \|\|
	LD->getPointerInfo().getAddrSpace() != 0 \|\|
	ST->getPointerInfo().getAddrSpace() != 0)
	return SDValue();

	TypeSize VTSize = VT.getSizeInBits();

	// We don't know the size of scalable types at compile time so we cannot
	// create an integer of the equivalent size.
	if (VTSize.isScalable())
	return SDValue();

	EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedSize());
	if (!TLI.isOperationLegal(ISD::LOAD, IntVT) \|\|
	!TLI.isOperationLegal(ISD::STORE, IntVT) \|\|
	!TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) \|\|
	!TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
	return SDValue();

	Align LDAlign = LD->getAlign();
	Align STAlign = ST->getAlign();
	Type IntVTTy = IntVT.getTypeForEVT(DAG.getContext());
	Align ABIAlign = DAG.getDataLayout().getABITypeAlign(IntVTTy);
	if (LDAlign < ABIAlign \|\| STAlign < ABIAlign)
	return SDValue();

	SDValue NewLD =
	DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
	LD->getPointerInfo(), LDAlign);

	SDValue NewST =
	DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
	ST->getPointerInfo(), STAlign);

	AddToWorklist(NewLD.getNode());
	AddToWorklist(NewST.getNode());
	WorklistRemover DeadNodes(*this);
	DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
	++LdStFP2Int;
	return NewST;
	}

	return SDValue();
	}

	// This is a helper function for visitMUL to check the profitability
	// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
	// MulNode is the original multiply, AddNode is (add x, c1),
	// and ConstNode is c2.
	//
	// If the (add x, c1) has multiple uses, we could increase
	// the number of adds if we make this transformation.
	// It would only be worth doing this if we can remove a
	// multiply in the process. Check for that here.
	// To illustrate:
	// (A + c1) * c3
	// (A + c2) * c3
	// We're checking for cases where we have common "c3 * A" expressions.
	bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
	SDValue &AddNode,
	SDValue &ConstNode) {
	APInt Val;

	// If the add only has one use, this would be OK to do.
	if (AddNode.getNode()->hasOneUse())
	return true;

	// Walk all the users of the constant with which we're multiplying.
	for (SDNode *Use : ConstNode->uses()) {
	if (Use == MulNode) // This use is the one we're on right now. Skip it.
	continue;

	if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
	SDNode *OtherOp;
	SDNode *MulVar = AddNode.getOperand(0).getNode();

	// OtherOp is what we're multiplying against the constant.
	if (Use->getOperand(0) == ConstNode)
	OtherOp = Use->getOperand(1).getNode();
	else
	OtherOp = Use->getOperand(0).getNode();

	// Check to see if multiply is with the same operand of our "add".
	//
	// ConstNode = CONST
	// Use = ConstNode * A <-- visiting Use. OtherOp is A.
	// ...
	// AddNode = (A + c1) <-- MulVar is A.
	// = AddNode * ConstNode <-- current visiting instruction.
	//
	// If we make this transformation, we will have a common
	// multiply (ConstNode * A) that we can save.
	if (OtherOp == MulVar)
	return true;

	// Now check to see if a future expansion will give us a common
	// multiply.
	//
	// ConstNode = CONST
	// AddNode = (A + c1)
	// ... = AddNode * ConstNode <-- current visiting instruction.
	// ...
	// OtherOp = (A + c2)
	// Use = OtherOp * ConstNode <-- visiting Use.
	//
	// If we make this transformation, we will have a common
	// multiply (CONST * A) after we also do the same transformation
	// to the "t2" instruction.
	if (OtherOp->getOpcode() == ISD::ADD &&
	DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
	OtherOp->getOperand(0).getNode() == MulVar)
	return true;
	}
	}

	// Didn't find a case where this would be profitable.
	return false;
	}

	SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
	unsigned NumStores) {
	SmallVector<SDValue, 8> Chains;
	SmallPtrSet<const SDNode *, 8> Visited;
	SDLoc StoreDL(StoreNodes[0].MemNode);

	for (unsigned i = 0; i < NumStores; ++i) {
	Visited.insert(StoreNodes[i].MemNode);
	}

	// don't include nodes that are children or repeated nodes.
	for (unsigned i = 0; i < NumStores; ++i) {
	if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
	Chains.push_back(StoreNodes[i].MemNode->getChain());
	}

	assert(Chains.size() > 0 && "Chain should have generated a chain");
	return DAG.getTokenFactor(StoreDL, Chains);
	}

	bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
	SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
	bool IsConstantSrc, bool UseVector, bool UseTrunc) {
	// Make sure we have something to merge.
	if (NumStores < 2)
	return false;

	assert((!UseTrunc \|\| !UseVector) &&
	"This optimization cannot emit a vector truncating store");

	// The latest Node in the DAG.
	SDLoc DL(StoreNodes[0].MemNode);

	TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
	unsigned SizeInBits = NumStores * ElementSizeBits;
	unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;

	EVT StoreTy;
	if (UseVector) {
	unsigned Elts = NumStores * NumMemElts;
	// Get the type for the merged vector store.
	StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
	} else
	StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);

	SDValue StoredVal;
	if (UseVector) {
	if (IsConstantSrc) {
	SmallVector<SDValue, 8> BuildVector;
	for (unsigned I = 0; I != NumStores; ++I) {
	StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
	SDValue Val = St->getValue();
	// If constant is of the wrong type, convert it now.
	if (MemVT != Val.getValueType()) {
	Val = peekThroughBitcasts(Val);
	// Deal with constants of wrong size.
	if (ElementSizeBits != Val.getValueSizeInBits()) {
	EVT IntMemVT =
	EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
	if (isa<ConstantFPSDNode>(Val)) {
	// Not clear how to truncate FP values.
	return false;
	} else if (auto *C = dyn_cast<ConstantSDNode>(Val))
	Val = DAG.getConstant(C->getAPIntValue()
	.zextOrTrunc(Val.getValueSizeInBits())
	.zextOrTrunc(ElementSizeBits),
	SDLoc(C), IntMemVT);
	}
	// Make sure correctly size type is the correct type.
	Val = DAG.getBitcast(MemVT, Val);
	}
	BuildVector.push_back(Val);
	}
	StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
	: ISD::BUILD_VECTOR,
	DL, StoreTy, BuildVector);
	} else {
	SmallVector<SDValue, 8> Ops;
	for (unsigned i = 0; i < NumStores; ++i) {
	StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
	SDValue Val = peekThroughBitcasts(St->getValue());
	// All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
	// type MemVT. If the underlying value is not the correct
	// type, but it is an extraction of an appropriate vector we
	// can recast Val to be of the correct type. This may require
	// converting between EXTRACT_VECTOR_ELT and
	// EXTRACT_SUBVECTOR.
	if ((MemVT != Val.getValueType()) &&
	(Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT \|\|
	Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
	EVT MemVTScalarTy = MemVT.getScalarType();
	// We may need to add a bitcast here to get types to line up.
	if (MemVTScalarTy != Val.getValueType().getScalarType()) {
	Val = DAG.getBitcast(MemVT, Val);
	} else {
	unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
	: ISD::EXTRACT_VECTOR_ELT;
	SDValue Vec = Val.getOperand(0);
	SDValue Idx = Val.getOperand(1);
	Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
	}
	}
	Ops.push_back(Val);
	}

	// Build the extracted vector elements back into a vector.
	StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
	: ISD::BUILD_VECTOR,
	DL, StoreTy, Ops);
	}
	} else {
	// We should always use a vector store when merging extracted vector
	// elements, so this path implies a store of constants.
	assert(IsConstantSrc && "Merged vector elements should use vector store");

	APInt StoreInt(SizeInBits, 0);

	// Construct a single integer constant which is made of the smaller
	// constant inputs.
	bool IsLE = DAG.getDataLayout().isLittleEndian();
	for (unsigned i = 0; i < NumStores; ++i) {
	unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
	StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);

	SDValue Val = St->getValue();
	Val = peekThroughBitcasts(Val);
	StoreInt <<= ElementSizeBits;
	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
	StoreInt \|= C->getAPIntValue()
	.zextOrTrunc(ElementSizeBits)
	.zextOrTrunc(SizeInBits);
	} else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
	StoreInt \|= C->getValueAPF()
	.bitcastToAPInt()
	.zextOrTrunc(ElementSizeBits)
	.zextOrTrunc(SizeInBits);
	// If fp truncation is necessary give up for now.
	if (MemVT.getSizeInBits() != ElementSizeBits)
	return false;
	} else {
	llvm_unreachable("Invalid constant element type");
	}
	}

	// Create the new Load and Store operations.
	StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
	}

	LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
	SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);

	// make sure we use trunc store if it's necessary to be legal.
	SDValue NewStore;
	if (!UseTrunc) {
	NewStore =
	DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
	FirstInChain->getPointerInfo(), FirstInChain->getAlign());
	} else { // Must be realized as a trunc store
	EVT LegalizedStoredValTy =
	TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
	unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
	ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
	SDValue ExtendedStoreVal =
	DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
	LegalizedStoredValTy);
	NewStore = DAG.getTruncStore(
	NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
	FirstInChain->getPointerInfo(), StoredVal.getValueType() /TVT/,
	FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags());
	}

	// Replace all merged stores with the new store.
	for (unsigned i = 0; i < NumStores; ++i)
	CombineTo(StoreNodes[i].MemNode, NewStore);

	AddToWorklist(NewChain.getNode());
	return true;
	}

	void DAGCombiner::getStoreMergeCandidates(
	StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
	SDNode *&RootNode) {
	// This holds the base pointer, index, and the offset in bytes from the base
	// pointer. We must have a base and an offset. Do not handle stores to undef
	// base pointers.
	BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
	if (!BasePtr.getBase().getNode() \|\| BasePtr.getBase().isUndef())
	return;

	SDValue Val = peekThroughBitcasts(St->getValue());
	StoreSource StoreSrc = getStoreSource(Val);
	assert(StoreSrc != StoreSource::Unknown && "Expected known source for store");

	// Match on loadbaseptr if relevant.
	EVT MemVT = St->getMemoryVT();
	BaseIndexOffset LBasePtr;
	EVT LoadVT;
	if (StoreSrc == StoreSource::Load) {
	auto *Ld = cast<LoadSDNode>(Val);
	LBasePtr = BaseIndexOffset::match(Ld, DAG);
	LoadVT = Ld->getMemoryVT();
	// Load and store should be the same type.
	if (MemVT != LoadVT)
	return;
	// Loads must only have one use.
	if (!Ld->hasNUsesOfValue(1, 0))
	return;
	// The memory operands must not be volatile/indexed/atomic.
	// TODO: May be able to relax for unordered atomics (see D66309)
	if (!Ld->isSimple() \|\| Ld->isIndexed())
	return;
	}
	auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
	int64_t &Offset) -> bool {
	// The memory operands must not be volatile/indexed/atomic.
	// TODO: May be able to relax for unordered atomics (see D66309)
	if (!Other->isSimple() \|\| Other->isIndexed())
	return false;
	// Don't mix temporal stores with non-temporal stores.
	if (St->isNonTemporal() != Other->isNonTemporal())
	return false;
	SDValue OtherBC = peekThroughBitcasts(Other->getValue());
	// Allow merging constants of different types as integers.
	bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
	: Other->getMemoryVT() != MemVT;
	switch (StoreSrc) {
	case StoreSource::Load: {
	if (NoTypeMatch)
	return false;
	// The Load's Base Ptr must also match.
	auto *OtherLd = dyn_cast<LoadSDNode>(OtherBC);
	if (!OtherLd)
	return false;
	BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
	if (LoadVT != OtherLd->getMemoryVT())
	return false;
	// Loads must only have one use.
	if (!OtherLd->hasNUsesOfValue(1, 0))
	return false;
	// The memory operands must not be volatile/indexed/atomic.
	// TODO: May be able to relax for unordered atomics (see D66309)
	if (!OtherLd->isSimple() \|\| OtherLd->isIndexed())
	return false;
	// Don't mix temporal loads with non-temporal loads.
	if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
	return false;
	if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
	return false;
	break;
	}
	case StoreSource::Constant:
	if (NoTypeMatch)
	return false;
	if (!isIntOrFPConstant(OtherBC))
	return false;
	break;
	case StoreSource::Extract:
	// Do not merge truncated stores here.
	if (Other->isTruncatingStore())
	return false;
	if (!MemVT.bitsEq(OtherBC.getValueType()))
	return false;
	if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
	OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
	return false;
	break;
	default:
	llvm_unreachable("Unhandled store source for merging");
	}
	Ptr = BaseIndexOffset::match(Other, DAG);
	return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
	};

	// Check if the pair of StoreNode and the RootNode already bail out many
	// times which is over the limit in dependence check.
	auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
	SDNode *RootNode) -> bool {
	auto RootCount = StoreRootCountMap.find(StoreNode);
	return RootCount != StoreRootCountMap.end() &&
	RootCount->second.first == RootNode &&
	RootCount->second.second > StoreMergeDependenceLimit;
	};

	auto TryToAddCandidate = [&](SDNode::use_iterator UseIter) {
	// This must be a chain use.
	if (UseIter.getOperandNo() != 0)
	return;
	if (auto OtherStore = dyn_cast<StoreSDNode>(UseIter)) {
	BaseIndexOffset Ptr;
	int64_t PtrDiff;
	if (CandidateMatch(OtherStore, Ptr, PtrDiff) &&
	!OverLimitInDependenceCheck(OtherStore, RootNode))
	StoreNodes.push_back(MemOpLink(OtherStore, PtrDiff));
	}
	};

	// We looking for a root node which is an ancestor to all mergable
	// stores. We search up through a load, to our root and then down
	// through all children. For instance we will find Store{1,2,3} if
	// St is Store1, Store2. or Store3 where the root is not a load
	// which always true for nonvolatile ops. TODO: Expand
	// the search to find all valid candidates through multiple layers of loads.
	//
	// Root
	// \|-------\|-------\|
	// Load Load Store3
	// \| \|
	// Store1 Store2
	//
	// FIXME: We should be able to climb and
	// descend TokenFactors to find candidates as well.

	RootNode = St->getChain().getNode();

	unsigned NumNodesExplored = 0;
	const unsigned MaxSearchNodes = 1024;
	if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
	RootNode = Ldn->getChain().getNode();
	for (auto I = RootNode->use_begin(), E = RootNode->use_end();
	I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
	if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) { // walk down chain
	for (auto I2 = (I)->use_begin(), E2 = (I)->use_end(); I2 != E2; ++I2)
	TryToAddCandidate(I2);
	}
	}
	} else {
	for (auto I = RootNode->use_begin(), E = RootNode->use_end();
	I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
	TryToAddCandidate(I);
	}
	}

	// We need to check that merging these stores does not cause a loop in
	// the DAG. Any store candidate may depend on another candidate
	// indirectly through its operand (we already consider dependencies
	// through the chain). Check in parallel by searching up from
	// non-chain operands of candidates.
	bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
	SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
	SDNode *RootNode) {
	// FIXME: We should be able to truncate a full search of
	// predecessors by doing a BFS and keeping tabs the originating
	// stores from which worklist nodes come from in a similar way to
	// TokenFactor simplfication.

	SmallPtrSet<const SDNode *, 32> Visited;
	SmallVector<const SDNode *, 8> Worklist;

	// RootNode is a predecessor to all candidates so we need not search
	// past it. Add RootNode (peeking through TokenFactors). Do not count
	// these towards size check.

	Worklist.push_back(RootNode);
	while (!Worklist.empty()) {
	auto N = Worklist.pop_back_val();
	if (!Visited.insert(N).second)
	continue; // Already present in Visited.
	if (N->getOpcode() == ISD::TokenFactor) {
	for (SDValue Op : N->ops())
	Worklist.push_back(Op.getNode());
	}
	}

	// Don't count pruning nodes towards max.
	unsigned int Max = 1024 + Visited.size();
	// Search Ops of store candidates.
	for (unsigned i = 0; i < NumStores; ++i) {
	SDNode *N = StoreNodes[i].MemNode;
	// Of the 4 Store Operands:
	// * Chain (Op 0) -> We have already considered these
	// in candidate selection and can be
	// safely ignored
	// * Value (Op 1) -> Cycles may happen (e.g. through load chains)
	// * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
	// but aren't necessarily fromt the same base node, so
	// cycles possible (e.g. via indexed store).
	// * (Op 3) -> Represents the pre or post-indexing offset (or undef for
	// non-indexed stores). Not constant on all targets (e.g. ARM)
	// and so can participate in a cycle.
	for (unsigned j = 1; j < N->getNumOperands(); ++j)
	Worklist.push_back(N->getOperand(j).getNode());
	}
	// Search through DAG. We can stop early if we find a store node.
	for (unsigned i = 0; i < NumStores; ++i)
	if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
	Max)) {
	// If the searching bail out, record the StoreNode and RootNode in the
	// StoreRootCountMap. If we have seen the pair many times over a limit,
	// we won't add the StoreNode into StoreNodes set again.
	if (Visited.size() >= Max) {
	auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
	if (RootCount.first == RootNode)
	RootCount.second++;
	else
	RootCount = {RootNode, 1};
	}
	return false;
	}
	return true;
	}

	unsigned
	DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
	int64_t ElementSizeBytes) const {
	while (true) {
	// Find a store past the width of the first store.
	size_t StartIdx = 0;
	while ((StartIdx + 1 < StoreNodes.size()) &&
	StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
	StoreNodes[StartIdx + 1].OffsetFromBase)
	++StartIdx;

	// Bail if we don't have enough candidates to merge.
	if (StartIdx + 1 >= StoreNodes.size())
	return 0;

	// Trim stores that overlapped with the first store.
	if (StartIdx)
	StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);

	// Scan the memory operations on the chain and find the first
	// non-consecutive store memory address.
	unsigned NumConsecutiveStores = 1;
	int64_t StartAddress = StoreNodes[0].OffsetFromBase;
	// Check that the addresses are consecutive starting from the second
	// element in the list of stores.
	for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
	int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
	if (CurrAddress - StartAddress != (ElementSizeBytes * i))
	break;
	NumConsecutiveStores = i + 1;
	}
	if (NumConsecutiveStores > 1)
	return NumConsecutiveStores;

	// There are no consecutive stores at the start of the list.
	// Remove the first store and try again.
	StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
	}
	}

	bool DAGCombiner::tryStoreMergeOfConstants(
	SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
	EVT MemVT, SDNode *RootNode, bool AllowVectors) {
	LLVMContext &Context = *DAG.getContext();
	const DataLayout &DL = DAG.getDataLayout();
	int64_t ElementSizeBytes = MemVT.getStoreSize();
	unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
	bool MadeChange = false;

	// Store the constants into memory as one consecutive store.
	while (NumConsecutiveStores >= 2) {
	LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
	unsigned FirstStoreAS = FirstInChain->getAddressSpace();
	unsigned FirstStoreAlign = FirstInChain->getAlignment();
	unsigned LastLegalType = 1;
	unsigned LastLegalVectorType = 1;
	bool LastIntegerTrunc = false;
	bool NonZero = false;
	unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
	for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
	StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
	SDValue StoredVal = ST->getValue();
	bool IsElementZero = false;
	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
	IsElementZero = C->isNullValue();
	else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
	IsElementZero = C->getConstantFPValue()->isNullValue();
	if (IsElementZero) {
	if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
	FirstZeroAfterNonZero = i;
	}
	NonZero \|= !IsElementZero;

	// Find a legal type for the constant store.
	unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
	EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
	bool IsFast = false;

	// Break early when size is too large to be legal.
	if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
	break;

	if (TLI.isTypeLegal(StoreTy) &&
	TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
	TLI.allowsMemoryAccess(Context, DL, StoreTy,
	*FirstInChain->getMemOperand(), &IsFast) &&
	IsFast) {
	LastIntegerTrunc = false;
	LastLegalType = i + 1;
	// Or check whether a truncstore is legal.
	} else if (TLI.getTypeAction(Context, StoreTy) ==
	TargetLowering::TypePromoteInteger) {
	EVT LegalizedStoredValTy =
	TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
	if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
	TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
	TLI.allowsMemoryAccess(Context, DL, StoreTy,
	*FirstInChain->getMemOperand(), &IsFast) &&
	IsFast) {
	LastIntegerTrunc = true;
	LastLegalType = i + 1;
	}
	}

	// We only use vectors if the constant is known to be zero or the
	// target allows it and the function is not marked with the
	// noimplicitfloat attribute.
	if ((!NonZero \|\|
	TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
	AllowVectors) {
	// Find a legal type for the vector store.
	unsigned Elts = (i + 1) * NumMemElts;
	EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
	if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
	TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
	TLI.allowsMemoryAccess(Context, DL, Ty,
	*FirstInChain->getMemOperand(), &IsFast) &&
	IsFast)
	LastLegalVectorType = i + 1;
	}
	}

	bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
	unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
	bool UseTrunc = LastIntegerTrunc && !UseVector;

	// Check if we found a legal integer type that creates a meaningful
	// merge.
	if (NumElem < 2) {
	// We know that candidate stores are in order and of correct
	// shape. While there is no mergeable sequence from the
	// beginning one may start later in the sequence. The only
	// reason a merge of size N could have failed where another of
	// the same size would not have, is if the alignment has
	// improved or we've dropped a non-zero value. Drop as many
	// candidates as we can here.
	unsigned NumSkip = 1;
	while ((NumSkip < NumConsecutiveStores) &&
	(NumSkip < FirstZeroAfterNonZero) &&
	(StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
	NumSkip++;

	StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
	NumConsecutiveStores -= NumSkip;
	continue;
	}

	// Check that we can merge these candidates without causing a cycle.
	if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
	RootNode)) {
	StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
	NumConsecutiveStores -= NumElem;
	continue;
	}

	MadeChange \|= mergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
	/IsConstantSrc/ true,
	UseVector, UseTrunc);

	// Remove merged stores for next iteration.
	StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
	NumConsecutiveStores -= NumElem;
	}
	return MadeChange;
	}

	bool DAGCombiner::tryStoreMergeOfExtracts(
	SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
	EVT MemVT, SDNode *RootNode) {
	LLVMContext &Context = *DAG.getContext();
	const DataLayout &DL = DAG.getDataLayout();
	unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
	bool MadeChange = false;

	// Loop on Consecutive Stores on success.
	while (NumConsecutiveStores >= 2) {
	LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
	unsigned FirstStoreAS = FirstInChain->getAddressSpace();
	unsigned FirstStoreAlign = FirstInChain->getAlignment();
	unsigned NumStoresToMerge = 1;
	for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
	// Find a legal type for the vector store.
	unsigned Elts = (i + 1) * NumMemElts;
	EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
	bool IsFast = false;

	// Break early when size is too large to be legal.
	if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
	break;

	if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
	TLI.allowsMemoryAccess(Context, DL, Ty,
	*FirstInChain->getMemOperand(), &IsFast) &&
	IsFast)
	NumStoresToMerge = i + 1;
	}

	// Check if we found a legal integer type creating a meaningful
	// merge.
	if (NumStoresToMerge < 2) {
	// We know that candidate stores are in order and of correct
	// shape. While there is no mergeable sequence from the
	// beginning one may start later in the sequence. The only
	// reason a merge of size N could have failed where another of
	// the same size would not have, is if the alignment has
	// improved. Drop as many candidates as we can here.
	unsigned NumSkip = 1;
	while ((NumSkip < NumConsecutiveStores) &&
	(StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
	NumSkip++;

	StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
	NumConsecutiveStores -= NumSkip;
	continue;
	}

	// Check that we can merge these candidates without causing a cycle.
	if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge,
	RootNode)) {
	StoreNodes.erase(StoreNodes.begin(),
	StoreNodes.begin() + NumStoresToMerge);
	NumConsecutiveStores -= NumStoresToMerge;
	continue;
	}

	MadeChange \|= mergeStoresOfConstantsOrVecElts(
	StoreNodes, MemVT, NumStoresToMerge, /IsConstantSrc/ false,
	/UseVector/ true, /UseTrunc/ false);

	StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
	NumConsecutiveStores -= NumStoresToMerge;
	}
	return MadeChange;
	}

	bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
	unsigned NumConsecutiveStores, EVT MemVT,
	SDNode *RootNode, bool AllowVectors,
	bool IsNonTemporalStore,
	bool IsNonTemporalLoad) {
	LLVMContext &Context = *DAG.getContext();
	const DataLayout &DL = DAG.getDataLayout();
	int64_t ElementSizeBytes = MemVT.getStoreSize();
	unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
	bool MadeChange = false;

	// Look for load nodes which are used by the stored values.
	SmallVector<MemOpLink, 8> LoadNodes;

	// Find acceptable loads. Loads need to have the same chain (token factor),
	// must not be zext, volatile, indexed, and they must be consecutive.
	BaseIndexOffset LdBasePtr;

	for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
	StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
	SDValue Val = peekThroughBitcasts(St->getValue());
	LoadSDNode *Ld = cast<LoadSDNode>(Val);

	BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
	// If this is not the first ptr that we check.
	int64_t LdOffset = 0;
	if (LdBasePtr.getBase().getNode()) {
	// The base ptr must be the same.
	if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
	break;
	} else {
	// Check that all other base pointers are the same as this one.
	LdBasePtr = LdPtr;
	}

	// We found a potential memory operand to merge.
	LoadNodes.push_back(MemOpLink(Ld, LdOffset));
	}

	while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
	Align RequiredAlignment;
	bool NeedRotate = false;
	if (LoadNodes.size() == 2) {
	// If we have load/store pair instructions and we only have two values,
	// don't bother merging.
	if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
	StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
	StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
	LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
	break;
	}
	// If the loads are reversed, see if we can rotate the halves into place.
	int64_t Offset0 = LoadNodes[0].OffsetFromBase;
	int64_t Offset1 = LoadNodes[1].OffsetFromBase;
	EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
	if (Offset0 - Offset1 == ElementSizeBytes &&
	(hasOperation(ISD::ROTL, PairVT) \|\|
	hasOperation(ISD::ROTR, PairVT))) {
	std::swap(LoadNodes[0], LoadNodes[1]);
	NeedRotate = true;
	}
	}
	LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
	unsigned FirstStoreAS = FirstInChain->getAddressSpace();
	Align FirstStoreAlign = FirstInChain->getAlign();
	LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);

	// Scan the memory operations on the chain and find the first
	// non-consecutive load memory address. These variables hold the index in
	// the store node array.

	unsigned LastConsecutiveLoad = 1;

	// This variable refers to the size and not index in the array.
	unsigned LastLegalVectorType = 1;
	unsigned LastLegalIntegerType = 1;
	bool isDereferenceable = true;
	bool DoIntegerTruncate = false;
	int64_t StartAddress = LoadNodes[0].OffsetFromBase;
	SDValue LoadChain = FirstLoad->getChain();
	for (unsigned i = 1; i < LoadNodes.size(); ++i) {
	// All loads must share the same chain.
	if (LoadNodes[i].MemNode->getChain() != LoadChain)
	break;

	int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
	if (CurrAddress - StartAddress != (ElementSizeBytes * i))
	break;
	LastConsecutiveLoad = i;

	if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
	isDereferenceable = false;

	// Find a legal type for the vector store.
	unsigned Elts = (i + 1) * NumMemElts;
	EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);

	// Break early when size is too large to be legal.
	if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
	break;

	bool IsFastSt = false;
	bool IsFastLd = false;
	if (TLI.isTypeLegal(StoreTy) &&
	TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
	TLI.allowsMemoryAccess(Context, DL, StoreTy,
	*FirstInChain->getMemOperand(), &IsFastSt) &&
	IsFastSt &&
	TLI.allowsMemoryAccess(Context, DL, StoreTy,
	*FirstLoad->getMemOperand(), &IsFastLd) &&
	IsFastLd) {
	LastLegalVectorType = i + 1;
	}

	// Find a legal type for the integer store.
	unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
	StoreTy = EVT::getIntegerVT(Context, SizeInBits);
	if (TLI.isTypeLegal(StoreTy) &&
	TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
	TLI.allowsMemoryAccess(Context, DL, StoreTy,
	*FirstInChain->getMemOperand(), &IsFastSt) &&
	IsFastSt &&
	TLI.allowsMemoryAccess(Context, DL, StoreTy,
	*FirstLoad->getMemOperand(), &IsFastLd) &&
	IsFastLd) {
	LastLegalIntegerType = i + 1;
	DoIntegerTruncate = false;
	// Or check whether a truncstore and extload is legal.
	} else if (TLI.getTypeAction(Context, StoreTy) ==
	TargetLowering::TypePromoteInteger) {
	EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
	if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
	TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
	TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
	TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
	TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
	TLI.allowsMemoryAccess(Context, DL, StoreTy,
	*FirstInChain->getMemOperand(), &IsFastSt) &&
	IsFastSt &&
	TLI.allowsMemoryAccess(Context, DL, StoreTy,
	*FirstLoad->getMemOperand(), &IsFastLd) &&
	IsFastLd) {
	LastLegalIntegerType = i + 1;
	DoIntegerTruncate = true;
	}
	}
	}

	// Only use vector types if the vector type is larger than the integer
	// type. If they are the same, use integers.
	bool UseVectorTy =
	LastLegalVectorType > LastLegalIntegerType && AllowVectors;
	unsigned LastLegalType =
	std::max(LastLegalVectorType, LastLegalIntegerType);

	// We add +1 here because the LastXXX variables refer to location while
	// the NumElem refers to array/index size.
	unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
	NumElem = std::min(LastLegalType, NumElem);
	Align FirstLoadAlign = FirstLoad->getAlign();

	if (NumElem < 2) {
	// We know that candidate stores are in order and of correct
	// shape. While there is no mergeable sequence from the
	// beginning one may start later in the sequence. The only
	// reason a merge of size N could have failed where another of
	// the same size would not have is if the alignment or either
	// the load or store has improved. Drop as many candidates as we
	// can here.
	unsigned NumSkip = 1;
	while ((NumSkip < LoadNodes.size()) &&
	(LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) &&
	(StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
	NumSkip++;
	StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
	LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
	NumConsecutiveStores -= NumSkip;
	continue;
	}

	// Check that we can merge these candidates without causing a cycle.
	if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
	RootNode)) {
	StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
	LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
	NumConsecutiveStores -= NumElem;
	continue;
	}

	// Find if it is better to use vectors or integers to load and store
	// to memory.
	EVT JointMemOpVT;
	if (UseVectorTy) {
	// Find a legal type for the vector store.
	unsigned Elts = NumElem * NumMemElts;
	JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
	} else {
	unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
	JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
	}

	SDLoc LoadDL(LoadNodes[0].MemNode);
	SDLoc StoreDL(StoreNodes[0].MemNode);

	// The merged loads are required to have the same incoming chain, so
	// using the first's chain is acceptable.

	SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
	AddToWorklist(NewStoreChain.getNode());

	MachineMemOperand::Flags LdMMOFlags =
	isDereferenceable ? MachineMemOperand::MODereferenceable
	: MachineMemOperand::MONone;
	if (IsNonTemporalLoad)
	LdMMOFlags \|= MachineMemOperand::MONonTemporal;

	MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
	? MachineMemOperand::MONonTemporal
	: MachineMemOperand::MONone;

	SDValue NewLoad, NewStore;
	if (UseVectorTy \|\| !DoIntegerTruncate) {
	NewLoad = DAG.getLoad(
	JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
	FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
	SDValue StoreOp = NewLoad;
	if (NeedRotate) {
	unsigned LoadWidth = ElementSizeBytes * 8 * 2;
	assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
	"Unexpected type for rotate-able load pair");
	SDValue RotAmt =
	DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
	// Target can convert to the identical ROTR if it does not have ROTL.
	StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
	}
	NewStore = DAG.getStore(
	NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
	FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
	} else { // This must be the truncstore/extload case
	EVT ExtendedTy =
	TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
	NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
	FirstLoad->getChain(), FirstLoad->getBasePtr(),
	FirstLoad->getPointerInfo(), JointMemOpVT,
	FirstLoadAlign, LdMMOFlags);
	NewStore = DAG.getTruncStore(
	NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
	FirstInChain->getPointerInfo(), JointMemOpVT,
	FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags());
	}

	// Transfer chain users from old loads to the new load.
	for (unsigned i = 0; i < NumElem; ++i) {
	LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
	DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
	SDValue(NewLoad.getNode(), 1));
	}

	// Replace all stores with the new store. Recursively remove corresponding
	// values if they are no longer used.
	for (unsigned i = 0; i < NumElem; ++i) {
	SDValue Val = StoreNodes[i].MemNode->getOperand(1);
	CombineTo(StoreNodes[i].MemNode, NewStore);
	if (Val.getNode()->use_empty())
	recursivelyDeleteUnusedNodes(Val.getNode());
	}

	MadeChange = true;
	StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
	LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
	NumConsecutiveStores -= NumElem;
	}
	return MadeChange;
	}

	bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
	if (OptLevel == CodeGenOpt::None \|\| !EnableStoreMerging)
	return false;

	// TODO: Extend this function to merge stores of scalable vectors.
	// (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
	// store since we know <vscale x 16 x i8> is exactly twice as large as
	// <vscale x 8 x i8>). Until then, bail out for scalable vectors.
	EVT MemVT = St->getMemoryVT();
	if (MemVT.isScalableVector())
	return false;
	if (!MemVT.isSimple() \|\| MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
	return false;

	// This function cannot currently deal with non-byte-sized memory sizes.
	int64_t ElementSizeBytes = MemVT.getStoreSize();
	if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
	return false;

	// Do not bother looking at stored values that are not constants, loads, or
	// extracted vector elements.
	SDValue StoredVal = peekThroughBitcasts(St->getValue());
	const StoreSource StoreSrc = getStoreSource(StoredVal);
	if (StoreSrc == StoreSource::Unknown)
	return false;

	SmallVector<MemOpLink, 8> StoreNodes;
	SDNode *RootNode;
	// Find potential store merge candidates by searching through chain sub-DAG
	getStoreMergeCandidates(St, StoreNodes, RootNode);

	// Check if there is anything to merge.
	if (StoreNodes.size() < 2)
	return false;

	// Sort the memory operands according to their distance from the
	// base pointer.
	llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
	return LHS.OffsetFromBase < RHS.OffsetFromBase;
	});

	bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
	Attribute::NoImplicitFloat);
	bool IsNonTemporalStore = St->isNonTemporal();
	bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
	cast<LoadSDNode>(StoredVal)->isNonTemporal();

	// Store Merge attempts to merge the lowest stores. This generally
	// works out as if successful, as the remaining stores are checked
	// after the first collection of stores is merged. However, in the
	// case that a non-mergeable store is found first, e.g., {p[-2],
	// p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
	// mergeable cases. To prevent this, we prune such stores from the
	// front of StoreNodes here.
	bool MadeChange = false;
	while (StoreNodes.size() > 1) {
	unsigned NumConsecutiveStores =
	getConsecutiveStores(StoreNodes, ElementSizeBytes);
	// There are no more stores in the list to examine.
	if (NumConsecutiveStores == 0)
	return MadeChange;

	// We have at least 2 consecutive stores. Try to merge them.
	assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores");
	switch (StoreSrc) {
	case StoreSource::Constant:
	MadeChange \|= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
	MemVT, RootNode, AllowVectors);
	break;

	case StoreSource::Extract:
	MadeChange \|= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
	MemVT, RootNode);
	break;

	case StoreSource::Load:
	MadeChange \|= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
	MemVT, RootNode, AllowVectors,
	IsNonTemporalStore, IsNonTemporalLoad);
	break;

	default:
	llvm_unreachable("Unhandled store source type");
	}
	}
	return MadeChange;
	}

	SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
	SDLoc SL(ST);
	SDValue ReplStore;

	// Replace the chain to avoid dependency.
	if (ST->isTruncatingStore()) {
	ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
	ST->getBasePtr(), ST->getMemoryVT(),
	ST->getMemOperand());
	} else {
	ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
	ST->getMemOperand());
	}

	// Create token to keep both nodes around.
	SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
	MVT::Other, ST->getChain(), ReplStore);

	// Make sure the new and old chains are cleaned up.
	AddToWorklist(Token.getNode());

	// Don't add users to work list.
	return CombineTo(ST, Token, false);
	}

	SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
	SDValue Value = ST->getValue();
	if (Value.getOpcode() == ISD::TargetConstantFP)
	return SDValue();

	if (!ISD::isNormalStore(ST))
	return SDValue();

	SDLoc DL(ST);

	SDValue Chain = ST->getChain();
	SDValue Ptr = ST->getBasePtr();

	const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);

	// NOTE: If the original store is volatile, this transform must not increase
	// the number of stores. For example, on x86-32 an f64 can be stored in one
	// processor operation but an i64 (which is not legal) requires two. So the
	// transform should not be done in this case.

	SDValue Tmp;
	switch (CFP->getSimpleValueType(0).SimpleTy) {
	default:
	llvm_unreachable("Unknown FP type");
	case MVT::f16: // We don't do this for these yet.
	case MVT::f80:
	case MVT::f128:
	case MVT::ppcf128:
	return SDValue();
	case MVT::f32:
	if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) \|\|
	TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
	;
	Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
	bitcastToAPInt().getZExtValue(), SDLoc(CFP),
	MVT::i32);
	return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
	}

	return SDValue();
	case MVT::f64:
	if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
	ST->isSimple()) \|\|
	TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
	;
	Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
	getZExtValue(), SDLoc(CFP), MVT::i64);
	return DAG.getStore(Chain, DL, Tmp,
	Ptr, ST->getMemOperand());
	}

	if (ST->isSimple() &&
	TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
	// Many FP stores are not made apparent until after legalize, e.g. for
	// argument passing. Since this is so common, custom legalize the
	// 64-bit integer store into two 32-bit stores.
	uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
	SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
	SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
	if (DAG.getDataLayout().isBigEndian())
	std::swap(Lo, Hi);

	MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
	AAMDNodes AAInfo = ST->getAAInfo();

	SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
	ST->getOriginalAlign(), MMOFlags, AAInfo);
	Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(4), DL);
	SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
	ST->getPointerInfo().getWithOffset(4),
	ST->getOriginalAlign(), MMOFlags, AAInfo);
	return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
	St0, St1);
	}

	return SDValue();
	}
	}

	SDValue DAGCombiner::visitSTORE(SDNode *N) {
	StoreSDNode *ST = cast<StoreSDNode>(N);
	SDValue Chain = ST->getChain();
	SDValue Value = ST->getValue();
	SDValue Ptr = ST->getBasePtr();

	// If this is a store of a bit convert, store the input value if the
	// resultant store does not need a higher alignment than the original.
	if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
	ST->isUnindexed()) {
	EVT SVT = Value.getOperand(0).getValueType();
	// If the store is volatile, we only want to change the store type if the
	// resulting store is legal. Otherwise we might increase the number of
	// memory accesses. We don't care if the original type was legal or not
	// as we assume software couldn't rely on the number of accesses of an
	// illegal type.
	// TODO: May be able to relax for unordered atomics (see D66309)
	if (((!LegalOperations && ST->isSimple()) \|\|
	TLI.isOperationLegal(ISD::STORE, SVT)) &&
	TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
	DAG, *ST->getMemOperand())) {
	return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
	ST->getMemOperand());
	}
	}

	// Turn 'store undef, Ptr' -> nothing.
	if (Value.isUndef() && ST->isUnindexed())
	return Chain;

	// Try to infer better alignment information than the store already has.
	if (OptLevel != CodeGenOpt::None && ST->isUnindexed() && !ST->isAtomic()) {
	if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
	if (*Alignment > ST->getAlign() &&
	isAligned(*Alignment, ST->getSrcValueOffset())) {
	SDValue NewStore =
	DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
	ST->getMemoryVT(), *Alignment,
	ST->getMemOperand()->getFlags(), ST->getAAInfo());
	// NewStore will always be N as we are only refining the alignment
	assert(NewStore.getNode() == N);
	(void)NewStore;
	}
	}
	}

	// Try transforming a pair floating point load / store ops to integer
	// load / store ops.
	if (SDValue NewST = TransformFPLoadStorePair(N))
	return NewST;

	// Try transforming several stores into STORE (BSWAP).
	if (SDValue Store = mergeTruncStores(ST))
	return Store;

	if (ST->isUnindexed()) {
	// Walk up chain skipping non-aliasing memory nodes, on this store and any
	// adjacent stores.
	if (findBetterNeighborChains(ST)) {
	// replaceStoreChain uses CombineTo, which handled all of the worklist
	// manipulation. Return the original node to not do anything else.
	return SDValue(ST, 0);
	}
	Chain = ST->getChain();
	}

	// FIXME: is there such a thing as a truncating indexed store?
	if (ST->isTruncatingStore() && ST->isUnindexed() &&
	Value.getValueType().isInteger() &&
	(!isa<ConstantSDNode>(Value) \|\|
	!cast<ConstantSDNode>(Value)->isOpaque())) {
	APInt TruncDemandedBits =
	APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
	ST->getMemoryVT().getScalarSizeInBits());

	// See if we can simplify the input to this truncstore with knowledge that
	// only the low bits are being used. For example:
	// "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
	AddToWorklist(Value.getNode());
	if (SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits))
	return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
	ST->getMemOperand());

	// Otherwise, see if we can simplify the operation with
	// SimplifyDemandedBits, which only works if the value has a single use.
	if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
	// Re-visit the store if anything changed and the store hasn't been merged
	// with another node (N is deleted) SimplifyDemandedBits will add Value's
	// node back to the worklist if necessary, but we also need to re-visit
	// the Store node itself.
	if (N->getOpcode() != ISD::DELETED_NODE)
	AddToWorklist(N);
	return SDValue(N, 0);
	}
	}

	// If this is a load followed by a store to the same location, then the store
	// is dead/noop.
	// TODO: Can relax for unordered atomics (see D66309)
	if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
	if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
	ST->isUnindexed() && ST->isSimple() &&
	Ld->getAddressSpace() == ST->getAddressSpace() &&
	// There can't be any side effects between the load and store, such as
	// a call or store.
	Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
	// The store is dead, remove it.
	return Chain;
	}
	}

	// TODO: Can relax for unordered atomics (see D66309)
	if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
	if (ST->isUnindexed() && ST->isSimple() &&
	ST1->isUnindexed() && ST1->isSimple()) {
	if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value &&
	ST->getMemoryVT() == ST1->getMemoryVT() &&
	ST->getAddressSpace() == ST1->getAddressSpace()) {
	// If this is a store followed by a store with the same value to the
	// same location, then the store is dead/noop.
	return Chain;
	}

	if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
	!ST1->getBasePtr().isUndef() &&
	// BaseIndexOffset and the code below requires knowing the size
	// of a vector, so bail out if MemoryVT is scalable.
	!ST->getMemoryVT().isScalableVector() &&
	!ST1->getMemoryVT().isScalableVector() &&
	ST->getAddressSpace() == ST1->getAddressSpace()) {
	const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
	const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
	unsigned STBitSize = ST->getMemoryVT().getFixedSizeInBits();
	unsigned ChainBitSize = ST1->getMemoryVT().getFixedSizeInBits();
	// If this is a store who's preceding store to a subset of the current
	// location and no one other node is chained to that store we can
	// effectively drop the store. Do not remove stores to undef as they may
	// be used as data sinks.
	if (STBase.contains(DAG, STBitSize, ChainBase, ChainBitSize)) {
	CombineTo(ST1, ST1->getChain());
	return SDValue();
	}
	}
	}
	}

	// If this is an FP_ROUND or TRUNC followed by a store, fold this into a
	// truncating store. We can do this even if this is already a truncstore.
	if ((Value.getOpcode() == ISD::FP_ROUND \|\|
	Value.getOpcode() == ISD::TRUNCATE) &&
	Value.getNode()->hasOneUse() && ST->isUnindexed() &&
	TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
	ST->getMemoryVT(), LegalOperations)) {
	return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
	Ptr, ST->getMemoryVT(), ST->getMemOperand());
	}

	// Always perform this optimization before types are legal. If the target
	// prefers, also try this after legalization to catch stores that were created
	// by intrinsics or other nodes.
	if (!LegalTypes \|\| (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
	while (true) {
	// There can be multiple store sequences on the same chain.
	// Keep trying to merge store sequences until we are unable to do so
	// or until we merge the last store on the chain.
	bool Changed = mergeConsecutiveStores(ST);
	if (!Changed) break;
	// Return N as merge only uses CombineTo and no worklist clean
	// up is necessary.
	if (N->getOpcode() == ISD::DELETED_NODE \|\| !isa<StoreSDNode>(N))
	return SDValue(N, 0);
	}
	}

	// Try transforming N to an indexed store.
	if (CombineToPreIndexedLoadStore(N) \|\| CombineToPostIndexedLoadStore(N))
	return SDValue(N, 0);

	// Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
	//
	// Make sure to do this only after attempting to merge stores in order to
	// avoid changing the types of some subset of stores due to visit order,
	// preventing their merging.
	if (isa<ConstantFPSDNode>(ST->getValue())) {
	if (SDValue NewSt = replaceStoreOfFPConstant(ST))
	return NewSt;
	}

	if (SDValue NewSt = splitMergedValStore(ST))
	return NewSt;

	return ReduceLoadOpStoreWidth(N);
	}

	SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
	const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
	if (!LifetimeEnd->hasOffset())
	return SDValue();

	const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
	LifetimeEnd->getOffset(), false);

	// We walk up the chains to find stores.
	SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
	while (!Chains.empty()) {
	SDValue Chain = Chains.pop_back_val();
	if (!Chain.hasOneUse())
	continue;
	switch (Chain.getOpcode()) {
	case ISD::TokenFactor:
	for (unsigned Nops = Chain.getNumOperands(); Nops;)
	Chains.push_back(Chain.getOperand(--Nops));
	break;
	case ISD::LIFETIME_START:
	case ISD::LIFETIME_END:
	// We can forward past any lifetime start/end that can be proven not to
	// alias the node.
	if (!isAlias(Chain.getNode(), N))
	Chains.push_back(Chain.getOperand(0));
	break;
	case ISD::STORE: {
	StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
	// TODO: Can relax for unordered atomics (see D66309)
	if (!ST->isSimple() \|\| ST->isIndexed())
	continue;
	const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
	// The bounds of a scalable store are not known until runtime, so this
	// store cannot be elided.
	if (StoreSize.isScalable())
	continue;
	const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
	// If we store purely within object bounds just before its lifetime ends,
	// we can remove the store.
	if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
	StoreSize.getFixedSize() * 8)) {
	LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
	dbgs() << "\nwithin LIFETIME_END of : ";
	LifetimeEndBase.dump(); dbgs() << "\n");
	CombineTo(ST, ST->getChain());
	return SDValue(N, 0);
	}
	}
	}
	}
	return SDValue();
	}

	/// For the instruction sequence of store below, F and I values
	/// are bundled together as an i64 value before being stored into memory.
	/// Sometimes it is more efficent to generate separate stores for F and I,
	/// which can remove the bitwise instructions or sink them to colder places.
	///
	/// (store (or (zext (bitcast F to i32) to i64),
	/// (shl (zext I to i64), 32)), addr) -->
	/// (store F, addr) and (store I, addr+4)
	///
	/// Similarly, splitting for other merged store can also be beneficial, like:
	/// For pair of {i32, i32}, i64 store --> two i32 stores.
	/// For pair of {i32, i16}, i64 store --> two i32 stores.
	/// For pair of {i16, i16}, i32 store --> two i16 stores.
	/// For pair of {i16, i8}, i32 store --> two i16 stores.
	/// For pair of {i8, i8}, i16 store --> two i8 stores.
	///
	/// We allow each target to determine specifically which kind of splitting is
	/// supported.
	///
	/// The store patterns are commonly seen from the simple code snippet below
	/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
	/// void goo(const std::pair<int, float> &);
	/// hoo() {
	/// ...
	/// goo(std::make_pair(tmp, ftmp));
	/// ...
	/// }
	///
	SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
	if (OptLevel == CodeGenOpt::None)
	return SDValue();

	// Can't change the number of memory accesses for a volatile store or break
	// atomicity for an atomic one.
	if (!ST->isSimple())
	return SDValue();

	SDValue Val = ST->getValue();
	SDLoc DL(ST);

	// Match OR operand.
	if (!Val.getValueType().isScalarInteger() \|\| Val.getOpcode() != ISD::OR)
	return SDValue();

	// Match SHL operand and get Lower and Higher parts of Val.
	SDValue Op1 = Val.getOperand(0);
	SDValue Op2 = Val.getOperand(1);
	SDValue Lo, Hi;
	if (Op1.getOpcode() != ISD::SHL) {
	std::swap(Op1, Op2);
	if (Op1.getOpcode() != ISD::SHL)
	return SDValue();
	}
	Lo = Op2;
	Hi = Op1.getOperand(0);
	if (!Op1.hasOneUse())
	return SDValue();

	// Match shift amount to HalfValBitSize.
	unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
	ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
	if (!ShAmt \|\| ShAmt->getAPIntValue() != HalfValBitSize)
	return SDValue();

	// Lo and Hi are zero-extended from int with size less equal than 32
	// to i64.
	if (Lo.getOpcode() != ISD::ZERO_EXTEND \|\| !Lo.hasOneUse() \|\|
	!Lo.getOperand(0).getValueType().isScalarInteger() \|\|
	Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize \|\|
	Hi.getOpcode() != ISD::ZERO_EXTEND \|\| !Hi.hasOneUse() \|\|
	!Hi.getOperand(0).getValueType().isScalarInteger() \|\|
	Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
	return SDValue();

	// Use the EVT of low and high parts before bitcast as the input
	// of target query.
	EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
	? Lo.getOperand(0).getValueType()
	: Lo.getValueType();
	EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
	? Hi.getOperand(0).getValueType()
	: Hi.getValueType();
	if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
	return SDValue();

	// Start to split store.
	MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
	AAMDNodes AAInfo = ST->getAAInfo();

	// Change the sizes of Lo and Hi's value types to HalfValBitSize.
	EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
	Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
	Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));

	SDValue Chain = ST->getChain();
	SDValue Ptr = ST->getBasePtr();
	// Lower value store.
	SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
	ST->getOriginalAlign(), MMOFlags, AAInfo);
	Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(HalfValBitSize / 8), DL);
	// Higher value store.
	SDValue St1 = DAG.getStore(
	St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
	ST->getOriginalAlign(), MMOFlags, AAInfo);
	return St1;
	}

	/// Convert a disguised subvector insertion into a shuffle:
	SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
	assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
	"Expected extract_vector_elt");
	SDValue InsertVal = N->getOperand(1);
	SDValue Vec = N->getOperand(0);

	// (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
	// InsIndex)
	// --> (vector_shuffle X, Y) and variations where shuffle operands may be
	// CONCAT_VECTORS.
	if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() &&
	InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
	isa<ConstantSDNode>(InsertVal.getOperand(1))) {
	ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Vec.getNode());
	ArrayRef<int> Mask = SVN->getMask();

	SDValue X = Vec.getOperand(0);
	SDValue Y = Vec.getOperand(1);

	// Vec's operand 0 is using indices from 0 to N-1 and
	// operand 1 from N to 2N - 1, where N is the number of
	// elements in the vectors.
	SDValue InsertVal0 = InsertVal.getOperand(0);
	int ElementOffset = -1;

	// We explore the inputs of the shuffle in order to see if we find the
	// source of the extract_vector_elt. If so, we can use it to modify the
	// shuffle rather than perform an insert_vector_elt.
	SmallVector<std::pair<int, SDValue>, 8> ArgWorkList;
	ArgWorkList.emplace_back(Mask.size(), Y);
	ArgWorkList.emplace_back(0, X);

	while (!ArgWorkList.empty()) {
	int ArgOffset;
	SDValue ArgVal;
	std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();

	if (ArgVal == InsertVal0) {
	ElementOffset = ArgOffset;
	break;
	}

	// Peek through concat_vector.
	if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
	int CurrentArgOffset =
	ArgOffset + ArgVal.getValueType().getVectorNumElements();
	int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
	for (SDValue Op : reverse(ArgVal->ops())) {
	CurrentArgOffset -= Step;
	ArgWorkList.emplace_back(CurrentArgOffset, Op);
	}

	// Make sure we went through all the elements and did not screw up index
	// computation.
	assert(CurrentArgOffset == ArgOffset);
	}
	}

	if (ElementOffset != -1) {
	SmallVector<int, 16> NewMask(Mask.begin(), Mask.end());

	auto *ExtrIndex = cast<ConstantSDNode>(InsertVal.getOperand(1));
	NewMask[InsIndex] = ElementOffset + ExtrIndex->getZExtValue();
	assert(NewMask[InsIndex] <
	(int)(2 * Vec.getValueType().getVectorNumElements()) &&
	NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound");

	SDValue LegalShuffle =
	TLI.buildLegalVectorShuffle(Vec.getValueType(), SDLoc(N), X,
	Y, NewMask, DAG);
	if (LegalShuffle)
	return LegalShuffle;
	}
	}

	// insert_vector_elt V, (bitcast X from vector type), IdxC -->
	// bitcast(shuffle (bitcast V), (extended X), Mask)
	// Note: We do not use an insert_subvector node because that requires a
	// legal subvector type.
	if (InsertVal.getOpcode() != ISD::BITCAST \|\| !InsertVal.hasOneUse() \|\|
	!InsertVal.getOperand(0).getValueType().isVector())
	return SDValue();

	SDValue SubVec = InsertVal.getOperand(0);
	SDValue DestVec = N->getOperand(0);
	EVT SubVecVT = SubVec.getValueType();
	EVT VT = DestVec.getValueType();
	unsigned NumSrcElts = SubVecVT.getVectorNumElements();
	// If the source only has a single vector element, the cost of creating adding
	// it to a vector is likely to exceed the cost of a insert_vector_elt.
	if (NumSrcElts == 1)
	return SDValue();
	unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
	unsigned NumMaskVals = ExtendRatio * NumSrcElts;

	// Step 1: Create a shuffle mask that implements this insert operation. The
	// vector that we are inserting into will be operand 0 of the shuffle, so
	// those elements are just 'i'. The inserted subvector is in the first
	// positions of operand 1 of the shuffle. Example:
	// insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
	SmallVector<int, 16> Mask(NumMaskVals);
	for (unsigned i = 0; i != NumMaskVals; ++i) {
	if (i / NumSrcElts == InsIndex)
	Mask[i] = (i % NumSrcElts) + NumMaskVals;
	else
	Mask[i] = i;
	}

	// Bail out if the target can not handle the shuffle we want to create.
	EVT SubVecEltVT = SubVecVT.getVectorElementType();
	EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
	if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
	return SDValue();

	// Step 2: Create a wide vector from the inserted source vector by appending
	// undefined elements. This is the same size as our destination vector.
	SDLoc DL(N);
	SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
	ConcatOps[0] = SubVec;
	SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);

	// Step 3: Shuffle in the padded subvector.
	SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
	SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
	AddToWorklist(PaddedSubV.getNode());
	AddToWorklist(DestVecBC.getNode());
	AddToWorklist(Shuf.getNode());
	return DAG.getBitcast(VT, Shuf);
	}

	SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
	SDValue InVec = N->getOperand(0);
	SDValue InVal = N->getOperand(1);
	SDValue EltNo = N->getOperand(2);
	SDLoc DL(N);

	EVT VT = InVec.getValueType();
	auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);

	// Insert into out-of-bounds element is undefined.
	if (IndexC && VT.isFixedLengthVector() &&
	IndexC->getZExtValue() >= VT.getVectorNumElements())
	return DAG.getUNDEF(VT);

	// Remove redundant insertions:
	// (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
	if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
	InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
	return InVec;

	if (!IndexC) {
	// If this is variable insert to undef vector, it might be better to splat:
	// inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
	if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
	if (VT.isScalableVector())
	return DAG.getSplatVector(VT, DL, InVal);
	else {
	SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), InVal);
	return DAG.getBuildVector(VT, DL, Ops);
	}
	}
	return SDValue();
	}

	if (VT.isScalableVector())
	return SDValue();

	unsigned NumElts = VT.getVectorNumElements();

	// We must know which element is being inserted for folds below here.
	unsigned Elt = IndexC->getZExtValue();
	if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
	return Shuf;

	// Canonicalize insert_vector_elt dag nodes.
	// Example:
	// (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
	// -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
	//
	// Do this only if the child insert_vector node has one use; also
	// do this only if indices are both constants and Idx1 < Idx0.
	if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
	&& isa<ConstantSDNode>(InVec.getOperand(2))) {
	unsigned OtherElt = InVec.getConstantOperandVal(2);
	if (Elt < OtherElt) {
	// Swap nodes.
	SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
	InVec.getOperand(0), InVal, EltNo);
	AddToWorklist(NewOp.getNode());
	return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
	VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
	}
	}

	// If we can't generate a legal BUILD_VECTOR, exit
	if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
	return SDValue();

	// Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
	// be converted to a BUILD_VECTOR). Fill in the Ops vector with the
	// vector elements.
	SmallVector<SDValue, 8> Ops;
	// Do not combine these two vectors if the output vector will not replace
	// the input vector.
	if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
	Ops.append(InVec.getNode()->op_begin(),
	InVec.getNode()->op_end());
	} else if (InVec.isUndef()) {
	Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType()));
	} else {
	return SDValue();
	}
	assert(Ops.size() == NumElts && "Unexpected vector size");

	// Insert the element
	if (Elt < Ops.size()) {
	// All the operands of BUILD_VECTOR must have the same type;
	// we enforce that here.
	EVT OpVT = Ops[0].getValueType();
	Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
	}

	// Return the new vector
	return DAG.getBuildVector(VT, DL, Ops);
	}

	SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
	SDValue EltNo,
	LoadSDNode *OriginalLoad) {
	assert(OriginalLoad->isSimple());

	EVT ResultVT = EVE->getValueType(0);
	EVT VecEltVT = InVecVT.getVectorElementType();

	// If the vector element type is not a multiple of a byte then we are unable
	// to correctly compute an address to load only the extracted element as a
	// scalar.
	if (!VecEltVT.isByteSized())
	return SDValue();

	Align Alignment = OriginalLoad->getAlign();
	Align NewAlign = DAG.getDataLayout().getABITypeAlign(
	VecEltVT.getTypeForEVT(*DAG.getContext()));

	if (NewAlign > Alignment \|\|
	!TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
	return SDValue();

	ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
	ISD::NON_EXTLOAD : ISD::EXTLOAD;
	if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
	return SDValue();

	Alignment = NewAlign;

	MachinePointerInfo MPI;
	SDLoc DL(EVE);
	if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
	int Elt = ConstEltNo->getZExtValue();
	unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
	MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
	} else {
	// Discard the pointer info except the address space because the memory
	// operand can't represent this new access since the offset is variable.
	MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
	}
	SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(),
	InVecVT, EltNo);

	// The replacement we need to do here is a little tricky: we need to
	// replace an extractelement of a load with a load.
	// Use ReplaceAllUsesOfValuesWith to do the replacement.
	// Note that this replacement assumes that the extractvalue is the only
	// use of the load; that's okay because we don't want to perform this
	// transformation in other cases anyway.
	SDValue Load;
	SDValue Chain;
	if (ResultVT.bitsGT(VecEltVT)) {
	// If the result type of vextract is wider than the load, then issue an
	// extending load instead.
	ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
	VecEltVT)
	? ISD::ZEXTLOAD
	: ISD::EXTLOAD;
	Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
	OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
	Alignment, OriginalLoad->getMemOperand()->getFlags(),
	OriginalLoad->getAAInfo());
	Chain = Load.getValue(1);
	} else {
	Load = DAG.getLoad(
	VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI, Alignment,
	OriginalLoad->getMemOperand()->getFlags(), OriginalLoad->getAAInfo());
	Chain = Load.getValue(1);
	if (ResultVT.bitsLT(VecEltVT))
	Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
	else
	Load = DAG.getBitcast(ResultVT, Load);
	}
	WorklistRemover DeadNodes(*this);
	SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
	SDValue To[] = { Load, Chain };
	DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
	// Make sure to revisit this node to clean it up; it will usually be dead.
	AddToWorklist(EVE);
	// Since we're explicitly calling ReplaceAllUses, add the new node to the
	// worklist explicitly as well.
	AddToWorklistWithUsers(Load.getNode());
	++OpsNarrowed;
	return SDValue(EVE, 0);
	}

	/// Transform a vector binary operation into a scalar binary operation by moving
	/// the math/logic after an extract element of a vector.
	static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
	bool LegalOperations) {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	SDValue Vec = ExtElt->getOperand(0);
	SDValue Index = ExtElt->getOperand(1);
	auto *IndexC = dyn_cast<ConstantSDNode>(Index);
	if (!IndexC \|\| !TLI.isBinOp(Vec.getOpcode()) \|\| !Vec.hasOneUse() \|\|
	Vec.getNode()->getNumValues() != 1)
	return SDValue();

	// Targets may want to avoid this to prevent an expensive register transfer.
	if (!TLI.shouldScalarizeBinop(Vec))
	return SDValue();

	// Extracting an element of a vector constant is constant-folded, so this
	// transform is just replacing a vector op with a scalar op while moving the
	// extract.
	SDValue Op0 = Vec.getOperand(0);
	SDValue Op1 = Vec.getOperand(1);
	if (isAnyConstantBuildVector(Op0, true) \|\|
	isAnyConstantBuildVector(Op1, true)) {
	// extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
	// extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
	SDLoc DL(ExtElt);
	EVT VT = ExtElt->getValueType(0);
	SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
	SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
	return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
	}

	return SDValue();
	}

	SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
	SDValue VecOp = N->getOperand(0);
	SDValue Index = N->getOperand(1);
	EVT ScalarVT = N->getValueType(0);
	EVT VecVT = VecOp.getValueType();
	if (VecOp.isUndef())
	return DAG.getUNDEF(ScalarVT);

	// extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
	//
	// This only really matters if the index is non-constant since other combines
	// on the constant elements already work.
	SDLoc DL(N);
	if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
	Index == VecOp.getOperand(2)) {
	SDValue Elt = VecOp.getOperand(1);
	return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
	}

	// (vextract (scalar_to_vector val, 0) -> val
	if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
	// Only 0'th element of SCALAR_TO_VECTOR is defined.
	if (DAG.isKnownNeverZero(Index))
	return DAG.getUNDEF(ScalarVT);

	// Check if the result type doesn't match the inserted element type. A
	// SCALAR_TO_VECTOR may truncate the inserted element and the
	// EXTRACT_VECTOR_ELT may widen the extracted vector.
	SDValue InOp = VecOp.getOperand(0);
	if (InOp.getValueType() != ScalarVT) {
	assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
	return DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
	}
	return InOp;
	}

	// extract_vector_elt of out-of-bounds element -> UNDEF
	auto *IndexC = dyn_cast<ConstantSDNode>(Index);
	if (IndexC && VecVT.isFixedLengthVector() &&
	IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
	return DAG.getUNDEF(ScalarVT);

	// extract_vector_elt (build_vector x, y), 1 -> y
	if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) \|\|
	VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
	TLI.isTypeLegal(VecVT) &&
	(VecOp.hasOneUse() \|\| TLI.aggressivelyPreferBuildVectorSources(VecVT))) {
	assert((VecOp.getOpcode() != ISD::BUILD_VECTOR \|\|
	VecVT.isFixedLengthVector()) &&
	"BUILD_VECTOR used for scalable vectors");
	unsigned IndexVal =
	VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
	SDValue Elt = VecOp.getOperand(IndexVal);
	EVT InEltVT = Elt.getValueType();

	// Sometimes build_vector's scalar input types do not match result type.
	if (ScalarVT == InEltVT)
	return Elt;

	// TODO: It may be useful to truncate if free if the build_vector implicitly
	// converts.
	}

	if (VecVT.isScalableVector())
	return SDValue();

	// All the code from this point onwards assumes fixed width vectors, but it's
	// possible that some of the combinations could be made to work for scalable
	// vectors too.
	unsigned NumElts = VecVT.getVectorNumElements();
	unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();

	// TODO: These transforms should not require the 'hasOneUse' restriction, but
	// there are regressions on multiple targets without it. We can end up with a
	// mess of scalar and vector code if we reduce only part of the DAG to scalar.
	if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
	VecOp.hasOneUse()) {
	// The vector index of the LSBs of the source depend on the endian-ness.
	bool IsLE = DAG.getDataLayout().isLittleEndian();
	unsigned ExtractIndex = IndexC->getZExtValue();
	// extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
	unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
	SDValue BCSrc = VecOp.getOperand(0);
	if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
	return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, BCSrc);

	if (LegalTypes && BCSrc.getValueType().isInteger() &&
	BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
	// ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
	// trunc i64 X to i32
	SDValue X = BCSrc.getOperand(0);
	assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&
	"Extract element and scalar to vector can't change element type "
	"from FP to integer.");
	unsigned XBitWidth = X.getValueSizeInBits();
	BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;

	// An extract element return value type can be wider than its vector
	// operand element type. In that case, the high bits are undefined, so
	// it's possible that we may need to extend rather than truncate.
	if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
	assert(XBitWidth % VecEltBitWidth == 0 &&
	"Scalar bitwidth must be a multiple of vector element bitwidth");
	return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
	}
	}
	}

	if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
	return BO;

	// Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
	// We only perform this optimization before the op legalization phase because
	// we may introduce new vector instructions which are not backed by TD
	// patterns. For example on AVX, extracting elements from a wide vector
	// without using extract_subvector. However, if we can find an underlying
	// scalar value, then we can always use that.
	if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
	auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
	// Find the new index to extract from.
	int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());

	// Extracting an undef index is undef.
	if (OrigElt == -1)
	return DAG.getUNDEF(ScalarVT);

	// Select the right vector half to extract from.
	SDValue SVInVec;
	if (OrigElt < (int)NumElts) {
	SVInVec = VecOp.getOperand(0);
	} else {
	SVInVec = VecOp.getOperand(1);
	OrigElt -= NumElts;
	}

	if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
	SDValue InOp = SVInVec.getOperand(OrigElt);
	if (InOp.getValueType() != ScalarVT) {
	assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
	InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
	}

	return InOp;
	}

	// FIXME: We should handle recursing on other vector shuffles and
	// scalar_to_vector here as well.

	if (!LegalOperations \|\|
	// FIXME: Should really be just isOperationLegalOrCustom.
	TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) \|\|
	TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) {
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
	DAG.getVectorIdxConstant(OrigElt, DL));
	}
	}

	// If only EXTRACT_VECTOR_ELT nodes use the source vector we can
	// simplify it based on the (valid) extraction indices.
	if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
	return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
	Use->getOperand(0) == VecOp &&
	isa<ConstantSDNode>(Use->getOperand(1));
	})) {
	APInt DemandedElts = APInt::getNullValue(NumElts);
	for (SDNode *Use : VecOp->uses()) {
	auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
	if (CstElt->getAPIntValue().ult(NumElts))
	DemandedElts.setBit(CstElt->getZExtValue());
	}
	if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
	// We simplified the vector operand of this extract element. If this
	// extract is not dead, visit it again so it is folded properly.
	if (N->getOpcode() != ISD::DELETED_NODE)
	AddToWorklist(N);
	return SDValue(N, 0);
	}
	APInt DemandedBits = APInt::getAllOnesValue(VecEltBitWidth);
	if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
	// We simplified the vector operand of this extract element. If this
	// extract is not dead, visit it again so it is folded properly.
	if (N->getOpcode() != ISD::DELETED_NODE)
	AddToWorklist(N);
	return SDValue(N, 0);
	}
	}

	// Everything under here is trying to match an extract of a loaded value.
	// If the result of load has to be truncated, then it's not necessarily
	// profitable.
	bool BCNumEltsChanged = false;
	EVT ExtVT = VecVT.getVectorElementType();
	EVT LVT = ExtVT;
	if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
	return SDValue();

	if (VecOp.getOpcode() == ISD::BITCAST) {
	// Don't duplicate a load with other uses.
	if (!VecOp.hasOneUse())
	return SDValue();

	EVT BCVT = VecOp.getOperand(0).getValueType();
	if (!BCVT.isVector() \|\| ExtVT.bitsGT(BCVT.getVectorElementType()))
	return SDValue();
	if (NumElts != BCVT.getVectorNumElements())
	BCNumEltsChanged = true;
	VecOp = VecOp.getOperand(0);
	ExtVT = BCVT.getVectorElementType();
	}

	// extract (vector load $addr), i --> load $addr + i * size
	if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
	ISD::isNormalLoad(VecOp.getNode()) &&
	!Index->hasPredecessor(VecOp.getNode())) {
	auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
	if (VecLoad && VecLoad->isSimple())
	return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
	}

	// Perform only after legalization to ensure build_vector / vector_shuffle
	// optimizations have already been done.
	if (!LegalOperations \|\| !IndexC)
	return SDValue();

	// (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
	// (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
	// (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
	int Elt = IndexC->getZExtValue();
	LoadSDNode *LN0 = nullptr;
	if (ISD::isNormalLoad(VecOp.getNode())) {
	LN0 = cast<LoadSDNode>(VecOp);
	} else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
	VecOp.getOperand(0).getValueType() == ExtVT &&
	ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
	// Don't duplicate a load with other uses.
	if (!VecOp.hasOneUse())
	return SDValue();

	LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
	}
	if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
	// (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
	// =>
	// (load $addr+1*size)

	// Don't duplicate a load with other uses.
	if (!VecOp.hasOneUse())
	return SDValue();

	// If the bit convert changed the number of elements, it is unsafe
	// to examine the mask.
	if (BCNumEltsChanged)
	return SDValue();

	// Select the input vector, guarding against out of range extract vector.
	int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
	VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);

	if (VecOp.getOpcode() == ISD::BITCAST) {
	// Don't duplicate a load with other uses.
	if (!VecOp.hasOneUse())
	return SDValue();

	VecOp = VecOp.getOperand(0);
	}
	if (ISD::isNormalLoad(VecOp.getNode())) {
	LN0 = cast<LoadSDNode>(VecOp);
	Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
	Index = DAG.getConstant(Elt, DL, Index.getValueType());
	}
	} else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
	VecVT.getVectorElementType() == ScalarVT &&
	(!LegalTypes \|\|
	TLI.isTypeLegal(
	VecOp.getOperand(0).getValueType().getVectorElementType()))) {
	// extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
	// -> extract_vector_elt a, 0
	// extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
	// -> extract_vector_elt a, 1
	// extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
	// -> extract_vector_elt b, 0
	// extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
	// -> extract_vector_elt b, 1
	SDLoc SL(N);
	EVT ConcatVT = VecOp.getOperand(0).getValueType();
	unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
	SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, SL,
	Index.getValueType());

	SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
	SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL,
	ConcatVT.getVectorElementType(),
	ConcatOp, NewIdx);
	return DAG.getNode(ISD::BITCAST, SL, ScalarVT, Elt);
	}

	// Make sure we found a non-volatile load and the extractelement is
	// the only use.
	if (!LN0 \|\| !LN0->hasNUsesOfValue(1,0) \|\| !LN0->isSimple())
	return SDValue();

	// If Idx was -1 above, Elt is going to be -1, so just return undef.
	if (Elt == -1)
	return DAG.getUNDEF(LVT);

	return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
	}

	// Simplify (build_vec (ext )) to (bitcast (build_vec ))
	SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
	// We perform this optimization post type-legalization because
	// the type-legalizer often scalarizes integer-promoted vectors.
	// Performing this optimization before may create bit-casts which
	// will be type-legalized to complex code sequences.
	// We perform this optimization only before the operation legalizer because we
	// may introduce illegal operations.
	if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
	return SDValue();

	unsigned NumInScalars = N->getNumOperands();
	SDLoc DL(N);
	EVT VT = N->getValueType(0);

	// Check to see if this is a BUILD_VECTOR of a bunch of values
	// which come from any_extend or zero_extend nodes. If so, we can create
	// a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
	// optimizations. We do not handle sign-extend because we can't fill the sign
	// using shuffles.
	EVT SourceType = MVT::Other;
	bool AllAnyExt = true;

	for (unsigned i = 0; i != NumInScalars; ++i) {
	SDValue In = N->getOperand(i);
	// Ignore undef inputs.
	if (In.isUndef()) continue;

	bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
	bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;

	// Abort if the element is not an extension.
	if (!ZeroExt && !AnyExt) {
	SourceType = MVT::Other;
	break;
	}

	// The input is a ZeroExt or AnyExt. Check the original type.
	EVT InTy = In.getOperand(0).getValueType();

	// Check that all of the widened source types are the same.
	if (SourceType == MVT::Other)
	// First time.
	SourceType = InTy;
	else if (InTy != SourceType) {
	// Multiple income types. Abort.
	SourceType = MVT::Other;
	break;
	}

	// Check if all of the extends are ANY_EXTENDs.
	AllAnyExt &= AnyExt;
	}

	// In order to have valid types, all of the inputs must be extended from the
	// same source type and all of the inputs must be any or zero extend.
	// Scalar sizes must be a power of two.
	EVT OutScalarTy = VT.getScalarType();
	bool ValidTypes = SourceType != MVT::Other &&
	isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
	isPowerOf2_32(SourceType.getSizeInBits());

	// Create a new simpler BUILD_VECTOR sequence which other optimizations can
	// turn into a single shuffle instruction.
	if (!ValidTypes)
	return SDValue();

	// If we already have a splat buildvector, then don't fold it if it means
	// introducing zeros.
	if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /AllowUndefs/ true))
	return SDValue();

	bool isLE = DAG.getDataLayout().isLittleEndian();
	unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
	assert(ElemRatio > 1 && "Invalid element size ratio");
	SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
	DAG.getConstant(0, DL, SourceType);

	unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
	SmallVector<SDValue, 8> Ops(NewBVElems, Filler);

	// Populate the new build_vector
	for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
	SDValue Cast = N->getOperand(i);
	assert((Cast.getOpcode() == ISD::ANY_EXTEND \|\|
	Cast.getOpcode() == ISD::ZERO_EXTEND \|\|
	Cast.isUndef()) && "Invalid cast opcode");
	SDValue In;
	if (Cast.isUndef())
	In = DAG.getUNDEF(SourceType);
	else
	In = Cast->getOperand(0);
	unsigned Index = isLE ? (i * ElemRatio) :
	(i * ElemRatio + (ElemRatio - 1));

	assert(Index < Ops.size() && "Invalid index");
	Ops[Index] = In;
	}

	// The type of the new BUILD_VECTOR node.
	EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
	assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
	"Invalid vector size");
	// Check if the new vector type is legal.
	if (!isTypeLegal(VecVT) \|\|
	(!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
	TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)))
	return SDValue();

	// Make the new BUILD_VECTOR.
	SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);

	// The new BUILD_VECTOR node has the potential to be further optimized.
	AddToWorklist(BV.getNode());
	// Bitcast to the desired type.
	return DAG.getBitcast(VT, BV);
	}

	// Simplify (build_vec (trunc $1)
	// (trunc (srl $1 half-width))
	// (trunc (srl $1 (2 * half-width))) …)
	// to (bitcast $1)
	SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
	assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");

	// Only for little endian
	if (!DAG.getDataLayout().isLittleEndian())
	return SDValue();

	SDLoc DL(N);
	EVT VT = N->getValueType(0);
	EVT OutScalarTy = VT.getScalarType();
	uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();

	// Only for power of two types to be sure that bitcast works well
	if (!isPowerOf2_64(ScalarTypeBitsize))
	return SDValue();

	unsigned NumInScalars = N->getNumOperands();

	// Look through bitcasts
	auto PeekThroughBitcast = [](SDValue Op) {
	if (Op.getOpcode() == ISD::BITCAST)
	return Op.getOperand(0);
	return Op;
	};

	// The source value where all the parts are extracted.
	SDValue Src;
	for (unsigned i = 0; i != NumInScalars; ++i) {
	SDValue In = PeekThroughBitcast(N->getOperand(i));
	// Ignore undef inputs.
	if (In.isUndef()) continue;

	if (In.getOpcode() != ISD::TRUNCATE)
	return SDValue();

	In = PeekThroughBitcast(In.getOperand(0));

	if (In.getOpcode() != ISD::SRL) {
	// For now only build_vec without shuffling, handle shifts here in the
	// future.
	if (i != 0)
	return SDValue();

	Src = In;
	} else {
	// In is SRL
	SDValue part = PeekThroughBitcast(In.getOperand(0));

	if (!Src) {
	Src = part;
	} else if (Src != part) {
	// Vector parts do not stem from the same variable
	return SDValue();
	}

	SDValue ShiftAmtVal = In.getOperand(1);
	if (!isa<ConstantSDNode>(ShiftAmtVal))
	return SDValue();

	uint64_t ShiftAmt = In.getNode()->getConstantOperandVal(1);

	// The extracted value is not extracted at the right position
	if (ShiftAmt != i * ScalarTypeBitsize)
	return SDValue();
	}
	}

	// Only cast if the size is the same
	if (Src.getValueType().getSizeInBits() != VT.getSizeInBits())
	return SDValue();

	return DAG.getBitcast(VT, Src);
	}

	SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
	ArrayRef<int> VectorMask,
	SDValue VecIn1, SDValue VecIn2,
	unsigned LeftIdx, bool DidSplitVec) {
	SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);

	EVT VT = N->getValueType(0);
	EVT InVT1 = VecIn1.getValueType();
	EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;

	unsigned NumElems = VT.getVectorNumElements();
	unsigned ShuffleNumElems = NumElems;

	// If we artificially split a vector in two already, then the offsets in the
	// operands will all be based off of VecIn1, even those in VecIn2.
	unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();

	uint64_t VTSize = VT.getFixedSizeInBits();
	uint64_t InVT1Size = InVT1.getFixedSizeInBits();
	uint64_t InVT2Size = InVT2.getFixedSizeInBits();

	assert(InVT2Size <= InVT1Size &&
	"Inputs must be sorted to be in non-increasing vector size order.");

	// We can't generate a shuffle node with mismatched input and output types.
	// Try to make the types match the type of the output.
	if (InVT1 != VT \|\| InVT2 != VT) {
	if ((VTSize % InVT1Size == 0) && InVT1 == InVT2) {
	// If the output vector length is a multiple of both input lengths,
	// we can concatenate them and pad the rest with undefs.
	unsigned NumConcats = VTSize / InVT1Size;
	assert(NumConcats >= 2 && "Concat needs at least two inputs!");
	SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
	ConcatOps[0] = VecIn1;
	ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
	VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
	VecIn2 = SDValue();
	} else if (InVT1Size == VTSize * 2) {
	if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
	return SDValue();

	if (!VecIn2.getNode()) {
	// If we only have one input vector, and it's twice the size of the
	// output, split it in two.
	VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
	DAG.getVectorIdxConstant(NumElems, DL));
	VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
	// Since we now have shorter input vectors, adjust the offset of the
	// second vector's start.
	Vec2Offset = NumElems;
	} else {
	assert(InVT2Size <= InVT1Size &&
	"Second input is not going to be larger than the first one.");

	// VecIn1 is wider than the output, and we have another, possibly
	// smaller input. Pad the smaller input with undefs, shuffle at the
	// input vector width, and extract the output.
	// The shuffle type is different than VT, so check legality again.
	if (LegalOperations &&
	!TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
	return SDValue();

	// Legalizing INSERT_SUBVECTOR is tricky - you basically have to
	// lower it back into a BUILD_VECTOR. So if the inserted type is
	// illegal, don't even try.
	if (InVT1 != InVT2) {
	if (!TLI.isTypeLegal(InVT2))
	return SDValue();
	VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
	DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
	}
	ShuffleNumElems = NumElems * 2;
	}
	} else if (InVT2Size * 2 == VTSize && InVT1Size == VTSize) {
	SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
	ConcatOps[0] = VecIn2;
	VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
	} else {
	// TODO: Support cases where the length mismatch isn't exactly by a
	// factor of 2.
	// TODO: Move this check upwards, so that if we have bad type
	// mismatches, we don't create any DAG nodes.
	return SDValue();
	}
	}

	// Initialize mask to undef.
	SmallVector<int, 8> Mask(ShuffleNumElems, -1);

	// Only need to run up to the number of elements actually used, not the
	// total number of elements in the shuffle - if we are shuffling a wider
	// vector, the high lanes should be set to undef.
	for (unsigned i = 0; i != NumElems; ++i) {
	if (VectorMask[i] <= 0)
	continue;

	unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
	if (VectorMask[i] == (int)LeftIdx) {
	Mask[i] = ExtIndex;
	} else if (VectorMask[i] == (int)LeftIdx + 1) {
	Mask[i] = Vec2Offset + ExtIndex;
	}
	}

	// The type the input vectors may have changed above.
	InVT1 = VecIn1.getValueType();

	// If we already have a VecIn2, it should have the same type as VecIn1.
	// If we don't, get an undef/zero vector of the appropriate type.
	VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
	assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");

	SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
	if (ShuffleNumElems > NumElems)
	Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);

	return Shuffle;
	}

	static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
	assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");

	// First, determine where the build vector is not undef.
	// TODO: We could extend this to handle zero elements as well as undefs.
	int NumBVOps = BV->getNumOperands();
	int ZextElt = -1;
	for (int i = 0; i != NumBVOps; ++i) {
	SDValue Op = BV->getOperand(i);
	if (Op.isUndef())
	continue;
	if (ZextElt == -1)
	ZextElt = i;
	else
	return SDValue();
	}
	// Bail out if there's no non-undef element.
	if (ZextElt == -1)
	return SDValue();

	// The build vector contains some number of undef elements and exactly
	// one other element. That other element must be a zero-extended scalar
	// extracted from a vector at a constant index to turn this into a shuffle.
	// Also, require that the build vector does not implicitly truncate/extend
	// its elements.
	// TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
	EVT VT = BV->getValueType(0);
	SDValue Zext = BV->getOperand(ZextElt);
	if (Zext.getOpcode() != ISD::ZERO_EXTEND \|\| !Zext.hasOneUse() \|\|
	Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
	!isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) \|\|
	Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
	return SDValue();

	// The zero-extend must be a multiple of the source size, and we must be
	// building a vector of the same size as the source of the extract element.
	SDValue Extract = Zext.getOperand(0);
	unsigned DestSize = Zext.getValueSizeInBits();
	unsigned SrcSize = Extract.getValueSizeInBits();
	if (DestSize % SrcSize != 0 \|\|
	Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
	return SDValue();

	// Create a shuffle mask that will combine the extracted element with zeros
	// and undefs.
	int ZextRatio = DestSize / SrcSize;
	int NumMaskElts = NumBVOps * ZextRatio;
	SmallVector<int, 32> ShufMask(NumMaskElts, -1);
	for (int i = 0; i != NumMaskElts; ++i) {
	if (i / ZextRatio == ZextElt) {
	// The low bits of the (potentially translated) extracted element map to
	// the source vector. The high bits map to zero. We will use a zero vector
	// as the 2nd source operand of the shuffle, so use the 1st element of
	// that vector (mask value is number-of-elements) for the high bits.
	if (i % ZextRatio == 0)
	ShufMask[i] = Extract.getConstantOperandVal(1);
	else
	ShufMask[i] = NumMaskElts;
	}

	// Undef elements of the build vector remain undef because we initialize
	// the shuffle mask with -1.
	}

	// buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
	// bitcast (shuffle V, ZeroVec, VectorMask)
	SDLoc DL(BV);
	EVT VecVT = Extract.getOperand(0).getValueType();
	SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
	ZeroVec, ShufMask, DAG);
	if (!Shuf)
	return SDValue();
	return DAG.getBitcast(VT, Shuf);
	}

	// FIXME: promote to STLExtras.
	template <typename R, typename T>
	static auto getFirstIndexOf(R &&Range, const T &Val) {
	auto I = find(Range, Val);
	if (I == Range.end())
	return static_cast<decltype(std::distance(Range.begin(), I))>(-1);
	return std::distance(Range.begin(), I);
	}

	// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
	// operations. If the types of the vectors we're extracting from allow it,
	// turn this into a vector_shuffle node.
	SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
	SDLoc DL(N);
	EVT VT = N->getValueType(0);

	// Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
	if (!isTypeLegal(VT))
	return SDValue();

	if (SDValue V = reduceBuildVecToShuffleWithZero(N, DAG))
	return V;

	// May only combine to shuffle after legalize if shuffle is legal.
	if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
	return SDValue();

	bool UsesZeroVector = false;
	unsigned NumElems = N->getNumOperands();

	// Record, for each element of the newly built vector, which input vector
	// that element comes from. -1 stands for undef, 0 for the zero vector,
	// and positive values for the input vectors.
	// VectorMask maps each element to its vector number, and VecIn maps vector
	// numbers to their initial SDValues.

	SmallVector<int, 8> VectorMask(NumElems, -1);
	SmallVector<SDValue, 8> VecIn;
	VecIn.push_back(SDValue());

	for (unsigned i = 0; i != NumElems; ++i) {
	SDValue Op = N->getOperand(i);

	if (Op.isUndef())
	continue;

	// See if we can use a blend with a zero vector.
	// TODO: Should we generalize this to a blend with an arbitrary constant
	// vector?
	if (isNullConstant(Op) \|\| isNullFPConstant(Op)) {
	UsesZeroVector = true;
	VectorMask[i] = 0;
	continue;
	}

	// Not an undef or zero. If the input is something other than an
	// EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
	if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
	!isa<ConstantSDNode>(Op.getOperand(1)))
	return SDValue();
	SDValue ExtractedFromVec = Op.getOperand(0);

	if (ExtractedFromVec.getValueType().isScalableVector())
	return SDValue();

	const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
	if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
	return SDValue();

	// All inputs must have the same element type as the output.
	if (VT.getVectorElementType() !=
	ExtractedFromVec.getValueType().getVectorElementType())
	return SDValue();

	// Have we seen this input vector before?
	// The vectors are expected to be tiny (usually 1 or 2 elements), so using
	// a map back from SDValues to numbers isn't worth it.
	int Idx = getFirstIndexOf(VecIn, ExtractedFromVec);
	if (Idx == -1) { // A new source vector?
	Idx = VecIn.size();
	VecIn.push_back(ExtractedFromVec);
	}

	VectorMask[i] = Idx;
	}

	// If we didn't find at least one input vector, bail out.
	if (VecIn.size() < 2)
	return SDValue();

	// If all the Operands of BUILD_VECTOR extract from same
	// vector, then split the vector efficiently based on the maximum
	// vector access index and adjust the VectorMask and
	// VecIn accordingly.
	bool DidSplitVec = false;
	if (VecIn.size() == 2) {
	unsigned MaxIndex = 0;
	unsigned NearestPow2 = 0;
	SDValue Vec = VecIn.back();
	EVT InVT = Vec.getValueType();
	SmallVector<unsigned, 8> IndexVec(NumElems, 0);

	for (unsigned i = 0; i < NumElems; i++) {
	if (VectorMask[i] <= 0)
	continue;
	unsigned Index = N->getOperand(i).getConstantOperandVal(1);
	IndexVec[i] = Index;
	MaxIndex = std::max(MaxIndex, Index);
	}

	NearestPow2 = PowerOf2Ceil(MaxIndex);
	if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
	NumElems * 2 < NearestPow2) {
	unsigned SplitSize = NearestPow2 / 2;
	EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
	InVT.getVectorElementType(), SplitSize);
	if (TLI.isTypeLegal(SplitVT) &&
	SplitSize + SplitVT.getVectorNumElements() <=
	InVT.getVectorNumElements()) {
	SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
	DAG.getVectorIdxConstant(SplitSize, DL));
	SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
	DAG.getVectorIdxConstant(0, DL));
	VecIn.pop_back();
	VecIn.push_back(VecIn1);
	VecIn.push_back(VecIn2);
	DidSplitVec = true;

	for (unsigned i = 0; i < NumElems; i++) {
	if (VectorMask[i] <= 0)
	continue;
	VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
	}
	}
	}
	}

	// Sort input vectors by decreasing vector element count,
	// while preserving the relative order of equally-sized vectors.
	// Note that we keep the first "implicit zero vector as-is.
	SmallVector<SDValue, 8> SortedVecIn(VecIn);
	llvm::stable_sort(MutableArrayRef<SDValue>(SortedVecIn).drop_front(),
	[](const SDValue &a, const SDValue &b) {
	return a.getValueType().getVectorNumElements() >
	b.getValueType().getVectorNumElements();
	});

	// We now also need to rebuild the VectorMask, because it referenced element
	// order in VecIn, and we just sorted them.
	for (int &SourceVectorIndex : VectorMask) {
	if (SourceVectorIndex <= 0)
	continue;
	unsigned Idx = getFirstIndexOf(SortedVecIn, VecIn[SourceVectorIndex]);
	assert(Idx > 0 && Idx < SortedVecIn.size() &&
	VecIn[SourceVectorIndex] == SortedVecIn[Idx] && "Remapping failure");
	SourceVectorIndex = Idx;
	}

	VecIn = std::move(SortedVecIn);

	// TODO: Should this fire if some of the input vectors has illegal type (like
	// it does now), or should we let legalization run its course first?

	// Shuffle phase:
	// Take pairs of vectors, and shuffle them so that the result has elements
	// from these vectors in the correct places.
	// For example, given:
	// t10: i32 = extract_vector_elt t1, Constant:i64<0>
	// t11: i32 = extract_vector_elt t2, Constant:i64<0>
	// t12: i32 = extract_vector_elt t3, Constant:i64<0>
	// t13: i32 = extract_vector_elt t1, Constant:i64<1>
	// t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
	// We will generate:
	// t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
	// t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
	SmallVector<SDValue, 4> Shuffles;
	for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
	unsigned LeftIdx = 2 * In + 1;
	SDValue VecLeft = VecIn[LeftIdx];
	SDValue VecRight =
	(LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();

	if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
	VecRight, LeftIdx, DidSplitVec))
	Shuffles.push_back(Shuffle);
	else
	return SDValue();
	}

	// If we need the zero vector as an "ingredient" in the blend tree, add it
	// to the list of shuffles.
	if (UsesZeroVector)
	Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
	: DAG.getConstantFP(0.0, DL, VT));

	// If we only have one shuffle, we're done.
	if (Shuffles.size() == 1)
	return Shuffles[0];

	// Update the vector mask to point to the post-shuffle vectors.
	for (int &Vec : VectorMask)
	if (Vec == 0)
	Vec = Shuffles.size() - 1;
	else
	Vec = (Vec - 1) / 2;

	// More than one shuffle. Generate a binary tree of blends, e.g. if from
	// the previous step we got the set of shuffles t10, t11, t12, t13, we will
	// generate:
	// t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
	// t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
	// t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
	// t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
	// t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
	// t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
	// t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21

	// Make sure the initial size of the shuffle list is even.
	if (Shuffles.size() % 2)
	Shuffles.push_back(DAG.getUNDEF(VT));

	for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
	if (CurSize % 2) {
	Shuffles[CurSize] = DAG.getUNDEF(VT);
	CurSize++;
	}
	for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
	int Left = 2 * In;
	int Right = 2 * In + 1;
	SmallVector<int, 8> Mask(NumElems, -1);
	for (unsigned i = 0; i != NumElems; ++i) {
	if (VectorMask[i] == Left) {
	Mask[i] = i;
	VectorMask[i] = In;
	} else if (VectorMask[i] == Right) {
	Mask[i] = i + NumElems;
	VectorMask[i] = In;
	}
	}

	Shuffles[In] =
	DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
	}
	}
	return Shuffles[0];
	}

	// Try to turn a build vector of zero extends of extract vector elts into a
	// a vector zero extend and possibly an extract subvector.
	// TODO: Support sign extend?
	// TODO: Allow undef elements?
	SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
	if (LegalOperations)
	return SDValue();

	EVT VT = N->getValueType(0);

	bool FoundZeroExtend = false;
	SDValue Op0 = N->getOperand(0);
	auto checkElem = [&](SDValue Op) -> int64_t {
	unsigned Opc = Op.getOpcode();
	FoundZeroExtend \|= (Opc == ISD::ZERO_EXTEND);
	if ((Opc == ISD::ZERO_EXTEND \|\| Opc == ISD::ANY_EXTEND) &&
	Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
	Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
	if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
	return C->getZExtValue();
	return -1;
	};

	// Make sure the first element matches
	// (zext (extract_vector_elt X, C))
	int64_t Offset = checkElem(Op0);
	if (Offset < 0)
	return SDValue();

	unsigned NumElems = N->getNumOperands();
	SDValue In = Op0.getOperand(0).getOperand(0);
	EVT InSVT = In.getValueType().getScalarType();
	EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);

	// Don't create an illegal input type after type legalization.
	if (LegalTypes && !TLI.isTypeLegal(InVT))
	return SDValue();

	// Ensure all the elements come from the same vector and are adjacent.
	for (unsigned i = 1; i != NumElems; ++i) {
	if ((Offset + i) != checkElem(N->getOperand(i)))
	return SDValue();
	}

	SDLoc DL(N);
	In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
	Op0.getOperand(0).getOperand(1));
	return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
	VT, In);
	}

	SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
	EVT VT = N->getValueType(0);

	// A vector built entirely of undefs is undef.
	if (ISD::allOperandsUndef(N))
	return DAG.getUNDEF(VT);

	// If this is a splat of a bitcast from another vector, change to a
	// concat_vector.
	// For example:
	// (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
	// (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
	//
	// If X is a build_vector itself, the concat can become a larger build_vector.
	// TODO: Maybe this is useful for non-splat too?
	if (!LegalOperations) {
	if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
	Splat = peekThroughBitcasts(Splat);
	EVT SrcVT = Splat.getValueType();
	if (SrcVT.isVector()) {
	unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
	EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
	SrcVT.getVectorElementType(), NumElts);
	if (!LegalTypes \|\| TLI.isTypeLegal(NewVT)) {
	SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
	SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N),
	NewVT, Ops);
	return DAG.getBitcast(VT, Concat);
	}
	}
	}
	}

	// Check if we can express BUILD VECTOR via subvector extract.
	if (!LegalTypes && (N->getNumOperands() > 1)) {
	SDValue Op0 = N->getOperand(0);
	auto checkElem = [&](SDValue Op) -> uint64_t {
	if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
	(Op0.getOperand(0) == Op.getOperand(0)))
	if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
	return CNode->getZExtValue();
	return -1;
	};

	int Offset = checkElem(Op0);
	for (unsigned i = 0; i < N->getNumOperands(); ++i) {
	if (Offset + i != checkElem(N->getOperand(i))) {
	Offset = -1;
	break;
	}
	}

	if ((Offset == 0) &&
	(Op0.getOperand(0).getValueType() == N->getValueType(0)))
	return Op0.getOperand(0);
	if ((Offset != -1) &&
	((Offset % N->getValueType(0).getVectorNumElements()) ==
	0)) // IDX must be multiple of output size.
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
	Op0.getOperand(0), Op0.getOperand(1));
	}

	if (SDValue V = convertBuildVecZextToZext(N))
	return V;

	if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
	return V;

	if (SDValue V = reduceBuildVecTruncToBitCast(N))
	return V;

	if (SDValue V = reduceBuildVecToShuffle(N))
	return V;

	// A splat of a single element is a SPLAT_VECTOR if supported on the target.
	// Do this late as some of the above may replace the splat.
	if (TLI.getOperationAction(ISD::SPLAT_VECTOR, VT) != TargetLowering::Expand)
	if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
	assert(!V.isUndef() && "Splat of undef should have been handled earlier");
	return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
	}

	return SDValue();
	}

	static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	EVT OpVT = N->getOperand(0).getValueType();

	// If the operands are legal vectors, leave them alone.
	if (TLI.isTypeLegal(OpVT))
	return SDValue();

	SDLoc DL(N);
	EVT VT = N->getValueType(0);
	SmallVector<SDValue, 8> Ops;

	EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
	SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);

	// Keep track of what we encounter.
	bool AnyInteger = false;
	bool AnyFP = false;
	for (const SDValue &Op : N->ops()) {
	if (ISD::BITCAST == Op.getOpcode() &&
	!Op.getOperand(0).getValueType().isVector())
	Ops.push_back(Op.getOperand(0));
	else if (ISD::UNDEF == Op.getOpcode())
	Ops.push_back(ScalarUndef);
	else
	return SDValue();

	// Note whether we encounter an integer or floating point scalar.
	// If it's neither, bail out, it could be something weird like x86mmx.
	EVT LastOpVT = Ops.back().getValueType();
	if (LastOpVT.isFloatingPoint())
	AnyFP = true;
	else if (LastOpVT.isInteger())
	AnyInteger = true;
	else
	return SDValue();
	}

	// If any of the operands is a floating point scalar bitcast to a vector,
	// use floating point types throughout, and bitcast everything.
	// Replace UNDEFs by another scalar UNDEF node, of the final desired type.
	if (AnyFP) {
	SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
	ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
	if (AnyInteger) {
	for (SDValue &Op : Ops) {
	if (Op.getValueType() == SVT)
	continue;
	if (Op.isUndef())
	Op = ScalarUndef;
	else
	Op = DAG.getBitcast(SVT, Op);
	}
	}
	}

	EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
	VT.getSizeInBits() / SVT.getSizeInBits());
	return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
	}

	// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
	// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
	// most two distinct vectors the same size as the result, attempt to turn this
	// into a legal shuffle.
	static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
	EVT VT = N->getValueType(0);
	EVT OpVT = N->getOperand(0).getValueType();

	// We currently can't generate an appropriate shuffle for a scalable vector.
	if (VT.isScalableVector())
	return SDValue();

	int NumElts = VT.getVectorNumElements();
	int NumOpElts = OpVT.getVectorNumElements();

	SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
	SmallVector<int, 8> Mask;

	for (SDValue Op : N->ops()) {
	Op = peekThroughBitcasts(Op);

	// UNDEF nodes convert to UNDEF shuffle mask values.
	if (Op.isUndef()) {
	Mask.append((unsigned)NumOpElts, -1);
	continue;
	}

	if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
	return SDValue();

	// What vector are we extracting the subvector from and at what index?
	SDValue ExtVec = Op.getOperand(0);
	int ExtIdx = Op.getConstantOperandVal(1);

	// We want the EVT of the original extraction to correctly scale the
	// extraction index.
	EVT ExtVT = ExtVec.getValueType();
	ExtVec = peekThroughBitcasts(ExtVec);

	// UNDEF nodes convert to UNDEF shuffle mask values.
	if (ExtVec.isUndef()) {
	Mask.append((unsigned)NumOpElts, -1);
	continue;
	}

	// Ensure that we are extracting a subvector from a vector the same
	// size as the result.
	if (ExtVT.getSizeInBits() != VT.getSizeInBits())
	return SDValue();

	// Scale the subvector index to account for any bitcast.
	int NumExtElts = ExtVT.getVectorNumElements();
	if (0 == (NumExtElts % NumElts))
	ExtIdx /= (NumExtElts / NumElts);
	else if (0 == (NumElts % NumExtElts))
	ExtIdx *= (NumElts / NumExtElts);
	else
	return SDValue();

	// At most we can reference 2 inputs in the final shuffle.
	if (SV0.isUndef() \|\| SV0 == ExtVec) {
	SV0 = ExtVec;
	for (int i = 0; i != NumOpElts; ++i)
	Mask.push_back(i + ExtIdx);
	} else if (SV1.isUndef() \|\| SV1 == ExtVec) {
	SV1 = ExtVec;
	for (int i = 0; i != NumOpElts; ++i)
	Mask.push_back(i + ExtIdx + NumElts);
	} else {
	return SDValue();
	}
	}

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
	DAG.getBitcast(VT, SV1), Mask, DAG);
	}

	static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG) {
	unsigned CastOpcode = N->getOperand(0).getOpcode();
	switch (CastOpcode) {
	case ISD::SINT_TO_FP:
	case ISD::UINT_TO_FP:
	case ISD::FP_TO_SINT:
	case ISD::FP_TO_UINT:
	// TODO: Allow more opcodes?
	// case ISD::BITCAST:
	// case ISD::TRUNCATE:
	// case ISD::ZERO_EXTEND:
	// case ISD::SIGN_EXTEND:
	// case ISD::FP_EXTEND:
	break;
	default:
	return SDValue();
	}

	EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
	if (!SrcVT.isVector())
	return SDValue();

	// All operands of the concat must be the same kind of cast from the same
	// source type.
	SmallVector<SDValue, 4> SrcOps;
	for (SDValue Op : N->ops()) {
	if (Op.getOpcode() != CastOpcode \|\| !Op.hasOneUse() \|\|
	Op.getOperand(0).getValueType() != SrcVT)
	return SDValue();
	SrcOps.push_back(Op.getOperand(0));
	}

	// The wider cast must be supported by the target. This is unusual because
	// the operation support type parameter depends on the opcode. In addition,
	// check the other type in the cast to make sure this is really legal.
	EVT VT = N->getValueType(0);
	EVT SrcEltVT = SrcVT.getVectorElementType();
	ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
	EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	switch (CastOpcode) {
	case ISD::SINT_TO_FP:
	case ISD::UINT_TO_FP:
	if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) \|\|
	!TLI.isTypeLegal(VT))
	return SDValue();
	break;
	case ISD::FP_TO_SINT:
	case ISD::FP_TO_UINT:
	if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) \|\|
	!TLI.isTypeLegal(ConcatSrcVT))
	return SDValue();
	break;
	default:
	llvm_unreachable("Unexpected cast opcode");
	}

	// concat (cast X), (cast Y)... -> cast (concat X, Y...)
	SDLoc DL(N);
	SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps);
	return DAG.getNode(CastOpcode, DL, VT, NewConcat);
	}

	SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
	// If we only have one input vector, we don't need to do any concatenation.
	if (N->getNumOperands() == 1)
	return N->getOperand(0);

	// Check if all of the operands are undefs.
	EVT VT = N->getValueType(0);
	if (ISD::allOperandsUndef(N))
	return DAG.getUNDEF(VT);

	// Optimize concat_vectors where all but the first of the vectors are undef.
	if (all_of(drop_begin(N->ops()),
	[](const SDValue &Op) { return Op.isUndef(); })) {
	SDValue In = N->getOperand(0);
	assert(In.getValueType().isVector() && "Must concat vectors");

	// If the input is a concat_vectors, just make a larger concat by padding
	// with smaller undefs.
	if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse()) {
	unsigned NumOps = N->getNumOperands() * In.getNumOperands();
	SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end());
	Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
	return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
	}

	SDValue Scalar = peekThroughOneUseBitcasts(In);

	// concat_vectors(scalar_to_vector(scalar), undef) ->
	// scalar_to_vector(scalar)
	if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
	Scalar.hasOneUse()) {
	EVT SVT = Scalar.getValueType().getVectorElementType();
	if (SVT == Scalar.getOperand(0).getValueType())
	Scalar = Scalar.getOperand(0);
	}

	// concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
	if (!Scalar.getValueType().isVector()) {
	// If the bitcast type isn't legal, it might be a trunc of a legal type;
	// look through the trunc so we can still do the transform:
	// concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
	if (Scalar->getOpcode() == ISD::TRUNCATE &&
	!TLI.isTypeLegal(Scalar.getValueType()) &&
	TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
	Scalar = Scalar->getOperand(0);

	EVT SclTy = Scalar.getValueType();

	if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
	return SDValue();

	// Bail out if the vector size is not a multiple of the scalar size.
	if (VT.getSizeInBits() % SclTy.getSizeInBits())
	return SDValue();

	unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
	if (VNTNumElms < 2)
	return SDValue();

	EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
	if (!TLI.isTypeLegal(NVT) \|\| !TLI.isTypeLegal(Scalar.getValueType()))
	return SDValue();

	SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
	return DAG.getBitcast(VT, Res);
	}
	}

	// Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
	// We have already tested above for an UNDEF only concatenation.
	// fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
	// -> (BUILD_VECTOR A, B, ..., C, D, ...)
	auto IsBuildVectorOrUndef = [](const SDValue &Op) {
	return ISD::UNDEF == Op.getOpcode() \|\| ISD::BUILD_VECTOR == Op.getOpcode();
	};
	if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
	SmallVector<SDValue, 8> Opnds;
	EVT SVT = VT.getScalarType();

	EVT MinVT = SVT;
	if (!SVT.isFloatingPoint()) {
	// If BUILD_VECTOR are from built from integer, they may have different
	// operand types. Get the smallest type and truncate all operands to it.
	bool FoundMinVT = false;
	for (const SDValue &Op : N->ops())
	if (ISD::BUILD_VECTOR == Op.getOpcode()) {
	EVT OpSVT = Op.getOperand(0).getValueType();
	MinVT = (!FoundMinVT \|\| OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
	FoundMinVT = true;
	}
	assert(FoundMinVT && "Concat vector type mismatch");
	}

	for (const SDValue &Op : N->ops()) {
	EVT OpVT = Op.getValueType();
	unsigned NumElts = OpVT.getVectorNumElements();

	if (ISD::UNDEF == Op.getOpcode())
	Opnds.append(NumElts, DAG.getUNDEF(MinVT));

	if (ISD::BUILD_VECTOR == Op.getOpcode()) {
	if (SVT.isFloatingPoint()) {
	assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
	Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
	} else {
	for (unsigned i = 0; i != NumElts; ++i)
	Opnds.push_back(
	DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
	}
	}
	}

	assert(VT.getVectorNumElements() == Opnds.size() &&
	"Concat vector type mismatch");
	return DAG.getBuildVector(VT, SDLoc(N), Opnds);
	}

	// Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
	if (SDValue V = combineConcatVectorOfScalars(N, DAG))
	return V;

	// Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
	if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
	if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
	return V;

	if (SDValue V = combineConcatVectorOfCasts(N, DAG))
	return V;

	// Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
	// nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
	// operands and look for a CONCAT operations that place the incoming vectors
	// at the exact same location.
	//
	// For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
	SDValue SingleSource = SDValue();
	unsigned PartNumElem =
	N->getOperand(0).getValueType().getVectorMinNumElements();

	for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
	SDValue Op = N->getOperand(i);

	if (Op.isUndef())
	continue;

	// Check if this is the identity extract:
	if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
	return SDValue();

	// Find the single incoming vector for the extract_subvector.
	if (SingleSource.getNode()) {
	if (Op.getOperand(0) != SingleSource)
	return SDValue();
	} else {
	SingleSource = Op.getOperand(0);

	// Check the source type is the same as the type of the result.
	// If not, this concat may extend the vector, so we can not
	// optimize it away.
	if (SingleSource.getValueType() != N->getValueType(0))
	return SDValue();
	}

	// Check that we are reading from the identity index.
	unsigned IdentityIndex = i * PartNumElem;
	if (Op.getConstantOperandAPInt(1) != IdentityIndex)
	return SDValue();
	}

	if (SingleSource.getNode())
	return SingleSource;

	return SDValue();
	}

	// Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
	// if the subvector can be sourced for free.
	static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) {
	if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
	V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
	return V.getOperand(1);
	}
	auto *IndexC = dyn_cast<ConstantSDNode>(Index);
	if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
	V.getOperand(0).getValueType() == SubVT &&
	(IndexC->getZExtValue() % SubVT.getVectorMinNumElements()) == 0) {
	uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorMinNumElements();
	return V.getOperand(SubIdx);
	}
	return SDValue();
	}

	static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
	SelectionDAG &DAG,
	bool LegalOperations) {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	SDValue BinOp = Extract->getOperand(0);
	unsigned BinOpcode = BinOp.getOpcode();
	if (!TLI.isBinOp(BinOpcode) \|\| BinOp.getNode()->getNumValues() != 1)
	return SDValue();

	EVT VecVT = BinOp.getValueType();
	SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
	if (VecVT != Bop0.getValueType() \|\| VecVT != Bop1.getValueType())
	return SDValue();

	SDValue Index = Extract->getOperand(1);
	EVT SubVT = Extract->getValueType(0);
	if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT, LegalOperations))
	return SDValue();

	SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
	SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);

	// TODO: We could handle the case where only 1 operand is being inserted by
	// creating an extract of the other operand, but that requires checking
	// number of uses and/or costs.
	if (!Sub0 \|\| !Sub1)
	return SDValue();

	// We are inserting both operands of the wide binop only to extract back
	// to the narrow vector size. Eliminate all of the insert/extract:
	// ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
	return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
	BinOp->getFlags());
	}

	/// If we are extracting a subvector produced by a wide binary operator try
	/// to use a narrow binary operator and/or avoid concatenation and extraction.
	static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG,
	bool LegalOperations) {
	// TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
	// some of these bailouts with other transforms.

	if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG, LegalOperations))
	return V;

	// The extract index must be a constant, so we can map it to a concat operand.
	auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
	if (!ExtractIndexC)
	return SDValue();

	// We are looking for an optionally bitcasted wide vector binary operator
	// feeding an extract subvector.
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
	unsigned BOpcode = BinOp.getOpcode();
	if (!TLI.isBinOp(BOpcode) \|\| BinOp.getNode()->getNumValues() != 1)
	return SDValue();

	// Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
	// reduced to the unary fneg when it is visited, and we probably want to deal
	// with fneg in a target-specific way.
	if (BOpcode == ISD::FSUB) {
	auto C = isConstOrConstSplatFP(BinOp.getOperand(0), /AllowUndefs*/ true);
	if (C && C->getValueAPF().isNegZero())
	return SDValue();
	}

	// The binop must be a vector type, so we can extract some fraction of it.
	EVT WideBVT = BinOp.getValueType();
	// The optimisations below currently assume we are dealing with fixed length
	// vectors. It is possible to add support for scalable vectors, but at the
	// moment we've done no analysis to prove whether they are profitable or not.
	if (!WideBVT.isFixedLengthVector())
	return SDValue();

	EVT VT = Extract->getValueType(0);
	unsigned ExtractIndex = ExtractIndexC->getZExtValue();
	assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
	"Extract index is not a multiple of the vector length.");

	// Bail out if this is not a proper multiple width extraction.
	unsigned WideWidth = WideBVT.getSizeInBits();
	unsigned NarrowWidth = VT.getSizeInBits();
	if (WideWidth % NarrowWidth != 0)
	return SDValue();

	// Bail out if we are extracting a fraction of a single operation. This can
	// occur because we potentially looked through a bitcast of the binop.
	unsigned NarrowingRatio = WideWidth / NarrowWidth;
	unsigned WideNumElts = WideBVT.getVectorNumElements();
	if (WideNumElts % NarrowingRatio != 0)
	return SDValue();

	// Bail out if the target does not support a narrower version of the binop.
	EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
	WideNumElts / NarrowingRatio);
	if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
	return SDValue();

	// If extraction is cheap, we don't need to look at the binop operands
	// for concat ops. The narrow binop alone makes this transform profitable.
	// We can't just reuse the original extract index operand because we may have
	// bitcasted.
	unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
	unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
	if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
	BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
	// extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
	SDLoc DL(Extract);
	SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
	SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
	BinOp.getOperand(0), NewExtIndex);
	SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
	BinOp.getOperand(1), NewExtIndex);
	SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y,
	BinOp.getNode()->getFlags());
	return DAG.getBitcast(VT, NarrowBinOp);
	}

	// Only handle the case where we are doubling and then halving. A larger ratio
	// may require more than two narrow binops to replace the wide binop.
	if (NarrowingRatio != 2)
	return SDValue();

	// TODO: The motivating case for this transform is an x86 AVX1 target. That
	// target has temptingly almost legal versions of bitwise logic ops in 256-bit
	// flavors, but no other 256-bit integer support. This could be extended to
	// handle any binop, but that may require fixing/adding other folds to avoid
	// codegen regressions.
	if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
	return SDValue();

	// We need at least one concatenation operation of a binop operand to make
	// this transform worthwhile. The concat must double the input vector sizes.
	auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
	if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
	return V.getOperand(ConcatOpNum);
	return SDValue();
	};
	SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
	SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));

	if (SubVecL \|\| SubVecR) {
	// If a binop operand was not the result of a concat, we must extract a
	// half-sized operand for our new narrow binop:
	// extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
	// extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
	// extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
	SDLoc DL(Extract);
	SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
	SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
	: DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
	BinOp.getOperand(0), IndexC);

	SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
	: DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
	BinOp.getOperand(1), IndexC);

	SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
	return DAG.getBitcast(VT, NarrowBinOp);
	}

	return SDValue();
	}

	/// If we are extracting a subvector from a wide vector load, convert to a
	/// narrow load to eliminate the extraction:
	/// (extract_subvector (load wide vector)) --> (load narrow vector)
	static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
	// TODO: Add support for big-endian. The offset calculation must be adjusted.
	if (DAG.getDataLayout().isBigEndian())
	return SDValue();

	auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
	auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
	if (!Ld \|\| Ld->getExtensionType() \|\| !Ld->isSimple() \|\|
	!ExtIdx)
	return SDValue();

	// Allow targets to opt-out.
	EVT VT = Extract->getValueType(0);

	// We can only create byte sized loads.
	if (!VT.isByteSized())
	return SDValue();

	unsigned Index = ExtIdx->getZExtValue();
	unsigned NumElts = VT.getVectorMinNumElements();

	// The definition of EXTRACT_SUBVECTOR states that the index must be a
	// multiple of the minimum number of elements in the result type.
	assert(Index % NumElts == 0 && "The extract subvector index is not a "
	"multiple of the result's element count");

	// It's fine to use TypeSize here as we know the offset will not be negative.
	TypeSize Offset = VT.getStoreSize() * (Index / NumElts);

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
	return SDValue();

	// The narrow load will be offset from the base address of the old load if
	// we are extracting from something besides index 0 (little-endian).
	SDLoc DL(Extract);

	// TODO: Use "BaseIndexOffset" to make this more effective.
	SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);

	uint64_t StoreSize = MemoryLocation::getSizeOrUnknown(VT.getStoreSize());
	MachineFunction &MF = DAG.getMachineFunction();
	MachineMemOperand *MMO;
	if (Offset.isScalable()) {
	MachinePointerInfo MPI =
	MachinePointerInfo(Ld->getPointerInfo().getAddrSpace());
	MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, StoreSize);
	} else
	MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedSize(),
	StoreSize);

	SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
	DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
	return NewLd;
	}

	SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
	EVT NVT = N->getValueType(0);
	SDValue V = N->getOperand(0);
	uint64_t ExtIdx = N->getConstantOperandVal(1);

	// Extract from UNDEF is UNDEF.
	if (V.isUndef())
	return DAG.getUNDEF(NVT);

	if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
	if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
	return NarrowLoad;

	// Combine an extract of an extract into a single extract_subvector.
	// ext (ext X, C), 0 --> ext X, C
	if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
	if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
	V.getConstantOperandVal(1)) &&
	TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) {
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0),
	V.getOperand(1));
	}
	}

	// Try to move vector bitcast after extract_subv by scaling extraction index:
	// extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
	if (V.getOpcode() == ISD::BITCAST &&
	V.getOperand(0).getValueType().isVector() &&
	(!LegalOperations \|\| TLI.isOperationLegal(ISD::BITCAST, NVT))) {
	SDValue SrcOp = V.getOperand(0);
	EVT SrcVT = SrcOp.getValueType();
	unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
	unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
	if ((SrcNumElts % DestNumElts) == 0) {
	unsigned SrcDestRatio = SrcNumElts / DestNumElts;
	ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
	EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
	NewExtEC);
	if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
	SDLoc DL(N);
	SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
	SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
	V.getOperand(0), NewIndex);
	return DAG.getBitcast(NVT, NewExtract);
	}
	}
	if ((DestNumElts % SrcNumElts) == 0) {
	unsigned DestSrcRatio = DestNumElts / SrcNumElts;
	if (NVT.getVectorElementCount().isKnownMultipleOf(DestSrcRatio)) {
	ElementCount NewExtEC =
	NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio);
	EVT ScalarVT = SrcVT.getScalarType();
	if ((ExtIdx % DestSrcRatio) == 0) {
	SDLoc DL(N);
	unsigned IndexValScaled = ExtIdx / DestSrcRatio;
	EVT NewExtVT =
	EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
	if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
	SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
	SDValue NewExtract =
	DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
	V.getOperand(0), NewIndex);
	return DAG.getBitcast(NVT, NewExtract);
	}
	if (NewExtEC.isScalar() &&
	TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, ScalarVT)) {
	SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
	SDValue NewExtract =
	DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
	V.getOperand(0), NewIndex);
	return DAG.getBitcast(NVT, NewExtract);
	}
	}
	}
	}
	}

	if (V.getOpcode() == ISD::CONCAT_VECTORS) {
	unsigned ExtNumElts = NVT.getVectorMinNumElements();
	EVT ConcatSrcVT = V.getOperand(0).getValueType();
	assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&
	"Concat and extract subvector do not change element type");
	assert((ExtIdx % ExtNumElts) == 0 &&
	"Extract index is not a multiple of the input vector length.");

	unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
	unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;

	// If the concatenated source types match this extract, it's a direct
	// simplification:
	// extract_subvec (concat V1, V2, ...), i --> Vi
	if (ConcatSrcNumElts == ExtNumElts)
	return V.getOperand(ConcatOpIdx);

	// If the concatenated source vectors are a multiple length of this extract,
	// then extract a fraction of one of those source vectors directly from a
	// concat operand. Example:
	// v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
	// v2i8 extract_subvec v8i8 Y, 6
	if (NVT.isFixedLengthVector() && ConcatSrcNumElts % ExtNumElts == 0) {
	SDLoc DL(N);
	unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
	assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
	"Trying to extract from >1 concat operand?");
	assert(NewExtIdx % ExtNumElts == 0 &&
	"Extract index is not a multiple of the input vector length.");
	SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
	V.getOperand(ConcatOpIdx), NewIndexC);
	}
	}

	V = peekThroughBitcasts(V);

	// If the input is a build vector. Try to make a smaller build vector.
	if (V.getOpcode() == ISD::BUILD_VECTOR) {
	EVT InVT = V.getValueType();
	unsigned ExtractSize = NVT.getSizeInBits();
	unsigned EltSize = InVT.getScalarSizeInBits();
	// Only do this if we won't split any elements.
	if (ExtractSize % EltSize == 0) {
	unsigned NumElems = ExtractSize / EltSize;
	EVT EltVT = InVT.getVectorElementType();
	EVT ExtractVT =
	NumElems == 1 ? EltVT
	: EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
	if ((Level < AfterLegalizeDAG \|\|
	(NumElems == 1 \|\|
	TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
	(!LegalTypes \|\| TLI.isTypeLegal(ExtractVT))) {
	unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;

	if (NumElems == 1) {
	SDValue Src = V->getOperand(IdxVal);
	if (EltVT != Src.getValueType())
	Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
	return DAG.getBitcast(NVT, Src);
	}

	// Extract the pieces from the original build_vector.
	SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
	V->ops().slice(IdxVal, NumElems));
	return DAG.getBitcast(NVT, BuildVec);
	}
	}
	}

	if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
	// Handle only simple case where vector being inserted and vector
	// being extracted are of same size.
	EVT SmallVT = V.getOperand(1).getValueType();
	if (!NVT.bitsEq(SmallVT))
	return SDValue();

	// Combine:
	// (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
	// Into:
	// indices are equal or bit offsets are equal => V1
	// otherwise => (extract_subvec V1, ExtIdx)
	uint64_t InsIdx = V.getConstantOperandVal(2);
	if (InsIdx * SmallVT.getScalarSizeInBits() ==
	ExtIdx * NVT.getScalarSizeInBits()) {
	if (LegalOperations && !TLI.isOperationLegal(ISD::BITCAST, NVT))
	return SDValue();

	return DAG.getBitcast(NVT, V.getOperand(1));
	}
	return DAG.getNode(
	ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
	DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
	N->getOperand(1));
	}

	if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG, LegalOperations))
	return NarrowBOp;

	if (SimplifyDemandedVectorElts(SDValue(N, 0)))
	return SDValue(N, 0);

	return SDValue();
	}

	/// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
	/// followed by concatenation. Narrow vector ops may have better performance
	/// than wide ops, and this can unlock further narrowing of other vector ops.
	/// Targets can invert this transform later if it is not profitable.
	static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf,
	SelectionDAG &DAG) {
	SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
	if (N0.getOpcode() != ISD::CONCAT_VECTORS \|\| N0.getNumOperands() != 2 \|\|
	N1.getOpcode() != ISD::CONCAT_VECTORS \|\| N1.getNumOperands() != 2 \|\|
	!N0.getOperand(1).isUndef() \|\| !N1.getOperand(1).isUndef())
	return SDValue();

	// Split the wide shuffle mask into halves. Any mask element that is accessing
	// operand 1 is offset down to account for narrowing of the vectors.
	ArrayRef<int> Mask = Shuf->getMask();
	EVT VT = Shuf->getValueType(0);
	unsigned NumElts = VT.getVectorNumElements();
	unsigned HalfNumElts = NumElts / 2;
	SmallVector<int, 16> Mask0(HalfNumElts, -1);
	SmallVector<int, 16> Mask1(HalfNumElts, -1);
	for (unsigned i = 0; i != NumElts; ++i) {
	if (Mask[i] == -1)
	continue;
	// If we reference the upper (undef) subvector then the element is undef.
	if ((Mask[i] % NumElts) >= HalfNumElts)
	continue;
	int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
	if (i < HalfNumElts)
	Mask0[i] = M;
	else
	Mask1[i - HalfNumElts] = M;
	}

	// Ask the target if this is a valid transform.
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
	HalfNumElts);
	if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) \|\|
	!TLI.isShuffleMaskLegal(Mask1, HalfVT))
	return SDValue();

	// shuffle (concat X, undef), (concat Y, undef), Mask -->
	// concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
	SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
	SDLoc DL(Shuf);
	SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
	SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
	return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
	}

	// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
	// or turn a shuffle of a single concat into simpler shuffle then concat.
	static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
	EVT VT = N->getValueType(0);
	unsigned NumElts = VT.getVectorNumElements();

	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
	ArrayRef<int> Mask = SVN->getMask();

	SmallVector<SDValue, 4> Ops;
	EVT ConcatVT = N0.getOperand(0).getValueType();
	unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
	unsigned NumConcats = NumElts / NumElemsPerConcat;

	auto IsUndefMaskElt = [](int i) { return i == -1; };

	// Special case: shuffle(concat(A,B)) can be more efficiently represented
	// as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
	// half vector elements.
	if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
	llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
	IsUndefMaskElt)) {
	N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
	N0.getOperand(1),
	Mask.slice(0, NumElemsPerConcat));
	N1 = DAG.getUNDEF(ConcatVT);
	return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
	}

	// Look at every vector that's inserted. We're looking for exact
	// subvector-sized copies from a concatenated vector
	for (unsigned I = 0; I != NumConcats; ++I) {
	unsigned Begin = I * NumElemsPerConcat;
	ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);

	// Make sure we're dealing with a copy.
	if (llvm::all_of(SubMask, IsUndefMaskElt)) {
	Ops.push_back(DAG.getUNDEF(ConcatVT));
	continue;
	}

	int OpIdx = -1;
	for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
	if (IsUndefMaskElt(SubMask[i]))
	continue;
	if ((SubMask[i] % (int)NumElemsPerConcat) != i)
	return SDValue();
	int EltOpIdx = SubMask[i] / NumElemsPerConcat;
	if (0 <= OpIdx && EltOpIdx != OpIdx)
	return SDValue();
	OpIdx = EltOpIdx;
	}
	assert(0 <= OpIdx && "Unknown concat_vectors op");

	if (OpIdx < (int)N0.getNumOperands())
	Ops.push_back(N0.getOperand(OpIdx));
	else
	Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
	}

	return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
	}

	// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
	// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
	//
	// SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
	// a simplification in some sense, but it isn't appropriate in general: some
	// BUILD_VECTORs are substantially cheaper than others. The general case
	// of a BUILD_VECTOR requires inserting each element individually (or
	// performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
	// all constants is a single constant pool load. A BUILD_VECTOR where each
	// element is identical is a splat. A BUILD_VECTOR where most of the operands
	// are undef lowers to a small number of element insertions.
	//
	// To deal with this, we currently use a bunch of mostly arbitrary heuristics.
	// We don't fold shuffles where one side is a non-zero constant, and we don't
	// fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
	// non-constant operands. This seems to work out reasonably well in practice.
	static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
	SelectionDAG &DAG,
	const TargetLowering &TLI) {
	EVT VT = SVN->getValueType(0);
	unsigned NumElts = VT.getVectorNumElements();
	SDValue N0 = SVN->getOperand(0);
	SDValue N1 = SVN->getOperand(1);

	if (!N0->hasOneUse())
	return SDValue();

	// If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
	// discussed above.
	if (!N1.isUndef()) {
	if (!N1->hasOneUse())
	return SDValue();

	bool N0AnyConst = isAnyConstantBuildVector(N0);
	bool N1AnyConst = isAnyConstantBuildVector(N1);
	if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
	return SDValue();
	if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
	return SDValue();
	}

	// If both inputs are splats of the same value then we can safely merge this
	// to a single BUILD_VECTOR with undef elements based on the shuffle mask.
	bool IsSplat = false;
	auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
	auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
	if (BV0 && BV1)
	if (SDValue Splat0 = BV0->getSplatValue())
	IsSplat = (Splat0 == BV1->getSplatValue());

	SmallVector<SDValue, 8> Ops;
	SmallSet<SDValue, 16> DuplicateOps;
	for (int M : SVN->getMask()) {
	SDValue Op = DAG.getUNDEF(VT.getScalarType());
	if (M >= 0) {
	int Idx = M < (int)NumElts ? M : M - NumElts;
	SDValue &S = (M < (int)NumElts ? N0 : N1);
	if (S.getOpcode() == ISD::BUILD_VECTOR) {
	Op = S.getOperand(Idx);
	} else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
	SDValue Op0 = S.getOperand(0);
	Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
	} else {
	// Operand can't be combined - bail out.
	return SDValue();
	}
	}

	// Don't duplicate a non-constant BUILD_VECTOR operand unless we're
	// generating a splat; semantically, this is fine, but it's likely to
	// generate low-quality code if the target can't reconstruct an appropriate
	// shuffle.
	if (!Op.isUndef() && !isIntOrFPConstant(Op))
	if (!IsSplat && !DuplicateOps.insert(Op).second)
	return SDValue();

	Ops.push_back(Op);
	}

	// BUILD_VECTOR requires all inputs to be of the same type, find the
	// maximum type and extend them all.
	EVT SVT = VT.getScalarType();
	if (SVT.isInteger())
	for (SDValue &Op : Ops)
	SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
	if (SVT != VT.getScalarType())
	for (SDValue &Op : Ops)
	Op = TLI.isZExtFree(Op.getValueType(), SVT)
	? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
	: DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
	return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
	}

	// Match shuffles that can be converted to any_vector_extend_in_reg.
	// This is often generated during legalization.
	// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
	// TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
	static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
	SelectionDAG &DAG,
	const TargetLowering &TLI,
	bool LegalOperations) {
	EVT VT = SVN->getValueType(0);
	bool IsBigEndian = DAG.getDataLayout().isBigEndian();

	// TODO Add support for big-endian when we have a test case.
	if (!VT.isInteger() \|\| IsBigEndian)
	return SDValue();

	unsigned NumElts = VT.getVectorNumElements();
	unsigned EltSizeInBits = VT.getScalarSizeInBits();
	ArrayRef<int> Mask = SVN->getMask();
	SDValue N0 = SVN->getOperand(0);

	// shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
	auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
	for (unsigned i = 0; i != NumElts; ++i) {
	if (Mask[i] < 0)
	continue;
	if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
	continue;
	return false;
	}
	return true;
	};

	// Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
	// power-of-2 extensions as they are the most likely.
	for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
	// Check for non power of 2 vector sizes
	if (NumElts % Scale != 0)
	continue;
	if (!isAnyExtend(Scale))
	continue;

	EVT OutSVT = EVT::getIntegerVT(DAG.getContext(), EltSizeInBits Scale);
	EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
	// Never create an illegal type. Only create unsupported operations if we
	// are pre-legalization.
	if (TLI.isTypeLegal(OutVT))
	if (!LegalOperations \|\|
	TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
	return DAG.getBitcast(VT,
	DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG,
	SDLoc(SVN), OutVT, N0));
	}

	return SDValue();
	}

	// Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
	// each source element of a large type into the lowest elements of a smaller
	// destination type. This is often generated during legalization.
	// If the source node itself was a '*_extend_vector_inreg' node then we should
	// then be able to remove it.
	static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
	SelectionDAG &DAG) {
	EVT VT = SVN->getValueType(0);
	bool IsBigEndian = DAG.getDataLayout().isBigEndian();

	// TODO Add support for big-endian when we have a test case.
	if (!VT.isInteger() \|\| IsBigEndian)
	return SDValue();

	SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));

	unsigned Opcode = N0.getOpcode();
	if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
	Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
	Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
	return SDValue();

	SDValue N00 = N0.getOperand(0);
	ArrayRef<int> Mask = SVN->getMask();
	unsigned NumElts = VT.getVectorNumElements();
	unsigned EltSizeInBits = VT.getScalarSizeInBits();
	unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
	unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();

	if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
	return SDValue();
	unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;

	// (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
	// (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
	// (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
	auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
	for (unsigned i = 0; i != NumElts; ++i) {
	if (Mask[i] < 0)
	continue;
	if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
	continue;
	return false;
	}
	return true;
	};

	// At the moment we just handle the case where we've truncated back to the
	// same size as before the extension.
	// TODO: handle more extension/truncation cases as cases arise.
	if (EltSizeInBits != ExtSrcSizeInBits)
	return SDValue();

	// We can remove *extend_vector_inreg only if the truncation happens at
	// the same scale as the extension.
	if (isTruncate(ExtScale))
	return DAG.getBitcast(VT, N00);

	return SDValue();
	}

	// Combine shuffles of splat-shuffles of the form:
	// shuffle (shuffle V, undef, splat-mask), undef, M
	// If splat-mask contains undef elements, we need to be careful about
	// introducing undef's in the folded mask which are not the result of composing
	// the masks of the shuffles.
	static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
	SelectionDAG &DAG) {
	if (!Shuf->getOperand(1).isUndef())
	return SDValue();
	auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
	if (!Splat \|\| !Splat->isSplat())
	return SDValue();

	ArrayRef<int> ShufMask = Shuf->getMask();
	ArrayRef<int> SplatMask = Splat->getMask();
	assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");

	// Prefer simplifying to the splat-shuffle, if possible. This is legal if
	// every undef mask element in the splat-shuffle has a corresponding undef
	// element in the user-shuffle's mask or if the composition of mask elements
	// would result in undef.
	// Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
	// * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
	// In this case it is not legal to simplify to the splat-shuffle because we
	// may be exposing the users of the shuffle an undef element at index 1
	// which was not there before the combine.
	// * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
	// In this case the composition of masks yields SplatMask, so it's ok to
	// simplify to the splat-shuffle.
	// * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
	// In this case the composed mask includes all undef elements of SplatMask
	// and in addition sets element zero to undef. It is safe to simplify to
	// the splat-shuffle.
	auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
	ArrayRef<int> SplatMask) {
	for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
	if (UserMask[i] != -1 && SplatMask[i] == -1 &&
	SplatMask[UserMask[i]] != -1)
	return false;
	return true;
	};
	if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
	return Shuf->getOperand(0);

	// Create a new shuffle with a mask that is composed of the two shuffles'
	// masks.
	SmallVector<int, 32> NewMask;
	for (int Idx : ShufMask)
	NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);

	return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
	Splat->getOperand(0), Splat->getOperand(1),
	NewMask);
	}

	/// Combine shuffle of shuffle of the form:
	/// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
	static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf,
	SelectionDAG &DAG) {
	if (!OuterShuf->getOperand(1).isUndef())
	return SDValue();
	auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
	if (!InnerShuf \|\| !InnerShuf->getOperand(1).isUndef())
	return SDValue();

	ArrayRef<int> OuterMask = OuterShuf->getMask();
	ArrayRef<int> InnerMask = InnerShuf->getMask();
	unsigned NumElts = OuterMask.size();
	assert(NumElts == InnerMask.size() && "Mask length mismatch");
	SmallVector<int, 32> CombinedMask(NumElts, -1);
	int SplatIndex = -1;
	for (unsigned i = 0; i != NumElts; ++i) {
	// Undef lanes remain undef.
	int OuterMaskElt = OuterMask[i];
	if (OuterMaskElt == -1)
	continue;

	// Peek through the shuffle masks to get the underlying source element.
	int InnerMaskElt = InnerMask[OuterMaskElt];
	if (InnerMaskElt == -1)
	continue;

	// Initialize the splatted element.
	if (SplatIndex == -1)
	SplatIndex = InnerMaskElt;

	// Non-matching index - this is not a splat.
	if (SplatIndex != InnerMaskElt)
	return SDValue();

	CombinedMask[i] = InnerMaskElt;
	}
	assert((all_of(CombinedMask, [](int M) { return M == -1; }) \|\|
	getSplatIndex(CombinedMask) != -1) &&
	"Expected a splat mask");

	// TODO: The transform may be a win even if the mask is not legal.
	EVT VT = OuterShuf->getValueType(0);
	assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types");
	if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
	return SDValue();

	return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
	InnerShuf->getOperand(1), CombinedMask);
	}

	/// If the shuffle mask is taking exactly one element from the first vector
	/// operand and passing through all other elements from the second vector
	/// operand, return the index of the mask element that is choosing an element
	/// from the first operand. Otherwise, return -1.
	static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
	int MaskSize = Mask.size();
	int EltFromOp0 = -1;
	// TODO: This does not match if there are undef elements in the shuffle mask.
	// Should we ignore undefs in the shuffle mask instead? The trade-off is
	// removing an instruction (a shuffle), but losing the knowledge that some
	// vector lanes are not needed.
	for (int i = 0; i != MaskSize; ++i) {
	if (Mask[i] >= 0 && Mask[i] < MaskSize) {
	// We're looking for a shuffle of exactly one element from operand 0.
	if (EltFromOp0 != -1)
	return -1;
	EltFromOp0 = i;
	} else if (Mask[i] != i + MaskSize) {
	// Nothing from operand 1 can change lanes.
	return -1;
	}
	}
	return EltFromOp0;
	}

	/// If a shuffle inserts exactly one element from a source vector operand into
	/// another vector operand and we can access the specified element as a scalar,
	/// then we can eliminate the shuffle.
	static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
	SelectionDAG &DAG) {
	// First, check if we are taking one element of a vector and shuffling that
	// element into another vector.
	ArrayRef<int> Mask = Shuf->getMask();
	SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
	SDValue Op0 = Shuf->getOperand(0);
	SDValue Op1 = Shuf->getOperand(1);
	int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
	if (ShufOp0Index == -1) {
	// Commute mask and check again.
	ShuffleVectorSDNode::commuteMask(CommutedMask);
	ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
	if (ShufOp0Index == -1)
	return SDValue();
	// Commute operands to match the commuted shuffle mask.
	std::swap(Op0, Op1);
	Mask = CommutedMask;
	}

	// The shuffle inserts exactly one element from operand 0 into operand 1.
	// Now see if we can access that element as a scalar via a real insert element
	// instruction.
	// TODO: We can try harder to locate the element as a scalar. Examples: it
	// could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
	assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
	"Shuffle mask value must be from operand 0");
	if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
	return SDValue();

	auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
	if (!InsIndexC \|\| InsIndexC->getSExtValue() != Mask[ShufOp0Index])
	return SDValue();

	// There's an existing insertelement with constant insertion index, so we
	// don't need to check the legality/profitability of a replacement operation
	// that differs at most in the constant value. The target should be able to
	// lower any of those in a similar way. If not, legalization will expand this
	// to a scalar-to-vector plus shuffle.
	//
	// Note that the shuffle may move the scalar from the position that the insert
	// element used. Therefore, our new insert element occurs at the shuffle's
	// mask index value, not the insert's index value.
	// shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
	SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
	return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
	Op1, Op0.getOperand(1), NewInsIndex);
	}

	/// If we have a unary shuffle of a shuffle, see if it can be folded away
	/// completely. This has the potential to lose undef knowledge because the first
	/// shuffle may not have an undef mask element where the second one does. So
	/// only call this after doing simplifications based on demanded elements.
	static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf) {
	// shuf (shuf0 X, Y, Mask0), undef, Mask
	auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
	if (!Shuf0 \|\| !Shuf->getOperand(1).isUndef())
	return SDValue();

	ArrayRef<int> Mask = Shuf->getMask();
	ArrayRef<int> Mask0 = Shuf0->getMask();
	for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
	// Ignore undef elements.
	if (Mask[i] == -1)
	continue;
	assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");

	// Is the element of the shuffle operand chosen by this shuffle the same as
	// the element chosen by the shuffle operand itself?
	if (Mask0[Mask[i]] != Mask0[i])
	return SDValue();
	}
	// Every element of this shuffle is identical to the result of the previous
	// shuffle, so we can replace this value.
	return Shuf->getOperand(0);
	}

	SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
	EVT VT = N->getValueType(0);
	unsigned NumElts = VT.getVectorNumElements();

	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);

	assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");

	// Canonicalize shuffle undef, undef -> undef
	if (N0.isUndef() && N1.isUndef())
	return DAG.getUNDEF(VT);

	ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);

	// Canonicalize shuffle v, v -> v, undef
	if (N0 == N1) {
	SmallVector<int, 8> NewMask;
	for (unsigned i = 0; i != NumElts; ++i) {
	int Idx = SVN->getMaskElt(i);
	if (Idx >= (int)NumElts) Idx -= NumElts;
	NewMask.push_back(Idx);
	}
	return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
	}

	// Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
	if (N0.isUndef())
	return DAG.getCommutedVectorShuffle(*SVN);

	// Remove references to rhs if it is undef
	if (N1.isUndef()) {
	bool Changed = false;
	SmallVector<int, 8> NewMask;
	for (unsigned i = 0; i != NumElts; ++i) {
	int Idx = SVN->getMaskElt(i);
	if (Idx >= (int)NumElts) {
	Idx = -1;
	Changed = true;
	}
	NewMask.push_back(Idx);
	}
	if (Changed)
	return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
	}

	if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
	return InsElt;

	// A shuffle of a single vector that is a splatted value can always be folded.
	if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
	return V;

	if (SDValue V = formSplatFromShuffles(SVN, DAG))
	return V;

	// If it is a splat, check if the argument vector is another splat or a
	// build_vector.
	if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
	int SplatIndex = SVN->getSplatIndex();
	if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
	TLI.isBinOp(N0.getOpcode()) && N0.getNode()->getNumValues() == 1) {
	// splat (vector_bo L, R), Index -->
	// splat (scalar_bo (extelt L, Index), (extelt R, Index))
	SDValue L = N0.getOperand(0), R = N0.getOperand(1);
	SDLoc DL(N);
	EVT EltVT = VT.getScalarType();
	SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
	SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
	SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
	SDValue NewBO = DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR,
	N0.getNode()->getFlags());
	SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
	SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
	return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
	}

	// If this is a bit convert that changes the element type of the vector but
	// not the number of vector elements, look through it. Be careful not to
	// look though conversions that change things like v4f32 to v2f64.
	SDNode *V = N0.getNode();
	if (V->getOpcode() == ISD::BITCAST) {
	SDValue ConvInput = V->getOperand(0);
	if (ConvInput.getValueType().isVector() &&
	ConvInput.getValueType().getVectorNumElements() == NumElts)
	V = ConvInput.getNode();
	}

	if (V->getOpcode() == ISD::BUILD_VECTOR) {
	assert(V->getNumOperands() == NumElts &&
	"BUILD_VECTOR has wrong number of operands");
	SDValue Base;
	bool AllSame = true;
	for (unsigned i = 0; i != NumElts; ++i) {
	if (!V->getOperand(i).isUndef()) {
	Base = V->getOperand(i);
	break;
	}
	}
	// Splat of <u, u, u, u>, return <u, u, u, u>
	if (!Base.getNode())
	return N0;
	for (unsigned i = 0; i != NumElts; ++i) {
	if (V->getOperand(i) != Base) {
	AllSame = false;
	break;
	}
	}
	// Splat of <x, x, x, x>, return <x, x, x, x>
	if (AllSame)
	return N0;

	// Canonicalize any other splat as a build_vector.
	SDValue Splatted = V->getOperand(SplatIndex);
	SmallVector<SDValue, 8> Ops(NumElts, Splatted);
	SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);

	// We may have jumped through bitcasts, so the type of the
	// BUILD_VECTOR may not match the type of the shuffle.
	if (V->getValueType(0) != VT)
	NewBV = DAG.getBitcast(VT, NewBV);
	return NewBV;
	}
	}

	// Simplify source operands based on shuffle mask.
	if (SimplifyDemandedVectorElts(SDValue(N, 0)))
	return SDValue(N, 0);

	// This is intentionally placed after demanded elements simplification because
	// it could eliminate knowledge of undef elements created by this shuffle.
	if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
	return ShufOp;

	// Match shuffles that can be converted to any_vector_extend_in_reg.
	if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
	return V;

	// Combine "truncate_vector_in_reg" style shuffles.
	if (SDValue V = combineTruncationShuffle(SVN, DAG))
	return V;

	if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
	Level < AfterLegalizeVectorOps &&
	(N1.isUndef() \|\|
	(N1.getOpcode() == ISD::CONCAT_VECTORS &&
	N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
	if (SDValue V = partitionShuffleOfConcats(N, DAG))
	return V;
	}

	// A shuffle of a concat of the same narrow vector can be reduced to use
	// only low-half elements of a concat with undef:
	// shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
	if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
	N0.getNumOperands() == 2 &&
	N0.getOperand(0) == N0.getOperand(1)) {
	int HalfNumElts = (int)NumElts / 2;
	SmallVector<int, 8> NewMask;
	for (unsigned i = 0; i != NumElts; ++i) {
	int Idx = SVN->getMaskElt(i);
	if (Idx >= HalfNumElts) {
	assert(Idx < (int)NumElts && "Shuffle mask chooses undef op");
	Idx -= HalfNumElts;
	}
	NewMask.push_back(Idx);
	}
	if (TLI.isShuffleMaskLegal(NewMask, VT)) {
	SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
	SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
	N0.getOperand(0), UndefVec);
	return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
	}
	}

	// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
	// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
	if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
	if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
	return Res;

	// If this shuffle only has a single input that is a bitcasted shuffle,
	// attempt to merge the 2 shuffles and suitably bitcast the inputs/output
	// back to their original types.
	if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
	N1.isUndef() && Level < AfterLegalizeVectorOps &&
	TLI.isTypeLegal(VT)) {

	SDValue BC0 = peekThroughOneUseBitcasts(N0);
	if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
	EVT SVT = VT.getScalarType();
	EVT InnerVT = BC0->getValueType(0);
	EVT InnerSVT = InnerVT.getScalarType();

	// Determine which shuffle works with the smaller scalar type.
	EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
	EVT ScaleSVT = ScaleVT.getScalarType();

	if (TLI.isTypeLegal(ScaleVT) &&
	0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
	0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
	int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
	int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();

	// Scale the shuffle masks to the smaller scalar type.
	ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
	SmallVector<int, 8> InnerMask;
	SmallVector<int, 8> OuterMask;
	narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
	narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);

	// Merge the shuffle masks.
	SmallVector<int, 8> NewMask;
	for (int M : OuterMask)
	NewMask.push_back(M < 0 ? -1 : InnerMask[M]);

	// Test for shuffle mask legality over both commutations.
	SDValue SV0 = BC0->getOperand(0);
	SDValue SV1 = BC0->getOperand(1);
	bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
	if (!LegalMask) {
	std::swap(SV0, SV1);
	ShuffleVectorSDNode::commuteMask(NewMask);
	LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
	}

	if (LegalMask) {
	SV0 = DAG.getBitcast(ScaleVT, SV0);
	SV1 = DAG.getBitcast(ScaleVT, SV1);
	return DAG.getBitcast(
	VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
	}
	}
	}
	}

	// Compute the combined shuffle mask for a shuffle with SV0 as the first
	// operand, and SV1 as the second operand.
	// i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false
	// Merge SVN(N1, OtherSVN) -> shuffle(SV0, SV1, Mask') iff Commute = true
	auto MergeInnerShuffle =
	[NumElts, &VT](bool Commute, ShuffleVectorSDNode *SVN,
	ShuffleVectorSDNode *OtherSVN, SDValue N1,
	const TargetLowering &TLI, SDValue &SV0, SDValue &SV1,
	SmallVectorImpl<int> &Mask) -> bool {
	// Don't try to fold splats; they're likely to simplify somehow, or they
	// might be free.
	if (OtherSVN->isSplat())
	return false;

	SV0 = SV1 = SDValue();
	Mask.clear();

	for (unsigned i = 0; i != NumElts; ++i) {
	int Idx = SVN->getMaskElt(i);
	if (Idx < 0) {
	// Propagate Undef.
	Mask.push_back(Idx);
	continue;
	}

	if (Commute)
	Idx = (Idx < (int)NumElts) ? (Idx + NumElts) : (Idx - NumElts);

	SDValue CurrentVec;
	if (Idx < (int)NumElts) {
	// This shuffle index refers to the inner shuffle N0. Lookup the inner
	// shuffle mask to identify which vector is actually referenced.
	Idx = OtherSVN->getMaskElt(Idx);
	if (Idx < 0) {
	// Propagate Undef.
	Mask.push_back(Idx);
	continue;
	}
	CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(0)
	: OtherSVN->getOperand(1);
	} else {
	// This shuffle index references an element within N1.
	CurrentVec = N1;
	}

	// Simple case where 'CurrentVec' is UNDEF.
	if (CurrentVec.isUndef()) {
	Mask.push_back(-1);
	continue;
	}

	// Canonicalize the shuffle index. We don't know yet if CurrentVec
	// will be the first or second operand of the combined shuffle.
	Idx = Idx % NumElts;
	if (!SV0.getNode() \|\| SV0 == CurrentVec) {
	// Ok. CurrentVec is the left hand side.
	// Update the mask accordingly.
	SV0 = CurrentVec;
	Mask.push_back(Idx);
	continue;
	}
	if (!SV1.getNode() \|\| SV1 == CurrentVec) {
	// Ok. CurrentVec is the right hand side.
	// Update the mask accordingly.
	SV1 = CurrentVec;
	Mask.push_back(Idx + NumElts);
	continue;
	}

	// Last chance - see if the vector is another shuffle and if it
	// uses one of the existing candidate shuffle ops.
	if (auto *CurrentSVN = dyn_cast<ShuffleVectorSDNode>(CurrentVec)) {
	int InnerIdx = CurrentSVN->getMaskElt(Idx);
	if (InnerIdx < 0) {
	Mask.push_back(-1);
	continue;
	}
	SDValue InnerVec = (InnerIdx < (int)NumElts)
	? CurrentSVN->getOperand(0)
	: CurrentSVN->getOperand(1);
	if (InnerVec.isUndef()) {
	Mask.push_back(-1);
	continue;
	}
	InnerIdx %= NumElts;
	if (InnerVec == SV0) {
	Mask.push_back(InnerIdx);
	continue;
	}
	if (InnerVec == SV1) {
	Mask.push_back(InnerIdx + NumElts);
	continue;
	}
	}

	// Bail out if we cannot convert the shuffle pair into a single shuffle.
	return false;
	}

	if (llvm::all_of(Mask, [](int M) { return M < 0; }))
	return true;

	// Avoid introducing shuffles with illegal mask.
	// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
	// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
	// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
	// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
	// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
	// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
	if (TLI.isShuffleMaskLegal(Mask, VT))
	return true;

	std::swap(SV0, SV1);
	ShuffleVectorSDNode::commuteMask(Mask);
	return TLI.isShuffleMaskLegal(Mask, VT);
	};

	if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
	// Canonicalize shuffles according to rules:
	// shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
	// shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
	// shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
	if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
	N0.getOpcode() != ISD::VECTOR_SHUFFLE) {
	// The incoming shuffle must be of the same type as the result of the
	// current shuffle.
	assert(N1->getOperand(0).getValueType() == VT &&
	"Shuffle types don't match");

	SDValue SV0 = N1->getOperand(0);
	SDValue SV1 = N1->getOperand(1);
	bool HasSameOp0 = N0 == SV0;
	bool IsSV1Undef = SV1.isUndef();
	if (HasSameOp0 \|\| IsSV1Undef \|\| N0 == SV1)
	// Commute the operands of this shuffle so merging below will trigger.
	return DAG.getCommutedVectorShuffle(*SVN);
	}

	// Canonicalize splat shuffles to the RHS to improve merging below.
	// shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
	if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
	N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
	cast<ShuffleVectorSDNode>(N0)->isSplat() &&
	!cast<ShuffleVectorSDNode>(N1)->isSplat()) {
	return DAG.getCommutedVectorShuffle(*SVN);
	}

	// Try to fold according to rules:
	// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
	// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
	// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
	// Don't try to fold shuffles with illegal type.
	// Only fold if this shuffle is the only user of the other shuffle.
	// Try matching shuffle(C,shuffle(A,B)) commutted patterns as well.
	for (int i = 0; i != 2; ++i) {
	if (N->getOperand(i).getOpcode() == ISD::VECTOR_SHUFFLE &&
	N->isOnlyUserOf(N->getOperand(i).getNode())) {
	// The incoming shuffle must be of the same type as the result of the
	// current shuffle.
	auto *OtherSV = cast<ShuffleVectorSDNode>(N->getOperand(i));
	assert(OtherSV->getOperand(0).getValueType() == VT &&
	"Shuffle types don't match");

	SDValue SV0, SV1;
	SmallVector<int, 4> Mask;
	if (MergeInnerShuffle(i != 0, SVN, OtherSV, N->getOperand(1 - i), TLI,
	SV0, SV1, Mask)) {
	// Check if all indices in Mask are Undef. In case, propagate Undef.
	if (llvm::all_of(Mask, [](int M) { return M < 0; }))
	return DAG.getUNDEF(VT);

	return DAG.getVectorShuffle(VT, SDLoc(N),
	SV0 ? SV0 : DAG.getUNDEF(VT),
	SV1 ? SV1 : DAG.getUNDEF(VT), Mask);
	}
	}
	}

	// Merge shuffles through binops if we are able to merge it with at least
	// one other shuffles.
	// shuffle(bop(shuffle(x,y),shuffle(z,w)),undef)
	// shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)))
	unsigned SrcOpcode = N0.getOpcode();
	if (TLI.isBinOp(SrcOpcode) && N->isOnlyUserOf(N0.getNode()) &&
	(N1.isUndef() \|\|
	(SrcOpcode == N1.getOpcode() && N->isOnlyUserOf(N1.getNode())))) {
	// Get binop source ops, or just pass on the undef.
	SDValue Op00 = N0.getOperand(0);
	SDValue Op01 = N0.getOperand(1);
	SDValue Op10 = N1.isUndef() ? N1 : N1.getOperand(0);
	SDValue Op11 = N1.isUndef() ? N1 : N1.getOperand(1);
	// TODO: We might be able to relax the VT check but we don't currently
	// have any isBinOp() that has different result/ops VTs so play safe until
	// we have test coverage.
	if (Op00.getValueType() == VT && Op10.getValueType() == VT &&
	Op01.getValueType() == VT && Op11.getValueType() == VT &&
	(Op00.getOpcode() == ISD::VECTOR_SHUFFLE \|\|
	Op10.getOpcode() == ISD::VECTOR_SHUFFLE \|\|
	Op01.getOpcode() == ISD::VECTOR_SHUFFLE \|\|
	Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) {
	auto CanMergeInnerShuffle = [&](SDValue &SV0, SDValue &SV1,
	SmallVectorImpl<int> &Mask, bool LeftOp,
	bool Commute) {
	SDValue InnerN = Commute ? N1 : N0;
	SDValue Op0 = LeftOp ? Op00 : Op01;
	SDValue Op1 = LeftOp ? Op10 : Op11;
	if (Commute)
	std::swap(Op0, Op1);
	// Only accept the merged shuffle if we don't introduce undef elements,
	// or the inner shuffle already contained undef elements.
	auto *SVN0 = dyn_cast<ShuffleVectorSDNode>(Op0);
	return SVN0 && InnerN->isOnlyUserOf(SVN0) &&
	MergeInnerShuffle(Commute, SVN, SVN0, Op1, TLI, SV0, SV1,
	Mask) &&
	(llvm::any_of(SVN0->getMask(), [](int M) { return M < 0; }) \|\|
	llvm::none_of(Mask, [](int M) { return M < 0; }));
	};

	// Ensure we don't increase the number of shuffles - we must merge a
	// shuffle from at least one of the LHS and RHS ops.
	bool MergedLeft = false;
	SDValue LeftSV0, LeftSV1;
	SmallVector<int, 4> LeftMask;
	if (CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, false) \|\|
	CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, true)) {
	MergedLeft = true;
	} else {
	LeftMask.assign(SVN->getMask().begin(), SVN->getMask().end());
	LeftSV0 = Op00, LeftSV1 = Op10;
	}

	bool MergedRight = false;
	SDValue RightSV0, RightSV1;
	SmallVector<int, 4> RightMask;
	if (CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, false) \|\|
	CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, true)) {
	MergedRight = true;
	} else {
	RightMask.assign(SVN->getMask().begin(), SVN->getMask().end());
	RightSV0 = Op01, RightSV1 = Op11;
	}

	if (MergedLeft \|\| MergedRight) {
	SDLoc DL(N);
	SDValue LHS = DAG.getVectorShuffle(
	VT, DL, LeftSV0 ? LeftSV0 : DAG.getUNDEF(VT),
	LeftSV1 ? LeftSV1 : DAG.getUNDEF(VT), LeftMask);
	SDValue RHS = DAG.getVectorShuffle(
	VT, DL, RightSV0 ? RightSV0 : DAG.getUNDEF(VT),
	RightSV1 ? RightSV1 : DAG.getUNDEF(VT), RightMask);
	return DAG.getNode(SrcOpcode, DL, VT, LHS, RHS);
	}
	}
	}
	}

	if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
	return V;

	return SDValue();
	}

	SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
	SDValue InVal = N->getOperand(0);
	EVT VT = N->getValueType(0);

	// Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
	// with a VECTOR_SHUFFLE and possible truncate.
	if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
	VT.isFixedLengthVector() &&
	InVal->getOperand(0).getValueType().isFixedLengthVector()) {
	SDValue InVec = InVal->getOperand(0);
	SDValue EltNo = InVal->getOperand(1);
	auto InVecT = InVec.getValueType();
	if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
	SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
	int Elt = C0->getZExtValue();
	NewMask[0] = Elt;
	// If we have an implict truncate do truncate here as long as it's legal.
	// if it's not legal, this should
	if (VT.getScalarType() != InVal.getValueType() &&
	InVal.getValueType().isScalarInteger() &&
	isTypeLegal(VT.getScalarType())) {
	SDValue Val =
	DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
	return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
	}
	if (VT.getScalarType() == InVecT.getScalarType() &&
	VT.getVectorNumElements() <= InVecT.getVectorNumElements()) {
	SDValue LegalShuffle =
	TLI.buildLegalVectorShuffle(InVecT, SDLoc(N), InVec,
	DAG.getUNDEF(InVecT), NewMask, DAG);
	if (LegalShuffle) {
	// If the initial vector is the correct size this shuffle is a
	// valid result.
	if (VT == InVecT)
	return LegalShuffle;
	// If not we must truncate the vector.
	if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
	SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
	EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
	InVecT.getVectorElementType(),
	VT.getVectorNumElements());
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT,
	LegalShuffle, ZeroIdx);
	}
	}
	}
	}
	}

	return SDValue();
	}

	SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
	EVT VT = N->getValueType(0);
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	SDValue N2 = N->getOperand(2);
	uint64_t InsIdx = N->getConstantOperandVal(2);

	// If inserting an UNDEF, just return the original vector.
	if (N1.isUndef())
	return N0;

	// If this is an insert of an extracted vector into an undef vector, we can
	// just use the input to the extract.
	if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
	N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
	return N1.getOperand(0);

	// If we are inserting a bitcast value into an undef, with the same
	// number of elements, just use the bitcast input of the extract.
	// i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
	// BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
	if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
	N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
	N1.getOperand(0).getOperand(1) == N2 &&
	N1.getOperand(0).getOperand(0).getValueType().getVectorElementCount() ==
	VT.getVectorElementCount() &&
	N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
	VT.getSizeInBits()) {
	return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
	}

	// If both N1 and N2 are bitcast values on which insert_subvector
	// would makes sense, pull the bitcast through.
	// i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
	// BITCAST (INSERT_SUBVECTOR N0 N1 N2)
	if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
	SDValue CN0 = N0.getOperand(0);
	SDValue CN1 = N1.getOperand(0);
	EVT CN0VT = CN0.getValueType();
	EVT CN1VT = CN1.getValueType();
	if (CN0VT.isVector() && CN1VT.isVector() &&
	CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
	CN0VT.getVectorElementCount() == VT.getVectorElementCount()) {
	SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
	CN0.getValueType(), CN0, CN1, N2);
	return DAG.getBitcast(VT, NewINSERT);
	}
	}

	// Combine INSERT_SUBVECTORs where we are inserting to the same index.
	// INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
	// --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
	if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
	N0.getOperand(1).getValueType() == N1.getValueType() &&
	N0.getOperand(2) == N2)
	return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
	N1, N2);

	// Eliminate an intermediate insert into an undef vector:
	// insert_subvector undef, (insert_subvector undef, X, 0), N2 -->
	// insert_subvector undef, X, N2
	if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
	N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)))
	return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
	N1.getOperand(1), N2);

	// Push subvector bitcasts to the output, adjusting the index as we go.
	// insert_subvector(bitcast(v), bitcast(s), c1)
	// -> bitcast(insert_subvector(v, s, c2))
	if ((N0.isUndef() \|\| N0.getOpcode() == ISD::BITCAST) &&
	N1.getOpcode() == ISD::BITCAST) {
	SDValue N0Src = peekThroughBitcasts(N0);
	SDValue N1Src = peekThroughBitcasts(N1);
	EVT N0SrcSVT = N0Src.getValueType().getScalarType();
	EVT N1SrcSVT = N1Src.getValueType().getScalarType();
	if ((N0.isUndef() \|\| N0SrcSVT == N1SrcSVT) &&
	N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
	EVT NewVT;
	SDLoc DL(N);
	SDValue NewIdx;
	LLVMContext &Ctx = *DAG.getContext();
	ElementCount NumElts = VT.getVectorElementCount();
	unsigned EltSizeInBits = VT.getScalarSizeInBits();
	if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
	unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
	NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
	NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
	} else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
	unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
	if (NumElts.isKnownMultipleOf(Scale) && (InsIdx % Scale) == 0) {
	NewVT = EVT::getVectorVT(Ctx, N1SrcSVT,
	NumElts.divideCoefficientBy(Scale));
	NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
	}
	}
	if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
	SDValue Res = DAG.getBitcast(NewVT, N0Src);
	Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
	return DAG.getBitcast(VT, Res);
	}
	}
	}

	// Canonicalize insert_subvector dag nodes.
	// Example:
	// (insert_subvector (insert_subvector A, Idx0), Idx1)
	// -> (insert_subvector (insert_subvector A, Idx1), Idx0)
	if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
	N1.getValueType() == N0.getOperand(1).getValueType()) {
	unsigned OtherIdx = N0.getConstantOperandVal(2);
	if (InsIdx < OtherIdx) {
	// Swap nodes.
	SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
	N0.getOperand(0), N1, N2);
	AddToWorklist(NewOp.getNode());
	return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
	VT, NewOp, N0.getOperand(1), N0.getOperand(2));
	}
	}

	// If the input vector is a concatenation, and the insert replaces
	// one of the pieces, we can optimize into a single concat_vectors.
	if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
	N0.getOperand(0).getValueType() == N1.getValueType() &&
	N0.getOperand(0).getValueType().isScalableVector() ==
	N1.getValueType().isScalableVector()) {
	unsigned Factor = N1.getValueType().getVectorMinNumElements();
	SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
	Ops[InsIdx / Factor] = N1;
	return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
	}

	// Simplify source operands based on insertion.
	if (SimplifyDemandedVectorElts(SDValue(N, 0)))
	return SDValue(N, 0);

	return SDValue();
	}

	SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
	SDValue N0 = N->getOperand(0);

	// fold (fp_to_fp16 (fp16_to_fp op)) -> op
	if (N0->getOpcode() == ISD::FP16_TO_FP)
	return N0->getOperand(0);

	return SDValue();
	}

	SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
	SDValue N0 = N->getOperand(0);

	// fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
	if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
	ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
	if (AndConst && AndConst->getAPIntValue() == 0xffff) {
	return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
	N0.getOperand(0));
	}
	}

	return SDValue();
	}

	SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	EVT VT = N0.getValueType();
	unsigned Opcode = N->getOpcode();

	// VECREDUCE over 1-element vector is just an extract.
	if (VT.getVectorElementCount().isScalar()) {
	SDLoc dl(N);
	SDValue Res =
	DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
	DAG.getVectorIdxConstant(0, dl));
	if (Res.getValueType() != N->getValueType(0))
	Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
	return Res;
	}

	// On an boolean vector an and/or reduction is the same as a umin/umax
	// reduction. Convert them if the latter is legal while the former isn't.
	if (Opcode == ISD::VECREDUCE_AND \|\| Opcode == ISD::VECREDUCE_OR) {
	unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
	? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX;
	if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
	TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
	DAG.ComputeNumSignBits(N0) == VT.getScalarSizeInBits())
	return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
	}

	return SDValue();
	}

	/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
	/// with the destination vector and a zero vector.
	/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
	/// vector_shuffle V, Zero, <0, 4, 2, 4>
	SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
	assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");

	EVT VT = N->getValueType(0);
	SDValue LHS = N->getOperand(0);
	SDValue RHS = peekThroughBitcasts(N->getOperand(1));
	SDLoc DL(N);

	// Make sure we're not running after operation legalization where it
	// may have custom lowered the vector shuffles.
	if (LegalOperations)
	return SDValue();

	if (RHS.getOpcode() != ISD::BUILD_VECTOR)
	return SDValue();

	EVT RVT = RHS.getValueType();
	unsigned NumElts = RHS.getNumOperands();

	// Attempt to create a valid clear mask, splitting the mask into
	// sub elements and checking to see if each is
	// all zeros or all ones - suitable for shuffle masking.
	auto BuildClearMask = [&](int Split) {
	int NumSubElts = NumElts * Split;
	int NumSubBits = RVT.getScalarSizeInBits() / Split;

	SmallVector<int, 8> Indices;
	for (int i = 0; i != NumSubElts; ++i) {
	int EltIdx = i / Split;
	int SubIdx = i % Split;
	SDValue Elt = RHS.getOperand(EltIdx);
	// X & undef --> 0 (not undef). So this lane must be converted to choose
	// from the zero constant vector (same as if the element had all 0-bits).
	if (Elt.isUndef()) {
	Indices.push_back(i + NumSubElts);
	continue;
	}

	APInt Bits;
	if (isa<ConstantSDNode>(Elt))
	Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
	else if (isa<ConstantFPSDNode>(Elt))
	Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
	else
	return SDValue();

	// Extract the sub element from the constant bit mask.
	if (DAG.getDataLayout().isBigEndian())
	Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
	else
	Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);

	if (Bits.isAllOnesValue())
	Indices.push_back(i);
	else if (Bits == 0)
	Indices.push_back(i + NumSubElts);
	else
	return SDValue();
	}

	// Let's see if the target supports this vector_shuffle.
	EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
	EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
	if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
	return SDValue();

	SDValue Zero = DAG.getConstant(0, DL, ClearVT);
	return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
	DAG.getBitcast(ClearVT, LHS),
	Zero, Indices));
	};

	// Determine maximum split level (byte level masking).
	int MaxSplit = 1;
	if (RVT.getScalarSizeInBits() % 8 == 0)
	MaxSplit = RVT.getScalarSizeInBits() / 8;

	for (int Split = 1; Split <= MaxSplit; ++Split)
	if (RVT.getScalarSizeInBits() % Split == 0)
	if (SDValue S = BuildClearMask(Split))
	return S;

	return SDValue();
	}

	/// If a vector binop is performed on splat values, it may be profitable to
	/// extract, scalarize, and insert/splat.
	static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	unsigned Opcode = N->getOpcode();
	EVT VT = N->getValueType(0);
	EVT EltVT = VT.getVectorElementType();
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();

	// TODO: Remove/replace the extract cost check? If the elements are available
	// as scalars, then there may be no extract cost. Should we ask if
	// inserting a scalar back into a vector is cheap instead?
	int Index0, Index1;
	SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
	SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
	if (!Src0 \|\| !Src1 \|\| Index0 != Index1 \|\|
	Src0.getValueType().getVectorElementType() != EltVT \|\|
	Src1.getValueType().getVectorElementType() != EltVT \|\|
	!TLI.isExtractVecEltCheap(VT, Index0) \|\|
	!TLI.isOperationLegalOrCustom(Opcode, EltVT))
	return SDValue();

	SDLoc DL(N);
	SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
	SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
	SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
	SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());

	// If all lanes but 1 are undefined, no need to splat the scalar result.
	// TODO: Keep track of undefs and use that info in the general case.
	if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
	count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
	count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
	// bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
	// build_vec ..undef, (bo X, Y), undef...
	SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), DAG.getUNDEF(EltVT));
	Ops[Index0] = ScalarBO;
	return DAG.getBuildVector(VT, DL, Ops);
	}

	// bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
	SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO);
	return DAG.getBuildVector(VT, DL, Ops);
	}

	/// Visit a binary vector operation, like ADD.
	SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
	assert(N->getValueType(0).isVector() &&
	"SimplifyVBinOp only works on vectors!");

	SDValue LHS = N->getOperand(0);
	SDValue RHS = N->getOperand(1);
	SDValue Ops[] = {LHS, RHS};
	EVT VT = N->getValueType(0);
	unsigned Opcode = N->getOpcode();
	SDNodeFlags Flags = N->getFlags();

	// See if we can constant fold the vector operation.
	if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
	Opcode, SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
	return Fold;

	// Move unary shuffles with identical masks after a vector binop:
	// VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
	// --> shuffle (VBinOp A, B), Undef, Mask
	// This does not require type legality checks because we are creating the
	// same types of operations that are in the original sequence. We do have to
	// restrict ops like integer div that have immediate UB (eg, div-by-zero)
	// though. This code is adapted from the identical transform in instcombine.
	if (Opcode != ISD::UDIV && Opcode != ISD::SDIV &&
	Opcode != ISD::UREM && Opcode != ISD::SREM &&
	Opcode != ISD::UDIVREM && Opcode != ISD::SDIVREM) {
	auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
	auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
	if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
	LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
	(LHS.hasOneUse() \|\| RHS.hasOneUse() \|\| LHS == RHS)) {
	SDLoc DL(N);
	SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
	RHS.getOperand(0), Flags);
	SDValue UndefV = LHS.getOperand(1);
	return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
	}

	// Try to sink a splat shuffle after a binop with a uniform constant.
	// This is limited to cases where neither the shuffle nor the constant have
	// undefined elements because that could be poison-unsafe or inhibit
	// demanded elements analysis. It is further limited to not change a splat
	// of an inserted scalar because that may be optimized better by
	// load-folding or other target-specific behaviors.
	if (isConstOrConstSplat(RHS) && Shuf0 && is_splat(Shuf0->getMask()) &&
	Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
	Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
	// binop (splat X), (splat C) --> splat (binop X, C)
	SDLoc DL(N);
	SDValue X = Shuf0->getOperand(0);
	SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
	return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
	Shuf0->getMask());
	}
	if (isConstOrConstSplat(LHS) && Shuf1 && is_splat(Shuf1->getMask()) &&
	Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
	Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
	// binop (splat C), (splat X) --> splat (binop C, X)
	SDLoc DL(N);
	SDValue X = Shuf1->getOperand(0);
	SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
	return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
	Shuf1->getMask());
	}
	}

	// The following pattern is likely to emerge with vector reduction ops. Moving
	// the binary operation ahead of insertion may allow using a narrower vector
	// instruction that has better performance than the wide version of the op:
	// VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
	if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
	RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
	LHS.getOperand(2) == RHS.getOperand(2) &&
	(LHS.hasOneUse() \|\| RHS.hasOneUse())) {
	SDValue X = LHS.getOperand(1);
	SDValue Y = RHS.getOperand(1);
	SDValue Z = LHS.getOperand(2);
	EVT NarrowVT = X.getValueType();
	if (NarrowVT == Y.getValueType() &&
	TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
	LegalOperations)) {
	// (binop undef, undef) may not return undef, so compute that result.
	SDLoc DL(N);
	SDValue VecC =
	DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
	SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
	return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
	}
	}

	// Make sure all but the first op are undef or constant.
	auto ConcatWithConstantOrUndef = [](SDValue Concat) {
	return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
	all_of(drop_begin(Concat->ops()), [](const SDValue &Op) {
	return Op.isUndef() \|\|
	ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
	});
	};

	// The following pattern is likely to emerge with vector reduction ops. Moving
	// the binary operation ahead of the concat may allow using a narrower vector
	// instruction that has better performance than the wide version of the op:
	// VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
	// concat (VBinOp X, Y), VecC
	if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
	(LHS.hasOneUse() \|\| RHS.hasOneUse())) {
	EVT NarrowVT = LHS.getOperand(0).getValueType();
	if (NarrowVT == RHS.getOperand(0).getValueType() &&
	TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
	SDLoc DL(N);
	unsigned NumOperands = LHS.getNumOperands();
	SmallVector<SDValue, 4> ConcatOps;
	for (unsigned i = 0; i != NumOperands; ++i) {
	// This constant fold for operands 1 and up.
	ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
	RHS.getOperand(i)));
	}

	return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
	}
	}

	if (SDValue V = scalarizeBinOpOfSplats(N, DAG))
	return V;

	return SDValue();
	}

	SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
	SDValue N2) {
	assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");

	SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
	cast<CondCodeSDNode>(N0.getOperand(2))->get());

	// If we got a simplified select_cc node back from SimplifySelectCC, then
	// break it down into a new SETCC node, and a new SELECT node, and then return
	// the SELECT node, since we were called with a SELECT node.
	if (SCC.getNode()) {
	// Check to see if we got a select_cc back (to turn into setcc/select).
	// Otherwise, just return whatever node we got back, like fabs.
	if (SCC.getOpcode() == ISD::SELECT_CC) {
	const SDNodeFlags Flags = N0.getNode()->getFlags();
	SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
	N0.getValueType(),
	SCC.getOperand(0), SCC.getOperand(1),
	SCC.getOperand(4), Flags);
	AddToWorklist(SETCC.getNode());
	SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
	SCC.getOperand(2), SCC.getOperand(3));
	SelectNode->setFlags(Flags);
	return SelectNode;
	}

	return SCC;
	}
	return SDValue();
	}

	/// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
	/// being selected between, see if we can simplify the select. Callers of this
	/// should assume that TheSelect is deleted if this returns true. As such, they
	/// should return the appropriate thing (e.g. the node) back to the top-level of
	/// the DAG combiner loop to avoid it being looked at.
	bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
	SDValue RHS) {
	// fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
	// The select + setcc is redundant, because fsqrt returns NaN for X < 0.
	if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
	if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
	// We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
	SDValue Sqrt = RHS;
	ISD::CondCode CC;
	SDValue CmpLHS;
	const ConstantFPSDNode *Zero = nullptr;

	if (TheSelect->getOpcode() == ISD::SELECT_CC) {
	CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
	CmpLHS = TheSelect->getOperand(0);
	Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
	} else {
	// SELECT or VSELECT
	SDValue Cmp = TheSelect->getOperand(0);
	if (Cmp.getOpcode() == ISD::SETCC) {
	CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
	CmpLHS = Cmp.getOperand(0);
	Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
	}
	}
	if (Zero && Zero->isZero() &&
	Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT \|\|
	CC == ISD::SETULT \|\| CC == ISD::SETLT)) {
	// We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
	CombineTo(TheSelect, Sqrt);
	return true;
	}
	}
	}
	// Cannot simplify select with vector condition
	if (TheSelect->getOperand(0).getValueType().isVector()) return false;

	// If this is a select from two identical things, try to pull the operation
	// through the select.
	if (LHS.getOpcode() != RHS.getOpcode() \|\|
	!LHS.hasOneUse() \|\| !RHS.hasOneUse())
	return false;

	// If this is a load and the token chain is identical, replace the select
	// of two loads with a load through a select of the address to load from.
	// This triggers in things like "select bool X, 10.0, 123.0" after the FP
	// constants have been dropped into the constant pool.
	if (LHS.getOpcode() == ISD::LOAD) {
	LoadSDNode *LLD = cast<LoadSDNode>(LHS);
	LoadSDNode *RLD = cast<LoadSDNode>(RHS);

	// Token chains must be identical.
	if (LHS.getOperand(0) != RHS.getOperand(0) \|\|
	// Do not let this transformation reduce the number of volatile loads.
	// Be conservative for atomics for the moment
	// TODO: This does appear to be legal for unordered atomics (see D66309)
	!LLD->isSimple() \|\| !RLD->isSimple() \|\|
	// FIXME: If either is a pre/post inc/dec load,
	// we'd need to split out the address adjustment.
	LLD->isIndexed() \|\| RLD->isIndexed() \|\|
	// If this is an EXTLOAD, the VT's must match.
	LLD->getMemoryVT() != RLD->getMemoryVT() \|\|
	// If this is an EXTLOAD, the kind of extension must match.
	(LLD->getExtensionType() != RLD->getExtensionType() &&
	// The only exception is if one of the extensions is anyext.
	LLD->getExtensionType() != ISD::EXTLOAD &&
	RLD->getExtensionType() != ISD::EXTLOAD) \|\|
	// FIXME: this discards src value information. This is
	// over-conservative. It would be beneficial to be able to remember
	// both potential memory locations. Since we are discarding
	// src value info, don't do the transformation if the memory
	// locations are not in the default address space.
	LLD->getPointerInfo().getAddrSpace() != 0 \|\|
	RLD->getPointerInfo().getAddrSpace() != 0 \|\|
	// We can't produce a CMOV of a TargetFrameIndex since we won't
	// generate the address generation required.
	LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex \|\|
	RLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex \|\|
	!TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
	LLD->getBasePtr().getValueType()))
	return false;

	// The loads must not depend on one another.
	if (LLD->isPredecessorOf(RLD) \|\| RLD->isPredecessorOf(LLD))
	return false;

	// Check that the select condition doesn't reach either load. If so,
	// folding this will induce a cycle into the DAG. If not, this is safe to
	// xform, so create a select of the addresses.

	SmallPtrSet<const SDNode *, 32> Visited;
	SmallVector<const SDNode *, 16> Worklist;

	// Always fail if LLD and RLD are not independent. TheSelect is a
	// predecessor to all Nodes in question so we need not search past it.

	Visited.insert(TheSelect);
	Worklist.push_back(LLD);
	Worklist.push_back(RLD);

	if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) \|\|
	SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
	return false;

	SDValue Addr;
	if (TheSelect->getOpcode() == ISD::SELECT) {
	// We cannot do this optimization if any pair of {RLD, LLD} is a
	// predecessor to {RLD, LLD, CondNode}. As we've already compared the
	// Loads, we only need to check if CondNode is a successor to one of the
	// loads. We can further avoid this if there's no use of their chain
	// value.
	SDNode *CondNode = TheSelect->getOperand(0).getNode();
	Worklist.push_back(CondNode);

	if ((LLD->hasAnyUseOfValue(1) &&
	SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) \|\|
	(RLD->hasAnyUseOfValue(1) &&
	SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
	return false;

	Addr = DAG.getSelect(SDLoc(TheSelect),
	LLD->getBasePtr().getValueType(),
	TheSelect->getOperand(0), LLD->getBasePtr(),
	RLD->getBasePtr());
	} else { // Otherwise SELECT_CC
	// We cannot do this optimization if any pair of {RLD, LLD} is a
	// predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
	// the Loads, we only need to check if CondLHS/CondRHS is a successor to
	// one of the loads. We can further avoid this if there's no use of their
	// chain value.

	SDNode *CondLHS = TheSelect->getOperand(0).getNode();
	SDNode *CondRHS = TheSelect->getOperand(1).getNode();
	Worklist.push_back(CondLHS);
	Worklist.push_back(CondRHS);

	if ((LLD->hasAnyUseOfValue(1) &&
	SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) \|\|
	(RLD->hasAnyUseOfValue(1) &&
	SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
	return false;

	Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
	LLD->getBasePtr().getValueType(),
	TheSelect->getOperand(0),
	TheSelect->getOperand(1),
	LLD->getBasePtr(), RLD->getBasePtr(),
	TheSelect->getOperand(4));
	}

	SDValue Load;
	// It is safe to replace the two loads if they have different alignments,
	// but the new load must be the minimum (most restrictive) alignment of the
	// inputs.
	Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
	MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
	if (!RLD->isInvariant())
	MMOFlags &= ~MachineMemOperand::MOInvariant;
	if (!RLD->isDereferenceable())
	MMOFlags &= ~MachineMemOperand::MODereferenceable;
	if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
	// FIXME: Discards pointer and AA info.
	Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
	LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
	MMOFlags);
	} else {
	// FIXME: Discards pointer and AA info.
	Load = DAG.getExtLoad(
	LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
	: LLD->getExtensionType(),
	SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
	MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
	}

	// Users of the select now use the result of the load.
	CombineTo(TheSelect, Load);

	// Users of the old loads now use the new load's chain. We know the
	// old-load value is dead now.
	CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
	CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
	return true;
	}

	return false;
	}

	/// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
	/// bitwise 'and'.
	SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
	SDValue N1, SDValue N2, SDValue N3,
	ISD::CondCode CC) {
	// If this is a select where the false operand is zero and the compare is a
	// check of the sign bit, see if we can perform the "gzip trick":
	// select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
	// select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
	EVT XType = N0.getValueType();
	EVT AType = N2.getValueType();
	if (!isNullConstant(N3) \|\| !XType.bitsGE(AType))
	return SDValue();

	// If the comparison is testing for a positive value, we have to invert
	// the sign bit mask, so only do that transform if the target has a bitwise
	// 'and not' instruction (the invert is free).
	if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
	// (X > -1) ? A : 0
	// (X > 0) ? X : 0 <-- This is canonical signed max.
	if (!(isAllOnesConstant(N1) \|\| (isNullConstant(N1) && N0 == N2)))
	return SDValue();
	} else if (CC == ISD::SETLT) {
	// (X < 0) ? A : 0
	// (X < 1) ? X : 0 <-- This is un-canonicalized signed min.
	if (!(isNullConstant(N1) \|\| (isOneConstant(N1) && N0 == N2)))
	return SDValue();
	} else {
	return SDValue();
	}

	// and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
	// constant.
	EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
	auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
	if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
	unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
	if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
	SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
	SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
	AddToWorklist(Shift.getNode());

	if (XType.bitsGT(AType)) {
	Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
	AddToWorklist(Shift.getNode());
	}

	if (CC == ISD::SETGT)
	Shift = DAG.getNOT(DL, Shift, AType);

	return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
	}
	}

	unsigned ShCt = XType.getSizeInBits() - 1;
	if (TLI.shouldAvoidTransformToShift(XType, ShCt))
	return SDValue();

	SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
	SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
	AddToWorklist(Shift.getNode());

	if (XType.bitsGT(AType)) {
	Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
	AddToWorklist(Shift.getNode());
	}

	if (CC == ISD::SETGT)
	Shift = DAG.getNOT(DL, Shift, AType);

	return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
	}

	// Fold select(cc, binop(), binop()) -> binop(select(), select()) etc.
	SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	SDValue N2 = N->getOperand(2);
	EVT VT = N->getValueType(0);
	SDLoc DL(N);

	unsigned BinOpc = N1.getOpcode();
	if (!TLI.isBinOp(BinOpc) \|\| (N2.getOpcode() != BinOpc))
	return SDValue();

	if (!N->isOnlyUserOf(N0.getNode()) \|\| !N->isOnlyUserOf(N1.getNode()))
	return SDValue();

	// Fold select(cond, binop(x, y), binop(z, y))
	// --> binop(select(cond, x, z), y)
	if (N1.getOperand(1) == N2.getOperand(1)) {
	SDValue NewSel =
	DAG.getSelect(DL, VT, N0, N1.getOperand(0), N2.getOperand(0));
	SDValue NewBinOp = DAG.getNode(BinOpc, DL, VT, NewSel, N1.getOperand(1));
	NewBinOp->setFlags(N1->getFlags());
	NewBinOp->intersectFlagsWith(N2->getFlags());
	return NewBinOp;
	}

	// Fold select(cond, binop(x, y), binop(x, z))
	// --> binop(x, select(cond, y, z))
	// Second op VT might be different (e.g. shift amount type)
	if (N1.getOperand(0) == N2.getOperand(0) &&
	VT == N1.getOperand(1).getValueType() &&
	VT == N2.getOperand(1).getValueType()) {
	SDValue NewSel =
	DAG.getSelect(DL, VT, N0, N1.getOperand(1), N2.getOperand(1));
	SDValue NewBinOp = DAG.getNode(BinOpc, DL, VT, N1.getOperand(0), NewSel);
	NewBinOp->setFlags(N1->getFlags());
	NewBinOp->intersectFlagsWith(N2->getFlags());
	return NewBinOp;
	}

	// TODO: Handle isCommutativeBinOp patterns as well?
	return SDValue();
	}

	// Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values.
	SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) {
	SDValue N0 = N->getOperand(0);
	EVT VT = N->getValueType(0);
	bool IsFabs = N->getOpcode() == ISD::FABS;
	bool IsFree = IsFabs ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);

	if (IsFree \|\| N0.getOpcode() != ISD::BITCAST \|\| !N0.hasOneUse())
	return SDValue();

	SDValue Int = N0.getOperand(0);
	EVT IntVT = Int.getValueType();

	// The operand to cast should be integer.
	if (!IntVT.isInteger() \|\| IntVT.isVector())
	return SDValue();

	// (fneg (bitconvert x)) -> (bitconvert (xor x sign))
	// (fabs (bitconvert x)) -> (bitconvert (and x ~sign))
	APInt SignMask;
	if (N0.getValueType().isVector()) {
	// For vector, create a sign mask (0x80...) or its inverse (for fabs,
	// 0x7f...) per element and splat it.
	SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
	if (IsFabs)
	SignMask = ~SignMask;
	SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
	} else {
	// For scalar, just use the sign mask (0x80... or the inverse, 0x7f...)
	SignMask = APInt::getSignMask(IntVT.getSizeInBits());
	if (IsFabs)
	SignMask = ~SignMask;
	}
	SDLoc DL(N0);
	Int = DAG.getNode(IsFabs ? ISD::AND : ISD::XOR, DL, IntVT, Int,
	DAG.getConstant(SignMask, DL, IntVT));
	AddToWorklist(Int.getNode());
	return DAG.getBitcast(VT, Int);
	}

	/// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
	/// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
	/// in it. This may be a win when the constant is not otherwise available
	/// because it replaces two constant pool loads with one.
	SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
	const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
	ISD::CondCode CC) {
	if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType()))
	return SDValue();

	// If we are before legalize types, we want the other legalization to happen
	// first (for example, to avoid messing with soft float).
	auto *TV = dyn_cast<ConstantFPSDNode>(N2);
	auto *FV = dyn_cast<ConstantFPSDNode>(N3);
	EVT VT = N2.getValueType();
	if (!TV \|\| !FV \|\| !TLI.isTypeLegal(VT))
	return SDValue();

	// If a constant can be materialized without loads, this does not make sense.
	if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal \|\|
	TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) \|\|
	TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
	return SDValue();

	// If both constants have multiple uses, then we won't need to do an extra
	// load. The values are likely around in registers for other users.
	if (!TV->hasOneUse() && !FV->hasOneUse())
	return SDValue();

	Constant Elts[] = { const_cast<ConstantFP>(FV->getConstantFPValue()),
	const_cast<ConstantFP*>(TV->getConstantFPValue()) };
	Type *FPTy = Elts[0]->getType();
	const DataLayout &TD = DAG.getDataLayout();

	// Create a ConstantArray of the two constants.
	Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
	SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
	TD.getPrefTypeAlign(FPTy));
	Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();

	// Get offsets to the 0 and 1 elements of the array, so we can select between
	// them.
	SDValue Zero = DAG.getIntPtrConstant(0, DL);
	unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
	SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
	SDValue Cond =
	DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
	AddToWorklist(Cond.getNode());
	SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
	AddToWorklist(CstOffset.getNode());
	CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
	AddToWorklist(CPIdx.getNode());
	return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
	MachinePointerInfo::getConstantPool(
	DAG.getMachineFunction()), Alignment);
	}

	/// Simplify an expression of the form (N0 cond N1) ? N2 : N3
	/// where 'cond' is the comparison specified by CC.
	SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
	SDValue N2, SDValue N3, ISD::CondCode CC,
	bool NotExtCompare) {
	// (x ? y : y) -> y.
	if (N2 == N3) return N2;

	EVT CmpOpVT = N0.getValueType();
	EVT CmpResVT = getSetCCResultType(CmpOpVT);
	EVT VT = N2.getValueType();
	auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
	auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
	auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());

	// Determine if the condition we're dealing with is constant.
	if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
	AddToWorklist(SCC.getNode());
	if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
	// fold select_cc true, x, y -> x
	// fold select_cc false, x, y -> y
	return !(SCCC->isNullValue()) ? N2 : N3;
	}
	}

	if (SDValue V =
	convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
	return V;

	if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
	return V;

	// fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
	// where y is has a single bit set.
	// A plaintext description would be, we can turn the SELECT_CC into an AND
	// when the condition can be materialized as an all-ones register. Any
	// single bit-test can be materialized as an all-ones register with
	// shift-left and shift-right-arith.
	if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
	N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
	SDValue AndLHS = N0->getOperand(0);
	auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
	if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
	// Shift the tested bit over the sign bit.
	const APInt &AndMask = ConstAndRHS->getAPIntValue();
	unsigned ShCt = AndMask.getBitWidth() - 1;
	if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
	SDValue ShlAmt =
	DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
	getShiftAmountTy(AndLHS.getValueType()));
	SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);

	// Now arithmetic right shift it all the way over, so the result is
	// either all-ones, or zero.
	SDValue ShrAmt =
	DAG.getConstant(ShCt, SDLoc(Shl),
	getShiftAmountTy(Shl.getValueType()));
	SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);

	return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
	}
	}
	}

	// fold select C, 16, 0 -> shl C, 4
	bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
	bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();

	if ((Fold \|\| Swap) &&
	TLI.getBooleanContents(CmpOpVT) ==
	TargetLowering::ZeroOrOneBooleanContent &&
	(!LegalOperations \|\| TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {

	if (Swap) {
	CC = ISD::getSetCCInverse(CC, CmpOpVT);
	std::swap(N2C, N3C);
	}

	// If the caller doesn't want us to simplify this into a zext of a compare,
	// don't do it.
	if (NotExtCompare && N2C->isOne())
	return SDValue();

	SDValue Temp, SCC;
	// zext (setcc n0, n1)
	if (LegalTypes) {
	SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
	if (VT.bitsLT(SCC.getValueType()))
	Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
	else
	Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
	} else {
	SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
	Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
	}

	AddToWorklist(SCC.getNode());
	AddToWorklist(Temp.getNode());

	if (N2C->isOne())
	return Temp;

	unsigned ShCt = N2C->getAPIntValue().logBase2();
	if (TLI.shouldAvoidTransformToShift(VT, ShCt))
	return SDValue();

	// shl setcc result by log2 n2c
	return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
	DAG.getConstant(ShCt, SDLoc(Temp),
	getShiftAmountTy(Temp.getValueType())));
	}

	// select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
	// select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
	// select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
	// select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
	// select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
	// select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
	// select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
	// select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
	if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ \|\| CC == ISD::SETNE)) {
	SDValue ValueOnZero = N2;
	SDValue Count = N3;
	// If the condition is NE instead of E, swap the operands.
	if (CC == ISD::SETNE)
	std::swap(ValueOnZero, Count);
	// Check if the value on zero is a constant equal to the bits in the type.
	if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
	if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
	// If the other operand is cttz/cttz_zero_undef of N0, and cttz is
	// legal, combine to just cttz.
	if ((Count.getOpcode() == ISD::CTTZ \|\|
	Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
	N0 == Count.getOperand(0) &&
	(!LegalOperations \|\| TLI.isOperationLegal(ISD::CTTZ, VT)))
	return DAG.getNode(ISD::CTTZ, DL, VT, N0);
	// If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
	// legal, combine to just ctlz.
	if ((Count.getOpcode() == ISD::CTLZ \|\|
	Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
	N0 == Count.getOperand(0) &&
	(!LegalOperations \|\| TLI.isOperationLegal(ISD::CTLZ, VT)))
	return DAG.getNode(ISD::CTLZ, DL, VT, N0);
	}
	}
	}

	return SDValue();
	}

	/// This is a stub for TargetLowering::SimplifySetCC.
	SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
	ISD::CondCode Cond, const SDLoc &DL,
	bool foldBooleans) {
	TargetLowering::DAGCombinerInfo
	DagCombineInfo(DAG, Level, false, this);
	return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
	}

	/// Given an ISD::SDIV node expressing a divide by constant, return
	/// a DAG expression to select that will generate the same value by multiplying
	/// by a magic number.
	/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
	SDValue DAGCombiner::BuildSDIV(SDNode *N) {
	// when optimising for minimum size, we don't want to expand a div to a mul
	// and a shift.
	if (DAG.getMachineFunction().getFunction().hasMinSize())
	return SDValue();

	SmallVector<SDNode *, 8> Built;
	if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
	for (SDNode *N : Built)
	AddToWorklist(N);
	return S;
	}

	return SDValue();
	}

	/// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
	/// DAG expression that will generate the same value by right shifting.
	SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
	ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
	if (!C)
	return SDValue();

	// Avoid division by zero.
	if (C->isNullValue())
	return SDValue();

	SmallVector<SDNode *, 8> Built;
	if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
	for (SDNode *N : Built)
	AddToWorklist(N);
	return S;
	}

	return SDValue();
	}

	/// Given an ISD::UDIV node expressing a divide by constant, return a DAG
	/// expression that will generate the same value by multiplying by a magic
	/// number.
	/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
	SDValue DAGCombiner::BuildUDIV(SDNode *N) {
	// when optimising for minimum size, we don't want to expand a div to a mul
	// and a shift.
	if (DAG.getMachineFunction().getFunction().hasMinSize())
	return SDValue();

	SmallVector<SDNode *, 8> Built;
	if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
	for (SDNode *N : Built)
	AddToWorklist(N);
	return S;
	}

	return SDValue();
	}

	/// Determines the LogBase2 value for a non-null input value using the
	/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
	SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
	EVT VT = V.getValueType();
	SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
	SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
	SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
	return LogBase2;
	}

	/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
	/// For the reciprocal, we need to find the zero of the function:
	/// F(X) = A X - 1 [which has a zero at X = 1/A]
	/// =>
	/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
	/// does not require additional intermediate precision]
	/// For the last iteration, put numerator N into it to gain more precision:
	/// Result = N X_i + X_i (N - N A X_i)
	SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
	SDNodeFlags Flags) {
	if (LegalDAG)
	return SDValue();

	// TODO: Handle half and/or extended types?
	EVT VT = Op.getValueType();
	if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
	return SDValue();

	// If estimates are explicitly disabled for this function, we're done.
	MachineFunction &MF = DAG.getMachineFunction();
	int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
	if (Enabled == TLI.ReciprocalEstimate::Disabled)
	return SDValue();

	// Estimates may be explicitly enabled for this type with a custom number of
	// refinement steps.
	int Iterations = TLI.getDivRefinementSteps(VT, MF);
	if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
	AddToWorklist(Est.getNode());

	SDLoc DL(Op);
	if (Iterations) {
	SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);

	// Newton iterations: Est = Est + Est (N - Arg * Est)
	// If this is the last iteration, also multiply by the numerator.
	for (int i = 0; i < Iterations; ++i) {
	SDValue MulEst = Est;

	if (i == Iterations - 1) {
	MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
	AddToWorklist(MulEst.getNode());
	}

	SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
	AddToWorklist(NewEst.getNode());

	NewEst = DAG.getNode(ISD::FSUB, DL, VT,
	(i == Iterations - 1 ? N : FPOne), NewEst, Flags);
	AddToWorklist(NewEst.getNode());

	NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
	AddToWorklist(NewEst.getNode());

	Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
	AddToWorklist(Est.getNode());
	}
	} else {
	// If no iterations are available, multiply with N.
	Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
	AddToWorklist(Est.getNode());
	}

	return Est;
	}

	return SDValue();
	}

	/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
	/// For the reciprocal sqrt, we need to find the zero of the function:
	/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
	/// =>
	/// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
	/// As a result, we precompute A/2 prior to the iteration loop.
	SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
	unsigned Iterations,
	SDNodeFlags Flags, bool Reciprocal) {
	EVT VT = Arg.getValueType();
	SDLoc DL(Arg);
	SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);

	// We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
	// this entire sequence requires only one FP constant.
	SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
	HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);

	// Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
	for (unsigned i = 0; i < Iterations; ++i) {
	SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
	NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
	NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
	Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
	}

	// If non-reciprocal square root is requested, multiply the result by Arg.
	if (!Reciprocal)
	Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);

	return Est;
	}

	/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
	/// For the reciprocal sqrt, we need to find the zero of the function:
	/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
	/// =>
	/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
	SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
	unsigned Iterations,
	SDNodeFlags Flags, bool Reciprocal) {
	EVT VT = Arg.getValueType();
	SDLoc DL(Arg);
	SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
	SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);

	// This routine must enter the loop below to work correctly
	// when (Reciprocal == false).
	assert(Iterations > 0);

	// Newton iterations for reciprocal square root:
	// E = (E * -0.5) * ((A * E) * E + -3.0)
	for (unsigned i = 0; i < Iterations; ++i) {
	SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
	SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
	SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);

	// When calculating a square root at the last iteration build:
	// S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
	// (notice a common subexpression)
	SDValue LHS;
	if (Reciprocal \|\| (i + 1) < Iterations) {
	// RSQRT: LHS = (E * -0.5)
	LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
	} else {
	// SQRT: LHS = (A * E) * -0.5
	LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
	}

	Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
	}

	return Est;
	}

	/// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
	/// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
	/// Op can be zero.
	SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
	bool Reciprocal) {
	if (LegalDAG)
	return SDValue();

	// TODO: Handle half and/or extended types?
	EVT VT = Op.getValueType();
	if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
	return SDValue();

	// If estimates are explicitly disabled for this function, we're done.
	MachineFunction &MF = DAG.getMachineFunction();
	int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
	if (Enabled == TLI.ReciprocalEstimate::Disabled)
	return SDValue();

	// Estimates may be explicitly enabled for this type with a custom number of
	// refinement steps.
	int Iterations = TLI.getSqrtRefinementSteps(VT, MF);

	bool UseOneConstNR = false;
	if (SDValue Est =
	TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
	Reciprocal)) {
	AddToWorklist(Est.getNode());

	if (Iterations)
	Est = UseOneConstNR
	? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
	: buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
	if (!Reciprocal) {
	SDLoc DL(Op);
	// Try the target specific test first.
	SDValue Test = TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT));

	// The estimate is now completely wrong if the input was exactly 0.0 or
	// possibly a denormal. Force the answer to 0.0 or value provided by
	// target for those cases.
	Est = DAG.getNode(
	Test.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
	Test, TLI.getSqrtResultForDenormInput(Op, DAG), Est);
	}
	return Est;
	}

	return SDValue();
	}

	SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
	return buildSqrtEstimateImpl(Op, Flags, true);
	}

	SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
	return buildSqrtEstimateImpl(Op, Flags, false);
	}

	/// Return true if there is any possibility that the two addresses overlap.
	bool DAGCombiner::isAlias(SDNode Op0, SDNode Op1) const {

	struct MemUseCharacteristics {
	bool IsVolatile;
	bool IsAtomic;
	SDValue BasePtr;
	int64_t Offset;
	Optional<int64_t> NumBytes;
	MachineMemOperand *MMO;
	};

	auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
	if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
	int64_t Offset = 0;
	if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
	Offset = (LSN->getAddressingMode() == ISD::PRE_INC)
	? C->getSExtValue()
	: (LSN->getAddressingMode() == ISD::PRE_DEC)
	? -1 * C->getSExtValue()
	: 0;
	uint64_t Size =
	MemoryLocation::getSizeOrUnknown(LSN->getMemoryVT().getStoreSize());
	return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(),
	Offset /base offset/,
	Optional<int64_t>(Size),
	LSN->getMemOperand()};
	}
	if (const auto *LN = cast<LifetimeSDNode>(N))
	return {false /isVolatile/, /isAtomic/ false, LN->getOperand(1),
	(LN->hasOffset()) ? LN->getOffset() : 0,
	(LN->hasOffset()) ? Optional<int64_t>(LN->getSize())
	: Optional<int64_t>(),
	(MachineMemOperand *)nullptr};
	// Default.
	return {false /isvolatile/, /isAtomic/ false, SDValue(),
	(int64_t)0 /offset/,
	Optional<int64_t>() /size/, (MachineMemOperand *)nullptr};
	};

	MemUseCharacteristics MUC0 = getCharacteristics(Op0),
	MUC1 = getCharacteristics(Op1);

	// If they are to the same address, then they must be aliases.
	if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
	MUC0.Offset == MUC1.Offset)
	return true;

	// If they are both volatile then they cannot be reordered.
	if (MUC0.IsVolatile && MUC1.IsVolatile)
	return true;

	// Be conservative about atomics for the moment
	// TODO: This is way overconservative for unordered atomics (see D66309)
	if (MUC0.IsAtomic && MUC1.IsAtomic)
	return true;

	if (MUC0.MMO && MUC1.MMO) {
	if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) \|\|
	(MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
	return false;
	}

	// Try to prove that there is aliasing, or that there is no aliasing. Either
	// way, we can return now. If nothing can be proved, proceed with more tests.
	bool IsAlias;
	if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
	DAG, IsAlias))
	return IsAlias;

	// The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
	// either are not known.
	if (!MUC0.MMO \|\| !MUC1.MMO)
	return true;

	// If one operation reads from invariant memory, and the other may store, they
	// cannot alias. These should really be checking the equivalent of mayWrite,
	// but it only matters for memory nodes other than load /store.
	if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) \|\|
	(MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
	return false;

	// If we know required SrcValue1 and SrcValue2 have relatively large
	// alignment compared to the size and offset of the access, we may be able
	// to prove they do not alias. This check is conservative for now to catch
	// cases created by splitting vector types, it only works when the offsets are
	// multiples of the size of the data.
	int64_t SrcValOffset0 = MUC0.MMO->getOffset();
	int64_t SrcValOffset1 = MUC1.MMO->getOffset();
	Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
	Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
	auto &Size0 = MUC0.NumBytes;
	auto &Size1 = MUC1.NumBytes;
	if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
	Size0.hasValue() && Size1.hasValue() && Size0 == Size1 &&
	OrigAlignment0 > Size0 && SrcValOffset0 % Size0 == 0 &&
	SrcValOffset1 % *Size1 == 0) {
	int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
	int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();

	// There is no overlap between these relatively aligned accesses of
	// similar size. Return no alias.
	if ((OffAlign0 + Size0) <= OffAlign1 \|\| (OffAlign1 + Size1) <= OffAlign0)
	return false;
	}

	bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
	? CombinerGlobalAA
	: DAG.getSubtarget().useAA();
	#ifndef NDEBUG
	if (CombinerAAOnlyFunc.getNumOccurrences() &&
	CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
	UseAA = false;
	#endif

	if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
	Size0.hasValue() && Size1.hasValue()) {
	// Use alias analysis information.
	int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
	int64_t Overlap0 = *Size0 + SrcValOffset0 - MinOffset;
	int64_t Overlap1 = *Size1 + SrcValOffset1 - MinOffset;
	if (AA->isNoAlias(
	MemoryLocation(MUC0.MMO->getValue(), Overlap0,
	UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
	MemoryLocation(MUC1.MMO->getValue(), Overlap1,
	UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes())))
	return false;
	}

	// Otherwise we have to assume they alias.
	return true;
	}

	/// Walk up chain skipping non-aliasing memory nodes,
	/// looking for aliasing nodes and adding them to the Aliases vector.
	void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
	SmallVectorImpl<SDValue> &Aliases) {
	SmallVector<SDValue, 8> Chains; // List of chains to visit.
	SmallPtrSet<SDNode *, 16> Visited; // Visited node set.

	// Get alias information for node.
	// TODO: relax aliasing for unordered atomics (see D66309)
	const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();

	// Starting off.
	Chains.push_back(OriginalChain);
	unsigned Depth = 0;

	// Attempt to improve chain by a single step
	std::function<bool(SDValue &)> ImproveChain = [&](SDValue &C) -> bool {
	switch (C.getOpcode()) {
	case ISD::EntryToken:
	// No need to mark EntryToken.
	C = SDValue();
	return true;
	case ISD::LOAD:
	case ISD::STORE: {
	// Get alias information for C.
	// TODO: Relax aliasing for unordered atomics (see D66309)
	bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
	cast<LSBaseSDNode>(C.getNode())->isSimple();
	if ((IsLoad && IsOpLoad) \|\| !isAlias(N, C.getNode())) {
	// Look further up the chain.
	C = C.getOperand(0);
	return true;
	}
	// Alias, so stop here.
	return false;
	}

	case ISD::CopyFromReg:
	// Always forward past past CopyFromReg.
	C = C.getOperand(0);
	return true;

	case ISD::LIFETIME_START:
	case ISD::LIFETIME_END: {
	// We can forward past any lifetime start/end that can be proven not to
	// alias the memory access.
	if (!isAlias(N, C.getNode())) {
	// Look further up the chain.
	C = C.getOperand(0);
	return true;
	}
	return false;
	}
	default:
	return false;
	}
	};

	// Look at each chain and determine if it is an alias. If so, add it to the
	// aliases list. If not, then continue up the chain looking for the next
	// candidate.
	while (!Chains.empty()) {
	SDValue Chain = Chains.pop_back_val();

	// Don't bother if we've seen Chain before.
	if (!Visited.insert(Chain.getNode()).second)
	continue;

	// For TokenFactor nodes, look at each operand and only continue up the
	// chain until we reach the depth limit.
	//
	// FIXME: The depth check could be made to return the last non-aliasing
	// chain we found before we hit a tokenfactor rather than the original
	// chain.
	if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
	Aliases.clear();
	Aliases.push_back(OriginalChain);
	return;
	}

	if (Chain.getOpcode() == ISD::TokenFactor) {
	// We have to check each of the operands of the token factor for "small"
	// token factors, so we queue them up. Adding the operands to the queue
	// (stack) in reverse order maintains the original order and increases the
	// likelihood that getNode will find a matching token factor (CSE.)
	if (Chain.getNumOperands() > 16) {
	Aliases.push_back(Chain);
	continue;
	}
	for (unsigned n = Chain.getNumOperands(); n;)
	Chains.push_back(Chain.getOperand(--n));
	++Depth;
	continue;
	}
	// Everything else
	if (ImproveChain(Chain)) {
	// Updated Chain Found, Consider new chain if one exists.
	if (Chain.getNode())
	Chains.push_back(Chain);
	++Depth;
	continue;
	}
	// No Improved Chain Possible, treat as Alias.
	Aliases.push_back(Chain);
	}
	}

	/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
	/// (aliasing node.)
	SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
	if (OptLevel == CodeGenOpt::None)
	return OldChain;

	// Ops for replacing token factor.
	SmallVector<SDValue, 8> Aliases;

	// Accumulate all the aliases to this node.
	GatherAllAliases(N, OldChain, Aliases);

	// If no operands then chain to entry token.
	if (Aliases.size() == 0)
	return DAG.getEntryNode();

	// If a single operand then chain to it. We don't need to revisit it.
	if (Aliases.size() == 1)
	return Aliases[0];

	// Construct a custom tailored token factor.
	return DAG.getTokenFactor(SDLoc(N), Aliases);
	}

	namespace {
	// TODO: Replace with with std::monostate when we move to C++17.
	struct UnitT { } Unit;
	bool operator==(const UnitT &, const UnitT &) { return true; }
	bool operator!=(const UnitT &, const UnitT &) { return false; }
	} // namespace

	// This function tries to collect a bunch of potentially interesting
	// nodes to improve the chains of, all at once. This might seem
	// redundant, as this function gets called when visiting every store
	// node, so why not let the work be done on each store as it's visited?
	//
	// I believe this is mainly important because mergeConsecutiveStores
	// is unable to deal with merging stores of different sizes, so unless
	// we improve the chains of all the potential candidates up-front
	// before running mergeConsecutiveStores, it might only see some of
	// the nodes that will eventually be candidates, and then not be able
	// to go from a partially-merged state to the desired final
	// fully-merged state.

	bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
	SmallVector<StoreSDNode *, 8> ChainedStores;
	StoreSDNode *STChain = St;
	// Intervals records which offsets from BaseIndex have been covered. In
	// the common case, every store writes to the immediately previous address
	// space and thus merged with the previous interval at insertion time.

	using IMap =
	llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>;
	IMap::Allocator A;
	IMap Intervals(A);

	// This holds the base pointer, index, and the offset in bytes from the base
	// pointer.
	const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);

	// We must have a base and an offset.
	if (!BasePtr.getBase().getNode())
	return false;

	// Do not handle stores to undef base pointers.
	if (BasePtr.getBase().isUndef())
	return false;

	// Do not handle stores to opaque types
	if (St->getMemoryVT().isZeroSized())
	return false;

	// BaseIndexOffset assumes that offsets are fixed-size, which
	// is not valid for scalable vectors where the offsets are
	// scaled by `vscale`, so bail out early.
	if (St->getMemoryVT().isScalableVector())
	return false;

	// Add ST's interval.
	Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);

	while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
	if (Chain->getMemoryVT().isScalableVector())
	return false;

	// If the chain has more than one use, then we can't reorder the mem ops.
	if (!SDValue(Chain, 0)->hasOneUse())
	break;
	// TODO: Relax for unordered atomics (see D66309)
	if (!Chain->isSimple() \|\| Chain->isIndexed())
	break;

	// Find the base pointer and offset for this memory node.
	const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
	// Check that the base pointer is the same as the original one.
	int64_t Offset;
	if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
	break;
	int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
	// Make sure we don't overlap with other intervals by checking the ones to
	// the left or right before inserting.
	auto I = Intervals.find(Offset);
	// If there's a next interval, we should end before it.
	if (I != Intervals.end() && I.start() < (Offset + Length))
	break;
	// If there's a previous interval, we should start after it.
	if (I != Intervals.begin() && (--I).stop() <= Offset)
	break;
	Intervals.insert(Offset, Offset + Length, Unit);

	ChainedStores.push_back(Chain);
	STChain = Chain;
	}

	// If we didn't find a chained store, exit.
	if (ChainedStores.size() == 0)
	return false;

	// Improve all chained stores (St and ChainedStores members) starting from
	// where the store chain ended and return single TokenFactor.
	SDValue NewChain = STChain->getChain();
	SmallVector<SDValue, 8> TFOps;
	for (unsigned I = ChainedStores.size(); I;) {
	StoreSDNode *S = ChainedStores[--I];
	SDValue BetterChain = FindBetterChain(S, NewChain);
	S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
	S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
	TFOps.push_back(SDValue(S, 0));
	ChainedStores[I] = S;
	}

	// Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
	SDValue BetterChain = FindBetterChain(St, NewChain);
	SDValue NewST;
	if (St->isTruncatingStore())
	NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
	St->getBasePtr(), St->getMemoryVT(),
	St->getMemOperand());
	else
	NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
	St->getBasePtr(), St->getMemOperand());

	TFOps.push_back(NewST);

	// If we improved every element of TFOps, then we've lost the dependence on
	// NewChain to successors of St and we need to add it back to TFOps. Do so at
	// the beginning to keep relative order consistent with FindBetterChains.
	auto hasImprovedChain = [&](SDValue ST) -> bool {
	return ST->getOperand(0) != NewChain;
	};
	bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
	if (AddNewChain)
	TFOps.insert(TFOps.begin(), NewChain);

	SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
	CombineTo(St, TF);

	// Add TF and its operands to the worklist.
	AddToWorklist(TF.getNode());
	for (const SDValue &Op : TF->ops())
	AddToWorklist(Op.getNode());
	AddToWorklist(STChain);
	return true;
	}

	bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
	if (OptLevel == CodeGenOpt::None)
	return false;

	const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);

	// We must have a base and an offset.
	if (!BasePtr.getBase().getNode())
	return false;

	// Do not handle stores to undef base pointers.
	if (BasePtr.getBase().isUndef())
	return false;

	// Directly improve a chain of disjoint stores starting at St.
	if (parallelizeChainedStores(St))
	return true;

	// Improve St's Chain..
	SDValue BetterChain = FindBetterChain(St, St->getChain());
	if (St->getChain() != BetterChain) {
	replaceStoreChain(St, BetterChain);
	return true;
	}
	return false;
	}

	/// This is the entry point for the file.
	void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
	CodeGenOpt::Level OptLevel) {
	/// This is the main entry point to this class.
	DAGCombiner(*this, AA, OptLevel).Run(Level);
	}
	diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
	index b8a3dd014901..328e9430d635 100644
	--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
	+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
	@@ -1,5027 +1,5030 @@
	//===----- LegalizeIntegerTypes.cpp - Legalization of integer types -------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file implements integer type expansion and promotion for LegalizeTypes.
	// Promotion is the act of changing a computation in an illegal type into a
	// computation in a larger type. For example, implementing i8 arithmetic in an
	// i32 register (often needed on powerpc).
	// Expansion is the act of changing a computation in an illegal type into a
	// computation in two identical registers of a smaller type. For example,
	// implementing i64 arithmetic in two i32 registers (often needed on 32-bit
	// targets).
	//
	//===----------------------------------------------------------------------===//

	#include "LegalizeTypes.h"
	#include "llvm/Analysis/TargetLibraryInfo.h"
	#include "llvm/IR/DerivedTypes.h"
	#include "llvm/Support/ErrorHandling.h"
	#include "llvm/Support/KnownBits.h"
	#include "llvm/Support/raw_ostream.h"
	using namespace llvm;

	#define DEBUG_TYPE "legalize-types"

	//===----------------------------------------------------------------------===//
	// Integer Result Promotion
	//===----------------------------------------------------------------------===//

	/// PromoteIntegerResult - This method is called when a result of a node is
	/// found to be in need of promotion to a larger type. At this point, the node
	/// may also have invalid operands or may have other results that need
	/// expansion, we just know that (at least) one result needs promotion.
	void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
	LLVM_DEBUG(dbgs() << "Promote integer result: "; N->dump(&DAG);
	dbgs() << "\n");
	SDValue Res = SDValue();

	// See if the target wants to custom expand this node.
	if (CustomLowerNode(N, N->getValueType(ResNo), true)) {
	LLVM_DEBUG(dbgs() << "Node has been custom expanded, done\n");
	return;
	}

	switch (N->getOpcode()) {
	default:
	#ifndef NDEBUG
	dbgs() << "PromoteIntegerResult #" << ResNo << ": ";
	N->dump(&DAG); dbgs() << "\n";
	#endif
	llvm_unreachable("Do not know how to promote this operator!");
	case ISD::MERGE_VALUES:Res = PromoteIntRes_MERGE_VALUES(N, ResNo); break;
	case ISD::AssertSext: Res = PromoteIntRes_AssertSext(N); break;
	case ISD::AssertZext: Res = PromoteIntRes_AssertZext(N); break;
	case ISD::BITCAST: Res = PromoteIntRes_BITCAST(N); break;
	case ISD::BITREVERSE: Res = PromoteIntRes_BITREVERSE(N); break;
	case ISD::BSWAP: Res = PromoteIntRes_BSWAP(N); break;
	case ISD::BUILD_PAIR: Res = PromoteIntRes_BUILD_PAIR(N); break;
	case ISD::Constant: Res = PromoteIntRes_Constant(N); break;
	case ISD::CTLZ_ZERO_UNDEF:
	case ISD::CTLZ: Res = PromoteIntRes_CTLZ(N); break;
	case ISD::PARITY:
	case ISD::CTPOP: Res = PromoteIntRes_CTPOP_PARITY(N); break;
	case ISD::CTTZ_ZERO_UNDEF:
	case ISD::CTTZ: Res = PromoteIntRes_CTTZ(N); break;
	case ISD::EXTRACT_VECTOR_ELT:
	Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break;
	case ISD::LOAD: Res = PromoteIntRes_LOAD(cast<LoadSDNode>(N)); break;
	case ISD::MLOAD: Res = PromoteIntRes_MLOAD(cast<MaskedLoadSDNode>(N));
	break;
	case ISD::MGATHER: Res = PromoteIntRes_MGATHER(cast<MaskedGatherSDNode>(N));
	break;
	case ISD::SELECT: Res = PromoteIntRes_SELECT(N); break;
	case ISD::VSELECT: Res = PromoteIntRes_VSELECT(N); break;
	case ISD::SELECT_CC: Res = PromoteIntRes_SELECT_CC(N); break;
	case ISD::STRICT_FSETCC:
	case ISD::STRICT_FSETCCS:
	case ISD::SETCC: Res = PromoteIntRes_SETCC(N); break;
	case ISD::SMIN:
	case ISD::SMAX: Res = PromoteIntRes_SExtIntBinOp(N); break;
	case ISD::UMIN:
	case ISD::UMAX: Res = PromoteIntRes_UMINUMAX(N); break;

	case ISD::SHL: Res = PromoteIntRes_SHL(N); break;
	case ISD::SIGN_EXTEND_INREG:
	Res = PromoteIntRes_SIGN_EXTEND_INREG(N); break;
	case ISD::SRA: Res = PromoteIntRes_SRA(N); break;
	case ISD::SRL: Res = PromoteIntRes_SRL(N); break;
	case ISD::TRUNCATE: Res = PromoteIntRes_TRUNCATE(N); break;
	case ISD::UNDEF: Res = PromoteIntRes_UNDEF(N); break;
	case ISD::VAARG: Res = PromoteIntRes_VAARG(N); break;
	case ISD::VSCALE: Res = PromoteIntRes_VSCALE(N); break;

	case ISD::EXTRACT_SUBVECTOR:
	Res = PromoteIntRes_EXTRACT_SUBVECTOR(N); break;
	case ISD::INSERT_SUBVECTOR:
	Res = PromoteIntRes_INSERT_SUBVECTOR(N); break;
	case ISD::VECTOR_REVERSE:
	Res = PromoteIntRes_VECTOR_REVERSE(N); break;
	case ISD::VECTOR_SHUFFLE:
	Res = PromoteIntRes_VECTOR_SHUFFLE(N); break;
	case ISD::VECTOR_SPLICE:
	Res = PromoteIntRes_VECTOR_SPLICE(N); break;
	case ISD::INSERT_VECTOR_ELT:
	Res = PromoteIntRes_INSERT_VECTOR_ELT(N); break;
	case ISD::BUILD_VECTOR:
	Res = PromoteIntRes_BUILD_VECTOR(N); break;
	case ISD::SCALAR_TO_VECTOR:
	Res = PromoteIntRes_SCALAR_TO_VECTOR(N); break;
	case ISD::SPLAT_VECTOR:
	Res = PromoteIntRes_SPLAT_VECTOR(N); break;
	case ISD::STEP_VECTOR: Res = PromoteIntRes_STEP_VECTOR(N); break;
	case ISD::CONCAT_VECTORS:
	Res = PromoteIntRes_CONCAT_VECTORS(N); break;

	case ISD::ANY_EXTEND_VECTOR_INREG:
	case ISD::SIGN_EXTEND_VECTOR_INREG:
	case ISD::ZERO_EXTEND_VECTOR_INREG:
	Res = PromoteIntRes_EXTEND_VECTOR_INREG(N); break;

	case ISD::SIGN_EXTEND:
	case ISD::ZERO_EXTEND:
	case ISD::ANY_EXTEND: Res = PromoteIntRes_INT_EXTEND(N); break;

	case ISD::STRICT_FP_TO_SINT:
	case ISD::STRICT_FP_TO_UINT:
	case ISD::FP_TO_SINT:
	case ISD::FP_TO_UINT: Res = PromoteIntRes_FP_TO_XINT(N); break;

	case ISD::FP_TO_SINT_SAT:
	case ISD::FP_TO_UINT_SAT:
	Res = PromoteIntRes_FP_TO_XINT_SAT(N); break;

	case ISD::FP_TO_FP16: Res = PromoteIntRes_FP_TO_FP16(N); break;

	case ISD::FLT_ROUNDS_: Res = PromoteIntRes_FLT_ROUNDS(N); break;

	case ISD::AND:
	case ISD::OR:
	case ISD::XOR:
	case ISD::ADD:
	case ISD::SUB:
	case ISD::MUL: Res = PromoteIntRes_SimpleIntBinOp(N); break;

	case ISD::SDIV:
	case ISD::SREM: Res = PromoteIntRes_SExtIntBinOp(N); break;

	case ISD::UDIV:
	case ISD::UREM: Res = PromoteIntRes_ZExtIntBinOp(N); break;

	case ISD::SADDO:
	case ISD::SSUBO: Res = PromoteIntRes_SADDSUBO(N, ResNo); break;
	case ISD::UADDO:
	case ISD::USUBO: Res = PromoteIntRes_UADDSUBO(N, ResNo); break;
	case ISD::SMULO:
	case ISD::UMULO: Res = PromoteIntRes_XMULO(N, ResNo); break;

	case ISD::ADDE:
	case ISD::SUBE:
	case ISD::ADDCARRY:
	case ISD::SUBCARRY: Res = PromoteIntRes_ADDSUBCARRY(N, ResNo); break;

	case ISD::SADDO_CARRY:
	case ISD::SSUBO_CARRY: Res = PromoteIntRes_SADDSUBO_CARRY(N, ResNo); break;

	case ISD::SADDSAT:
	case ISD::UADDSAT:
	case ISD::SSUBSAT:
	case ISD::USUBSAT:
	case ISD::SSHLSAT:
	case ISD::USHLSAT: Res = PromoteIntRes_ADDSUBSHLSAT(N); break;

	case ISD::SMULFIX:
	case ISD::SMULFIXSAT:
	case ISD::UMULFIX:
	case ISD::UMULFIXSAT: Res = PromoteIntRes_MULFIX(N); break;

	case ISD::SDIVFIX:
	case ISD::SDIVFIXSAT:
	case ISD::UDIVFIX:
	case ISD::UDIVFIXSAT: Res = PromoteIntRes_DIVFIX(N); break;

	case ISD::ABS: Res = PromoteIntRes_ABS(N); break;

	case ISD::ATOMIC_LOAD:
	Res = PromoteIntRes_Atomic0(cast<AtomicSDNode>(N)); break;

	case ISD::ATOMIC_LOAD_ADD:
	case ISD::ATOMIC_LOAD_SUB:
	case ISD::ATOMIC_LOAD_AND:
	case ISD::ATOMIC_LOAD_CLR:
	case ISD::ATOMIC_LOAD_OR:
	case ISD::ATOMIC_LOAD_XOR:
	case ISD::ATOMIC_LOAD_NAND:
	case ISD::ATOMIC_LOAD_MIN:
	case ISD::ATOMIC_LOAD_MAX:
	case ISD::ATOMIC_LOAD_UMIN:
	case ISD::ATOMIC_LOAD_UMAX:
	case ISD::ATOMIC_SWAP:
	Res = PromoteIntRes_Atomic1(cast<AtomicSDNode>(N)); break;

	case ISD::ATOMIC_CMP_SWAP:
	case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
	Res = PromoteIntRes_AtomicCmpSwap(cast<AtomicSDNode>(N), ResNo);
	break;

	case ISD::VECREDUCE_ADD:
	case ISD::VECREDUCE_MUL:
	case ISD::VECREDUCE_AND:
	case ISD::VECREDUCE_OR:
	case ISD::VECREDUCE_XOR:
	case ISD::VECREDUCE_SMAX:
	case ISD::VECREDUCE_SMIN:
	case ISD::VECREDUCE_UMAX:
	case ISD::VECREDUCE_UMIN:
	Res = PromoteIntRes_VECREDUCE(N);
	break;

	case ISD::FREEZE:
	Res = PromoteIntRes_FREEZE(N);
	break;

	case ISD::ROTL:
	case ISD::ROTR:
	Res = PromoteIntRes_Rotate(N);
	break;

	case ISD::FSHL:
	case ISD::FSHR:
	Res = PromoteIntRes_FunnelShift(N);
	break;
	}

	// If the result is null then the sub-method took care of registering it.
	if (Res.getNode())
	SetPromotedInteger(SDValue(N, ResNo), Res);
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_MERGE_VALUES(SDNode *N,
	unsigned ResNo) {
	SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
	return GetPromotedInteger(Op);
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_AssertSext(SDNode *N) {
	// Sign-extend the new bits, and continue the assertion.
	SDValue Op = SExtPromotedInteger(N->getOperand(0));
	return DAG.getNode(ISD::AssertSext, SDLoc(N),
	Op.getValueType(), Op, N->getOperand(1));
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_AssertZext(SDNode *N) {
	// Zero the new bits, and continue the assertion.
	SDValue Op = ZExtPromotedInteger(N->getOperand(0));
	return DAG.getNode(ISD::AssertZext, SDLoc(N),
	Op.getValueType(), Op, N->getOperand(1));
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_Atomic0(AtomicSDNode *N) {
	EVT ResVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
	SDValue Res = DAG.getAtomic(N->getOpcode(), SDLoc(N),
	N->getMemoryVT(), ResVT,
	N->getChain(), N->getBasePtr(),
	N->getMemOperand());
	// Legalize the chain result - switch anything that used the old chain to
	// use the new one.
	ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
	return Res;
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) {
	SDValue Op2 = GetPromotedInteger(N->getOperand(2));
	SDValue Res = DAG.getAtomic(N->getOpcode(), SDLoc(N),
	N->getMemoryVT(),
	N->getChain(), N->getBasePtr(),
	Op2, N->getMemOperand());
	// Legalize the chain result - switch anything that used the old chain to
	// use the new one.
	ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
	return Res;
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N,
	unsigned ResNo) {
	if (ResNo == 1) {
	assert(N->getOpcode() == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS);
	EVT SVT = getSetCCResultType(N->getOperand(2).getValueType());
	EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(1));

	// Only use the result of getSetCCResultType if it is legal,
	// otherwise just use the promoted result type (NVT).
	if (!TLI.isTypeLegal(SVT))
	SVT = NVT;

	SDVTList VTs = DAG.getVTList(N->getValueType(0), SVT, MVT::Other);
	SDValue Res = DAG.getAtomicCmpSwap(
	ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, SDLoc(N), N->getMemoryVT(), VTs,
	N->getChain(), N->getBasePtr(), N->getOperand(2), N->getOperand(3),
	N->getMemOperand());
	ReplaceValueWith(SDValue(N, 0), Res.getValue(0));
	ReplaceValueWith(SDValue(N, 2), Res.getValue(2));
	return Res.getValue(1);
	}

	// Op2 is used for the comparison and thus must be extended according to the
	// target's atomic operations. Op3 is merely stored and so can be left alone.
	SDValue Op2 = N->getOperand(2);
	SDValue Op3 = GetPromotedInteger(N->getOperand(3));
	switch (TLI.getExtendForAtomicCmpSwapArg()) {
	case ISD::SIGN_EXTEND:
	Op2 = SExtPromotedInteger(Op2);
	break;
	case ISD::ZERO_EXTEND:
	Op2 = ZExtPromotedInteger(Op2);
	break;
	case ISD::ANY_EXTEND:
	Op2 = GetPromotedInteger(Op2);
	break;
	default:
	llvm_unreachable("Invalid atomic op extension");
	}

	SDVTList VTs =
	DAG.getVTList(Op2.getValueType(), N->getValueType(1), MVT::Other);
	SDValue Res = DAG.getAtomicCmpSwap(
	N->getOpcode(), SDLoc(N), N->getMemoryVT(), VTs, N->getChain(),
	N->getBasePtr(), Op2, Op3, N->getMemOperand());
	// Update the use to N with the newly created Res.
	for (unsigned i = 1, NumResults = N->getNumValues(); i < NumResults; ++i)
	ReplaceValueWith(SDValue(N, i), Res.getValue(i));
	return Res;
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
	SDValue InOp = N->getOperand(0);
	EVT InVT = InOp.getValueType();
	EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
	EVT OutVT = N->getValueType(0);
	EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
	SDLoc dl(N);

	switch (getTypeAction(InVT)) {
	case TargetLowering::TypeLegal:
	break;
	case TargetLowering::TypePromoteInteger:
	if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector() && !NInVT.isVector())
	// The input promotes to the same size. Convert the promoted value.
	return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetPromotedInteger(InOp));
	break;
	case TargetLowering::TypeSoftenFloat:
	// Promote the integer operand by hand.
	return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, GetSoftenedFloat(InOp));
	case TargetLowering::TypeSoftPromoteHalf:
	// Promote the integer operand by hand.
	return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, GetSoftPromotedHalf(InOp));
	case TargetLowering::TypePromoteFloat: {
	// Convert the promoted float by hand.
	if (!NOutVT.isVector())
	return DAG.getNode(ISD::FP_TO_FP16, dl, NOutVT, GetPromotedFloat(InOp));
	break;
	}
	case TargetLowering::TypeExpandInteger:
	case TargetLowering::TypeExpandFloat:
	break;
	case TargetLowering::TypeScalarizeVector:
	// Convert the element to an integer and promote it by hand.
	if (!NOutVT.isVector())
	return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
	BitConvertToInteger(GetScalarizedVector(InOp)));
	break;
	case TargetLowering::TypeScalarizeScalableVector:
	report_fatal_error("Scalarization of scalable vectors is not supported.");
	case TargetLowering::TypeSplitVector: {
	if (!NOutVT.isVector()) {
	// For example, i32 = BITCAST v2i16 on alpha. Convert the split
	// pieces of the input into integers and reassemble in the final type.
	SDValue Lo, Hi;
	GetSplitVector(N->getOperand(0), Lo, Hi);
	Lo = BitConvertToInteger(Lo);
	Hi = BitConvertToInteger(Hi);

	if (DAG.getDataLayout().isBigEndian())
	std::swap(Lo, Hi);

	InOp = DAG.getNode(ISD::ANY_EXTEND, dl,
	EVT::getIntegerVT(*DAG.getContext(),
	NOutVT.getSizeInBits()),
	JoinIntegers(Lo, Hi));
	return DAG.getNode(ISD::BITCAST, dl, NOutVT, InOp);
	}
	break;
	}
	case TargetLowering::TypeWidenVector:
	// The input is widened to the same size. Convert to the widened value.
	// Make sure that the outgoing value is not a vector, because this would
	// make us bitcast between two vectors which are legalized in different ways.
	if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector()) {
	SDValue Res =
	DAG.getNode(ISD::BITCAST, dl, NOutVT, GetWidenedVector(InOp));

	// For big endian targets we need to shift the casted value or the
	// interesting bits will end up at the wrong place.
	if (DAG.getDataLayout().isBigEndian()) {
	unsigned ShiftAmt = NInVT.getSizeInBits() - InVT.getSizeInBits();
	EVT ShiftAmtTy = TLI.getShiftAmountTy(NOutVT, DAG.getDataLayout());
	assert(ShiftAmt < NOutVT.getSizeInBits() && "Too large shift amount!");
	Res = DAG.getNode(ISD::SRL, dl, NOutVT, Res,
	DAG.getConstant(ShiftAmt, dl, ShiftAmtTy));
	}
	return Res;
	}
	// If the output type is also a vector and widening it to the same size
	// as the widened input type would be a legal type, we can widen the bitcast
	// and handle the promotion after.
	if (NOutVT.isVector()) {
	unsigned WidenInSize = NInVT.getSizeInBits();
	unsigned OutSize = OutVT.getSizeInBits();
	if (WidenInSize % OutSize == 0) {
	unsigned Scale = WidenInSize / OutSize;
	EVT WideOutVT = EVT::getVectorVT(*DAG.getContext(),
	OutVT.getVectorElementType(),
	OutVT.getVectorNumElements() * Scale);
	if (isTypeLegal(WideOutVT)) {
	InOp = DAG.getBitcast(WideOutVT, GetWidenedVector(InOp));
	InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OutVT, InOp,
	DAG.getVectorIdxConstant(0, dl));
	return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, InOp);
	}
	}
	}
	}

	return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
	CreateStackStoreLoad(InOp, OutVT));
	}

	// Helper for BSWAP/BITREVERSE promotion to ensure we can fit any shift amount
	// in the VT returned by getShiftAmountTy and to return a safe VT if we can't.
	static EVT getShiftAmountTyForConstant(EVT VT, const TargetLowering &TLI,
	SelectionDAG &DAG) {
	EVT ShiftVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
	// If any possible shift value won't fit in the prefered type, just use
	// something safe. It will be legalized when the shift is expanded.
	if (!ShiftVT.isVector() &&
	ShiftVT.getSizeInBits() < Log2_32_Ceil(VT.getSizeInBits()))
	ShiftVT = MVT::i32;
	return ShiftVT;
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_FREEZE(SDNode *N) {
	SDValue V = GetPromotedInteger(N->getOperand(0));
	return DAG.getNode(ISD::FREEZE, SDLoc(N),
	V.getValueType(), V);
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
	SDValue Op = GetPromotedInteger(N->getOperand(0));
	EVT OVT = N->getValueType(0);
	EVT NVT = Op.getValueType();
	SDLoc dl(N);

	// If the larger BSWAP isn't supported by the target, try to expand now.
	// If we expand later we'll end up with more operations since we lost the
	// original type. We only do this for scalars since we have a shuffle
	// based lowering for vectors in LegalizeVectorOps.
	if (!OVT.isVector() &&
	!TLI.isOperationLegalOrCustomOrPromote(ISD::BSWAP, NVT)) {
	if (SDValue Res = TLI.expandBSWAP(N, DAG))
	return DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Res);
	}

	unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
	EVT ShiftVT = getShiftAmountTyForConstant(NVT, TLI, DAG);
	return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),
	DAG.getConstant(DiffBits, dl, ShiftVT));
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) {
	SDValue Op = GetPromotedInteger(N->getOperand(0));
	EVT OVT = N->getValueType(0);
	EVT NVT = Op.getValueType();
	SDLoc dl(N);

	// If the larger BITREVERSE isn't supported by the target, try to expand now.
	// If we expand later we'll end up with more operations since we lost the
	// original type. We only do this for scalars since we have a shuffle
	// based lowering for vectors in LegalizeVectorOps.
	if (!OVT.isVector() && OVT.isSimple() &&
	!TLI.isOperationLegalOrCustomOrPromote(ISD::BITREVERSE, NVT)) {
	if (SDValue Res = TLI.expandBITREVERSE(N, DAG))
	return DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Res);
	}

	unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
	EVT ShiftVT = getShiftAmountTyForConstant(NVT, TLI, DAG);
	return DAG.getNode(ISD::SRL, dl, NVT,
	DAG.getNode(ISD::BITREVERSE, dl, NVT, Op),
	DAG.getConstant(DiffBits, dl, ShiftVT));
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) {
	// The pair element type may be legal, or may not promote to the same type as
	// the result, for example i14 = BUILD_PAIR (i7, i7). Handle all cases.
	return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N),
	TLI.getTypeToTransformTo(*DAG.getContext(),
	N->getValueType(0)), JoinIntegers(N->getOperand(0),
	N->getOperand(1)));
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_Constant(SDNode *N) {
	EVT VT = N->getValueType(0);
	// FIXME there is no actual debug info here
	SDLoc dl(N);
	// Zero extend things like i1, sign extend everything else. It shouldn't
	// matter in theory which one we pick, but this tends to give better code?
	unsigned Opc = VT.isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
	SDValue Result = DAG.getNode(Opc, dl,
	TLI.getTypeToTransformTo(*DAG.getContext(), VT),
	SDValue(N, 0));
	assert(isa<ConstantSDNode>(Result) && "Didn't constant fold ext?");
	return Result;
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) {
	// Zero extend to the promoted type and do the count there.
	SDValue Op = ZExtPromotedInteger(N->getOperand(0));
	SDLoc dl(N);
	EVT OVT = N->getValueType(0);
	EVT NVT = Op.getValueType();
	Op = DAG.getNode(N->getOpcode(), dl, NVT, Op);
	// Subtract off the extra leading bits in the bigger type.
	return DAG.getNode(
	ISD::SUB, dl, NVT, Op,
	DAG.getConstant(NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(), dl,
	NVT));
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP_PARITY(SDNode *N) {
	// Zero extend to the promoted type and do the count or parity there.
	SDValue Op = ZExtPromotedInteger(N->getOperand(0));
	return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op);
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
	SDValue Op = GetPromotedInteger(N->getOperand(0));
	EVT OVT = N->getValueType(0);
	EVT NVT = Op.getValueType();
	SDLoc dl(N);
	if (N->getOpcode() == ISD::CTTZ) {
	// The count is the same in the promoted type except if the original
	// value was zero. This can be handled by setting the bit just off
	// the top of the original type.
	auto TopBit = APInt::getOneBitSet(NVT.getScalarSizeInBits(),
	OVT.getScalarSizeInBits());
	Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, dl, NVT));
	}
	return DAG.getNode(N->getOpcode(), dl, NVT, Op);
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) {
	SDLoc dl(N);
	EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));

	SDValue Op0 = N->getOperand(0);
	SDValue Op1 = N->getOperand(1);

	// If the input also needs to be promoted, do that first so we can get a
	// get a good idea for the output type.
	if (TLI.getTypeAction(*DAG.getContext(), Op0.getValueType())
	== TargetLowering::TypePromoteInteger) {
	SDValue In = GetPromotedInteger(Op0);

	// If the new type is larger than NVT, use it. We probably won't need to
	// promote it again.
	EVT SVT = In.getValueType().getScalarType();
	if (SVT.bitsGE(NVT)) {
	SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT, In, Op1);
	return DAG.getAnyExtOrTrunc(Ext, dl, NVT);
	}
	}

	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NVT, Op0, Op1);
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
	EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
	unsigned NewOpc = N->getOpcode();
	SDLoc dl(N);

	// If we're promoting a UINT to a larger size and the larger FP_TO_UINT is
	// not Legal, check to see if we can use FP_TO_SINT instead. (If both UINT
	// and SINT conversions are Custom, there is no way to tell which is
	// preferable. We choose SINT because that's the right thing on PPC.)
	if (N->getOpcode() == ISD::FP_TO_UINT &&
	!TLI.isOperationLegal(ISD::FP_TO_UINT, NVT) &&
	TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))
	NewOpc = ISD::FP_TO_SINT;

	if (N->getOpcode() == ISD::STRICT_FP_TO_UINT &&
	!TLI.isOperationLegal(ISD::STRICT_FP_TO_UINT, NVT) &&
	TLI.isOperationLegalOrCustom(ISD::STRICT_FP_TO_SINT, NVT))
	NewOpc = ISD::STRICT_FP_TO_SINT;

	SDValue Res;
	if (N->isStrictFPOpcode()) {
	Res = DAG.getNode(NewOpc, dl, {NVT, MVT::Other},
	{N->getOperand(0), N->getOperand(1)});
	// Legalize the chain result - switch anything that used the old chain to
	// use the new one.
	ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
	} else
	Res = DAG.getNode(NewOpc, dl, NVT, N->getOperand(0));

	// Assert that the converted value fits in the original type. If it doesn't
	// (eg: because the value being converted is too big), then the result of the
	// original operation was undefined anyway, so the assert is still correct.
	//
	// NOTE: fp-to-uint to fp-to-sint promotion guarantees zero extend. For example:
	// before legalization: fp-to-uint16, 65534. -> 0xfffe
	// after legalization: fp-to-sint32, 65534. -> 0x0000fffe
	return DAG.getNode((N->getOpcode() == ISD::FP_TO_UINT \|\|
	N->getOpcode() == ISD::STRICT_FP_TO_UINT) ?
	ISD::AssertZext : ISD::AssertSext, dl, NVT, Res,
	DAG.getValueType(N->getValueType(0).getScalarType()));
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT_SAT(SDNode *N) {
	// Promote the result type, while keeping the original width in Op1.
	EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
	SDLoc dl(N);
	return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0),
	N->getOperand(1));
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_FP16(SDNode *N) {
	EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
	SDLoc dl(N);

	return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_FLT_ROUNDS(SDNode *N) {
	EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
	SDLoc dl(N);

	SDValue Res =
	DAG.getNode(N->getOpcode(), dl, {NVT, MVT::Other}, N->getOperand(0));

	// Legalize the chain result - switch anything that used the old chain to
	// use the new one.
	ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
	return Res;
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) {
	EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
	SDLoc dl(N);

	if (getTypeAction(N->getOperand(0).getValueType())
	== TargetLowering::TypePromoteInteger) {
	SDValue Res = GetPromotedInteger(N->getOperand(0));
	assert(Res.getValueType().bitsLE(NVT) && "Extension doesn't make sense!");

	// If the result and operand types are the same after promotion, simplify
	// to an in-register extension.
	if (NVT == Res.getValueType()) {
	// The high bits are not guaranteed to be anything. Insert an extend.
	if (N->getOpcode() == ISD::SIGN_EXTEND)
	return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res,
	DAG.getValueType(N->getOperand(0).getValueType()));
	if (N->getOpcode() == ISD::ZERO_EXTEND)
	return DAG.getZeroExtendInReg(Res, dl, N->getOperand(0).getValueType());
	assert(N->getOpcode() == ISD::ANY_EXTEND && "Unknown integer extension!");
	return Res;
	}
	}

	// Otherwise, just extend the original operand all the way to the larger type.
	return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) {
	assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!");
	EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
	ISD::LoadExtType ExtType =
	ISD::isNON_EXTLoad(N) ? ISD::EXTLOAD : N->getExtensionType();
	SDLoc dl(N);
	SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(),
	N->getMemoryVT(), N->getMemOperand());

	// Legalize the chain result - switch anything that used the old chain to
	// use the new one.
	ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
	return Res;
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) {
	EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
	SDValue ExtPassThru = GetPromotedInteger(N->getPassThru());

	SDLoc dl(N);
	SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(),
	N->getOffset(), N->getMask(), ExtPassThru,
	N->getMemoryVT(), N->getMemOperand(),
	N->getAddressingMode(), ISD::EXTLOAD);
	// Legalize the chain result - switch anything that used the old chain to
	// use the new one.
	ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
	return Res;
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_MGATHER(MaskedGatherSDNode *N) {
	EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
	SDValue ExtPassThru = GetPromotedInteger(N->getPassThru());
	assert(NVT == ExtPassThru.getValueType() &&
	"Gather result type and the passThru argument type should be the same");

	ISD::LoadExtType ExtType = N->getExtensionType();
	if (ExtType == ISD::NON_EXTLOAD)
	ExtType = ISD::EXTLOAD;

	SDLoc dl(N);
	SDValue Ops[] = {N->getChain(), ExtPassThru, N->getMask(), N->getBasePtr(),
	N->getIndex(), N->getScale() };
	SDValue Res = DAG.getMaskedGather(DAG.getVTList(NVT, MVT::Other),
	N->getMemoryVT(), dl, Ops,
	N->getMemOperand(), N->getIndexType(),
	ExtType);
	// Legalize the chain result - switch anything that used the old chain to
	// use the new one.
	ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
	return Res;
	}

	/// Promote the overflow flag of an overflowing arithmetic node.
	SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) {
	// Change the return type of the boolean result while obeying
	// getSetCCResultType.
	EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(1));
	EVT VT = N->getValueType(0);
	EVT SVT = getSetCCResultType(VT);
	SDValue Ops[3] = { N->getOperand(0), N->getOperand(1) };
	unsigned NumOps = N->getNumOperands();
	assert(NumOps <= 3 && "Too many operands");
	if (NumOps == 3)
	Ops[2] = N->getOperand(2);

	SDLoc dl(N);
	SDValue Res = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(VT, SVT),
	makeArrayRef(Ops, NumOps));

	// Modified the sum result - switch anything that used the old sum to use
	// the new one.
	ReplaceValueWith(SDValue(N, 0), Res);

	// Convert to the expected type.
	return DAG.getBoolExtOrTrunc(Res.getValue(1), dl, NVT, VT);
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) {
	// If the promoted type is legal, we can convert this to:
	// 1. ANY_EXTEND iN to iM
	// 2. SHL by M-N
	// 3. [US][ADD\|SUB\|SHL]SAT
	// 4. L/ASHR by M-N
	// Else it is more efficient to convert this to a min and a max
	// operation in the higher precision arithmetic.
	SDLoc dl(N);
	SDValue Op1 = N->getOperand(0);
	SDValue Op2 = N->getOperand(1);
	unsigned OldBits = Op1.getScalarValueSizeInBits();

	unsigned Opcode = N->getOpcode();
	bool IsShift = Opcode == ISD::USHLSAT \|\| Opcode == ISD::SSHLSAT;

	SDValue Op1Promoted, Op2Promoted;
	if (IsShift) {
	Op1Promoted = GetPromotedInteger(Op1);
	Op2Promoted = ZExtPromotedInteger(Op2);
	} else if (Opcode == ISD::UADDSAT \|\| Opcode == ISD::USUBSAT) {
	Op1Promoted = ZExtPromotedInteger(Op1);
	Op2Promoted = ZExtPromotedInteger(Op2);
	} else {
	Op1Promoted = SExtPromotedInteger(Op1);
	Op2Promoted = SExtPromotedInteger(Op2);
	}
	EVT PromotedType = Op1Promoted.getValueType();
	unsigned NewBits = PromotedType.getScalarSizeInBits();

	if (Opcode == ISD::UADDSAT) {
	APInt MaxVal = APInt::getAllOnesValue(OldBits).zext(NewBits);
	SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType);
	SDValue Add =
	DAG.getNode(ISD::ADD, dl, PromotedType, Op1Promoted, Op2Promoted);
	return DAG.getNode(ISD::UMIN, dl, PromotedType, Add, SatMax);
	}

	// USUBSAT can always be promoted as long as we have zero-extended the args.
	if (Opcode == ISD::USUBSAT)
	return DAG.getNode(ISD::USUBSAT, dl, PromotedType, Op1Promoted,
	Op2Promoted);

	// Shift cannot use a min/max expansion, we can't detect overflow if all of
	// the bits have been shifted out.
	if (IsShift \|\| TLI.isOperationLegalOrCustom(Opcode, PromotedType)) {
	unsigned ShiftOp;
	switch (Opcode) {
	case ISD::SADDSAT:
	case ISD::SSUBSAT:
	case ISD::SSHLSAT:
	ShiftOp = ISD::SRA;
	break;
	case ISD::USHLSAT:
	ShiftOp = ISD::SRL;
	break;
	default:
	llvm_unreachable("Expected opcode to be signed or unsigned saturation "
	"addition, subtraction or left shift");
	}

	unsigned SHLAmount = NewBits - OldBits;
	EVT SHVT = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout());
	SDValue ShiftAmount = DAG.getConstant(SHLAmount, dl, SHVT);
	Op1Promoted =
	DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, ShiftAmount);
	if (!IsShift)
	Op2Promoted =
	DAG.getNode(ISD::SHL, dl, PromotedType, Op2Promoted, ShiftAmount);

	SDValue Result =
	DAG.getNode(Opcode, dl, PromotedType, Op1Promoted, Op2Promoted);
	return DAG.getNode(ShiftOp, dl, PromotedType, Result, ShiftAmount);
	}

	unsigned AddOp = Opcode == ISD::SADDSAT ? ISD::ADD : ISD::SUB;
	APInt MinVal = APInt::getSignedMinValue(OldBits).sext(NewBits);
	APInt MaxVal = APInt::getSignedMaxValue(OldBits).sext(NewBits);
	SDValue SatMin = DAG.getConstant(MinVal, dl, PromotedType);
	SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType);
	SDValue Result =
	DAG.getNode(AddOp, dl, PromotedType, Op1Promoted, Op2Promoted);
	Result = DAG.getNode(ISD::SMIN, dl, PromotedType, Result, SatMax);
	Result = DAG.getNode(ISD::SMAX, dl, PromotedType, Result, SatMin);
	return Result;
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) {
	// Can just promote the operands then continue with operation.
	SDLoc dl(N);
	SDValue Op1Promoted, Op2Promoted;
	bool Signed =
	N->getOpcode() == ISD::SMULFIX \|\| N->getOpcode() == ISD::SMULFIXSAT;
	bool Saturating =
	N->getOpcode() == ISD::SMULFIXSAT \|\| N->getOpcode() == ISD::UMULFIXSAT;
	if (Signed) {
	Op1Promoted = SExtPromotedInteger(N->getOperand(0));
	Op2Promoted = SExtPromotedInteger(N->getOperand(1));
	} else {
	Op1Promoted = ZExtPromotedInteger(N->getOperand(0));
	Op2Promoted = ZExtPromotedInteger(N->getOperand(1));
	}
	EVT OldType = N->getOperand(0).getValueType();
	EVT PromotedType = Op1Promoted.getValueType();
	unsigned DiffSize =
	PromotedType.getScalarSizeInBits() - OldType.getScalarSizeInBits();

	if (Saturating) {
	// Promoting the operand and result values changes the saturation width,
	// which is extends the values that we clamp to on saturation. This could be
	// resolved by shifting one of the operands the same amount, which would
	// also shift the result we compare against, then shifting back.
	EVT ShiftTy = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout());
	Op1Promoted = DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted,
	DAG.getConstant(DiffSize, dl, ShiftTy));
	SDValue Result = DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted,
	Op2Promoted, N->getOperand(2));
	unsigned ShiftOp = Signed ? ISD::SRA : ISD::SRL;
	return DAG.getNode(ShiftOp, dl, PromotedType, Result,
	DAG.getConstant(DiffSize, dl, ShiftTy));
	}
	return DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted, Op2Promoted,
	N->getOperand(2));
	}

	static SDValue SaturateWidenedDIVFIX(SDValue V, SDLoc &dl,
	unsigned SatW, bool Signed,
	const TargetLowering &TLI,
	SelectionDAG &DAG) {
	EVT VT = V.getValueType();
	unsigned VTW = VT.getScalarSizeInBits();

	if (!Signed) {
	// Saturate to the unsigned maximum by getting the minimum of V and the
	// maximum.
	return DAG.getNode(ISD::UMIN, dl, VT, V,
	DAG.getConstant(APInt::getLowBitsSet(VTW, SatW),
	dl, VT));
	}

	// Saturate to the signed maximum (the low SatW - 1 bits) by taking the
	// signed minimum of it and V.
	V = DAG.getNode(ISD::SMIN, dl, VT, V,
	DAG.getConstant(APInt::getLowBitsSet(VTW, SatW - 1),
	dl, VT));
	// Saturate to the signed minimum (the high SatW + 1 bits) by taking the
	// signed maximum of it and V.
	V = DAG.getNode(ISD::SMAX, dl, VT, V,
	DAG.getConstant(APInt::getHighBitsSet(VTW, VTW - SatW + 1),
	dl, VT));
	return V;
	}

	static SDValue earlyExpandDIVFIX(SDNode *N, SDValue LHS, SDValue RHS,
	unsigned Scale, const TargetLowering &TLI,
	SelectionDAG &DAG, unsigned SatW = 0) {
	EVT VT = LHS.getValueType();
	unsigned VTSize = VT.getScalarSizeInBits();
	bool Signed = N->getOpcode() == ISD::SDIVFIX \|\|
	N->getOpcode() == ISD::SDIVFIXSAT;
	bool Saturating = N->getOpcode() == ISD::SDIVFIXSAT \|\|
	N->getOpcode() == ISD::UDIVFIXSAT;

	SDLoc dl(N);
	// Widen the types by a factor of two. This is guaranteed to expand, since it
	// will always have enough high bits in the LHS to shift into.
	EVT WideVT = EVT::getIntegerVT(DAG.getContext(), VTSize 2);
	if (VT.isVector())
	WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
	VT.getVectorElementCount());
	if (Signed) {
	LHS = DAG.getSExtOrTrunc(LHS, dl, WideVT);
	RHS = DAG.getSExtOrTrunc(RHS, dl, WideVT);
	} else {
	LHS = DAG.getZExtOrTrunc(LHS, dl, WideVT);
	RHS = DAG.getZExtOrTrunc(RHS, dl, WideVT);
	}

	SDValue Res = TLI.expandFixedPointDiv(N->getOpcode(), dl, LHS, RHS, Scale,
	DAG);
	assert(Res && "Expanding DIVFIX with wide type failed?");
	if (Saturating) {
	// If the caller has told us to saturate at something less, use that width
	// instead of the type before doubling. However, it cannot be more than
	// what we just widened!
	assert(SatW <= VTSize &&
	"Tried to saturate to more than the original type?");
	Res = SaturateWidenedDIVFIX(Res, dl, SatW == 0 ? VTSize : SatW, Signed,
	TLI, DAG);
	}
	return DAG.getZExtOrTrunc(Res, dl, VT);
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_DIVFIX(SDNode *N) {
	SDLoc dl(N);
	SDValue Op1Promoted, Op2Promoted;
	bool Signed = N->getOpcode() == ISD::SDIVFIX \|\|
	N->getOpcode() == ISD::SDIVFIXSAT;
	bool Saturating = N->getOpcode() == ISD::SDIVFIXSAT \|\|
	N->getOpcode() == ISD::UDIVFIXSAT;
	if (Signed) {
	Op1Promoted = SExtPromotedInteger(N->getOperand(0));
	Op2Promoted = SExtPromotedInteger(N->getOperand(1));
	} else {
	Op1Promoted = ZExtPromotedInteger(N->getOperand(0));
	Op2Promoted = ZExtPromotedInteger(N->getOperand(1));
	}
	EVT PromotedType = Op1Promoted.getValueType();
	unsigned Scale = N->getConstantOperandVal(2);

	// If the type is already legal and the operation is legal in that type, we
	// should not early expand.
	if (TLI.isTypeLegal(PromotedType)) {
	TargetLowering::LegalizeAction Action =
	TLI.getFixedPointOperationAction(N->getOpcode(), PromotedType, Scale);
	if (Action == TargetLowering::Legal \|\| Action == TargetLowering::Custom) {
	EVT ShiftTy = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout());
	unsigned Diff = PromotedType.getScalarSizeInBits() -
	N->getValueType(0).getScalarSizeInBits();
	if (Saturating)
	Op1Promoted = DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted,
	DAG.getConstant(Diff, dl, ShiftTy));
	SDValue Res = DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted,
	Op2Promoted, N->getOperand(2));
	if (Saturating)
	Res = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, PromotedType, Res,
	DAG.getConstant(Diff, dl, ShiftTy));
	return Res;
	}
	}

	// See if we can perform the division in this type without expanding.
	if (SDValue Res = TLI.expandFixedPointDiv(N->getOpcode(), dl, Op1Promoted,
	Op2Promoted, Scale, DAG)) {
	if (Saturating)
	Res = SaturateWidenedDIVFIX(Res, dl,
	N->getValueType(0).getScalarSizeInBits(),
	Signed, TLI, DAG);
	return Res;
	}
	// If we cannot, expand it to twice the type width. If we are saturating, give
	// it the original width as a saturating width so we don't need to emit
	// two saturations.
	return earlyExpandDIVFIX(N, Op1Promoted, Op2Promoted, Scale, TLI, DAG,
	N->getValueType(0).getScalarSizeInBits());
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) {
	if (ResNo == 1)
	return PromoteIntRes_Overflow(N);

	// The operation overflowed iff the result in the larger type is not the
	// sign extension of its truncation to the original type.
	SDValue LHS = SExtPromotedInteger(N->getOperand(0));
	SDValue RHS = SExtPromotedInteger(N->getOperand(1));
	EVT OVT = N->getOperand(0).getValueType();
	EVT NVT = LHS.getValueType();
	SDLoc dl(N);

	// Do the arithmetic in the larger type.
	unsigned Opcode = N->getOpcode() == ISD::SADDO ? ISD::ADD : ISD::SUB;
	SDValue Res = DAG.getNode(Opcode, dl, NVT, LHS, RHS);

	// Calculate the overflow flag: sign extend the arithmetic result from
	// the original type.
	SDValue Ofl = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res,
	DAG.getValueType(OVT));
	// Overflowed if and only if this is not equal to Res.
	Ofl = DAG.getSetCC(dl, N->getValueType(1), Ofl, Res, ISD::SETNE);

	// Use the calculated overflow everywhere.
	ReplaceValueWith(SDValue(N, 1), Ofl);

	return Res;
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_SELECT(SDNode *N) {
	SDValue LHS = GetPromotedInteger(N->getOperand(1));
	SDValue RHS = GetPromotedInteger(N->getOperand(2));
	return DAG.getSelect(SDLoc(N),
	LHS.getValueType(), N->getOperand(0), LHS, RHS);
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_VSELECT(SDNode *N) {
	SDValue Mask = N->getOperand(0);

	SDValue LHS = GetPromotedInteger(N->getOperand(1));
	SDValue RHS = GetPromotedInteger(N->getOperand(2));
	return DAG.getNode(ISD::VSELECT, SDLoc(N),
	LHS.getValueType(), Mask, LHS, RHS);
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_SELECT_CC(SDNode *N) {
	SDValue LHS = GetPromotedInteger(N->getOperand(2));
	SDValue RHS = GetPromotedInteger(N->getOperand(3));
	return DAG.getNode(ISD::SELECT_CC, SDLoc(N),
	LHS.getValueType(), N->getOperand(0),
	N->getOperand(1), LHS, RHS, N->getOperand(4));
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {
	unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
	EVT InVT = N->getOperand(OpNo).getValueType();
	EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));

	EVT SVT = getSetCCResultType(InVT);

	// If we got back a type that needs to be promoted, this likely means the
	// the input type also needs to be promoted. So get the promoted type for
	// the input and try the query again.
	if (getTypeAction(SVT) == TargetLowering::TypePromoteInteger) {
	if (getTypeAction(InVT) == TargetLowering::TypePromoteInteger) {
	InVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
	SVT = getSetCCResultType(InVT);
	} else {
	// Input type isn't promoted, just use the default promoted type.
	SVT = NVT;
	}
	}

	SDLoc dl(N);
	assert(SVT.isVector() == N->getOperand(OpNo).getValueType().isVector() &&
	"Vector compare must return a vector result!");

	// Get the SETCC result using the canonical SETCC type.
	SDValue SetCC;
	if (N->isStrictFPOpcode()) {
	EVT VTs[] = {SVT, MVT::Other};
	SDValue Opers[] = {N->getOperand(0), N->getOperand(1),
	N->getOperand(2), N->getOperand(3)};
	SetCC = DAG.getNode(N->getOpcode(), dl, VTs, Opers);
	// Legalize the chain result - switch anything that used the old chain to
	// use the new one.
	ReplaceValueWith(SDValue(N, 1), SetCC.getValue(1));
	} else
	SetCC = DAG.getNode(N->getOpcode(), dl, SVT, N->getOperand(0),
	N->getOperand(1), N->getOperand(2));

	// Convert to the expected type.
	return DAG.getSExtOrTrunc(SetCC, dl, NVT);
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) {
	SDValue LHS = GetPromotedInteger(N->getOperand(0));
	SDValue RHS = N->getOperand(1);
	if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
	RHS = ZExtPromotedInteger(RHS);
	return DAG.getNode(ISD::SHL, SDLoc(N), LHS.getValueType(), LHS, RHS);
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) {
	SDValue Op = GetPromotedInteger(N->getOperand(0));
	return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N),
	Op.getValueType(), Op, N->getOperand(1));
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) {
	// The input may have strange things in the top bits of the registers, but
	// these operations don't care. They may have weird bits going out, but
	// that too is okay if they are integer operations.
	SDValue LHS = GetPromotedInteger(N->getOperand(0));
	SDValue RHS = GetPromotedInteger(N->getOperand(1));
	return DAG.getNode(N->getOpcode(), SDLoc(N),
	LHS.getValueType(), LHS, RHS);
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_SExtIntBinOp(SDNode *N) {
	// Sign extend the input.
	SDValue LHS = SExtPromotedInteger(N->getOperand(0));
	SDValue RHS = SExtPromotedInteger(N->getOperand(1));
	return DAG.getNode(N->getOpcode(), SDLoc(N),
	LHS.getValueType(), LHS, RHS);
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_ZExtIntBinOp(SDNode *N) {
	// Zero extend the input.
	SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
	SDValue RHS = ZExtPromotedInteger(N->getOperand(1));
	return DAG.getNode(N->getOpcode(), SDLoc(N),
	LHS.getValueType(), LHS, RHS);
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_UMINUMAX(SDNode *N) {
	// It doesn't matter if we sign extend or zero extend in the inputs. So do
	// whatever is best for the target.
	SDValue LHS = SExtOrZExtPromotedInteger(N->getOperand(0));
	SDValue RHS = SExtOrZExtPromotedInteger(N->getOperand(1));
	return DAG.getNode(N->getOpcode(), SDLoc(N),
	LHS.getValueType(), LHS, RHS);
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) {
	// The input value must be properly sign extended.
	SDValue LHS = SExtPromotedInteger(N->getOperand(0));
	SDValue RHS = N->getOperand(1);
	if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
	RHS = ZExtPromotedInteger(RHS);
	return DAG.getNode(ISD::SRA, SDLoc(N), LHS.getValueType(), LHS, RHS);
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) {
	// The input value must be properly zero extended.
	SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
	SDValue RHS = N->getOperand(1);
	if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
	RHS = ZExtPromotedInteger(RHS);
	return DAG.getNode(ISD::SRL, SDLoc(N), LHS.getValueType(), LHS, RHS);
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_Rotate(SDNode *N) {
	// Lower the rotate to shifts and ORs which can be promoted.
	SDValue Res;
	TLI.expandROT(N, true /AllowVectorOps/, Res, DAG);
	ReplaceValueWith(SDValue(N, 0), Res);
	return SDValue();
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_FunnelShift(SDNode *N) {
	SDValue Hi = GetPromotedInteger(N->getOperand(0));
	SDValue Lo = GetPromotedInteger(N->getOperand(1));
	SDValue Amount = GetPromotedInteger(N->getOperand(2));

	SDLoc DL(N);
	EVT OldVT = N->getOperand(0).getValueType();
	EVT VT = Lo.getValueType();
	unsigned Opcode = N->getOpcode();
	bool IsFSHR = Opcode == ISD::FSHR;
	unsigned OldBits = OldVT.getScalarSizeInBits();
	unsigned NewBits = VT.getScalarSizeInBits();

	// Amount has to be interpreted modulo the old bit width.
	Amount =
	DAG.getNode(ISD::UREM, DL, VT, Amount, DAG.getConstant(OldBits, DL, VT));

	// If the promoted type is twice the size (or more), then we use the
	// traditional funnel 'double' shift codegen. This isn't necessary if the
	// shift amount is constant.
	// fshl(x,y,z) -> (((aext(x) << bw) \| zext(y)) << (z % bw)) >> bw.
	// fshr(x,y,z) -> (((aext(x) << bw) \| zext(y)) >> (z % bw)).
	if (NewBits >= (2 * OldBits) && !isa<ConstantSDNode>(Amount) &&
	!TLI.isOperationLegalOrCustom(Opcode, VT)) {
	SDValue HiShift = DAG.getConstant(OldBits, DL, VT);
	Hi = DAG.getNode(ISD::SHL, DL, VT, Hi, HiShift);
	Lo = DAG.getZeroExtendInReg(Lo, DL, OldVT);
	SDValue Res = DAG.getNode(ISD::OR, DL, VT, Hi, Lo);
	Res = DAG.getNode(IsFSHR ? ISD::SRL : ISD::SHL, DL, VT, Res, Amount);
	if (!IsFSHR)
	Res = DAG.getNode(ISD::SRL, DL, VT, Res, HiShift);
	return Res;
	}

	// Shift Lo up to occupy the upper bits of the promoted type.
	SDValue ShiftOffset = DAG.getConstant(NewBits - OldBits, DL, VT);
	Lo = DAG.getNode(ISD::SHL, DL, VT, Lo, ShiftOffset);

	// Increase Amount to shift the result into the lower bits of the promoted
	// type.
	if (IsFSHR)
	Amount = DAG.getNode(ISD::ADD, DL, VT, Amount, ShiftOffset);

	return DAG.getNode(Opcode, DL, VT, Hi, Lo, Amount);
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) {
	EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
	SDValue Res;
	SDValue InOp = N->getOperand(0);
	SDLoc dl(N);

	switch (getTypeAction(InOp.getValueType())) {
	default: llvm_unreachable("Unknown type action!");
	case TargetLowering::TypeLegal:
	case TargetLowering::TypeExpandInteger:
	Res = InOp;
	break;
	case TargetLowering::TypePromoteInteger:
	Res = GetPromotedInteger(InOp);
	break;
	case TargetLowering::TypeSplitVector: {
	EVT InVT = InOp.getValueType();
	assert(InVT.isVector() && "Cannot split scalar types");
	ElementCount NumElts = InVT.getVectorElementCount();
	assert(NumElts == NVT.getVectorElementCount() &&
	"Dst and Src must have the same number of elements");
	assert(isPowerOf2_32(NumElts.getKnownMinValue()) &&
	"Promoted vector type must be a power of two");

	SDValue EOp1, EOp2;
	GetSplitVector(InOp, EOp1, EOp2);

	EVT HalfNVT = EVT::getVectorVT(*DAG.getContext(), NVT.getScalarType(),
	NumElts.divideCoefficientBy(2));
	EOp1 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp1);
	EOp2 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp2);

	return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, EOp1, EOp2);
	}
	case TargetLowering::TypeWidenVector: {
	SDValue WideInOp = GetWidenedVector(InOp);

	// Truncate widened InOp.
	unsigned NumElem = WideInOp.getValueType().getVectorNumElements();
	EVT TruncVT = EVT::getVectorVT(*DAG.getContext(),
	N->getValueType(0).getScalarType(), NumElem);
	SDValue WideTrunc = DAG.getNode(ISD::TRUNCATE, dl, TruncVT, WideInOp);

	// Zero extend so that the elements are of same type as those of NVT
	EVT ExtVT = EVT::getVectorVT(*DAG.getContext(), NVT.getVectorElementType(),
	NumElem);
	SDValue WideExt = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVT, WideTrunc);

	// Extract the low NVT subvector.
	SDValue ZeroIdx = DAG.getVectorIdxConstant(0, dl);
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, WideExt, ZeroIdx);
	}
	}

	// Truncate to NVT instead of VT
	return DAG.getNode(ISD::TRUNCATE, dl, NVT, Res);
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) {
	if (ResNo == 1)
	return PromoteIntRes_Overflow(N);

	// The operation overflowed iff the result in the larger type is not the
	// zero extension of its truncation to the original type.
	SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
	SDValue RHS = ZExtPromotedInteger(N->getOperand(1));
	EVT OVT = N->getOperand(0).getValueType();
	EVT NVT = LHS.getValueType();
	SDLoc dl(N);

	// Do the arithmetic in the larger type.
	unsigned Opcode = N->getOpcode() == ISD::UADDO ? ISD::ADD : ISD::SUB;
	SDValue Res = DAG.getNode(Opcode, dl, NVT, LHS, RHS);

	// Calculate the overflow flag: zero extend the arithmetic result from
	// the original type.
	SDValue Ofl = DAG.getZeroExtendInReg(Res, dl, OVT);
	// Overflowed if and only if this is not equal to Res.
	Ofl = DAG.getSetCC(dl, N->getValueType(1), Ofl, Res, ISD::SETNE);

	// Use the calculated overflow everywhere.
	ReplaceValueWith(SDValue(N, 1), Ofl);

	return Res;
	}

	// Handle promotion for the ADDE/SUBE/ADDCARRY/SUBCARRY nodes. Notice that
	// the third operand of ADDE/SUBE nodes is carry flag, which differs from
	// the ADDCARRY/SUBCARRY nodes in that the third operand is carry Boolean.
	SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo) {
	if (ResNo == 1)
	return PromoteIntRes_Overflow(N);

	// We need to sign-extend the operands so the carry value computed by the
	// wide operation will be equivalent to the carry value computed by the
	// narrow operation.
	// An ADDCARRY can generate carry only if any of the operands has its
	// most significant bit set. Sign extension propagates the most significant
	// bit into the higher bits which means the extra bit that the narrow
	// addition would need (i.e. the carry) will be propagated through the higher
	// bits of the wide addition.
	// A SUBCARRY can generate borrow only if LHS < RHS and this property will be
	// preserved by sign extension.
	SDValue LHS = SExtPromotedInteger(N->getOperand(0));
	SDValue RHS = SExtPromotedInteger(N->getOperand(1));

	EVT ValueVTs[] = {LHS.getValueType(), N->getValueType(1)};

	// Do the arithmetic in the wide type.
	SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N), DAG.getVTList(ValueVTs),
	LHS, RHS, N->getOperand(2));

	// Update the users of the original carry/borrow value.
	ReplaceValueWith(SDValue(N, 1), Res.getValue(1));

	return SDValue(Res.getNode(), 0);
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO_CARRY(SDNode *N,
	unsigned ResNo) {
	assert(ResNo == 1 && "Don't know how to promote other results yet.");
	return PromoteIntRes_Overflow(N);
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_ABS(SDNode *N) {
	SDValue Op0 = SExtPromotedInteger(N->getOperand(0));
	return DAG.getNode(ISD::ABS, SDLoc(N), Op0.getValueType(), Op0);
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
	// Promote the overflow bit trivially.
	if (ResNo == 1)
	return PromoteIntRes_Overflow(N);

	SDValue LHS = N->getOperand(0), RHS = N->getOperand(1);
	SDLoc DL(N);
	EVT SmallVT = LHS.getValueType();

	// To determine if the result overflowed in a larger type, we extend the
	// input to the larger type, do the multiply (checking if it overflows),
	// then also check the high bits of the result to see if overflow happened
	// there.
	if (N->getOpcode() == ISD::SMULO) {
	LHS = SExtPromotedInteger(LHS);
	RHS = SExtPromotedInteger(RHS);
	} else {
	LHS = ZExtPromotedInteger(LHS);
	RHS = ZExtPromotedInteger(RHS);
	}
	SDVTList VTs = DAG.getVTList(LHS.getValueType(), N->getValueType(1));
	SDValue Mul = DAG.getNode(N->getOpcode(), DL, VTs, LHS, RHS);

	// Overflow occurred if it occurred in the larger type, or if the high part
	// of the result does not zero/sign-extend the low part. Check this second
	// possibility first.
	SDValue Overflow;
	if (N->getOpcode() == ISD::UMULO) {
	// Unsigned overflow occurred if the high part is non-zero.
	unsigned Shift = SmallVT.getScalarSizeInBits();
	EVT ShiftTy = getShiftAmountTyForConstant(Mul.getValueType(), TLI, DAG);
	SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul,
	DAG.getConstant(Shift, DL, ShiftTy));
	Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi,
	DAG.getConstant(0, DL, Hi.getValueType()),
	ISD::SETNE);
	} else {
	// Signed overflow occurred if the high part does not sign extend the low.
	SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Mul.getValueType(),
	Mul, DAG.getValueType(SmallVT));
	Overflow = DAG.getSetCC(DL, N->getValueType(1), SExt, Mul, ISD::SETNE);
	}

	// The only other way for overflow to occur is if the multiplication in the
	// larger type itself overflowed.
	Overflow = DAG.getNode(ISD::OR, DL, N->getValueType(1), Overflow,
	SDValue(Mul.getNode(), 1));

	// Use the calculated overflow everywhere.
	ReplaceValueWith(SDValue(N, 1), Overflow);
	return Mul;
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_UNDEF(SDNode *N) {
	return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(),
	N->getValueType(0)));
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_VSCALE(SDNode *N) {
	EVT VT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));

	APInt MulImm = cast<ConstantSDNode>(N->getOperand(0))->getAPIntValue();
	return DAG.getVScale(SDLoc(N), VT, MulImm.sextOrSelf(VT.getSizeInBits()));
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) {
	SDValue Chain = N->getOperand(0); // Get the chain.
	SDValue Ptr = N->getOperand(1); // Get the pointer.
	EVT VT = N->getValueType(0);
	SDLoc dl(N);

	MVT RegVT = TLI.getRegisterType(*DAG.getContext(), VT);
	unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), VT);
	// The argument is passed as NumRegs registers of type RegVT.

	SmallVector<SDValue, 8> Parts(NumRegs);
	for (unsigned i = 0; i < NumRegs; ++i) {
	Parts[i] = DAG.getVAArg(RegVT, dl, Chain, Ptr, N->getOperand(2),
	N->getConstantOperandVal(3));
	Chain = Parts[i].getValue(1);
	}

	// Handle endianness of the load.
	if (DAG.getDataLayout().isBigEndian())
	std::reverse(Parts.begin(), Parts.end());

	// Assemble the parts in the promoted type.
	EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
	SDValue Res = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[0]);
	for (unsigned i = 1; i < NumRegs; ++i) {
	SDValue Part = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[i]);
	// Shift it to the right position and "or" it in.
	Part = DAG.getNode(ISD::SHL, dl, NVT, Part,
	DAG.getConstant(i * RegVT.getSizeInBits(), dl,
	TLI.getPointerTy(DAG.getDataLayout())));
	Res = DAG.getNode(ISD::OR, dl, NVT, Res, Part);
	}

	// Modified the chain result - switch anything that used the old chain to
	// use the new one.
	ReplaceValueWith(SDValue(N, 1), Chain);

	return Res;
	}

	//===----------------------------------------------------------------------===//
	// Integer Operand Promotion
	//===----------------------------------------------------------------------===//

	/// PromoteIntegerOperand - This method is called when the specified operand of
	/// the specified node is found to need promotion. At this point, all of the
	/// result types of the node are known to be legal, but other operands of the
	/// node may need promotion or expansion as well as the specified one.
	bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
	LLVM_DEBUG(dbgs() << "Promote integer operand: "; N->dump(&DAG);
	dbgs() << "\n");
	SDValue Res = SDValue();
	if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) {
	LLVM_DEBUG(dbgs() << "Node has been custom lowered, done\n");
	return false;
	}

	switch (N->getOpcode()) {
	default:
	#ifndef NDEBUG
	dbgs() << "PromoteIntegerOperand Op #" << OpNo << ": ";
	N->dump(&DAG); dbgs() << "\n";
	#endif
	llvm_unreachable("Do not know how to promote this operator's operand!");

	case ISD::ANY_EXTEND: Res = PromoteIntOp_ANY_EXTEND(N); break;
	case ISD::ATOMIC_STORE:
	Res = PromoteIntOp_ATOMIC_STORE(cast<AtomicSDNode>(N));
	break;
	case ISD::BITCAST: Res = PromoteIntOp_BITCAST(N); break;
	case ISD::BR_CC: Res = PromoteIntOp_BR_CC(N, OpNo); break;
	case ISD::BRCOND: Res = PromoteIntOp_BRCOND(N, OpNo); break;
	case ISD::BUILD_PAIR: Res = PromoteIntOp_BUILD_PAIR(N); break;
	case ISD::BUILD_VECTOR: Res = PromoteIntOp_BUILD_VECTOR(N); break;
	case ISD::CONCAT_VECTORS: Res = PromoteIntOp_CONCAT_VECTORS(N); break;
	case ISD::EXTRACT_VECTOR_ELT: Res = PromoteIntOp_EXTRACT_VECTOR_ELT(N); break;
	case ISD::INSERT_VECTOR_ELT:
	Res = PromoteIntOp_INSERT_VECTOR_ELT(N, OpNo);break;
	case ISD::SCALAR_TO_VECTOR:
	Res = PromoteIntOp_SCALAR_TO_VECTOR(N); break;
	case ISD::SPLAT_VECTOR:
	Res = PromoteIntOp_SPLAT_VECTOR(N); break;
	case ISD::VSELECT:
	case ISD::SELECT: Res = PromoteIntOp_SELECT(N, OpNo); break;
	case ISD::SELECT_CC: Res = PromoteIntOp_SELECT_CC(N, OpNo); break;
	case ISD::SETCC: Res = PromoteIntOp_SETCC(N, OpNo); break;
	case ISD::SIGN_EXTEND: Res = PromoteIntOp_SIGN_EXTEND(N); break;
	case ISD::SINT_TO_FP: Res = PromoteIntOp_SINT_TO_FP(N); break;
	case ISD::STRICT_SINT_TO_FP: Res = PromoteIntOp_STRICT_SINT_TO_FP(N); break;
	case ISD::STORE: Res = PromoteIntOp_STORE(cast<StoreSDNode>(N),
	OpNo); break;
	case ISD::MSTORE: Res = PromoteIntOp_MSTORE(cast<MaskedStoreSDNode>(N),
	OpNo); break;
	case ISD::MLOAD: Res = PromoteIntOp_MLOAD(cast<MaskedLoadSDNode>(N),
	OpNo); break;
	case ISD::MGATHER: Res = PromoteIntOp_MGATHER(cast<MaskedGatherSDNode>(N),
	OpNo); break;
	case ISD::MSCATTER: Res = PromoteIntOp_MSCATTER(cast<MaskedScatterSDNode>(N),
	OpNo); break;
	case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break;
	case ISD::FP16_TO_FP:
	case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break;
	case ISD::STRICT_UINT_TO_FP: Res = PromoteIntOp_STRICT_UINT_TO_FP(N); break;
	case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break;
	case ISD::EXTRACT_SUBVECTOR: Res = PromoteIntOp_EXTRACT_SUBVECTOR(N); break;

	case ISD::SHL:
	case ISD::SRA:
	case ISD::SRL:
	case ISD::ROTL:
	case ISD::ROTR: Res = PromoteIntOp_Shift(N); break;

	case ISD::SADDO_CARRY:
	case ISD::SSUBO_CARRY:
	case ISD::ADDCARRY:
	case ISD::SUBCARRY: Res = PromoteIntOp_ADDSUBCARRY(N, OpNo); break;

	case ISD::FRAMEADDR:
	case ISD::RETURNADDR: Res = PromoteIntOp_FRAMERETURNADDR(N); break;

	case ISD::PREFETCH: Res = PromoteIntOp_PREFETCH(N, OpNo); break;

	case ISD::SMULFIX:
	case ISD::SMULFIXSAT:
	case ISD::UMULFIX:
	case ISD::UMULFIXSAT:
	case ISD::SDIVFIX:
	case ISD::SDIVFIXSAT:
	case ISD::UDIVFIX:
	case ISD::UDIVFIXSAT: Res = PromoteIntOp_FIX(N); break;

	case ISD::FPOWI: Res = PromoteIntOp_FPOWI(N); break;

	case ISD::VECREDUCE_ADD:
	case ISD::VECREDUCE_MUL:
	case ISD::VECREDUCE_AND:
	case ISD::VECREDUCE_OR:
	case ISD::VECREDUCE_XOR:
	case ISD::VECREDUCE_SMAX:
	case ISD::VECREDUCE_SMIN:
	case ISD::VECREDUCE_UMAX:
	case ISD::VECREDUCE_UMIN: Res = PromoteIntOp_VECREDUCE(N); break;

	case ISD::SET_ROUNDING: Res = PromoteIntOp_SET_ROUNDING(N); break;
	}

	// If the result is null, the sub-method took care of registering results etc.
	if (!Res.getNode()) return false;

	// If the result is N, the sub-method updated N in place. Tell the legalizer
	// core about this.
	if (Res.getNode() == N)
	return true;

	const bool IsStrictFp = N->isStrictFPOpcode();
	assert(Res.getValueType() == N->getValueType(0) &&
	N->getNumValues() == (IsStrictFp ? 2 : 1) &&
	"Invalid operand expansion");
	LLVM_DEBUG(dbgs() << "Replacing: "; N->dump(&DAG); dbgs() << " with: ";
	Res.dump());

	ReplaceValueWith(SDValue(N, 0), Res);
	if (IsStrictFp)
	ReplaceValueWith(SDValue(N, 1), SDValue(Res.getNode(), 1));

	return false;
	}

	/// PromoteSetCCOperands - Promote the operands of a comparison. This code is
	/// shared among BR_CC, SELECT_CC, and SETCC handlers.
	void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS,
	ISD::CondCode CCCode) {
	// We have to insert explicit sign or zero extends. Note that we could
	// insert sign extends for ALL conditions. For those operations where either
	// zero or sign extension would be valid, use SExtOrZExtPromotedInteger
	// which will choose the cheapest for the target.
	switch (CCCode) {
	default: llvm_unreachable("Unknown integer comparison!");
	case ISD::SETEQ:
	case ISD::SETNE: {
	SDValue OpL = GetPromotedInteger(NewLHS);
	SDValue OpR = GetPromotedInteger(NewRHS);

	// We would prefer to promote the comparison operand with sign extension.
	// If the width of OpL/OpR excluding the duplicated sign bits is no greater
	// than the width of NewLHS/NewRH, we can avoid inserting real truncate
	// instruction, which is redundant eventually.
	unsigned OpLEffectiveBits =
	OpL.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(OpL) + 1;
	unsigned OpREffectiveBits =
	OpR.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(OpR) + 1;
	if (OpLEffectiveBits <= NewLHS.getScalarValueSizeInBits() &&
	OpREffectiveBits <= NewRHS.getScalarValueSizeInBits()) {
	NewLHS = OpL;
	NewRHS = OpR;
	} else {
	NewLHS = SExtOrZExtPromotedInteger(NewLHS);
	NewRHS = SExtOrZExtPromotedInteger(NewRHS);
	}
	break;
	}
	case ISD::SETUGE:
	case ISD::SETUGT:
	case ISD::SETULE:
	case ISD::SETULT:
	NewLHS = SExtOrZExtPromotedInteger(NewLHS);
	NewRHS = SExtOrZExtPromotedInteger(NewRHS);
	break;
	case ISD::SETGE:
	case ISD::SETGT:
	case ISD::SETLT:
	case ISD::SETLE:
	NewLHS = SExtPromotedInteger(NewLHS);
	NewRHS = SExtPromotedInteger(NewRHS);
	break;
	}
	}

	SDValue DAGTypeLegalizer::PromoteIntOp_ANY_EXTEND(SDNode *N) {
	SDValue Op = GetPromotedInteger(N->getOperand(0));
	return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), N->getValueType(0), Op);
	}

	SDValue DAGTypeLegalizer::PromoteIntOp_ATOMIC_STORE(AtomicSDNode *N) {
	SDValue Op2 = GetPromotedInteger(N->getOperand(2));
	return DAG.getAtomic(N->getOpcode(), SDLoc(N), N->getMemoryVT(),
	N->getChain(), N->getBasePtr(), Op2, N->getMemOperand());
	}

	SDValue DAGTypeLegalizer::PromoteIntOp_BITCAST(SDNode *N) {
	// This should only occur in unusual situations like bitcasting to an
	// x86_fp80, so just turn it into a store+load
	return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0));
	}

	SDValue DAGTypeLegalizer::PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo) {
	assert(OpNo == 2 && "Don't know how to promote this operand!");

	SDValue LHS = N->getOperand(2);
	SDValue RHS = N->getOperand(3);
	PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(1))->get());

	// The chain (Op#0), CC (#1) and basic block destination (Op#4) are always
	// legal types.
	return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
	N->getOperand(1), LHS, RHS, N->getOperand(4)),
	0);
	}

	SDValue DAGTypeLegalizer::PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo) {
	assert(OpNo == 1 && "only know how to promote condition");

	// Promote all the way up to the canonical SetCC type.
	SDValue Cond = PromoteTargetBoolean(N->getOperand(1), MVT::Other);

	// The chain (Op#0) and basic block destination (Op#2) are always legal types.
	return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Cond,
	N->getOperand(2)), 0);
	}

	SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_PAIR(SDNode *N) {
	// Since the result type is legal, the operands must promote to it.
	EVT OVT = N->getOperand(0).getValueType();
	SDValue Lo = ZExtPromotedInteger(N->getOperand(0));
	SDValue Hi = GetPromotedInteger(N->getOperand(1));
	assert(Lo.getValueType() == N->getValueType(0) && "Operand over promoted?");
	SDLoc dl(N);

	Hi = DAG.getNode(ISD::SHL, dl, N->getValueType(0), Hi,
	DAG.getConstant(OVT.getSizeInBits(), dl,
	TLI.getPointerTy(DAG.getDataLayout())));
	return DAG.getNode(ISD::OR, dl, N->getValueType(0), Lo, Hi);
	}

	SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) {
	// The vector type is legal but the element type is not. This implies
	// that the vector is a power-of-two in length and that the element
	// type does not have a strange size (eg: it is not i1).
	EVT VecVT = N->getValueType(0);
	unsigned NumElts = VecVT.getVectorNumElements();
	assert(!((NumElts & 1) && (!TLI.isTypeLegal(VecVT))) &&
	"Legal vector of one illegal element?");

	// Promote the inserted value. The type does not need to match the
	// vector element type. Check that any extra bits introduced will be
	// truncated away.
	assert(N->getOperand(0).getValueSizeInBits() >=
	N->getValueType(0).getScalarSizeInBits() &&
	"Type of inserted value narrower than vector element type!");

	SmallVector<SDValue, 16> NewOps;
	for (unsigned i = 0; i < NumElts; ++i)
	NewOps.push_back(GetPromotedInteger(N->getOperand(i)));

	return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
	}

	SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N,
	unsigned OpNo) {
	if (OpNo == 1) {
	// Promote the inserted value. This is valid because the type does not
	// have to match the vector element type.

	// Check that any extra bits introduced will be truncated away.
	assert(N->getOperand(1).getValueSizeInBits() >=
	N->getValueType(0).getScalarSizeInBits() &&
	"Type of inserted value narrower than vector element type!");
	return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
	GetPromotedInteger(N->getOperand(1)),
	N->getOperand(2)),
	0);
	}

	assert(OpNo == 2 && "Different operand and result vector types?");

	// Promote the index.
	SDValue Idx = DAG.getZExtOrTrunc(N->getOperand(2), SDLoc(N),
	TLI.getVectorIdxTy(DAG.getDataLayout()));
	return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
	N->getOperand(1), Idx), 0);
	}

	SDValue DAGTypeLegalizer::PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N) {
	// Integer SCALAR_TO_VECTOR operands are implicitly truncated, so just promote
	// the operand in place.
	return SDValue(DAG.UpdateNodeOperands(N,
	GetPromotedInteger(N->getOperand(0))), 0);
	}

	SDValue DAGTypeLegalizer::PromoteIntOp_SPLAT_VECTOR(SDNode *N) {
	// Integer SPLAT_VECTOR operands are implicitly truncated, so just promote the
	// operand in place.
	return SDValue(
	DAG.UpdateNodeOperands(N, GetPromotedInteger(N->getOperand(0))), 0);
	}

	SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) {
	assert(OpNo == 0 && "Only know how to promote the condition!");
	SDValue Cond = N->getOperand(0);
	EVT OpTy = N->getOperand(1).getValueType();

	if (N->getOpcode() == ISD::VSELECT)
	if (SDValue Res = WidenVSELECTMask(N))
	return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
	Res, N->getOperand(1), N->getOperand(2));

	// Promote all the way up to the canonical SetCC type.
	EVT OpVT = N->getOpcode() == ISD::SELECT ? OpTy.getScalarType() : OpTy;
	Cond = PromoteTargetBoolean(Cond, OpVT);

	return SDValue(DAG.UpdateNodeOperands(N, Cond, N->getOperand(1),
	N->getOperand(2)), 0);
	}

	SDValue DAGTypeLegalizer::PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo) {
	assert(OpNo == 0 && "Don't know how to promote this operand!");

	SDValue LHS = N->getOperand(0);
	SDValue RHS = N->getOperand(1);
	PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(4))->get());

	// The CC (#4) and the possible return values (#2 and #3) have legal types.
	return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2),
	N->getOperand(3), N->getOperand(4)), 0);
	}

	SDValue DAGTypeLegalizer::PromoteIntOp_SETCC(SDNode *N, unsigned OpNo) {
	assert(OpNo == 0 && "Don't know how to promote this operand!");

	SDValue LHS = N->getOperand(0);
	SDValue RHS = N->getOperand(1);
	PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(2))->get());

	// The CC (#2) is always legal.
	return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2)), 0);
	}

	SDValue DAGTypeLegalizer::PromoteIntOp_Shift(SDNode *N) {
	return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
	ZExtPromotedInteger(N->getOperand(1))), 0);
	}

	SDValue DAGTypeLegalizer::PromoteIntOp_SIGN_EXTEND(SDNode *N) {
	SDValue Op = GetPromotedInteger(N->getOperand(0));
	SDLoc dl(N);
	Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op);
	return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(),
	Op, DAG.getValueType(N->getOperand(0).getValueType()));
	}

	SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) {
	return SDValue(DAG.UpdateNodeOperands(N,
	SExtPromotedInteger(N->getOperand(0))), 0);
	}

	SDValue DAGTypeLegalizer::PromoteIntOp_STRICT_SINT_TO_FP(SDNode *N) {
	return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
	SExtPromotedInteger(N->getOperand(1))), 0);
	}

	SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
	assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
	SDValue Ch = N->getChain(), Ptr = N->getBasePtr();
	SDLoc dl(N);

	SDValue Val = GetPromotedInteger(N->getValue()); // Get promoted value.

	// Truncate the value and store the result.
	return DAG.getTruncStore(Ch, dl, Val, Ptr,
	N->getMemoryVT(), N->getMemOperand());
	}

	SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N,
	unsigned OpNo) {

	SDValue DataOp = N->getValue();
	EVT DataVT = DataOp.getValueType();
	SDValue Mask = N->getMask();
	SDLoc dl(N);

	bool TruncateStore = false;
	if (OpNo == 4) {
	Mask = PromoteTargetBoolean(Mask, DataVT);
	// Update in place.
	SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
	NewOps[4] = Mask;
	return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
	} else { // Data operand
	assert(OpNo == 1 && "Unexpected operand for promotion");
	DataOp = GetPromotedInteger(DataOp);
	TruncateStore = true;
	}

	return DAG.getMaskedStore(N->getChain(), dl, DataOp, N->getBasePtr(),
	N->getOffset(), Mask, N->getMemoryVT(),
	N->getMemOperand(), N->getAddressingMode(),
	TruncateStore, N->isCompressingStore());
	}

	SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N,
	unsigned OpNo) {
	assert(OpNo == 3 && "Only know how to promote the mask!");
	EVT DataVT = N->getValueType(0);
	SDValue Mask = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
	SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
	NewOps[OpNo] = Mask;
	SDNode *Res = DAG.UpdateNodeOperands(N, NewOps);
	if (Res == N)
	return SDValue(Res, 0);

	// Update triggered CSE, do our own replacement since caller can't.
	ReplaceValueWith(SDValue(N, 0), SDValue(Res, 0));
	ReplaceValueWith(SDValue(N, 1), SDValue(Res, 1));
	return SDValue();
	}

	SDValue DAGTypeLegalizer::PromoteIntOp_MGATHER(MaskedGatherSDNode *N,
	unsigned OpNo) {

	SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end());
	if (OpNo == 2) {
	// The Mask
	EVT DataVT = N->getValueType(0);
	NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
	} else if (OpNo == 4) {
	// The Index
	if (N->isIndexSigned())
	// Need to sign extend the index since the bits will likely be used.
	NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo));
	else
	NewOps[OpNo] = ZExtPromotedInteger(N->getOperand(OpNo));
	} else
	NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo));

	SDNode *Res = DAG.UpdateNodeOperands(N, NewOps);
	if (Res == N)
	return SDValue(Res, 0);

	// Update triggered CSE, do our own replacement since caller can't.
	ReplaceValueWith(SDValue(N, 0), SDValue(Res, 0));
	ReplaceValueWith(SDValue(N, 1), SDValue(Res, 1));
	return SDValue();
	}

	SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N,
	unsigned OpNo) {
	bool TruncateStore = N->isTruncatingStore();
	SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end());
	if (OpNo == 2) {
	// The Mask
	EVT DataVT = N->getValue().getValueType();
	NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
	} else if (OpNo == 4) {
	// The Index
	if (N->isIndexSigned())
	// Need to sign extend the index since the bits will likely be used.
	NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo));
	else
	NewOps[OpNo] = ZExtPromotedInteger(N->getOperand(OpNo));

	N->setIndexType(TLI.getCanonicalIndexType(N->getIndexType(),
	N->getMemoryVT(), NewOps[OpNo]));
	} else {
	NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo));
	TruncateStore = true;
	}

	return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), N->getMemoryVT(),
	SDLoc(N), NewOps, N->getMemOperand(),
	N->getIndexType(), TruncateStore);
	}

	SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) {
	SDValue Op = GetPromotedInteger(N->getOperand(0));
	return DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), Op);
	}

	SDValue DAGTypeLegalizer::PromoteIntOp_UINT_TO_FP(SDNode *N) {
	return SDValue(DAG.UpdateNodeOperands(N,
	ZExtPromotedInteger(N->getOperand(0))), 0);
	}

	SDValue DAGTypeLegalizer::PromoteIntOp_STRICT_UINT_TO_FP(SDNode *N) {
	return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
	ZExtPromotedInteger(N->getOperand(1))), 0);
	}

	SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) {
	SDLoc dl(N);
	SDValue Op = GetPromotedInteger(N->getOperand(0));
	Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op);
	return DAG.getZeroExtendInReg(Op, dl, N->getOperand(0).getValueType());
	}

	SDValue DAGTypeLegalizer::PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo) {
	assert(OpNo == 2 && "Don't know how to promote this operand!");

	SDValue LHS = N->getOperand(0);
	SDValue RHS = N->getOperand(1);
	SDValue Carry = N->getOperand(2);
	SDLoc DL(N);

	Carry = PromoteTargetBoolean(Carry, LHS.getValueType());

	return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, Carry), 0);
	}

	SDValue DAGTypeLegalizer::PromoteIntOp_FIX(SDNode *N) {
	SDValue Op2 = ZExtPromotedInteger(N->getOperand(2));
	return SDValue(
	DAG.UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1), Op2), 0);
	}

	SDValue DAGTypeLegalizer::PromoteIntOp_FRAMERETURNADDR(SDNode *N) {
	// Promote the RETURNADDR/FRAMEADDR argument to a supported integer width.
	SDValue Op = ZExtPromotedInteger(N->getOperand(0));
	return SDValue(DAG.UpdateNodeOperands(N, Op), 0);
	}

	SDValue DAGTypeLegalizer::PromoteIntOp_PREFETCH(SDNode *N, unsigned OpNo) {
	assert(OpNo > 1 && "Don't know how to promote this operand!");
	// Promote the rw, locality, and cache type arguments to a supported integer
	// width.
	SDValue Op2 = ZExtPromotedInteger(N->getOperand(2));
	SDValue Op3 = ZExtPromotedInteger(N->getOperand(3));
	SDValue Op4 = ZExtPromotedInteger(N->getOperand(4));
	return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1),
	Op2, Op3, Op4),
	0);
	}

	SDValue DAGTypeLegalizer::PromoteIntOp_FPOWI(SDNode *N) {
	// FIXME: Support for promotion of STRICT_FPOWI is not implemented yet.
	assert(N->getOpcode() == ISD::FPOWI && "No STRICT_FPOWI support here yet.");

	// The integer operand is the last operand in FPOWI (so the result and
	// floating point operand is already type legalized).

	// We can't just promote the exponent type in FPOWI, since we want to lower
	// the node to a libcall and we if we promote to a type larger than
	// sizeof(int) the libcall might not be according to the targets ABI. Instead
	// we rewrite to a libcall here directly, letting makeLibCall handle promotion
	// if the target accepts it according to shouldSignExtendTypeInLibCall.
	RTLIB::Libcall LC = RTLIB::getPOWI(N->getValueType(0));
	assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fpowi.");
	if (!TLI.getLibcallName(LC)) {
	// Some targets don't have a powi libcall; use pow instead.
	// FIXME: Implement this if some target needs it.
	DAG.getContext()->emitError("Don't know how to promote fpowi to fpow");
	return DAG.getUNDEF(N->getValueType(0));
	}
	// The exponent should fit in a sizeof(int) type for the libcall to be valid.
	assert(DAG.getLibInfo().getIntSize() ==
	N->getOperand(1).getValueType().getSizeInBits() &&
	"POWI exponent should match with sizeof(int) when doing the libcall.");
	TargetLowering::MakeLibCallOptions CallOptions;
	CallOptions.setSExt(true);
	SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
	std::pair<SDValue, SDValue> Tmp =
	TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops,
	CallOptions, SDLoc(N), SDValue());
	ReplaceValueWith(SDValue(N, 0), Tmp.first);
	return SDValue();
	}

	SDValue DAGTypeLegalizer::PromoteIntOp_VECREDUCE(SDNode *N) {
	SDLoc dl(N);
	SDValue Op;
	switch (N->getOpcode()) {
	default: llvm_unreachable("Expected integer vector reduction");
	case ISD::VECREDUCE_ADD:
	case ISD::VECREDUCE_MUL:
	case ISD::VECREDUCE_AND:
	case ISD::VECREDUCE_OR:
	case ISD::VECREDUCE_XOR:
	Op = GetPromotedInteger(N->getOperand(0));
	break;
	case ISD::VECREDUCE_SMAX:
	case ISD::VECREDUCE_SMIN:
	Op = SExtPromotedInteger(N->getOperand(0));
	break;
	case ISD::VECREDUCE_UMAX:
	case ISD::VECREDUCE_UMIN:
	Op = ZExtPromotedInteger(N->getOperand(0));
	break;
	}

	EVT EltVT = Op.getValueType().getVectorElementType();
	EVT VT = N->getValueType(0);
	if (VT.bitsGE(EltVT))
	return DAG.getNode(N->getOpcode(), SDLoc(N), VT, Op);

	// Result size must be >= element size. If this is not the case after
	// promotion, also promote the result type and then truncate.
	SDValue Reduce = DAG.getNode(N->getOpcode(), dl, EltVT, Op);
	return DAG.getNode(ISD::TRUNCATE, dl, VT, Reduce);
	}

	SDValue DAGTypeLegalizer::PromoteIntOp_SET_ROUNDING(SDNode *N) {
	SDValue Op = ZExtPromotedInteger(N->getOperand(1));
	return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op), 0);
	}

	//===----------------------------------------------------------------------===//
	// Integer Result Expansion
	//===----------------------------------------------------------------------===//

	/// ExpandIntegerResult - This method is called when the specified result of the
	/// specified node is found to need expansion. At this point, the node may also
	/// have invalid operands or may have other results that need promotion, we just
	/// know that (at least) one result needs expansion.
	void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
	LLVM_DEBUG(dbgs() << "Expand integer result: "; N->dump(&DAG);
	dbgs() << "\n");
	SDValue Lo, Hi;
	Lo = Hi = SDValue();

	// See if the target wants to custom expand this node.
	if (CustomLowerNode(N, N->getValueType(ResNo), true))
	return;

	switch (N->getOpcode()) {
	default:
	#ifndef NDEBUG
	dbgs() << "ExpandIntegerResult #" << ResNo << ": ";
	N->dump(&DAG); dbgs() << "\n";
	#endif
	report_fatal_error("Do not know how to expand the result of this "
	"operator!");

	case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break;
	case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;
	case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
	case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
	case ISD::FREEZE: SplitRes_FREEZE(N, Lo, Hi); break;

	case ISD::BITCAST: ExpandRes_BITCAST(N, Lo, Hi); break;
	case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break;
	case ISD::EXTRACT_ELEMENT: ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break;
	case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break;
	case ISD::VAARG: ExpandRes_VAARG(N, Lo, Hi); break;

	case ISD::ANY_EXTEND: ExpandIntRes_ANY_EXTEND(N, Lo, Hi); break;
	case ISD::AssertSext: ExpandIntRes_AssertSext(N, Lo, Hi); break;
	case ISD::AssertZext: ExpandIntRes_AssertZext(N, Lo, Hi); break;
	case ISD::BITREVERSE: ExpandIntRes_BITREVERSE(N, Lo, Hi); break;
	case ISD::BSWAP: ExpandIntRes_BSWAP(N, Lo, Hi); break;
	case ISD::PARITY: ExpandIntRes_PARITY(N, Lo, Hi); break;
	case ISD::Constant: ExpandIntRes_Constant(N, Lo, Hi); break;
	case ISD::ABS: ExpandIntRes_ABS(N, Lo, Hi); break;
	case ISD::CTLZ_ZERO_UNDEF:
	case ISD::CTLZ: ExpandIntRes_CTLZ(N, Lo, Hi); break;
	case ISD::CTPOP: ExpandIntRes_CTPOP(N, Lo, Hi); break;
	case ISD::CTTZ_ZERO_UNDEF:
	case ISD::CTTZ: ExpandIntRes_CTTZ(N, Lo, Hi); break;
	case ISD::FLT_ROUNDS_: ExpandIntRes_FLT_ROUNDS(N, Lo, Hi); break;
	case ISD::STRICT_FP_TO_SINT:
	case ISD::FP_TO_SINT: ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break;
	case ISD::STRICT_FP_TO_UINT:
	case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break;
	case ISD::FP_TO_SINT_SAT:
	case ISD::FP_TO_UINT_SAT: ExpandIntRes_FP_TO_XINT_SAT(N, Lo, Hi); break;
	case ISD::STRICT_LLROUND:
	case ISD::STRICT_LLRINT:
	case ISD::LLROUND:
	case ISD::LLRINT: ExpandIntRes_LLROUND_LLRINT(N, Lo, Hi); break;
	case ISD::LOAD: ExpandIntRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break;
	case ISD::MUL: ExpandIntRes_MUL(N, Lo, Hi); break;
	case ISD::READCYCLECOUNTER: ExpandIntRes_READCYCLECOUNTER(N, Lo, Hi); break;
	case ISD::SDIV: ExpandIntRes_SDIV(N, Lo, Hi); break;
	case ISD::SIGN_EXTEND: ExpandIntRes_SIGN_EXTEND(N, Lo, Hi); break;
	case ISD::SIGN_EXTEND_INREG: ExpandIntRes_SIGN_EXTEND_INREG(N, Lo, Hi); break;
	case ISD::SREM: ExpandIntRes_SREM(N, Lo, Hi); break;
	case ISD::TRUNCATE: ExpandIntRes_TRUNCATE(N, Lo, Hi); break;
	case ISD::UDIV: ExpandIntRes_UDIV(N, Lo, Hi); break;
	case ISD::UREM: ExpandIntRes_UREM(N, Lo, Hi); break;
	case ISD::ZERO_EXTEND: ExpandIntRes_ZERO_EXTEND(N, Lo, Hi); break;
	case ISD::ATOMIC_LOAD: ExpandIntRes_ATOMIC_LOAD(N, Lo, Hi); break;

	case ISD::ATOMIC_LOAD_ADD:
	case ISD::ATOMIC_LOAD_SUB:
	case ISD::ATOMIC_LOAD_AND:
	case ISD::ATOMIC_LOAD_CLR:
	case ISD::ATOMIC_LOAD_OR:
	case ISD::ATOMIC_LOAD_XOR:
	case ISD::ATOMIC_LOAD_NAND:
	case ISD::ATOMIC_LOAD_MIN:
	case ISD::ATOMIC_LOAD_MAX:
	case ISD::ATOMIC_LOAD_UMIN:
	case ISD::ATOMIC_LOAD_UMAX:
	case ISD::ATOMIC_SWAP:
	case ISD::ATOMIC_CMP_SWAP: {
	std::pair<SDValue, SDValue> Tmp = ExpandAtomic(N);
	SplitInteger(Tmp.first, Lo, Hi);
	ReplaceValueWith(SDValue(N, 1), Tmp.second);
	break;
	}
	case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: {
	AtomicSDNode *AN = cast<AtomicSDNode>(N);
	SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::Other);
	SDValue Tmp = DAG.getAtomicCmpSwap(
	ISD::ATOMIC_CMP_SWAP, SDLoc(N), AN->getMemoryVT(), VTs,
	N->getOperand(0), N->getOperand(1), N->getOperand(2), N->getOperand(3),
	AN->getMemOperand());

	// Expanding to the strong ATOMIC_CMP_SWAP node means we can determine
	// success simply by comparing the loaded value against the ingoing
	// comparison.
	SDValue Success = DAG.getSetCC(SDLoc(N), N->getValueType(1), Tmp,
	N->getOperand(2), ISD::SETEQ);

	SplitInteger(Tmp, Lo, Hi);
	ReplaceValueWith(SDValue(N, 1), Success);
	ReplaceValueWith(SDValue(N, 2), Tmp.getValue(1));
	break;
	}

	case ISD::AND:
	case ISD::OR:
	case ISD::XOR: ExpandIntRes_Logical(N, Lo, Hi); break;

	case ISD::UMAX:
	case ISD::SMAX:
	case ISD::UMIN:
	case ISD::SMIN: ExpandIntRes_MINMAX(N, Lo, Hi); break;

	case ISD::ADD:
	case ISD::SUB: ExpandIntRes_ADDSUB(N, Lo, Hi); break;

	case ISD::ADDC:
	case ISD::SUBC: ExpandIntRes_ADDSUBC(N, Lo, Hi); break;

	case ISD::ADDE:
	case ISD::SUBE: ExpandIntRes_ADDSUBE(N, Lo, Hi); break;

	case ISD::ADDCARRY:
	case ISD::SUBCARRY: ExpandIntRes_ADDSUBCARRY(N, Lo, Hi); break;

	case ISD::SADDO_CARRY:
	case ISD::SSUBO_CARRY: ExpandIntRes_SADDSUBO_CARRY(N, Lo, Hi); break;

	case ISD::SHL:
	case ISD::SRA:
	case ISD::SRL: ExpandIntRes_Shift(N, Lo, Hi); break;

	case ISD::SADDO:
	case ISD::SSUBO: ExpandIntRes_SADDSUBO(N, Lo, Hi); break;
	case ISD::UADDO:
	case ISD::USUBO: ExpandIntRes_UADDSUBO(N, Lo, Hi); break;
	case ISD::UMULO:
	case ISD::SMULO: ExpandIntRes_XMULO(N, Lo, Hi); break;

	case ISD::SADDSAT:
	case ISD::UADDSAT:
	case ISD::SSUBSAT:
	case ISD::USUBSAT: ExpandIntRes_ADDSUBSAT(N, Lo, Hi); break;

	case ISD::SSHLSAT:
	case ISD::USHLSAT: ExpandIntRes_SHLSAT(N, Lo, Hi); break;

	case ISD::SMULFIX:
	case ISD::SMULFIXSAT:
	case ISD::UMULFIX:
	case ISD::UMULFIXSAT: ExpandIntRes_MULFIX(N, Lo, Hi); break;

	case ISD::SDIVFIX:
	case ISD::SDIVFIXSAT:
	case ISD::UDIVFIX:
	case ISD::UDIVFIXSAT: ExpandIntRes_DIVFIX(N, Lo, Hi); break;

	case ISD::VECREDUCE_ADD:
	case ISD::VECREDUCE_MUL:
	case ISD::VECREDUCE_AND:
	case ISD::VECREDUCE_OR:
	case ISD::VECREDUCE_XOR:
	case ISD::VECREDUCE_SMAX:
	case ISD::VECREDUCE_SMIN:
	case ISD::VECREDUCE_UMAX:
	case ISD::VECREDUCE_UMIN: ExpandIntRes_VECREDUCE(N, Lo, Hi); break;

	case ISD::ROTL:
	case ISD::ROTR:
	ExpandIntRes_Rotate(N, Lo, Hi);
	break;

	case ISD::FSHL:
	case ISD::FSHR:
	ExpandIntRes_FunnelShift(N, Lo, Hi);
	break;

	case ISD::VSCALE:
	ExpandIntRes_VSCALE(N, Lo, Hi);
	break;
	}

	// If Lo/Hi is null, the sub-method took care of registering results etc.
	if (Lo.getNode())
	SetExpandedInteger(SDValue(N, ResNo), Lo, Hi);
	}

	/// Lower an atomic node to the appropriate builtin call.
	std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
	unsigned Opc = Node->getOpcode();
	MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT();
	AtomicOrdering order = cast<AtomicSDNode>(Node)->getMergedOrdering();
	// Lower to outline atomic libcall if outline atomics enabled,
	// or to sync libcall otherwise
	RTLIB::Libcall LC = RTLIB::getOUTLINE_ATOMIC(Opc, order, VT);
	EVT RetVT = Node->getValueType(0);
	TargetLowering::MakeLibCallOptions CallOptions;
	SmallVector<SDValue, 4> Ops;
	if (TLI.getLibcallName(LC)) {
	Ops.append(Node->op_begin() + 2, Node->op_end());
	Ops.push_back(Node->getOperand(1));
	} else {
	LC = RTLIB::getSYNC(Opc, VT);
	assert(LC != RTLIB::UNKNOWN_LIBCALL &&
	"Unexpected atomic op or value type!");
	Ops.append(Node->op_begin() + 1, Node->op_end());
	}
	return TLI.makeLibCall(DAG, LC, RetVT, Ops, CallOptions, SDLoc(Node),
	Node->getOperand(0));
	}

	/// N is a shift by a value that needs to be expanded,
	/// and the shift amount is a constant 'Amt'. Expand the operation.
	void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, const APInt &Amt,
	SDValue &Lo, SDValue &Hi) {
	SDLoc DL(N);
	// Expand the incoming operand to be shifted, so that we have its parts
	SDValue InL, InH;
	GetExpandedInteger(N->getOperand(0), InL, InH);

	// Though Amt shouldn't usually be 0, it's possible. E.g. when legalization
	// splitted a vector shift, like this: <op1, op2> SHL <0, 2>.
	if (!Amt) {
	Lo = InL;
	Hi = InH;
	return;
	}

	EVT NVT = InL.getValueType();
	unsigned VTBits = N->getValueType(0).getSizeInBits();
	unsigned NVTBits = NVT.getSizeInBits();
	EVT ShTy = N->getOperand(1).getValueType();

	if (N->getOpcode() == ISD::SHL) {
	if (Amt.ugt(VTBits)) {
	Lo = Hi = DAG.getConstant(0, DL, NVT);
	} else if (Amt.ugt(NVTBits)) {
	Lo = DAG.getConstant(0, DL, NVT);
	Hi = DAG.getNode(ISD::SHL, DL,
	NVT, InL, DAG.getConstant(Amt - NVTBits, DL, ShTy));
	} else if (Amt == NVTBits) {
	Lo = DAG.getConstant(0, DL, NVT);
	Hi = InL;
	} else {
	Lo = DAG.getNode(ISD::SHL, DL, NVT, InL, DAG.getConstant(Amt, DL, ShTy));
	Hi = DAG.getNode(ISD::OR, DL, NVT,
	DAG.getNode(ISD::SHL, DL, NVT, InH,
	DAG.getConstant(Amt, DL, ShTy)),
	DAG.getNode(ISD::SRL, DL, NVT, InL,
	DAG.getConstant(-Amt + NVTBits, DL, ShTy)));
	}
	return;
	}

	if (N->getOpcode() == ISD::SRL) {
	if (Amt.ugt(VTBits)) {
	Lo = Hi = DAG.getConstant(0, DL, NVT);
	} else if (Amt.ugt(NVTBits)) {
	Lo = DAG.getNode(ISD::SRL, DL,
	NVT, InH, DAG.getConstant(Amt - NVTBits, DL, ShTy));
	Hi = DAG.getConstant(0, DL, NVT);
	} else if (Amt == NVTBits) {
	Lo = InH;
	Hi = DAG.getConstant(0, DL, NVT);
	} else {
	Lo = DAG.getNode(ISD::OR, DL, NVT,
	DAG.getNode(ISD::SRL, DL, NVT, InL,
	DAG.getConstant(Amt, DL, ShTy)),
	DAG.getNode(ISD::SHL, DL, NVT, InH,
	DAG.getConstant(-Amt + NVTBits, DL, ShTy)));
	Hi = DAG.getNode(ISD::SRL, DL, NVT, InH, DAG.getConstant(Amt, DL, ShTy));
	}
	return;
	}

	assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
	if (Amt.ugt(VTBits)) {
	Hi = Lo = DAG.getNode(ISD::SRA, DL, NVT, InH,
	DAG.getConstant(NVTBits - 1, DL, ShTy));
	} else if (Amt.ugt(NVTBits)) {
	Lo = DAG.getNode(ISD::SRA, DL, NVT, InH,
	DAG.getConstant(Amt - NVTBits, DL, ShTy));
	Hi = DAG.getNode(ISD::SRA, DL, NVT, InH,
	DAG.getConstant(NVTBits - 1, DL, ShTy));
	} else if (Amt == NVTBits) {
	Lo = InH;
	Hi = DAG.getNode(ISD::SRA, DL, NVT, InH,
	DAG.getConstant(NVTBits - 1, DL, ShTy));
	} else {
	Lo = DAG.getNode(ISD::OR, DL, NVT,
	DAG.getNode(ISD::SRL, DL, NVT, InL,
	DAG.getConstant(Amt, DL, ShTy)),
	DAG.getNode(ISD::SHL, DL, NVT, InH,
	DAG.getConstant(-Amt + NVTBits, DL, ShTy)));
	Hi = DAG.getNode(ISD::SRA, DL, NVT, InH, DAG.getConstant(Amt, DL, ShTy));
	}
	}

	/// ExpandShiftWithKnownAmountBit - Try to determine whether we can simplify
	/// this shift based on knowledge of the high bit of the shift amount. If we
	/// can tell this, we know that it is >= 32 or < 32, without knowing the actual
	/// shift amount.
	bool DAGTypeLegalizer::
	ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
	SDValue Amt = N->getOperand(1);
	EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
	EVT ShTy = Amt.getValueType();
	unsigned ShBits = ShTy.getScalarSizeInBits();
	unsigned NVTBits = NVT.getScalarSizeInBits();
	assert(isPowerOf2_32(NVTBits) &&
	"Expanded integer type size not a power of two!");
	SDLoc dl(N);

	APInt HighBitMask = APInt::getHighBitsSet(ShBits, ShBits - Log2_32(NVTBits));
	KnownBits Known = DAG.computeKnownBits(N->getOperand(1));

	// If we don't know anything about the high bits, exit.
	if (((Known.Zero\|Known.One) & HighBitMask) == 0)
	return false;

	// Get the incoming operand to be shifted.
	SDValue InL, InH;
	GetExpandedInteger(N->getOperand(0), InL, InH);

	// If we know that any of the high bits of the shift amount are one, then we
	// can do this as a couple of simple shifts.
	if (Known.One.intersects(HighBitMask)) {
	// Mask out the high bit, which we know is set.
	Amt = DAG.getNode(ISD::AND, dl, ShTy, Amt,
	DAG.getConstant(~HighBitMask, dl, ShTy));

	switch (N->getOpcode()) {
	default: llvm_unreachable("Unknown shift");
	case ISD::SHL:
	Lo = DAG.getConstant(0, dl, NVT); // Low part is zero.
	Hi = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt); // High part from Lo part.
	return true;
	case ISD::SRL:
	Hi = DAG.getConstant(0, dl, NVT); // Hi part is zero.
	Lo = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt); // Lo part from Hi part.
	return true;
	case ISD::SRA:
	Hi = DAG.getNode(ISD::SRA, dl, NVT, InH, // Sign extend high part.
	DAG.getConstant(NVTBits - 1, dl, ShTy));
	Lo = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt); // Lo part from Hi part.
	return true;
	}
	}

	// If we know that all of the high bits of the shift amount are zero, then we
	// can do this as a couple of simple shifts.
	if (HighBitMask.isSubsetOf(Known.Zero)) {
	// Calculate 31-x. 31 is used instead of 32 to avoid creating an undefined
	// shift if x is zero. We can use XOR here because x is known to be smaller
	// than 32.
	SDValue Amt2 = DAG.getNode(ISD::XOR, dl, ShTy, Amt,
	DAG.getConstant(NVTBits - 1, dl, ShTy));

	unsigned Op1, Op2;
	switch (N->getOpcode()) {
	default: llvm_unreachable("Unknown shift");
	case ISD::SHL: Op1 = ISD::SHL; Op2 = ISD::SRL; break;
	case ISD::SRL:
	case ISD::SRA: Op1 = ISD::SRL; Op2 = ISD::SHL; break;
	}

	// When shifting right the arithmetic for Lo and Hi is swapped.
	if (N->getOpcode() != ISD::SHL)
	std::swap(InL, InH);

	// Use a little trick to get the bits that move from Lo to Hi. First
	// shift by one bit.
	SDValue Sh1 = DAG.getNode(Op2, dl, NVT, InL, DAG.getConstant(1, dl, ShTy));
	// Then compute the remaining shift with amount-1.
	SDValue Sh2 = DAG.getNode(Op2, dl, NVT, Sh1, Amt2);

	Lo = DAG.getNode(N->getOpcode(), dl, NVT, InL, Amt);
	Hi = DAG.getNode(ISD::OR, dl, NVT, DAG.getNode(Op1, dl, NVT, InH, Amt),Sh2);

	if (N->getOpcode() != ISD::SHL)
	std::swap(Hi, Lo);
	return true;
	}

	return false;
	}

	/// ExpandShiftWithUnknownAmountBit - Fully general expansion of integer shift
	/// of any size.
	bool DAGTypeLegalizer::
	ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
	SDValue Amt = N->getOperand(1);
	EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
	EVT ShTy = Amt.getValueType();
	unsigned NVTBits = NVT.getSizeInBits();
	assert(isPowerOf2_32(NVTBits) &&
	"Expanded integer type size not a power of two!");
	SDLoc dl(N);

	// Get the incoming operand to be shifted.
	SDValue InL, InH;
	GetExpandedInteger(N->getOperand(0), InL, InH);

	SDValue NVBitsNode = DAG.getConstant(NVTBits, dl, ShTy);
	SDValue AmtExcess = DAG.getNode(ISD::SUB, dl, ShTy, Amt, NVBitsNode);
	SDValue AmtLack = DAG.getNode(ISD::SUB, dl, ShTy, NVBitsNode, Amt);
	SDValue isShort = DAG.getSetCC(dl, getSetCCResultType(ShTy),
	Amt, NVBitsNode, ISD::SETULT);
	SDValue isZero = DAG.getSetCC(dl, getSetCCResultType(ShTy),
	Amt, DAG.getConstant(0, dl, ShTy),
	ISD::SETEQ);

	SDValue LoS, HiS, LoL, HiL;
	switch (N->getOpcode()) {
	default: llvm_unreachable("Unknown shift");
	case ISD::SHL:
	// Short: ShAmt < NVTBits
	LoS = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt);
	HiS = DAG.getNode(ISD::OR, dl, NVT,
	DAG.getNode(ISD::SHL, dl, NVT, InH, Amt),
	DAG.getNode(ISD::SRL, dl, NVT, InL, AmtLack));

	// Long: ShAmt >= NVTBits
	LoL = DAG.getConstant(0, dl, NVT); // Lo part is zero.
	HiL = DAG.getNode(ISD::SHL, dl, NVT, InL, AmtExcess); // Hi from Lo part.

	Lo = DAG.getSelect(dl, NVT, isShort, LoS, LoL);
	Hi = DAG.getSelect(dl, NVT, isZero, InH,
	DAG.getSelect(dl, NVT, isShort, HiS, HiL));
	return true;
	case ISD::SRL:
	// Short: ShAmt < NVTBits
	HiS = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt);
	LoS = DAG.getNode(ISD::OR, dl, NVT,
	DAG.getNode(ISD::SRL, dl, NVT, InL, Amt),
	// FIXME: If Amt is zero, the following shift generates an undefined result
	// on some architectures.
	DAG.getNode(ISD::SHL, dl, NVT, InH, AmtLack));

	// Long: ShAmt >= NVTBits
	HiL = DAG.getConstant(0, dl, NVT); // Hi part is zero.
	LoL = DAG.getNode(ISD::SRL, dl, NVT, InH, AmtExcess); // Lo from Hi part.

	Lo = DAG.getSelect(dl, NVT, isZero, InL,
	DAG.getSelect(dl, NVT, isShort, LoS, LoL));
	Hi = DAG.getSelect(dl, NVT, isShort, HiS, HiL);
	return true;
	case ISD::SRA:
	// Short: ShAmt < NVTBits
	HiS = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt);
	LoS = DAG.getNode(ISD::OR, dl, NVT,
	DAG.getNode(ISD::SRL, dl, NVT, InL, Amt),
	DAG.getNode(ISD::SHL, dl, NVT, InH, AmtLack));

	// Long: ShAmt >= NVTBits
	HiL = DAG.getNode(ISD::SRA, dl, NVT, InH, // Sign of Hi part.
	DAG.getConstant(NVTBits - 1, dl, ShTy));
	LoL = DAG.getNode(ISD::SRA, dl, NVT, InH, AmtExcess); // Lo from Hi part.

	Lo = DAG.getSelect(dl, NVT, isZero, InL,
	DAG.getSelect(dl, NVT, isShort, LoS, LoL));
	Hi = DAG.getSelect(dl, NVT, isShort, HiS, HiL);
	return true;
	}
	}

	static std::pair<ISD::CondCode, ISD::NodeType> getExpandedMinMaxOps(int Op) {

	switch (Op) {
	default: llvm_unreachable("invalid min/max opcode");
	case ISD::SMAX:
	return std::make_pair(ISD::SETGT, ISD::UMAX);
	case ISD::UMAX:
	return std::make_pair(ISD::SETUGT, ISD::UMAX);
	case ISD::SMIN:
	return std::make_pair(ISD::SETLT, ISD::UMIN);
	case ISD::UMIN:
	return std::make_pair(ISD::SETULT, ISD::UMIN);
	}
	}

	void DAGTypeLegalizer::ExpandIntRes_MINMAX(SDNode *N,
	SDValue &Lo, SDValue &Hi) {
	SDLoc DL(N);
	ISD::NodeType LoOpc;
	ISD::CondCode CondC;
	std::tie(CondC, LoOpc) = getExpandedMinMaxOps(N->getOpcode());

	// Expand the subcomponents.
	SDValue LHSL, LHSH, RHSL, RHSH;
	GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
	GetExpandedInteger(N->getOperand(1), RHSL, RHSH);

	// Value types
	EVT NVT = LHSL.getValueType();
	EVT CCT = getSetCCResultType(NVT);

	// Hi part is always the same op
	Hi = DAG.getNode(N->getOpcode(), DL, NVT, {LHSH, RHSH});

	// We need to know whether to select Lo part that corresponds to 'winning'
	// Hi part or if Hi parts are equal.
	SDValue IsHiLeft = DAG.getSetCC(DL, CCT, LHSH, RHSH, CondC);
	SDValue IsHiEq = DAG.getSetCC(DL, CCT, LHSH, RHSH, ISD::SETEQ);

	// Lo part corresponding to the 'winning' Hi part
	SDValue LoCmp = DAG.getSelect(DL, NVT, IsHiLeft, LHSL, RHSL);

	// Recursed Lo part if Hi parts are equal, this uses unsigned version
	SDValue LoMinMax = DAG.getNode(LoOpc, DL, NVT, {LHSL, RHSL});

	Lo = DAG.getSelect(DL, NVT, IsHiEq, LoMinMax, LoCmp);
	}

	void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
	SDValue &Lo, SDValue &Hi) {
	SDLoc dl(N);
	// Expand the subcomponents.
	SDValue LHSL, LHSH, RHSL, RHSH;
	GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
	GetExpandedInteger(N->getOperand(1), RHSL, RHSH);

	EVT NVT = LHSL.getValueType();
	SDValue LoOps[2] = { LHSL, RHSL };
	SDValue HiOps[3] = { LHSH, RHSH };

	bool HasOpCarry = TLI.isOperationLegalOrCustom(
	N->getOpcode() == ISD::ADD ? ISD::ADDCARRY : ISD::SUBCARRY,
	TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
	if (HasOpCarry) {
	SDVTList VTList = DAG.getVTList(NVT, getSetCCResultType(NVT));
	if (N->getOpcode() == ISD::ADD) {
	Lo = DAG.getNode(ISD::UADDO, dl, VTList, LoOps);
	HiOps[2] = Lo.getValue(1);
	Hi = DAG.getNode(ISD::ADDCARRY, dl, VTList, HiOps);
	} else {
	Lo = DAG.getNode(ISD::USUBO, dl, VTList, LoOps);
	HiOps[2] = Lo.getValue(1);
	Hi = DAG.getNode(ISD::SUBCARRY, dl, VTList, HiOps);
	}
	return;
	}

	// Do not generate ADDC/ADDE or SUBC/SUBE if the target does not support
	// them. TODO: Teach operation legalization how to expand unsupported
	// ADDC/ADDE/SUBC/SUBE. The problem is that these operations generate
	// a carry of type MVT::Glue, but there doesn't seem to be any way to
	// generate a value of this type in the expanded code sequence.
	bool hasCarry =
	TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ?
	ISD::ADDC : ISD::SUBC,
	TLI.getTypeToExpandTo(*DAG.getContext(), NVT));

	if (hasCarry) {
	SDVTList VTList = DAG.getVTList(NVT, MVT::Glue);
	if (N->getOpcode() == ISD::ADD) {
	Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps);
	HiOps[2] = Lo.getValue(1);
	Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps);
	} else {
	Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps);
	HiOps[2] = Lo.getValue(1);
	Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps);
	}
	return;
	}

	bool hasOVF =
	TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ?
	ISD::UADDO : ISD::USUBO,
	TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
	TargetLoweringBase::BooleanContent BoolType = TLI.getBooleanContents(NVT);

	if (hasOVF) {
	EVT OvfVT = getSetCCResultType(NVT);
	SDVTList VTList = DAG.getVTList(NVT, OvfVT);
	int RevOpc;
	if (N->getOpcode() == ISD::ADD) {
	RevOpc = ISD::SUB;
	Lo = DAG.getNode(ISD::UADDO, dl, VTList, LoOps);
	Hi = DAG.getNode(ISD::ADD, dl, NVT, makeArrayRef(HiOps, 2));
	} else {
	RevOpc = ISD::ADD;
	Lo = DAG.getNode(ISD::USUBO, dl, VTList, LoOps);
	Hi = DAG.getNode(ISD::SUB, dl, NVT, makeArrayRef(HiOps, 2));
	}
	SDValue OVF = Lo.getValue(1);

	switch (BoolType) {
	case TargetLoweringBase::UndefinedBooleanContent:
	OVF = DAG.getNode(ISD::AND, dl, OvfVT, DAG.getConstant(1, dl, OvfVT), OVF);
	LLVM_FALLTHROUGH;
	case TargetLoweringBase::ZeroOrOneBooleanContent:
	OVF = DAG.getZExtOrTrunc(OVF, dl, NVT);
	Hi = DAG.getNode(N->getOpcode(), dl, NVT, Hi, OVF);
	break;
	case TargetLoweringBase::ZeroOrNegativeOneBooleanContent:
	OVF = DAG.getSExtOrTrunc(OVF, dl, NVT);
	Hi = DAG.getNode(RevOpc, dl, NVT, Hi, OVF);
	}
	return;
	}

	if (N->getOpcode() == ISD::ADD) {
	Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps);
	Hi = DAG.getNode(ISD::ADD, dl, NVT, makeArrayRef(HiOps, 2));
	SDValue Cmp1 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[0],
	ISD::SETULT);

	if (BoolType == TargetLoweringBase::ZeroOrOneBooleanContent) {
	SDValue Carry = DAG.getZExtOrTrunc(Cmp1, dl, NVT);
	Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry);
	return;
	}

	SDValue Carry1 = DAG.getSelect(dl, NVT, Cmp1,
	DAG.getConstant(1, dl, NVT),
	DAG.getConstant(0, dl, NVT));
	SDValue Cmp2 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[1],
	ISD::SETULT);
	SDValue Carry2 = DAG.getSelect(dl, NVT, Cmp2,
	DAG.getConstant(1, dl, NVT), Carry1);
	Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2);
	} else {
	Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps);
	Hi = DAG.getNode(ISD::SUB, dl, NVT, makeArrayRef(HiOps, 2));
	SDValue Cmp =
	DAG.getSetCC(dl, getSetCCResultType(LoOps[0].getValueType()),
	LoOps[0], LoOps[1], ISD::SETULT);

	SDValue Borrow;
	if (BoolType == TargetLoweringBase::ZeroOrOneBooleanContent)
	Borrow = DAG.getZExtOrTrunc(Cmp, dl, NVT);
	else
	Borrow = DAG.getSelect(dl, NVT, Cmp, DAG.getConstant(1, dl, NVT),
	DAG.getConstant(0, dl, NVT));

	Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow);
	}
	}

	void DAGTypeLegalizer::ExpandIntRes_ADDSUBC(SDNode *N,
	SDValue &Lo, SDValue &Hi) {
	// Expand the subcomponents.
	SDValue LHSL, LHSH, RHSL, RHSH;
	SDLoc dl(N);
	GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
	GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
	SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Glue);
	SDValue LoOps[2] = { LHSL, RHSL };
	SDValue HiOps[3] = { LHSH, RHSH };

	if (N->getOpcode() == ISD::ADDC) {
	Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps);
	HiOps[2] = Lo.getValue(1);
	Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps);
	} else {
	Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps);
	HiOps[2] = Lo.getValue(1);
	Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps);
	}

	// Legalized the flag result - switch anything that used the old flag to
	// use the new one.
	ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
	}

	void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N,
	SDValue &Lo, SDValue &Hi) {
	// Expand the subcomponents.
	SDValue LHSL, LHSH, RHSL, RHSH;
	SDLoc dl(N);
	GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
	GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
	SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Glue);
	SDValue LoOps[3] = { LHSL, RHSL, N->getOperand(2) };
	SDValue HiOps[3] = { LHSH, RHSH };

	Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps);
	HiOps[2] = Lo.getValue(1);
	Hi = DAG.getNode(N->getOpcode(), dl, VTList, HiOps);

	// Legalized the flag result - switch anything that used the old flag to
	// use the new one.
	ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
	}

	void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N,
	SDValue &Lo, SDValue &Hi) {
	SDValue LHS = N->getOperand(0);
	SDValue RHS = N->getOperand(1);
	SDLoc dl(N);

	SDValue Ovf;

	unsigned CarryOp, NoCarryOp;
	ISD::CondCode Cond;
	switch(N->getOpcode()) {
	case ISD::UADDO:
	CarryOp = ISD::ADDCARRY;
	NoCarryOp = ISD::ADD;
	Cond = ISD::SETULT;
	break;
	case ISD::USUBO:
	CarryOp = ISD::SUBCARRY;
	NoCarryOp = ISD::SUB;
	Cond = ISD::SETUGT;
	break;
	default:
	llvm_unreachable("Node has unexpected Opcode");
	}

	bool HasCarryOp = TLI.isOperationLegalOrCustom(
	CarryOp, TLI.getTypeToExpandTo(*DAG.getContext(), LHS.getValueType()));

	if (HasCarryOp) {
	// Expand the subcomponents.
	SDValue LHSL, LHSH, RHSL, RHSH;
	GetExpandedInteger(LHS, LHSL, LHSH);
	GetExpandedInteger(RHS, RHSL, RHSH);
	SDVTList VTList = DAG.getVTList(LHSL.getValueType(), N->getValueType(1));
	SDValue LoOps[2] = { LHSL, RHSL };
	SDValue HiOps[3] = { LHSH, RHSH };

	Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps);
	HiOps[2] = Lo.getValue(1);
	Hi = DAG.getNode(CarryOp, dl, VTList, HiOps);

	Ovf = Hi.getValue(1);
	} else {
	// Expand the result by simply replacing it with the equivalent
	// non-overflow-checking operation.
	SDValue Sum = DAG.getNode(NoCarryOp, dl, LHS.getValueType(), LHS, RHS);
	SplitInteger(Sum, Lo, Hi);

	// Calculate the overflow: addition overflows iff a + b < a, and subtraction
	// overflows iff a - b > a.
	Ovf = DAG.getSetCC(dl, N->getValueType(1), Sum, LHS, Cond);
	}

	// Legalized the flag result - switch anything that used the old flag to
	// use the new one.
	ReplaceValueWith(SDValue(N, 1), Ovf);
	}

	void DAGTypeLegalizer::ExpandIntRes_ADDSUBCARRY(SDNode *N,
	SDValue &Lo, SDValue &Hi) {
	// Expand the subcomponents.
	SDValue LHSL, LHSH, RHSL, RHSH;
	SDLoc dl(N);
	GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
	GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
	SDVTList VTList = DAG.getVTList(LHSL.getValueType(), N->getValueType(1));
	SDValue LoOps[3] = { LHSL, RHSL, N->getOperand(2) };
	SDValue HiOps[3] = { LHSH, RHSH, SDValue() };

	Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps);
	HiOps[2] = Lo.getValue(1);
	Hi = DAG.getNode(N->getOpcode(), dl, VTList, HiOps);

	// Legalized the flag result - switch anything that used the old flag to
	// use the new one.
	ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
	}

	void DAGTypeLegalizer::ExpandIntRes_SADDSUBO_CARRY(SDNode *N,
	SDValue &Lo, SDValue &Hi) {
	// Expand the subcomponents.
	SDValue LHSL, LHSH, RHSL, RHSH;
	SDLoc dl(N);
	GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
	GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
	SDVTList VTList = DAG.getVTList(LHSL.getValueType(), N->getValueType(1));

	// We need to use an unsigned carry op for the lo part.
	unsigned CarryOp = N->getOpcode() == ISD::SADDO_CARRY ? ISD::ADDCARRY
	: ISD::SUBCARRY;
	Lo = DAG.getNode(CarryOp, dl, VTList, { LHSL, RHSL, N->getOperand(2) });
	Hi = DAG.getNode(N->getOpcode(), dl, VTList, { LHSH, RHSH, Lo.getValue(1) });

	// Legalized the flag result - switch anything that used the old flag to
	// use the new one.
	ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
	}

	void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N,
	SDValue &Lo, SDValue &Hi) {
	EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
	SDLoc dl(N);
	SDValue Op = N->getOperand(0);
	if (Op.getValueType().bitsLE(NVT)) {
	// The low part is any extension of the input (which degenerates to a copy).
	Lo = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Op);
	Hi = DAG.getUNDEF(NVT); // The high part is undefined.
	} else {
	// For example, extension of an i48 to an i64. The operand type necessarily
	// promotes to the result type, so will end up being expanded too.
	assert(getTypeAction(Op.getValueType()) ==
	TargetLowering::TypePromoteInteger &&
	"Only know how to promote this result!");
	SDValue Res = GetPromotedInteger(Op);
	assert(Res.getValueType() == N->getValueType(0) &&
	"Operand over promoted?");
	// Split the promoted operand. This will simplify when it is expanded.
	SplitInteger(Res, Lo, Hi);
	}
	}

	void DAGTypeLegalizer::ExpandIntRes_AssertSext(SDNode *N,
	SDValue &Lo, SDValue &Hi) {
	SDLoc dl(N);
	GetExpandedInteger(N->getOperand(0), Lo, Hi);
	EVT NVT = Lo.getValueType();
	EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
	unsigned NVTBits = NVT.getSizeInBits();
	unsigned EVTBits = EVT.getSizeInBits();

	if (NVTBits < EVTBits) {
	Hi = DAG.getNode(ISD::AssertSext, dl, NVT, Hi,
	DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
	EVTBits - NVTBits)));
	} else {
	Lo = DAG.getNode(ISD::AssertSext, dl, NVT, Lo, DAG.getValueType(EVT));
	// The high part replicates the sign bit of Lo, make it explicit.
	Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
	DAG.getConstant(NVTBits - 1, dl,
	TLI.getPointerTy(DAG.getDataLayout())));
	}
	}

	void DAGTypeLegalizer::ExpandIntRes_AssertZext(SDNode *N,
	SDValue &Lo, SDValue &Hi) {
	SDLoc dl(N);
	GetExpandedInteger(N->getOperand(0), Lo, Hi);
	EVT NVT = Lo.getValueType();
	EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
	unsigned NVTBits = NVT.getSizeInBits();
	unsigned EVTBits = EVT.getSizeInBits();

	if (NVTBits < EVTBits) {
	Hi = DAG.getNode(ISD::AssertZext, dl, NVT, Hi,
	DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
	EVTBits - NVTBits)));
	} else {
	Lo = DAG.getNode(ISD::AssertZext, dl, NVT, Lo, DAG.getValueType(EVT));
	// The high part must be zero, make it explicit.
	Hi = DAG.getConstant(0, dl, NVT);
	}
	}

	void DAGTypeLegalizer::ExpandIntRes_BITREVERSE(SDNode *N,
	SDValue &Lo, SDValue &Hi) {
	SDLoc dl(N);
	GetExpandedInteger(N->getOperand(0), Hi, Lo); // Note swapped operands.
	Lo = DAG.getNode(ISD::BITREVERSE, dl, Lo.getValueType(), Lo);
	Hi = DAG.getNode(ISD::BITREVERSE, dl, Hi.getValueType(), Hi);
	}

	void DAGTypeLegalizer::ExpandIntRes_BSWAP(SDNode *N,
	SDValue &Lo, SDValue &Hi) {
	SDLoc dl(N);
	GetExpandedInteger(N->getOperand(0), Hi, Lo); // Note swapped operands.
	Lo = DAG.getNode(ISD::BSWAP, dl, Lo.getValueType(), Lo);
	Hi = DAG.getNode(ISD::BSWAP, dl, Hi.getValueType(), Hi);
	}

	void DAGTypeLegalizer::ExpandIntRes_PARITY(SDNode *N, SDValue &Lo,
	SDValue &Hi) {
	SDLoc dl(N);
	// parity(HiLo) -> parity(Lo^Hi)
	GetExpandedInteger(N->getOperand(0), Lo, Hi);
	EVT NVT = Lo.getValueType();
	Lo =
	DAG.getNode(ISD::PARITY, dl, NVT, DAG.getNode(ISD::XOR, dl, NVT, Lo, Hi));
	Hi = DAG.getConstant(0, dl, NVT);
	}

	void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N,
	SDValue &Lo, SDValue &Hi) {
	EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
	unsigned NBitWidth = NVT.getSizeInBits();
	auto Constant = cast<ConstantSDNode>(N);
	const APInt &Cst = Constant->getAPIntValue();
	bool IsTarget = Constant->isTargetOpcode();
	bool IsOpaque = Constant->isOpaque();
	SDLoc dl(N);
	Lo = DAG.getConstant(Cst.trunc(NBitWidth), dl, NVT, IsTarget, IsOpaque);
	Hi = DAG.getConstant(Cst.lshr(NBitWidth).trunc(NBitWidth), dl, NVT, IsTarget,
	IsOpaque);
	}

	void DAGTypeLegalizer::ExpandIntRes_ABS(SDNode *N, SDValue &Lo, SDValue &Hi) {
	SDLoc dl(N);

	SDValue N0 = N->getOperand(0);
	GetExpandedInteger(N0, Lo, Hi);
	EVT NVT = Lo.getValueType();

	// If we have ADDCARRY, use the expanded form of the sra+add+xor sequence we
	// use in LegalizeDAG. The ADD part of the expansion is based on
	// ExpandIntRes_ADDSUB which also uses ADDCARRY/UADDO after checking that
	// ADDCARRY is LegalOrCustom. Each of the pieces here can be further expanded
	// if needed. Shift expansion has a special case for filling with sign bits
	// so that we will only end up with one SRA.
	bool HasAddCarry = TLI.isOperationLegalOrCustom(
	ISD::ADDCARRY, TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
	if (HasAddCarry) {
	EVT ShiftAmtTy = getShiftAmountTyForConstant(NVT, TLI, DAG);
	SDValue Sign =
	DAG.getNode(ISD::SRA, dl, NVT, Hi,
	DAG.getConstant(NVT.getSizeInBits() - 1, dl, ShiftAmtTy));
	SDVTList VTList = DAG.getVTList(NVT, getSetCCResultType(NVT));
	Lo = DAG.getNode(ISD::UADDO, dl, VTList, Lo, Sign);
	Hi = DAG.getNode(ISD::ADDCARRY, dl, VTList, Hi, Sign, Lo.getValue(1));
	Lo = DAG.getNode(ISD::XOR, dl, NVT, Lo, Sign);
	Hi = DAG.getNode(ISD::XOR, dl, NVT, Hi, Sign);
	return;
	}

	// abs(HiLo) -> (Hi < 0 ? -HiLo : HiLo)
	EVT VT = N->getValueType(0);
	SDValue Neg = DAG.getNode(ISD::SUB, dl, VT,
	DAG.getConstant(0, dl, VT), N0);
	SDValue NegLo, NegHi;
	SplitInteger(Neg, NegLo, NegHi);

	SDValue HiIsNeg = DAG.getSetCC(dl, getSetCCResultType(NVT),
	DAG.getConstant(0, dl, NVT), Hi, ISD::SETGT);
	Lo = DAG.getSelect(dl, NVT, HiIsNeg, NegLo, Lo);
	Hi = DAG.getSelect(dl, NVT, HiIsNeg, NegHi, Hi);
	}

	void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N,
	SDValue &Lo, SDValue &Hi) {
	SDLoc dl(N);
	// ctlz (HiLo) -> Hi != 0 ? ctlz(Hi) : (ctlz(Lo)+32)
	GetExpandedInteger(N->getOperand(0), Lo, Hi);
	EVT NVT = Lo.getValueType();

	SDValue HiNotZero = DAG.getSetCC(dl, getSetCCResultType(NVT), Hi,
	DAG.getConstant(0, dl, NVT), ISD::SETNE);

	SDValue LoLZ = DAG.getNode(N->getOpcode(), dl, NVT, Lo);
	SDValue HiLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, NVT, Hi);

	Lo = DAG.getSelect(dl, NVT, HiNotZero, HiLZ,
	DAG.getNode(ISD::ADD, dl, NVT, LoLZ,
	DAG.getConstant(NVT.getSizeInBits(), dl,
	NVT)));
	Hi = DAG.getConstant(0, dl, NVT);
	}

	void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N,
	SDValue &Lo, SDValue &Hi) {
	SDLoc dl(N);
	// ctpop(HiLo) -> ctpop(Hi)+ctpop(Lo)
	GetExpandedInteger(N->getOperand(0), Lo, Hi);
	EVT NVT = Lo.getValueType();
	Lo = DAG.getNode(ISD::ADD, dl, NVT, DAG.getNode(ISD::CTPOP, dl, NVT, Lo),
	DAG.getNode(ISD::CTPOP, dl, NVT, Hi));
	Hi = DAG.getConstant(0, dl, NVT);
	}

	void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N,
	SDValue &Lo, SDValue &Hi) {
	SDLoc dl(N);
	// cttz (HiLo) -> Lo != 0 ? cttz(Lo) : (cttz(Hi)+32)
	GetExpandedInteger(N->getOperand(0), Lo, Hi);
	EVT NVT = Lo.getValueType();

	SDValue LoNotZero = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo,
	DAG.getConstant(0, dl, NVT), ISD::SETNE);

	SDValue LoLZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, NVT, Lo);
	SDValue HiLZ = DAG.getNode(N->getOpcode(), dl, NVT, Hi);

	Lo = DAG.getSelect(dl, NVT, LoNotZero, LoLZ,
	DAG.getNode(ISD::ADD, dl, NVT, HiLZ,
	DAG.getConstant(NVT.getSizeInBits(), dl,
	NVT)));
	Hi = DAG.getConstant(0, dl, NVT);
	}

	void DAGTypeLegalizer::ExpandIntRes_FLT_ROUNDS(SDNode *N, SDValue &Lo,
	SDValue &Hi) {
	SDLoc dl(N);
	EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
	unsigned NBitWidth = NVT.getSizeInBits();

	EVT ShiftAmtTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
	Lo = DAG.getNode(ISD::FLT_ROUNDS_, dl, {NVT, MVT::Other}, N->getOperand(0));
	SDValue Chain = Lo.getValue(1);
	// The high part is the sign of Lo, as -1 is a valid value for FLT_ROUNDS
	Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
	DAG.getConstant(NBitWidth - 1, dl, ShiftAmtTy));

	// Legalize the chain result - switch anything that used the old chain to
	// use the new one.
	ReplaceValueWith(SDValue(N, 1), Chain);
	}

	void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,
	SDValue &Hi) {
	SDLoc dl(N);
	EVT VT = N->getValueType(0);

	bool IsStrict = N->isStrictFPOpcode();
	SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
	SDValue Op = N->getOperand(IsStrict ? 1 : 0);
	if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat)
	Op = GetPromotedFloat(Op);

	if (getTypeAction(Op.getValueType()) == TargetLowering::TypeSoftPromoteHalf) {
	EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType());
	Op = GetSoftPromotedHalf(Op);
	Op = DAG.getNode(ISD::FP16_TO_FP, dl, NFPVT, Op);
	}

	RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT);
	assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!");
	TargetLowering::MakeLibCallOptions CallOptions;
	CallOptions.setSExt(true);
	std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, VT, Op,
	CallOptions, dl, Chain);
	SplitInteger(Tmp.first, Lo, Hi);

	if (IsStrict)
	ReplaceValueWith(SDValue(N, 1), Tmp.second);
	}

	void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
	SDValue &Hi) {
	SDLoc dl(N);
	EVT VT = N->getValueType(0);

	bool IsStrict = N->isStrictFPOpcode();
	SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
	SDValue Op = N->getOperand(IsStrict ? 1 : 0);
	if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat)
	Op = GetPromotedFloat(Op);

	if (getTypeAction(Op.getValueType()) == TargetLowering::TypeSoftPromoteHalf) {
	EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType());
	Op = GetSoftPromotedHalf(Op);
	Op = DAG.getNode(ISD::FP16_TO_FP, dl, NFPVT, Op);
	}

	RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT);
	assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!");
	TargetLowering::MakeLibCallOptions CallOptions;
	std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, VT, Op,
	CallOptions, dl, Chain);
	SplitInteger(Tmp.first, Lo, Hi);

	if (IsStrict)
	ReplaceValueWith(SDValue(N, 1), Tmp.second);
	}

	void DAGTypeLegalizer::ExpandIntRes_FP_TO_XINT_SAT(SDNode *N, SDValue &Lo,
	SDValue &Hi) {
	SDValue Res = TLI.expandFP_TO_INT_SAT(N, DAG);
	SplitInteger(Res, Lo, Hi);
	}

	void DAGTypeLegalizer::ExpandIntRes_LLROUND_LLRINT(SDNode *N, SDValue &Lo,
	SDValue &Hi) {
	SDValue Op = N->getOperand(N->isStrictFPOpcode() ? 1 : 0);

	assert(getTypeAction(Op.getValueType()) != TargetLowering::TypePromoteFloat &&
	"Input type needs to be promoted!");

	EVT VT = Op.getValueType();

	RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
	if (N->getOpcode() == ISD::LLROUND \|\|
	N->getOpcode() == ISD::STRICT_LLROUND) {
	if (VT == MVT::f32)
	LC = RTLIB::LLROUND_F32;
	else if (VT == MVT::f64)
	LC = RTLIB::LLROUND_F64;
	else if (VT == MVT::f80)
	LC = RTLIB::LLROUND_F80;
	else if (VT == MVT::f128)
	LC = RTLIB::LLROUND_F128;
	else if (VT == MVT::ppcf128)
	LC = RTLIB::LLROUND_PPCF128;
	assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected llround input type!");
	} else if (N->getOpcode() == ISD::LLRINT \|\|
	N->getOpcode() == ISD::STRICT_LLRINT) {
	if (VT == MVT::f32)
	LC = RTLIB::LLRINT_F32;
	else if (VT == MVT::f64)
	LC = RTLIB::LLRINT_F64;
	else if (VT == MVT::f80)
	LC = RTLIB::LLRINT_F80;
	else if (VT == MVT::f128)
	LC = RTLIB::LLRINT_F128;
	else if (VT == MVT::ppcf128)
	LC = RTLIB::LLRINT_PPCF128;
	assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected llrint input type!");
	} else
	llvm_unreachable("Unexpected opcode!");

	SDLoc dl(N);
	EVT RetVT = N->getValueType(0);
	SDValue Chain = N->isStrictFPOpcode() ? N->getOperand(0) : SDValue();

	TargetLowering::MakeLibCallOptions CallOptions;
	CallOptions.setSExt(true);
	std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RetVT,
	Op, CallOptions, dl,
	Chain);
	SplitInteger(Tmp.first, Lo, Hi);

	if (N->isStrictFPOpcode())
	ReplaceValueWith(SDValue(N, 1), Tmp.second);
	}

	void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
	SDValue &Lo, SDValue &Hi) {
	if (N->isAtomic()) {
	// It's typical to have larger CAS than atomic load instructions.
	SDLoc dl(N);
	EVT VT = N->getMemoryVT();
	SDVTList VTs = DAG.getVTList(VT, MVT::i1, MVT::Other);
	SDValue Zero = DAG.getConstant(0, dl, VT);
	SDValue Swap = DAG.getAtomicCmpSwap(
	ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl,
	VT, VTs, N->getOperand(0),
	N->getOperand(1), Zero, Zero, N->getMemOperand());
	ReplaceValueWith(SDValue(N, 0), Swap.getValue(0));
	ReplaceValueWith(SDValue(N, 1), Swap.getValue(2));
	return;
	}

	if (ISD::isNormalLoad(N)) {
	ExpandRes_NormalLoad(N, Lo, Hi);
	return;
	}

	assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!");

	EVT VT = N->getValueType(0);
	EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
	SDValue Ch = N->getChain();
	SDValue Ptr = N->getBasePtr();
	ISD::LoadExtType ExtType = N->getExtensionType();
	MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags();
	AAMDNodes AAInfo = N->getAAInfo();
	SDLoc dl(N);

	assert(NVT.isByteSized() && "Expanded type not byte sized!");

	if (N->getMemoryVT().bitsLE(NVT)) {
	EVT MemVT = N->getMemoryVT();

	Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(), MemVT,
	N->getOriginalAlign(), MMOFlags, AAInfo);

	// Remember the chain.
	Ch = Lo.getValue(1);

	if (ExtType == ISD::SEXTLOAD) {
	// The high part is obtained by SRA'ing all but one of the bits of the
	// lo part.
	unsigned LoSize = Lo.getValueSizeInBits();
	Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
	DAG.getConstant(LoSize - 1, dl,
	TLI.getPointerTy(DAG.getDataLayout())));
	} else if (ExtType == ISD::ZEXTLOAD) {
	// The high part is just a zero.
	Hi = DAG.getConstant(0, dl, NVT);
	} else {
	assert(ExtType == ISD::EXTLOAD && "Unknown extload!");
	// The high part is undefined.
	Hi = DAG.getUNDEF(NVT);
	}
	} else if (DAG.getDataLayout().isLittleEndian()) {
	// Little-endian - low bits are at low addresses.
	Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(),
	N->getOriginalAlign(), MMOFlags, AAInfo);

	unsigned ExcessBits =
	N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
	EVT NEVT = EVT::getIntegerVT(*DAG.getContext(), ExcessBits);

	// Increment the pointer to the other half.
	unsigned IncrementSize = NVT.getSizeInBits()/8;
	Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(IncrementSize), dl);
	Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr,
	N->getPointerInfo().getWithOffset(IncrementSize), NEVT,
	N->getOriginalAlign(), MMOFlags, AAInfo);

	// Build a factor node to remember that this load is independent of the
	// other one.
	Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
	Hi.getValue(1));
	} else {
	// Big-endian - high bits are at low addresses. Favor aligned loads at
	// the cost of some bit-fiddling.
	EVT MemVT = N->getMemoryVT();
	unsigned EBytes = MemVT.getStoreSize();
	unsigned IncrementSize = NVT.getSizeInBits()/8;
	unsigned ExcessBits = (EBytes - IncrementSize)*8;

	// Load both the high bits and maybe some of the low bits.
	Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(),
	EVT::getIntegerVT(*DAG.getContext(),
	MemVT.getSizeInBits() - ExcessBits),
	N->getOriginalAlign(), MMOFlags, AAInfo);

	// Increment the pointer to the other half.
	Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(IncrementSize), dl);
	// Load the rest of the low bits.
	Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr,
	N->getPointerInfo().getWithOffset(IncrementSize),
	EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
	N->getOriginalAlign(), MMOFlags, AAInfo);

	// Build a factor node to remember that this load is independent of the
	// other one.
	Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
	Hi.getValue(1));

	if (ExcessBits < NVT.getSizeInBits()) {
	// Transfer low bits from the bottom of Hi to the top of Lo.
	Lo = DAG.getNode(
	ISD::OR, dl, NVT, Lo,
	DAG.getNode(ISD::SHL, dl, NVT, Hi,
	DAG.getConstant(ExcessBits, dl,
	TLI.getPointerTy(DAG.getDataLayout()))));
	// Move high bits to the right position in Hi.
	Hi = DAG.getNode(ExtType == ISD::SEXTLOAD ? ISD::SRA : ISD::SRL, dl, NVT,
	Hi,
	DAG.getConstant(NVT.getSizeInBits() - ExcessBits, dl,
	TLI.getPointerTy(DAG.getDataLayout())));
	}
	}

	// Legalize the chain result - switch anything that used the old chain to
	// use the new one.
	ReplaceValueWith(SDValue(N, 1), Ch);
	}

	void DAGTypeLegalizer::ExpandIntRes_Logical(SDNode *N,
	SDValue &Lo, SDValue &Hi) {
	SDLoc dl(N);
	SDValue LL, LH, RL, RH;
	GetExpandedInteger(N->getOperand(0), LL, LH);
	GetExpandedInteger(N->getOperand(1), RL, RH);
	Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), LL, RL);
	Hi = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), LH, RH);
	}

	void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
	SDValue &Lo, SDValue &Hi) {
	EVT VT = N->getValueType(0);
	EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
	SDLoc dl(N);

	SDValue LL, LH, RL, RH;
	GetExpandedInteger(N->getOperand(0), LL, LH);
	GetExpandedInteger(N->getOperand(1), RL, RH);

	if (TLI.expandMUL(N, Lo, Hi, NVT, DAG,
	TargetLowering::MulExpansionKind::OnlyLegalOrCustom,
	LL, LH, RL, RH))
	return;

	// If nothing else, we can make a libcall.
	RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
	if (VT == MVT::i16)
	LC = RTLIB::MUL_I16;
	else if (VT == MVT::i32)
	LC = RTLIB::MUL_I32;
	else if (VT == MVT::i64)
	LC = RTLIB::MUL_I64;
	else if (VT == MVT::i128)
	LC = RTLIB::MUL_I128;

	if (LC == RTLIB::UNKNOWN_LIBCALL \|\| !TLI.getLibcallName(LC)) {
	// We'll expand the multiplication by brute force because we have no other
	// options. This is a trivially-generalized version of the code from
	// Hacker's Delight (itself derived from Knuth's Algorithm M from section
	// 4.3.1).
	unsigned Bits = NVT.getSizeInBits();
	unsigned HalfBits = Bits >> 1;
	SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl,
	NVT);
	SDValue LLL = DAG.getNode(ISD::AND, dl, NVT, LL, Mask);
	SDValue RLL = DAG.getNode(ISD::AND, dl, NVT, RL, Mask);

	SDValue T = DAG.getNode(ISD::MUL, dl, NVT, LLL, RLL);
	SDValue TL = DAG.getNode(ISD::AND, dl, NVT, T, Mask);

	EVT ShiftAmtTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
	if (APInt::getMaxValue(ShiftAmtTy.getSizeInBits()).ult(HalfBits)) {
	// The type from TLI is too small to fit the shift amount we want.
	// Override it with i32. The shift will have to be legalized.
	ShiftAmtTy = MVT::i32;
	}
	SDValue Shift = DAG.getConstant(HalfBits, dl, ShiftAmtTy);
	SDValue TH = DAG.getNode(ISD::SRL, dl, NVT, T, Shift);
	SDValue LLH = DAG.getNode(ISD::SRL, dl, NVT, LL, Shift);
	SDValue RLH = DAG.getNode(ISD::SRL, dl, NVT, RL, Shift);

	SDValue U = DAG.getNode(ISD::ADD, dl, NVT,
	DAG.getNode(ISD::MUL, dl, NVT, LLH, RLL), TH);
	SDValue UL = DAG.getNode(ISD::AND, dl, NVT, U, Mask);
	SDValue UH = DAG.getNode(ISD::SRL, dl, NVT, U, Shift);

	SDValue V = DAG.getNode(ISD::ADD, dl, NVT,
	DAG.getNode(ISD::MUL, dl, NVT, LLL, RLH), UL);
	SDValue VH = DAG.getNode(ISD::SRL, dl, NVT, V, Shift);

	SDValue W = DAG.getNode(ISD::ADD, dl, NVT,
	DAG.getNode(ISD::MUL, dl, NVT, LLH, RLH),
	DAG.getNode(ISD::ADD, dl, NVT, UH, VH));
	Lo = DAG.getNode(ISD::ADD, dl, NVT, TL,
	DAG.getNode(ISD::SHL, dl, NVT, V, Shift));

	Hi = DAG.getNode(ISD::ADD, dl, NVT, W,
	DAG.getNode(ISD::ADD, dl, NVT,
	DAG.getNode(ISD::MUL, dl, NVT, RH, LL),
	DAG.getNode(ISD::MUL, dl, NVT, RL, LH)));
	return;
	}

	SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
	TargetLowering::MakeLibCallOptions CallOptions;
	CallOptions.setSExt(true);
	SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first,
	Lo, Hi);
	}

	void DAGTypeLegalizer::ExpandIntRes_READCYCLECOUNTER(SDNode *N, SDValue &Lo,
	SDValue &Hi) {
	SDLoc DL(N);
	EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
	SDVTList VTs = DAG.getVTList(NVT, NVT, MVT::Other);
	SDValue R = DAG.getNode(N->getOpcode(), DL, VTs, N->getOperand(0));
	Lo = R.getValue(0);
	Hi = R.getValue(1);
	ReplaceValueWith(SDValue(N, 1), R.getValue(2));
	}

	void DAGTypeLegalizer::ExpandIntRes_ADDSUBSAT(SDNode *N, SDValue &Lo,
	SDValue &Hi) {
	SDValue Result = TLI.expandAddSubSat(N, DAG);
	SplitInteger(Result, Lo, Hi);
	}

	void DAGTypeLegalizer::ExpandIntRes_SHLSAT(SDNode *N, SDValue &Lo,
	SDValue &Hi) {
	SDValue Result = TLI.expandShlSat(N, DAG);
	SplitInteger(Result, Lo, Hi);
	}

	/// This performs an expansion of the integer result for a fixed point
	/// multiplication. The default expansion performs rounding down towards
	/// negative infinity, though targets that do care about rounding should specify
	/// a target hook for rounding and provide their own expansion or lowering of
	/// fixed point multiplication to be consistent with rounding.
	void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,
	SDValue &Hi) {
	SDLoc dl(N);
	EVT VT = N->getValueType(0);
	unsigned VTSize = VT.getScalarSizeInBits();
	SDValue LHS = N->getOperand(0);
	SDValue RHS = N->getOperand(1);
	uint64_t Scale = N->getConstantOperandVal(2);
	bool Saturating = (N->getOpcode() == ISD::SMULFIXSAT \|\|
	N->getOpcode() == ISD::UMULFIXSAT);
	bool Signed = (N->getOpcode() == ISD::SMULFIX \|\|
	N->getOpcode() == ISD::SMULFIXSAT);

	// Handle special case when scale is equal to zero.
	if (!Scale) {
	SDValue Result;
	if (!Saturating) {
	Result = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
	} else {
	EVT BoolVT = getSetCCResultType(VT);
	unsigned MulOp = Signed ? ISD::SMULO : ISD::UMULO;
	Result = DAG.getNode(MulOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
	SDValue Product = Result.getValue(0);
	SDValue Overflow = Result.getValue(1);
	if (Signed) {
	APInt MinVal = APInt::getSignedMinValue(VTSize);
	APInt MaxVal = APInt::getSignedMaxValue(VTSize);
	SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
	SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
	SDValue Zero = DAG.getConstant(0, dl, VT);
	- SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT);
	- Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin);
	+ // Xor the inputs, if resulting sign bit is 0 the product will be
	+ // positive, else negative.
	+ SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
	+ SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
	+ Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
	Result = DAG.getSelect(dl, VT, Overflow, Result, Product);
	} else {
	// For unsigned multiplication, we only need to check the max since we
	// can't really overflow towards zero.
	APInt MaxVal = APInt::getMaxValue(VTSize);
	SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
	Result = DAG.getSelect(dl, VT, Overflow, SatMax, Product);
	}
	}
	SplitInteger(Result, Lo, Hi);
	return;
	}

	// For SMULFIX[SAT] we only expect to find Scale<VTSize, but this assert will
	// cover for unhandled cases below, while still being valid for UMULFIX[SAT].
	assert(Scale <= VTSize && "Scale can't be larger than the value type size.");

	EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
	SDValue LL, LH, RL, RH;
	GetExpandedInteger(LHS, LL, LH);
	GetExpandedInteger(RHS, RL, RH);
	SmallVector<SDValue, 4> Result;

	unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
	if (!TLI.expandMUL_LOHI(LoHiOp, VT, dl, LHS, RHS, Result, NVT, DAG,
	TargetLowering::MulExpansionKind::OnlyLegalOrCustom,
	LL, LH, RL, RH)) {
	report_fatal_error("Unable to expand MUL_FIX using MUL_LOHI.");
	return;
	}

	unsigned NVTSize = NVT.getScalarSizeInBits();
	assert((VTSize == NVTSize * 2) && "Expected the new value type to be half "
	"the size of the current value type");
	EVT ShiftTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());

	// After getting the multiplication result in 4 parts, we need to perform a
	// shift right by the amount of the scale to get the result in that scale.
	//
	// Let's say we multiply 2 64 bit numbers. The resulting value can be held in
	// 128 bits that are cut into 4 32-bit parts:
	//
	// HH HL LH LL
	// \|---32---\|---32---\|---32---\|---32---\|
	// 128 96 64 32 0
	//
	// \|------VTSize-----\|
	//
	// \|NVTSize-\|
	//
	// The resulting Lo and Hi would normally be in LL and LH after the shift. But
	// to avoid unneccessary shifting of all 4 parts, we can adjust the shift
	// amount and get Lo and Hi using two funnel shifts. Or for the special case
	// when Scale is a multiple of NVTSize we can just pick the result without
	// shifting.
	uint64_t Part0 = Scale / NVTSize; // Part holding lowest bit needed.
	if (Scale % NVTSize) {
	SDValue ShiftAmount = DAG.getConstant(Scale % NVTSize, dl, ShiftTy);
	Lo = DAG.getNode(ISD::FSHR, dl, NVT, Result[Part0 + 1], Result[Part0],
	ShiftAmount);
	Hi = DAG.getNode(ISD::FSHR, dl, NVT, Result[Part0 + 2], Result[Part0 + 1],
	ShiftAmount);
	} else {
	Lo = Result[Part0];
	Hi = Result[Part0 + 1];
	}

	// Unless saturation is requested we are done. The result is in <Hi,Lo>.
	if (!Saturating)
	return;

	// Can not overflow when there is no integer part.
	if (Scale == VTSize)
	return;

	// To handle saturation we must check for overflow in the multiplication.
	//
	// Unsigned overflow happened if the upper (VTSize - Scale) bits (of Result)
	// aren't all zeroes.
	//
	// Signed overflow happened if the upper (VTSize - Scale + 1) bits (of Result)
	// aren't all ones or all zeroes.
	//
	// We cannot overflow past HH when multiplying 2 ints of size VTSize, so the
	// highest bit of HH determines saturation direction in the event of signed
	// saturation.

	SDValue ResultHL = Result[2];
	SDValue ResultHH = Result[3];

	SDValue SatMax, SatMin;
	SDValue NVTZero = DAG.getConstant(0, dl, NVT);
	SDValue NVTNeg1 = DAG.getConstant(-1, dl, NVT);
	EVT BoolNVT = getSetCCResultType(NVT);

	if (!Signed) {
	if (Scale < NVTSize) {
	// Overflow happened if ((HH \| (HL >> Scale)) != 0).
	SDValue HLAdjusted = DAG.getNode(ISD::SRL, dl, NVT, ResultHL,
	DAG.getConstant(Scale, dl, ShiftTy));
	SDValue Tmp = DAG.getNode(ISD::OR, dl, NVT, HLAdjusted, ResultHH);
	SatMax = DAG.getSetCC(dl, BoolNVT, Tmp, NVTZero, ISD::SETNE);
	} else if (Scale == NVTSize) {
	// Overflow happened if (HH != 0).
	SatMax = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETNE);
	} else if (Scale < VTSize) {
	// Overflow happened if ((HH >> (Scale - NVTSize)) != 0).
	SDValue HLAdjusted = DAG.getNode(ISD::SRL, dl, NVT, ResultHL,
	DAG.getConstant(Scale - NVTSize, dl,
	ShiftTy));
	SatMax = DAG.getSetCC(dl, BoolNVT, HLAdjusted, NVTZero, ISD::SETNE);
	} else
	llvm_unreachable("Scale must be less or equal to VTSize for UMULFIXSAT"
	"(and saturation can't happen with Scale==VTSize).");

	Hi = DAG.getSelect(dl, NVT, SatMax, NVTNeg1, Hi);
	Lo = DAG.getSelect(dl, NVT, SatMax, NVTNeg1, Lo);
	return;
	}

	if (Scale < NVTSize) {
	// The number of overflow bits we can check are VTSize - Scale + 1 (we
	// include the sign bit). If these top bits are > 0, then we overflowed past
	// the max value. If these top bits are < -1, then we overflowed past the
	// min value. Otherwise, we did not overflow.
	unsigned OverflowBits = VTSize - Scale + 1;
	assert(OverflowBits <= VTSize && OverflowBits > NVTSize &&
	"Extent of overflow bits must start within HL");
	SDValue HLHiMask = DAG.getConstant(
	APInt::getHighBitsSet(NVTSize, OverflowBits - NVTSize), dl, NVT);
	SDValue HLLoMask = DAG.getConstant(
	APInt::getLowBitsSet(NVTSize, VTSize - OverflowBits), dl, NVT);
	// We overflow max if HH > 0 or (HH == 0 && HL > HLLoMask).
	SDValue HHGT0 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETGT);
	SDValue HHEQ0 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETEQ);
	SDValue HLUGT = DAG.getSetCC(dl, BoolNVT, ResultHL, HLLoMask, ISD::SETUGT);
	SatMax = DAG.getNode(ISD::OR, dl, BoolNVT, HHGT0,
	DAG.getNode(ISD::AND, dl, BoolNVT, HHEQ0, HLUGT));
	// We overflow min if HH < -1 or (HH == -1 && HL < HLHiMask).
	SDValue HHLT = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETLT);
	SDValue HHEQ = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETEQ);
	SDValue HLULT = DAG.getSetCC(dl, BoolNVT, ResultHL, HLHiMask, ISD::SETULT);
	SatMin = DAG.getNode(ISD::OR, dl, BoolNVT, HHLT,
	DAG.getNode(ISD::AND, dl, BoolNVT, HHEQ, HLULT));
	} else if (Scale == NVTSize) {
	// We overflow max if HH > 0 or (HH == 0 && HL sign bit is 1).
	SDValue HHGT0 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETGT);
	SDValue HHEQ0 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETEQ);
	SDValue HLNeg = DAG.getSetCC(dl, BoolNVT, ResultHL, NVTZero, ISD::SETLT);
	SatMax = DAG.getNode(ISD::OR, dl, BoolNVT, HHGT0,
	DAG.getNode(ISD::AND, dl, BoolNVT, HHEQ0, HLNeg));
	// We overflow min if HH < -1 or (HH == -1 && HL sign bit is 0).
	SDValue HHLT = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETLT);
	SDValue HHEQ = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETEQ);
	SDValue HLPos = DAG.getSetCC(dl, BoolNVT, ResultHL, NVTZero, ISD::SETGE);
	SatMin = DAG.getNode(ISD::OR, dl, BoolNVT, HHLT,
	DAG.getNode(ISD::AND, dl, BoolNVT, HHEQ, HLPos));
	} else if (Scale < VTSize) {
	// This is similar to the case when we saturate if Scale < NVTSize, but we
	// only need to check HH.
	unsigned OverflowBits = VTSize - Scale + 1;
	SDValue HHHiMask = DAG.getConstant(
	APInt::getHighBitsSet(NVTSize, OverflowBits), dl, NVT);
	SDValue HHLoMask = DAG.getConstant(
	APInt::getLowBitsSet(NVTSize, NVTSize - OverflowBits), dl, NVT);
	SatMax = DAG.getSetCC(dl, BoolNVT, ResultHH, HHLoMask, ISD::SETGT);
	SatMin = DAG.getSetCC(dl, BoolNVT, ResultHH, HHHiMask, ISD::SETLT);
	} else
	llvm_unreachable("Illegal scale for signed fixed point mul.");

	// Saturate to signed maximum.
	APInt MaxHi = APInt::getSignedMaxValue(NVTSize);
	APInt MaxLo = APInt::getAllOnesValue(NVTSize);
	Hi = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(MaxHi, dl, NVT), Hi);
	Lo = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(MaxLo, dl, NVT), Lo);
	// Saturate to signed minimum.
	APInt MinHi = APInt::getSignedMinValue(NVTSize);
	Hi = DAG.getSelect(dl, NVT, SatMin, DAG.getConstant(MinHi, dl, NVT), Hi);
	Lo = DAG.getSelect(dl, NVT, SatMin, NVTZero, Lo);
	}

	void DAGTypeLegalizer::ExpandIntRes_DIVFIX(SDNode *N, SDValue &Lo,
	SDValue &Hi) {
	SDLoc dl(N);
	// Try expanding in the existing type first.
	SDValue Res = TLI.expandFixedPointDiv(N->getOpcode(), dl, N->getOperand(0),
	N->getOperand(1),
	N->getConstantOperandVal(2), DAG);

	if (!Res)
	Res = earlyExpandDIVFIX(N, N->getOperand(0), N->getOperand(1),
	N->getConstantOperandVal(2), TLI, DAG);
	SplitInteger(Res, Lo, Hi);
	}

	void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node,
	SDValue &Lo, SDValue &Hi) {
	assert((Node->getOpcode() == ISD::SADDO \|\| Node->getOpcode() == ISD::SSUBO) &&
	"Node has unexpected Opcode");
	SDValue LHS = Node->getOperand(0);
	SDValue RHS = Node->getOperand(1);
	SDLoc dl(Node);

	SDValue Ovf;

	bool IsAdd = Node->getOpcode() == ISD::SADDO;
	unsigned CarryOp = IsAdd ? ISD::SADDO_CARRY : ISD::SSUBO_CARRY;

	bool HasCarryOp = TLI.isOperationLegalOrCustom(
	CarryOp, TLI.getTypeToExpandTo(*DAG.getContext(), LHS.getValueType()));

	if (HasCarryOp) {
	// Expand the subcomponents.
	SDValue LHSL, LHSH, RHSL, RHSH;
	GetExpandedInteger(LHS, LHSL, LHSH);
	GetExpandedInteger(RHS, RHSL, RHSH);
	SDVTList VTList = DAG.getVTList(LHSL.getValueType(), Node->getValueType(1));

	Lo = DAG.getNode(IsAdd ? ISD::UADDO : ISD::USUBO, dl, VTList, {LHSL, RHSL});
	Hi = DAG.getNode(CarryOp, dl, VTList, { LHSH, RHSH, Lo.getValue(1) });

	Ovf = Hi.getValue(1);
	} else {
	// Expand the result by simply replacing it with the equivalent
	// non-overflow-checking operation.
	SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ?
	ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
	LHS, RHS);
	SplitInteger(Sum, Lo, Hi);

	// Compute the overflow.
	//
	// LHSSign -> LHS < 0
	// RHSSign -> RHS < 0
	// SumSign -> Sum < 0
	//
	// Add:
	// Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
	// Sub:
	// Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
	//
	// To get better codegen we can rewrite this by doing bitwise math on
	// the integers and extract the final sign bit at the end. So the
	// above becomes:
	//
	// Add:
	// Overflow -> (~(LHS ^ RHS) & (LHS ^ Sum)) < 0
	// Sub:
	// Overflow -> ((LHS ^ RHS) & (LHS ^ Sum)) < 0
	//
	// NOTE: This is different than the expansion we do in expandSADDSUBO
	// because it is more costly to determine the RHS is > 0 for SSUBO with the
	// integers split.
	EVT VT = LHS.getValueType();
	SDValue SignsMatch = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
	if (IsAdd)
	SignsMatch = DAG.getNOT(dl, SignsMatch, VT);

	SDValue SumSignNE = DAG.getNode(ISD::XOR, dl, VT, LHS, Sum);
	Ovf = DAG.getNode(ISD::AND, dl, VT, SignsMatch, SumSignNE);
	EVT OType = Node->getValueType(1);
	Ovf = DAG.getSetCC(dl, OType, Ovf, DAG.getConstant(0, dl, VT), ISD::SETLT);
	}

	// Use the calculated overflow everywhere.
	ReplaceValueWith(SDValue(Node, 1), Ovf);
	}

	void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
	SDValue &Lo, SDValue &Hi) {
	EVT VT = N->getValueType(0);
	SDLoc dl(N);
	SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };

	if (TLI.getOperationAction(ISD::SDIVREM, VT) == TargetLowering::Custom) {
	SDValue Res = DAG.getNode(ISD::SDIVREM, dl, DAG.getVTList(VT, VT), Ops);
	SplitInteger(Res.getValue(0), Lo, Hi);
	return;
	}

	RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
	if (VT == MVT::i16)
	LC = RTLIB::SDIV_I16;
	else if (VT == MVT::i32)
	LC = RTLIB::SDIV_I32;
	else if (VT == MVT::i64)
	LC = RTLIB::SDIV_I64;
	else if (VT == MVT::i128)
	LC = RTLIB::SDIV_I128;
	assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");

	TargetLowering::MakeLibCallOptions CallOptions;
	CallOptions.setSExt(true);
	SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi);
	}

	void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
	SDValue &Lo, SDValue &Hi) {
	EVT VT = N->getValueType(0);
	SDLoc dl(N);

	// If we can emit an efficient shift operation, do so now. Check to see if
	// the RHS is a constant.
	if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
	return ExpandShiftByConstant(N, CN->getAPIntValue(), Lo, Hi);

	// If we can determine that the high bit of the shift is zero or one, even if
	// the low bits are variable, emit this shift in an optimized form.
	if (ExpandShiftWithKnownAmountBit(N, Lo, Hi))
	return;

	// If this target supports shift_PARTS, use it. First, map to the _PARTS opc.
	unsigned PartsOpc;
	if (N->getOpcode() == ISD::SHL) {
	PartsOpc = ISD::SHL_PARTS;
	} else if (N->getOpcode() == ISD::SRL) {
	PartsOpc = ISD::SRL_PARTS;
	} else {
	assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
	PartsOpc = ISD::SRA_PARTS;
	}

	// Next check to see if the target supports this SHL_PARTS operation or if it
	// will custom expand it. Don't lower this to SHL_PARTS when we optimise for
	// size, but create a libcall instead.
	EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
	TargetLowering::LegalizeAction Action = TLI.getOperationAction(PartsOpc, NVT);
	const bool LegalOrCustom =
	(Action == TargetLowering::Legal && TLI.isTypeLegal(NVT)) \|\|
	Action == TargetLowering::Custom;

	if (LegalOrCustom && TLI.shouldExpandShift(DAG, N)) {
	// Expand the subcomponents.
	SDValue LHSL, LHSH;
	GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
	EVT VT = LHSL.getValueType();

	// If the shift amount operand is coming from a vector legalization it may
	// have an illegal type. Fix that first by casting the operand, otherwise
	// the new SHL_PARTS operation would need further legalization.
	SDValue ShiftOp = N->getOperand(1);
	EVT ShiftTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
	assert(ShiftTy.getScalarSizeInBits() >=
	Log2_32_Ceil(VT.getScalarSizeInBits()) &&
	"ShiftAmountTy is too small to cover the range of this type!");
	if (ShiftOp.getValueType() != ShiftTy)
	ShiftOp = DAG.getZExtOrTrunc(ShiftOp, dl, ShiftTy);

	SDValue Ops[] = { LHSL, LHSH, ShiftOp };
	Lo = DAG.getNode(PartsOpc, dl, DAG.getVTList(VT, VT), Ops);
	Hi = Lo.getValue(1);
	return;
	}

	// Otherwise, emit a libcall.
	RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
	bool isSigned;
	if (N->getOpcode() == ISD::SHL) {
	isSigned = false; /sign irrelevant/
	if (VT == MVT::i16)
	LC = RTLIB::SHL_I16;
	else if (VT == MVT::i32)
	LC = RTLIB::SHL_I32;
	else if (VT == MVT::i64)
	LC = RTLIB::SHL_I64;
	else if (VT == MVT::i128)
	LC = RTLIB::SHL_I128;
	} else if (N->getOpcode() == ISD::SRL) {
	isSigned = false;
	if (VT == MVT::i16)
	LC = RTLIB::SRL_I16;
	else if (VT == MVT::i32)
	LC = RTLIB::SRL_I32;
	else if (VT == MVT::i64)
	LC = RTLIB::SRL_I64;
	else if (VT == MVT::i128)
	LC = RTLIB::SRL_I128;
	} else {
	assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
	isSigned = true;
	if (VT == MVT::i16)
	LC = RTLIB::SRA_I16;
	else if (VT == MVT::i32)
	LC = RTLIB::SRA_I32;
	else if (VT == MVT::i64)
	LC = RTLIB::SRA_I64;
	else if (VT == MVT::i128)
	LC = RTLIB::SRA_I128;
	}

	if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) {
	SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
	TargetLowering::MakeLibCallOptions CallOptions;
	CallOptions.setSExt(isSigned);
	SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi);
	return;
	}

	if (!ExpandShiftWithUnknownAmountBit(N, Lo, Hi))
	llvm_unreachable("Unsupported shift!");
	}

	void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N,
	SDValue &Lo, SDValue &Hi) {
	EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
	SDLoc dl(N);
	SDValue Op = N->getOperand(0);
	if (Op.getValueType().bitsLE(NVT)) {
	// The low part is sign extension of the input (degenerates to a copy).
	Lo = DAG.getNode(ISD::SIGN_EXTEND, dl, NVT, N->getOperand(0));
	// The high part is obtained by SRA'ing all but one of the bits of low part.
	unsigned LoSize = NVT.getSizeInBits();
	Hi = DAG.getNode(
	ISD::SRA, dl, NVT, Lo,
	DAG.getConstant(LoSize - 1, dl, TLI.getPointerTy(DAG.getDataLayout())));
	} else {
	// For example, extension of an i48 to an i64. The operand type necessarily
	// promotes to the result type, so will end up being expanded too.
	assert(getTypeAction(Op.getValueType()) ==
	TargetLowering::TypePromoteInteger &&
	"Only know how to promote this result!");
	SDValue Res = GetPromotedInteger(Op);
	assert(Res.getValueType() == N->getValueType(0) &&
	"Operand over promoted?");
	// Split the promoted operand. This will simplify when it is expanded.
	SplitInteger(Res, Lo, Hi);
	unsigned ExcessBits = Op.getValueSizeInBits() - NVT.getSizeInBits();
	Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi,
	DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
	ExcessBits)));
	}
	}

	void DAGTypeLegalizer::
	ExpandIntRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi) {
	SDLoc dl(N);
	GetExpandedInteger(N->getOperand(0), Lo, Hi);
	EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();

	if (EVT.bitsLE(Lo.getValueType())) {
	// sext_inreg the low part if needed.
	Lo = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Lo.getValueType(), Lo,
	N->getOperand(1));

	// The high part gets the sign extension from the lo-part. This handles
	// things like sextinreg V:i64 from i8.
	Hi = DAG.getNode(ISD::SRA, dl, Hi.getValueType(), Lo,
	DAG.getConstant(Hi.getValueSizeInBits() - 1, dl,
	TLI.getPointerTy(DAG.getDataLayout())));
	} else {
	// For example, extension of an i48 to an i64. Leave the low part alone,
	// sext_inreg the high part.
	unsigned ExcessBits = EVT.getSizeInBits() - Lo.getValueSizeInBits();
	Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi,
	DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
	ExcessBits)));
	}
	}

	void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N,
	SDValue &Lo, SDValue &Hi) {
	EVT VT = N->getValueType(0);
	SDLoc dl(N);
	SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };

	if (TLI.getOperationAction(ISD::SDIVREM, VT) == TargetLowering::Custom) {
	SDValue Res = DAG.getNode(ISD::SDIVREM, dl, DAG.getVTList(VT, VT), Ops);
	SplitInteger(Res.getValue(1), Lo, Hi);
	return;
	}

	RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
	if (VT == MVT::i16)
	LC = RTLIB::SREM_I16;
	else if (VT == MVT::i32)
	LC = RTLIB::SREM_I32;
	else if (VT == MVT::i64)
	LC = RTLIB::SREM_I64;
	else if (VT == MVT::i128)
	LC = RTLIB::SREM_I128;
	assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");

	TargetLowering::MakeLibCallOptions CallOptions;
	CallOptions.setSExt(true);
	SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi);
	}

	void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N,
	SDValue &Lo, SDValue &Hi) {
	EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
	SDLoc dl(N);
	Lo = DAG.getNode(ISD::TRUNCATE, dl, NVT, N->getOperand(0));
	Hi = DAG.getNode(ISD::SRL, dl, N->getOperand(0).getValueType(),
	N->getOperand(0),
	DAG.getConstant(NVT.getSizeInBits(), dl,
	TLI.getPointerTy(DAG.getDataLayout())));
	Hi = DAG.getNode(ISD::TRUNCATE, dl, NVT, Hi);
	}

	void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
	SDValue &Lo, SDValue &Hi) {
	EVT VT = N->getValueType(0);
	SDLoc dl(N);

	if (N->getOpcode() == ISD::UMULO) {
	// This section expands the operation into the following sequence of
	// instructions. `iNh` here refers to a type which has half the bit width of
	// the type the original operation operated on.
	//
	// %0 = %LHS.HI != 0 && %RHS.HI != 0
	// %1 = { iNh, i1 } @umul.with.overflow.iNh(iNh %LHS.HI, iNh %RHS.LO)
	// %2 = { iNh, i1 } @umul.with.overflow.iNh(iNh %RHS.HI, iNh %LHS.LO)
	// %3 = mul nuw iN (%LHS.LOW as iN), (%RHS.LOW as iN)
	// %4 = add iNh %1.0, %2.0 as iN
	// %5 = { iNh, i1 } @uadd.with.overflow.iNh(iNh %4, iNh %3.HIGH)
	//
	// %lo = %3.LO
	// %hi = %5.0
	// %ovf = %0 \|\| %1.1 \|\| %2.1 \|\| %5.1
	SDValue LHS = N->getOperand(0), RHS = N->getOperand(1);
	SDValue LHSHigh, LHSLow, RHSHigh, RHSLow;
	GetExpandedInteger(LHS, LHSLow, LHSHigh);
	GetExpandedInteger(RHS, RHSLow, RHSHigh);
	EVT HalfVT = LHSLow.getValueType();
	EVT BitVT = N->getValueType(1);
	SDVTList VTHalfWithO = DAG.getVTList(HalfVT, BitVT);

	SDValue HalfZero = DAG.getConstant(0, dl, HalfVT);
	SDValue Overflow = DAG.getNode(ISD::AND, dl, BitVT,
	DAG.getSetCC(dl, BitVT, LHSHigh, HalfZero, ISD::SETNE),
	DAG.getSetCC(dl, BitVT, RHSHigh, HalfZero, ISD::SETNE));

	SDValue One = DAG.getNode(ISD::UMULO, dl, VTHalfWithO, LHSHigh, RHSLow);
	Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, One.getValue(1));

	SDValue Two = DAG.getNode(ISD::UMULO, dl, VTHalfWithO, RHSHigh, LHSLow);
	Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, Two.getValue(1));

	SDValue HighSum = DAG.getNode(ISD::ADD, dl, HalfVT, One, Two);

	// Cannot use `UMUL_LOHI` directly, because some 32-bit targets (ARM) do not
	// know how to expand `i64,i64 = umul_lohi a, b` and abort (why isn’t this
	// operation recursively legalized?).
	//
	// Many backends understand this pattern and will convert into LOHI
	// themselves, if applicable.
	SDValue Three = DAG.getNode(ISD::MUL, dl, VT,
	DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LHSLow),
	DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RHSLow));
	SplitInteger(Three, Lo, Hi);

	Hi = DAG.getNode(ISD::UADDO, dl, VTHalfWithO, Hi, HighSum);
	Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, Hi.getValue(1));
	ReplaceValueWith(SDValue(N, 1), Overflow);
	return;
	}

	Type RetTy = VT.getTypeForEVT(DAG.getContext());
	EVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
	Type PtrTy = PtrVT.getTypeForEVT(DAG.getContext());

	// Replace this with a libcall that will check overflow.
	RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
	if (VT == MVT::i32)
	LC = RTLIB::MULO_I32;
	else if (VT == MVT::i64)
	LC = RTLIB::MULO_I64;
	else if (VT == MVT::i128)
	LC = RTLIB::MULO_I128;
	assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XMULO!");

	SDValue Temp = DAG.CreateStackTemporary(PtrVT);
	// Temporary for the overflow value, default it to zero.
	SDValue Chain =
	DAG.getStore(DAG.getEntryNode(), dl, DAG.getConstant(0, dl, PtrVT), Temp,
	MachinePointerInfo());

	TargetLowering::ArgListTy Args;
	TargetLowering::ArgListEntry Entry;
	for (const SDValue &Op : N->op_values()) {
	EVT ArgVT = Op.getValueType();
	Type ArgTy = ArgVT.getTypeForEVT(DAG.getContext());
	Entry.Node = Op;
	Entry.Ty = ArgTy;
	Entry.IsSExt = true;
	Entry.IsZExt = false;
	Args.push_back(Entry);
	}

	// Also pass the address of the overflow check.
	Entry.Node = Temp;
	Entry.Ty = PtrTy->getPointerTo();
	Entry.IsSExt = true;
	Entry.IsZExt = false;
	Args.push_back(Entry);

	SDValue Func = DAG.getExternalSymbol(TLI.getLibcallName(LC), PtrVT);

	TargetLowering::CallLoweringInfo CLI(DAG);
	CLI.setDebugLoc(dl)
	.setChain(Chain)
	.setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Func, std::move(Args))
	.setSExtResult();

	std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);

	SplitInteger(CallInfo.first, Lo, Hi);
	SDValue Temp2 =
	DAG.getLoad(PtrVT, dl, CallInfo.second, Temp, MachinePointerInfo());
	SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Temp2,
	DAG.getConstant(0, dl, PtrVT),
	ISD::SETNE);
	// Use the overflow from the libcall everywhere.
	ReplaceValueWith(SDValue(N, 1), Ofl);
	}

	void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
	SDValue &Lo, SDValue &Hi) {
	EVT VT = N->getValueType(0);
	SDLoc dl(N);
	SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };

	if (TLI.getOperationAction(ISD::UDIVREM, VT) == TargetLowering::Custom) {
	SDValue Res = DAG.getNode(ISD::UDIVREM, dl, DAG.getVTList(VT, VT), Ops);
	SplitInteger(Res.getValue(0), Lo, Hi);
	return;
	}

	RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
	if (VT == MVT::i16)
	LC = RTLIB::UDIV_I16;
	else if (VT == MVT::i32)
	LC = RTLIB::UDIV_I32;
	else if (VT == MVT::i64)
	LC = RTLIB::UDIV_I64;
	else if (VT == MVT::i128)
	LC = RTLIB::UDIV_I128;
	assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!");

	TargetLowering::MakeLibCallOptions CallOptions;
	SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi);
	}

	void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,
	SDValue &Lo, SDValue &Hi) {
	EVT VT = N->getValueType(0);
	SDLoc dl(N);
	SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };

	if (TLI.getOperationAction(ISD::UDIVREM, VT) == TargetLowering::Custom) {
	SDValue Res = DAG.getNode(ISD::UDIVREM, dl, DAG.getVTList(VT, VT), Ops);
	SplitInteger(Res.getValue(1), Lo, Hi);
	return;
	}

	RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
	if (VT == MVT::i16)
	LC = RTLIB::UREM_I16;
	else if (VT == MVT::i32)
	LC = RTLIB::UREM_I32;
	else if (VT == MVT::i64)
	LC = RTLIB::UREM_I64;
	else if (VT == MVT::i128)
	LC = RTLIB::UREM_I128;
	assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!");

	TargetLowering::MakeLibCallOptions CallOptions;
	SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi);
	}

	void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N,
	SDValue &Lo, SDValue &Hi) {
	EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
	SDLoc dl(N);
	SDValue Op = N->getOperand(0);
	if (Op.getValueType().bitsLE(NVT)) {
	// The low part is zero extension of the input (degenerates to a copy).
	Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N->getOperand(0));
	Hi = DAG.getConstant(0, dl, NVT); // The high part is just a zero.
	} else {
	// For example, extension of an i48 to an i64. The operand type necessarily
	// promotes to the result type, so will end up being expanded too.
	assert(getTypeAction(Op.getValueType()) ==
	TargetLowering::TypePromoteInteger &&
	"Only know how to promote this result!");
	SDValue Res = GetPromotedInteger(Op);
	assert(Res.getValueType() == N->getValueType(0) &&
	"Operand over promoted?");
	// Split the promoted operand. This will simplify when it is expanded.
	SplitInteger(Res, Lo, Hi);
	unsigned ExcessBits = Op.getValueSizeInBits() - NVT.getSizeInBits();
	Hi = DAG.getZeroExtendInReg(Hi, dl,
	EVT::getIntegerVT(*DAG.getContext(),
	ExcessBits));
	}
	}

	void DAGTypeLegalizer::ExpandIntRes_ATOMIC_LOAD(SDNode *N,
	SDValue &Lo, SDValue &Hi) {
	SDLoc dl(N);
	EVT VT = cast<AtomicSDNode>(N)->getMemoryVT();
	SDVTList VTs = DAG.getVTList(VT, MVT::i1, MVT::Other);
	SDValue Zero = DAG.getConstant(0, dl, VT);
	SDValue Swap = DAG.getAtomicCmpSwap(
	ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl,
	cast<AtomicSDNode>(N)->getMemoryVT(), VTs, N->getOperand(0),
	N->getOperand(1), Zero, Zero, cast<AtomicSDNode>(N)->getMemOperand());

	ReplaceValueWith(SDValue(N, 0), Swap.getValue(0));
	ReplaceValueWith(SDValue(N, 1), Swap.getValue(2));
	}

	void DAGTypeLegalizer::ExpandIntRes_VECREDUCE(SDNode *N,
	SDValue &Lo, SDValue &Hi) {
	// TODO For VECREDUCE_(AND\|OR\|XOR) we could split the vector and calculate
	// both halves independently.
	SDValue Res = TLI.expandVecReduce(N, DAG);
	SplitInteger(Res, Lo, Hi);
	}

	void DAGTypeLegalizer::ExpandIntRes_Rotate(SDNode *N,
	SDValue &Lo, SDValue &Hi) {
	// Lower the rotate to shifts and ORs which can be expanded.
	SDValue Res;
	TLI.expandROT(N, true /AllowVectorOps/, Res, DAG);
	SplitInteger(Res, Lo, Hi);
	}

	void DAGTypeLegalizer::ExpandIntRes_FunnelShift(SDNode *N,
	SDValue &Lo, SDValue &Hi) {
	// Lower the funnel shift to shifts and ORs which can be expanded.
	SDValue Res;
	TLI.expandFunnelShift(N, Res, DAG);
	SplitInteger(Res, Lo, Hi);
	}

	void DAGTypeLegalizer::ExpandIntRes_VSCALE(SDNode *N, SDValue &Lo,
	SDValue &Hi) {
	EVT VT = N->getValueType(0);
	EVT HalfVT =
	EVT::getIntegerVT(*DAG.getContext(), N->getValueSizeInBits(0) / 2);
	SDLoc dl(N);

	// We assume VSCALE(1) fits into a legal integer.
	APInt One(HalfVT.getSizeInBits(), 1);
	SDValue VScaleBase = DAG.getVScale(dl, HalfVT, One);
	VScaleBase = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, VScaleBase);
	SDValue Res = DAG.getNode(ISD::MUL, dl, VT, VScaleBase, N->getOperand(0));
	SplitInteger(Res, Lo, Hi);
	}

	//===----------------------------------------------------------------------===//
	// Integer Operand Expansion
	//===----------------------------------------------------------------------===//

	/// ExpandIntegerOperand - This method is called when the specified operand of
	/// the specified node is found to need expansion. At this point, all of the
	/// result types of the node are known to be legal, but other operands of the
	/// node may need promotion or expansion as well as the specified one.
	bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
	LLVM_DEBUG(dbgs() << "Expand integer operand: "; N->dump(&DAG);
	dbgs() << "\n");
	SDValue Res = SDValue();

	if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
	return false;

	switch (N->getOpcode()) {
	default:
	#ifndef NDEBUG
	dbgs() << "ExpandIntegerOperand Op #" << OpNo << ": ";
	N->dump(&DAG); dbgs() << "\n";
	#endif
	report_fatal_error("Do not know how to expand this operator's operand!");

	case ISD::BITCAST: Res = ExpandOp_BITCAST(N); break;
	case ISD::BR_CC: Res = ExpandIntOp_BR_CC(N); break;
	case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break;
	case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break;
	case ISD::INSERT_VECTOR_ELT: Res = ExpandOp_INSERT_VECTOR_ELT(N); break;
	case ISD::SCALAR_TO_VECTOR: Res = ExpandOp_SCALAR_TO_VECTOR(N); break;
	case ISD::SPLAT_VECTOR: Res = ExpandIntOp_SPLAT_VECTOR(N); break;
	case ISD::SELECT_CC: Res = ExpandIntOp_SELECT_CC(N); break;
	case ISD::SETCC: Res = ExpandIntOp_SETCC(N); break;
	case ISD::SETCCCARRY: Res = ExpandIntOp_SETCCCARRY(N); break;
	case ISD::STRICT_SINT_TO_FP:
	case ISD::SINT_TO_FP: Res = ExpandIntOp_SINT_TO_FP(N); break;
	case ISD::STORE: Res = ExpandIntOp_STORE(cast<StoreSDNode>(N), OpNo); break;
	case ISD::TRUNCATE: Res = ExpandIntOp_TRUNCATE(N); break;
	case ISD::STRICT_UINT_TO_FP:
	case ISD::UINT_TO_FP: Res = ExpandIntOp_UINT_TO_FP(N); break;

	case ISD::SHL:
	case ISD::SRA:
	case ISD::SRL:
	case ISD::ROTL:
	case ISD::ROTR: Res = ExpandIntOp_Shift(N); break;
	case ISD::RETURNADDR:
	case ISD::FRAMEADDR: Res = ExpandIntOp_RETURNADDR(N); break;

	case ISD::ATOMIC_STORE: Res = ExpandIntOp_ATOMIC_STORE(N); break;
	}

	// If the result is null, the sub-method took care of registering results etc.
	if (!Res.getNode()) return false;

	// If the result is N, the sub-method updated N in place. Tell the legalizer
	// core about this.
	if (Res.getNode() == N)
	return true;

	assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
	"Invalid operand expansion");

	ReplaceValueWith(SDValue(N, 0), Res);
	return false;
	}

	/// IntegerExpandSetCCOperands - Expand the operands of a comparison. This code
	/// is shared among BR_CC, SELECT_CC, and SETCC handlers.
	void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
	SDValue &NewRHS,
	ISD::CondCode &CCCode,
	const SDLoc &dl) {
	SDValue LHSLo, LHSHi, RHSLo, RHSHi;
	GetExpandedInteger(NewLHS, LHSLo, LHSHi);
	GetExpandedInteger(NewRHS, RHSLo, RHSHi);

	if (CCCode == ISD::SETEQ \|\| CCCode == ISD::SETNE) {
	if (RHSLo == RHSHi) {
	if (ConstantSDNode *RHSCST = dyn_cast<ConstantSDNode>(RHSLo)) {
	if (RHSCST->isAllOnesValue()) {
	// Equality comparison to -1.
	NewLHS = DAG.getNode(ISD::AND, dl,
	LHSLo.getValueType(), LHSLo, LHSHi);
	NewRHS = RHSLo;
	return;
	}
	}
	}

	NewLHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSLo, RHSLo);
	NewRHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSHi, RHSHi);
	NewLHS = DAG.getNode(ISD::OR, dl, NewLHS.getValueType(), NewLHS, NewRHS);
	NewRHS = DAG.getConstant(0, dl, NewLHS.getValueType());
	return;
	}

	// If this is a comparison of the sign bit, just look at the top part.
	// X > -1, x < 0
	if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(NewRHS))
	if ((CCCode == ISD::SETLT && CST->isNullValue()) \|\| // X < 0
	(CCCode == ISD::SETGT && CST->isAllOnesValue())) { // X > -1
	NewLHS = LHSHi;
	NewRHS = RHSHi;
	return;
	}

	// FIXME: This generated code sucks.
	ISD::CondCode LowCC;
	switch (CCCode) {
	default: llvm_unreachable("Unknown integer setcc!");
	case ISD::SETLT:
	case ISD::SETULT: LowCC = ISD::SETULT; break;
	case ISD::SETGT:
	case ISD::SETUGT: LowCC = ISD::SETUGT; break;
	case ISD::SETLE:
	case ISD::SETULE: LowCC = ISD::SETULE; break;
	case ISD::SETGE:
	case ISD::SETUGE: LowCC = ISD::SETUGE; break;
	}

	// LoCmp = lo(op1) < lo(op2) // Always unsigned comparison
	// HiCmp = hi(op1) < hi(op2) // Signedness depends on operands
	// dest = hi(op1) == hi(op2) ? LoCmp : HiCmp;

	// NOTE: on targets without efficient SELECT of bools, we can always use
	// this identity: (B1 ? B2 : B3) --> (B1 & B2)\|(!B1&B3)
	TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, AfterLegalizeTypes, true,
	nullptr);
	SDValue LoCmp, HiCmp;
	if (TLI.isTypeLegal(LHSLo.getValueType()) &&
	TLI.isTypeLegal(RHSLo.getValueType()))
	LoCmp = TLI.SimplifySetCC(getSetCCResultType(LHSLo.getValueType()), LHSLo,
	RHSLo, LowCC, false, DagCombineInfo, dl);
	if (!LoCmp.getNode())
	LoCmp = DAG.getSetCC(dl, getSetCCResultType(LHSLo.getValueType()), LHSLo,
	RHSLo, LowCC);
	if (TLI.isTypeLegal(LHSHi.getValueType()) &&
	TLI.isTypeLegal(RHSHi.getValueType()))
	HiCmp = TLI.SimplifySetCC(getSetCCResultType(LHSHi.getValueType()), LHSHi,
	RHSHi, CCCode, false, DagCombineInfo, dl);
	if (!HiCmp.getNode())
	HiCmp =
	DAG.getNode(ISD::SETCC, dl, getSetCCResultType(LHSHi.getValueType()),
	LHSHi, RHSHi, DAG.getCondCode(CCCode));

	ConstantSDNode *LoCmpC = dyn_cast<ConstantSDNode>(LoCmp.getNode());
	ConstantSDNode *HiCmpC = dyn_cast<ConstantSDNode>(HiCmp.getNode());

	bool EqAllowed = (CCCode == ISD::SETLE \|\| CCCode == ISD::SETGE \|\|
	CCCode == ISD::SETUGE \|\| CCCode == ISD::SETULE);

	if ((EqAllowed && (HiCmpC && HiCmpC->isNullValue())) \|\|
	(!EqAllowed && ((HiCmpC && (HiCmpC->getAPIntValue() == 1)) \|\|
	(LoCmpC && LoCmpC->isNullValue())))) {
	// For LE / GE, if high part is known false, ignore the low part.
	// For LT / GT: if low part is known false, return the high part.
	// if high part is known true, ignore the low part.
	NewLHS = HiCmp;
	NewRHS = SDValue();
	return;
	}

	if (LHSHi == RHSHi) {
	// Comparing the low bits is enough.
	NewLHS = LoCmp;
	NewRHS = SDValue();
	return;
	}

	// Lower with SETCCCARRY if the target supports it.
	EVT HiVT = LHSHi.getValueType();
	EVT ExpandVT = TLI.getTypeToExpandTo(*DAG.getContext(), HiVT);
	bool HasSETCCCARRY = TLI.isOperationLegalOrCustom(ISD::SETCCCARRY, ExpandVT);

	// FIXME: Make all targets support this, then remove the other lowering.
	if (HasSETCCCARRY) {
	// SETCCCARRY can detect < and >= directly. For > and <=, flip
	// operands and condition code.
	bool FlipOperands = false;
	switch (CCCode) {
	case ISD::SETGT: CCCode = ISD::SETLT; FlipOperands = true; break;
	case ISD::SETUGT: CCCode = ISD::SETULT; FlipOperands = true; break;
	case ISD::SETLE: CCCode = ISD::SETGE; FlipOperands = true; break;
	case ISD::SETULE: CCCode = ISD::SETUGE; FlipOperands = true; break;
	default: break;
	}
	if (FlipOperands) {
	std::swap(LHSLo, RHSLo);
	std::swap(LHSHi, RHSHi);
	}
	// Perform a wide subtraction, feeding the carry from the low part into
	// SETCCCARRY. The SETCCCARRY operation is essentially looking at the high
	// part of the result of LHS - RHS. It is negative iff LHS < RHS. It is
	// zero or positive iff LHS >= RHS.
	EVT LoVT = LHSLo.getValueType();
	SDVTList VTList = DAG.getVTList(LoVT, getSetCCResultType(LoVT));
	SDValue LowCmp = DAG.getNode(ISD::USUBO, dl, VTList, LHSLo, RHSLo);
	SDValue Res = DAG.getNode(ISD::SETCCCARRY, dl, getSetCCResultType(HiVT),
	LHSHi, RHSHi, LowCmp.getValue(1),
	DAG.getCondCode(CCCode));
	NewLHS = Res;
	NewRHS = SDValue();
	return;
	}

	NewLHS = TLI.SimplifySetCC(getSetCCResultType(HiVT), LHSHi, RHSHi, ISD::SETEQ,
	false, DagCombineInfo, dl);
	if (!NewLHS.getNode())
	NewLHS =
	DAG.getSetCC(dl, getSetCCResultType(HiVT), LHSHi, RHSHi, ISD::SETEQ);
	NewLHS = DAG.getSelect(dl, LoCmp.getValueType(), NewLHS, LoCmp, HiCmp);
	NewRHS = SDValue();
	}

	SDValue DAGTypeLegalizer::ExpandIntOp_BR_CC(SDNode *N) {
	SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3);
	ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get();
	IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N));

	// If ExpandSetCCOperands returned a scalar, we need to compare the result
	// against zero to select between true and false values.
	if (!NewRHS.getNode()) {
	NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType());
	CCCode = ISD::SETNE;
	}

	// Update N to have the operands specified.
	return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
	DAG.getCondCode(CCCode), NewLHS, NewRHS,
	N->getOperand(4)), 0);
	}

	SDValue DAGTypeLegalizer::ExpandIntOp_SELECT_CC(SDNode *N) {
	SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
	ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();
	IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N));

	// If ExpandSetCCOperands returned a scalar, we need to compare the result
	// against zero to select between true and false values.
	if (!NewRHS.getNode()) {
	NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType());
	CCCode = ISD::SETNE;
	}

	// Update N to have the operands specified.
	return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
	N->getOperand(2), N->getOperand(3),
	DAG.getCondCode(CCCode)), 0);
	}

	SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) {
	SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
	ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
	IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N));

	// If ExpandSetCCOperands returned a scalar, use it.
	if (!NewRHS.getNode()) {
	assert(NewLHS.getValueType() == N->getValueType(0) &&
	"Unexpected setcc expansion!");
	return NewLHS;
	}

	// Otherwise, update N to have the operands specified.
	return SDValue(
	DAG.UpdateNodeOperands(N, NewLHS, NewRHS, DAG.getCondCode(CCCode)), 0);
	}

	SDValue DAGTypeLegalizer::ExpandIntOp_SETCCCARRY(SDNode *N) {
	SDValue LHS = N->getOperand(0);
	SDValue RHS = N->getOperand(1);
	SDValue Carry = N->getOperand(2);
	SDValue Cond = N->getOperand(3);
	SDLoc dl = SDLoc(N);

	SDValue LHSLo, LHSHi, RHSLo, RHSHi;
	GetExpandedInteger(LHS, LHSLo, LHSHi);
	GetExpandedInteger(RHS, RHSLo, RHSHi);

	// Expand to a SUBE for the low part and a smaller SETCCCARRY for the high.
	SDVTList VTList = DAG.getVTList(LHSLo.getValueType(), Carry.getValueType());
	SDValue LowCmp = DAG.getNode(ISD::SUBCARRY, dl, VTList, LHSLo, RHSLo, Carry);
	return DAG.getNode(ISD::SETCCCARRY, dl, N->getValueType(0), LHSHi, RHSHi,
	LowCmp.getValue(1), Cond);
	}

	SDValue DAGTypeLegalizer::ExpandIntOp_SPLAT_VECTOR(SDNode *N) {
	// Split the operand and replace with SPLAT_VECTOR_PARTS.
	SDValue Lo, Hi;
	GetExpandedInteger(N->getOperand(0), Lo, Hi);
	return DAG.getNode(ISD::SPLAT_VECTOR_PARTS, SDLoc(N), N->getValueType(0), Lo,
	Hi);
	}

	SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) {
	// The value being shifted is legal, but the shift amount is too big.
	// It follows that either the result of the shift is undefined, or the
	// upper half of the shift amount is zero. Just use the lower half.
	SDValue Lo, Hi;
	GetExpandedInteger(N->getOperand(1), Lo, Hi);
	return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Lo), 0);
	}

	SDValue DAGTypeLegalizer::ExpandIntOp_RETURNADDR(SDNode *N) {
	// The argument of RETURNADDR / FRAMEADDR builtin is 32 bit contant. This
	// surely makes pretty nice problems on 8/16 bit targets. Just truncate this
	// constant to valid type.
	SDValue Lo, Hi;
	GetExpandedInteger(N->getOperand(0), Lo, Hi);
	return SDValue(DAG.UpdateNodeOperands(N, Lo), 0);
	}

	SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) {
	bool IsStrict = N->isStrictFPOpcode();
	SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
	SDValue Op = N->getOperand(IsStrict ? 1 : 0);
	EVT DstVT = N->getValueType(0);
	RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT);
	assert(LC != RTLIB::UNKNOWN_LIBCALL &&
	"Don't know how to expand this SINT_TO_FP!");
	TargetLowering::MakeLibCallOptions CallOptions;
	CallOptions.setSExt(true);
	std::pair<SDValue, SDValue> Tmp =
	TLI.makeLibCall(DAG, LC, DstVT, Op, CallOptions, SDLoc(N), Chain);

	if (!IsStrict)
	return Tmp.first;

	ReplaceValueWith(SDValue(N, 1), Tmp.second);
	ReplaceValueWith(SDValue(N, 0), Tmp.first);
	return SDValue();
	}

	SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
	if (N->isAtomic()) {
	// It's typical to have larger CAS than atomic store instructions.
	SDLoc dl(N);
	SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl,
	N->getMemoryVT(),
	N->getOperand(0), N->getOperand(2),
	N->getOperand(1),
	N->getMemOperand());
	return Swap.getValue(1);
	}
	if (ISD::isNormalStore(N))
	return ExpandOp_NormalStore(N, OpNo);

	assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
	assert(OpNo == 1 && "Can only expand the stored value so far");

	EVT VT = N->getOperand(1).getValueType();
	EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
	SDValue Ch = N->getChain();
	SDValue Ptr = N->getBasePtr();
	MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags();
	AAMDNodes AAInfo = N->getAAInfo();
	SDLoc dl(N);
	SDValue Lo, Hi;

	assert(NVT.isByteSized() && "Expanded type not byte sized!");

	if (N->getMemoryVT().bitsLE(NVT)) {
	GetExpandedInteger(N->getValue(), Lo, Hi);
	return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo(),
	N->getMemoryVT(), N->getOriginalAlign(), MMOFlags,
	AAInfo);
	}

	if (DAG.getDataLayout().isLittleEndian()) {
	// Little-endian - low bits are at low addresses.
	GetExpandedInteger(N->getValue(), Lo, Hi);

	Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getPointerInfo(),
	N->getOriginalAlign(), MMOFlags, AAInfo);

	unsigned ExcessBits =
	N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
	EVT NEVT = EVT::getIntegerVT(*DAG.getContext(), ExcessBits);

	// Increment the pointer to the other half.
	unsigned IncrementSize = NVT.getSizeInBits()/8;
	Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
	Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr,
	N->getPointerInfo().getWithOffset(IncrementSize),
	NEVT, N->getOriginalAlign(), MMOFlags, AAInfo);
	return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
	}

	// Big-endian - high bits are at low addresses. Favor aligned stores at
	// the cost of some bit-fiddling.
	GetExpandedInteger(N->getValue(), Lo, Hi);

	EVT ExtVT = N->getMemoryVT();
	unsigned EBytes = ExtVT.getStoreSize();
	unsigned IncrementSize = NVT.getSizeInBits()/8;
	unsigned ExcessBits = (EBytes - IncrementSize)*8;
	EVT HiVT = EVT::getIntegerVT(*DAG.getContext(),
	ExtVT.getSizeInBits() - ExcessBits);

	if (ExcessBits < NVT.getSizeInBits()) {
	// Transfer high bits from the top of Lo to the bottom of Hi.
	Hi = DAG.getNode(ISD::SHL, dl, NVT, Hi,
	DAG.getConstant(NVT.getSizeInBits() - ExcessBits, dl,
	TLI.getPointerTy(DAG.getDataLayout())));
	Hi = DAG.getNode(
	ISD::OR, dl, NVT, Hi,
	DAG.getNode(ISD::SRL, dl, NVT, Lo,
	DAG.getConstant(ExcessBits, dl,
	TLI.getPointerTy(DAG.getDataLayout()))));
	}

	// Store both the high bits and maybe some of the low bits.
	Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo(), HiVT,
	N->getOriginalAlign(), MMOFlags, AAInfo);

	// Increment the pointer to the other half.
	Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
	// Store the lowest ExcessBits bits in the second half.
	Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr,
	N->getPointerInfo().getWithOffset(IncrementSize),
	EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
	N->getOriginalAlign(), MMOFlags, AAInfo);
	return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
	}

	SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) {
	SDValue InL, InH;
	GetExpandedInteger(N->getOperand(0), InL, InH);
	// Just truncate the low part of the source.
	return DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), InL);
	}

	SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
	bool IsStrict = N->isStrictFPOpcode();
	SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
	SDValue Op = N->getOperand(IsStrict ? 1 : 0);
	EVT DstVT = N->getValueType(0);
	RTLIB::Libcall LC = RTLIB::getUINTTOFP(Op.getValueType(), DstVT);
	assert(LC != RTLIB::UNKNOWN_LIBCALL &&
	"Don't know how to expand this UINT_TO_FP!");
	TargetLowering::MakeLibCallOptions CallOptions;
	CallOptions.setSExt(true);
	std::pair<SDValue, SDValue> Tmp =
	TLI.makeLibCall(DAG, LC, DstVT, Op, CallOptions, SDLoc(N), Chain);

	if (!IsStrict)
	return Tmp.first;

	ReplaceValueWith(SDValue(N, 1), Tmp.second);
	ReplaceValueWith(SDValue(N, 0), Tmp.first);
	return SDValue();
	}

	SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) {
	SDLoc dl(N);
	SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl,
	cast<AtomicSDNode>(N)->getMemoryVT(),
	N->getOperand(0),
	N->getOperand(1), N->getOperand(2),
	cast<AtomicSDNode>(N)->getMemOperand());
	return Swap.getValue(1);
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SPLICE(SDNode *N) {
	SDLoc dl(N);

	SDValue V0 = GetPromotedInteger(N->getOperand(0));
	SDValue V1 = GetPromotedInteger(N->getOperand(1));
	EVT OutVT = V0.getValueType();

	return DAG.getNode(ISD::VECTOR_SPLICE, dl, OutVT, V0, V1, N->getOperand(2));
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) {

	EVT OutVT = N->getValueType(0);
	EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
	assert(NOutVT.isVector() && "This type must be promoted to a vector type");
	EVT NOutVTElem = NOutVT.getVectorElementType();

	SDLoc dl(N);
	SDValue BaseIdx = N->getOperand(1);

	// TODO: We may be able to use this for types other than scalable
	// vectors and fix those tests that expect BUILD_VECTOR to be used
	if (OutVT.isScalableVector()) {
	SDValue InOp0 = N->getOperand(0);
	EVT InVT = InOp0.getValueType();

	// Promote operands and see if this is handled by target lowering,
	// Otherwise, use the BUILD_VECTOR approach below
	if (getTypeAction(InVT) == TargetLowering::TypePromoteInteger) {
	// Collect the (promoted) operands
	SDValue Ops[] = { GetPromotedInteger(InOp0), BaseIdx };

	EVT PromEltVT = Ops[0].getValueType().getVectorElementType();
	assert(PromEltVT.bitsLE(NOutVTElem) &&
	"Promoted operand has an element type greater than result");

	EVT ExtVT = NOutVT.changeVectorElementType(PromEltVT);
	SDValue Ext = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), ExtVT, Ops);
	return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, Ext);
	}
	}

	if (OutVT.isScalableVector())
	report_fatal_error("Unable to promote scalable types using BUILD_VECTOR");

	SDValue InOp0 = N->getOperand(0);
	if (getTypeAction(InOp0.getValueType()) == TargetLowering::TypePromoteInteger)
	InOp0 = GetPromotedInteger(N->getOperand(0));

	EVT InVT = InOp0.getValueType();

	unsigned OutNumElems = OutVT.getVectorNumElements();
	SmallVector<SDValue, 8> Ops;
	Ops.reserve(OutNumElems);
	for (unsigned i = 0; i != OutNumElems; ++i) {

	// Extract the element from the original vector.
	SDValue Index = DAG.getNode(ISD::ADD, dl, BaseIdx.getValueType(),
	BaseIdx, DAG.getConstant(i, dl, BaseIdx.getValueType()));
	SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
	InVT.getVectorElementType(), N->getOperand(0), Index);

	SDValue Op = DAG.getAnyExtOrTrunc(Ext, dl, NOutVTElem);
	// Insert the converted element to the new vector.
	Ops.push_back(Op);
	}

	return DAG.getBuildVector(NOutVT, dl, Ops);
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_SUBVECTOR(SDNode *N) {
	EVT OutVT = N->getValueType(0);
	EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
	assert(NOutVT.isVector() && "This type must be promoted to a vector type");

	SDLoc dl(N);
	SDValue Vec = N->getOperand(0);
	SDValue SubVec = N->getOperand(1);
	SDValue Idx = N->getOperand(2);

	EVT SubVecVT = SubVec.getValueType();
	EVT NSubVT =
	EVT::getVectorVT(*DAG.getContext(), NOutVT.getVectorElementType(),
	SubVecVT.getVectorElementCount());

	Vec = GetPromotedInteger(Vec);
	SubVec = DAG.getNode(ISD::ANY_EXTEND, dl, NSubVT, SubVec);

	return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, NOutVT, Vec, SubVec, Idx);
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_REVERSE(SDNode *N) {
	SDLoc dl(N);

	SDValue V0 = GetPromotedInteger(N->getOperand(0));
	EVT OutVT = V0.getValueType();

	return DAG.getNode(ISD::VECTOR_REVERSE, dl, OutVT, V0);
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SHUFFLE(SDNode *N) {
	ShuffleVectorSDNode *SV = cast<ShuffleVectorSDNode>(N);
	EVT VT = N->getValueType(0);
	SDLoc dl(N);

	ArrayRef<int> NewMask = SV->getMask().slice(0, VT.getVectorNumElements());

	SDValue V0 = GetPromotedInteger(N->getOperand(0));
	SDValue V1 = GetPromotedInteger(N->getOperand(1));
	EVT OutVT = V0.getValueType();

	return DAG.getVectorShuffle(OutVT, dl, V0, V1, NewMask);
	}


	SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) {
	EVT OutVT = N->getValueType(0);
	EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
	assert(NOutVT.isVector() && "This type must be promoted to a vector type");
	unsigned NumElems = N->getNumOperands();
	EVT NOutVTElem = NOutVT.getVectorElementType();

	SDLoc dl(N);

	SmallVector<SDValue, 8> Ops;
	Ops.reserve(NumElems);
	for (unsigned i = 0; i != NumElems; ++i) {
	SDValue Op;
	// BUILD_VECTOR integer operand types are allowed to be larger than the
	// result's element type. This may still be true after the promotion. For
	// example, we might be promoting (<v?i1> = BV <i32>, <i32>, ...) to
	// (v?i16 = BV <i32>, <i32>, ...), and we can't any_extend <i32> to <i16>.
	if (N->getOperand(i).getValueType().bitsLT(NOutVTElem))
	Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(i));
	else
	Op = N->getOperand(i);
	Ops.push_back(Op);
	}

	return DAG.getBuildVector(NOutVT, dl, Ops);
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) {

	SDLoc dl(N);

	assert(!N->getOperand(0).getValueType().isVector() &&
	"Input must be a scalar");

	EVT OutVT = N->getValueType(0);
	EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
	assert(NOutVT.isVector() && "This type must be promoted to a vector type");
	EVT NOutVTElem = NOutVT.getVectorElementType();

	SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(0));

	return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NOutVT, Op);
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_SPLAT_VECTOR(SDNode *N) {
	SDLoc dl(N);

	SDValue SplatVal = N->getOperand(0);

	assert(!SplatVal.getValueType().isVector() && "Input must be a scalar");

	EVT OutVT = N->getValueType(0);
	EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
	assert(NOutVT.isVector() && "Type must be promoted to a vector type");
	EVT NOutElemVT = NOutVT.getVectorElementType();

	SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutElemVT, SplatVal);

	return DAG.getNode(ISD::SPLAT_VECTOR, dl, NOutVT, Op);
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_STEP_VECTOR(SDNode *N) {
	SDLoc dl(N);
	EVT OutVT = N->getValueType(0);
	EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
	assert(NOutVT.isVector() && "Type must be promoted to a vector type");
	APInt StepVal = cast<ConstantSDNode>(N->getOperand(0))->getAPIntValue();
	return DAG.getStepVector(dl, NOutVT,
	StepVal.sext(NOutVT.getScalarSizeInBits()));
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) {
	SDLoc dl(N);

	EVT OutVT = N->getValueType(0);
	EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
	assert(NOutVT.isVector() && "This type must be promoted to a vector type");

	EVT OutElemTy = NOutVT.getVectorElementType();

	unsigned NumElem = N->getOperand(0).getValueType().getVectorNumElements();
	unsigned NumOutElem = NOutVT.getVectorNumElements();
	unsigned NumOperands = N->getNumOperands();
	assert(NumElem * NumOperands == NumOutElem &&
	"Unexpected number of elements");

	// Take the elements from the first vector.
	SmallVector<SDValue, 8> Ops(NumOutElem);
	for (unsigned i = 0; i < NumOperands; ++i) {
	SDValue Op = N->getOperand(i);
	if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteInteger)
	Op = GetPromotedInteger(Op);
	EVT SclrTy = Op.getValueType().getVectorElementType();
	assert(NumElem == Op.getValueType().getVectorNumElements() &&
	"Unexpected number of elements");

	for (unsigned j = 0; j < NumElem; ++j) {
	SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SclrTy, Op,
	DAG.getVectorIdxConstant(j, dl));
	Ops[i * NumElem + j] = DAG.getAnyExtOrTrunc(Ext, dl, OutElemTy);
	}
	}

	return DAG.getBuildVector(NOutVT, dl, Ops);
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_EXTEND_VECTOR_INREG(SDNode *N) {
	EVT VT = N->getValueType(0);
	EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
	assert(NVT.isVector() && "This type must be promoted to a vector type");

	SDLoc dl(N);

	// For operands whose TypeAction is to promote, extend the promoted node
	// appropriately (ZERO_EXTEND or SIGN_EXTEND) from the original pre-promotion
	// type, and then construct a new *_EXTEND_VECTOR_INREG node to the promote-to
	// type..
	if (getTypeAction(N->getOperand(0).getValueType())
	== TargetLowering::TypePromoteInteger) {
	SDValue Promoted;

	switch(N->getOpcode()) {
	case ISD::SIGN_EXTEND_VECTOR_INREG:
	Promoted = SExtPromotedInteger(N->getOperand(0));
	break;
	case ISD::ZERO_EXTEND_VECTOR_INREG:
	Promoted = ZExtPromotedInteger(N->getOperand(0));
	break;
	case ISD::ANY_EXTEND_VECTOR_INREG:
	Promoted = GetPromotedInteger(N->getOperand(0));
	break;
	default:
	llvm_unreachable("Node has unexpected Opcode");
	}
	return DAG.getNode(N->getOpcode(), dl, NVT, Promoted);
	}

	// Directly extend to the appropriate transform-to type.
	return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) {
	EVT OutVT = N->getValueType(0);
	EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
	assert(NOutVT.isVector() && "This type must be promoted to a vector type");

	EVT NOutVTElem = NOutVT.getVectorElementType();

	SDLoc dl(N);
	SDValue V0 = GetPromotedInteger(N->getOperand(0));

	SDValue ConvElem = DAG.getNode(ISD::ANY_EXTEND, dl,
	NOutVTElem, N->getOperand(1));
	return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NOutVT,
	V0, ConvElem, N->getOperand(2));
	}

	SDValue DAGTypeLegalizer::PromoteIntRes_VECREDUCE(SDNode *N) {
	// The VECREDUCE result size may be larger than the element size, so
	// we can simply change the result type.
	SDLoc dl(N);
	EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
	return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
	}

	SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) {
	SDLoc dl(N);
	SDValue V0 = GetPromotedInteger(N->getOperand(0));
	SDValue V1 = DAG.getZExtOrTrunc(N->getOperand(1), dl,
	TLI.getVectorIdxTy(DAG.getDataLayout()));
	SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
	V0->getValueType(0).getScalarType(), V0, V1);

	// EXTRACT_VECTOR_ELT can return types which are wider than the incoming
	// element types. If this is the case then we need to expand the outgoing
	// value and not truncate it.
	return DAG.getAnyExtOrTrunc(Ext, dl, N->getValueType(0));
	}

	SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N) {
	SDLoc dl(N);
	SDValue V0 = GetPromotedInteger(N->getOperand(0));
	MVT InVT = V0.getValueType().getSimpleVT();
	MVT OutVT = MVT::getVectorVT(InVT.getVectorElementType(),
	N->getValueType(0).getVectorNumElements());
	SDValue Ext = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OutVT, V0, N->getOperand(1));
	return DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), Ext);
	}

	SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) {
	SDLoc dl(N);

	EVT ResVT = N->getValueType(0);
	unsigned NumElems = N->getNumOperands();

	if (ResVT.isScalableVector()) {
	SDValue ResVec = DAG.getUNDEF(ResVT);

	for (unsigned OpIdx = 0; OpIdx < NumElems; ++OpIdx) {
	SDValue Op = N->getOperand(OpIdx);
	unsigned OpNumElts = Op.getValueType().getVectorMinNumElements();
	ResVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, ResVec, Op,
	DAG.getIntPtrConstant(OpIdx * OpNumElts, dl));
	}

	return ResVec;
	}

	EVT RetSclrTy = N->getValueType(0).getVectorElementType();

	SmallVector<SDValue, 8> NewOps;
	NewOps.reserve(NumElems);

	// For each incoming vector
	for (unsigned VecIdx = 0; VecIdx != NumElems; ++VecIdx) {
	SDValue Incoming = GetPromotedInteger(N->getOperand(VecIdx));
	EVT SclrTy = Incoming->getValueType(0).getVectorElementType();
	unsigned NumElem = Incoming->getValueType(0).getVectorNumElements();

	for (unsigned i=0; i<NumElem; ++i) {
	// Extract element from incoming vector
	SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SclrTy, Incoming,
	DAG.getVectorIdxConstant(i, dl));
	SDValue Tr = DAG.getNode(ISD::TRUNCATE, dl, RetSclrTy, Ex);
	NewOps.push_back(Tr);
	}
	}

	return DAG.getBuildVector(N->getValueType(0), dl, NewOps);
	}
	diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
	index a08548393979..bd2ebfd0bd3b 100644
	--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
	+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
	@@ -1,11153 +1,11155 @@
	//===- SelectionDAGBuilder.cpp - Selection-DAG building -------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This implements routines for translating from LLVM IR into SelectionDAG IR.
	//
	//===----------------------------------------------------------------------===//

	#include "SelectionDAGBuilder.h"
	#include "SDNodeDbgValue.h"
	#include "llvm/ADT/APFloat.h"
	#include "llvm/ADT/APInt.h"
	#include "llvm/ADT/BitVector.h"
	#include "llvm/ADT/None.h"
	#include "llvm/ADT/Optional.h"
	#include "llvm/ADT/STLExtras.h"
	#include "llvm/ADT/SmallPtrSet.h"
	#include "llvm/ADT/SmallSet.h"
	#include "llvm/ADT/StringRef.h"
	#include "llvm/ADT/Triple.h"
	#include "llvm/ADT/Twine.h"
	#include "llvm/Analysis/AliasAnalysis.h"
	#include "llvm/Analysis/BlockFrequencyInfo.h"
	#include "llvm/Analysis/BranchProbabilityInfo.h"
	#include "llvm/Analysis/ConstantFolding.h"
	#include "llvm/Analysis/EHPersonalities.h"
	#include "llvm/Analysis/Loads.h"
	#include "llvm/Analysis/MemoryLocation.h"
	#include "llvm/Analysis/ProfileSummaryInfo.h"
	#include "llvm/Analysis/TargetLibraryInfo.h"
	#include "llvm/Analysis/ValueTracking.h"
	#include "llvm/Analysis/VectorUtils.h"
	#include "llvm/CodeGen/Analysis.h"
	#include "llvm/CodeGen/FunctionLoweringInfo.h"
	#include "llvm/CodeGen/GCMetadata.h"
	#include "llvm/CodeGen/MachineBasicBlock.h"
	#include "llvm/CodeGen/MachineFrameInfo.h"
	#include "llvm/CodeGen/MachineFunction.h"
	#include "llvm/CodeGen/MachineInstr.h"
	#include "llvm/CodeGen/MachineInstrBuilder.h"
	#include "llvm/CodeGen/MachineJumpTableInfo.h"
	#include "llvm/CodeGen/MachineMemOperand.h"
	#include "llvm/CodeGen/MachineModuleInfo.h"
	#include "llvm/CodeGen/MachineOperand.h"
	#include "llvm/CodeGen/MachineRegisterInfo.h"
	#include "llvm/CodeGen/RuntimeLibcalls.h"
	#include "llvm/CodeGen/SelectionDAG.h"
	#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
	#include "llvm/CodeGen/StackMaps.h"
	#include "llvm/CodeGen/SwiftErrorValueTracking.h"
	#include "llvm/CodeGen/TargetFrameLowering.h"
	#include "llvm/CodeGen/TargetInstrInfo.h"
	#include "llvm/CodeGen/TargetOpcodes.h"
	#include "llvm/CodeGen/TargetRegisterInfo.h"
	#include "llvm/CodeGen/TargetSubtargetInfo.h"
	#include "llvm/CodeGen/WinEHFuncInfo.h"
	#include "llvm/IR/Argument.h"
	#include "llvm/IR/Attributes.h"
	#include "llvm/IR/BasicBlock.h"
	#include "llvm/IR/CFG.h"
	#include "llvm/IR/CallingConv.h"
	#include "llvm/IR/Constant.h"
	#include "llvm/IR/ConstantRange.h"
	#include "llvm/IR/Constants.h"
	#include "llvm/IR/DataLayout.h"
	#include "llvm/IR/DebugInfoMetadata.h"
	#include "llvm/IR/DerivedTypes.h"
	#include "llvm/IR/Function.h"
	#include "llvm/IR/GetElementPtrTypeIterator.h"
	#include "llvm/IR/InlineAsm.h"
	#include "llvm/IR/InstrTypes.h"
	#include "llvm/IR/Instructions.h"
	#include "llvm/IR/IntrinsicInst.h"
	#include "llvm/IR/Intrinsics.h"
	#include "llvm/IR/IntrinsicsAArch64.h"
	#include "llvm/IR/IntrinsicsWebAssembly.h"
	#include "llvm/IR/LLVMContext.h"
	#include "llvm/IR/Metadata.h"
	#include "llvm/IR/Module.h"
	#include "llvm/IR/Operator.h"
	#include "llvm/IR/PatternMatch.h"
	#include "llvm/IR/Statepoint.h"
	#include "llvm/IR/Type.h"
	#include "llvm/IR/User.h"
	#include "llvm/IR/Value.h"
	#include "llvm/MC/MCContext.h"
	#include "llvm/MC/MCSymbol.h"
	#include "llvm/Support/AtomicOrdering.h"
	#include "llvm/Support/Casting.h"
	#include "llvm/Support/CommandLine.h"
	#include "llvm/Support/Compiler.h"
	#include "llvm/Support/Debug.h"
	#include "llvm/Support/MathExtras.h"
	#include "llvm/Support/raw_ostream.h"
	#include "llvm/Target/TargetIntrinsicInfo.h"
	#include "llvm/Target/TargetMachine.h"
	#include "llvm/Target/TargetOptions.h"
	#include "llvm/Transforms/Utils/Local.h"
	#include <cstddef>
	#include <cstring>
	#include <iterator>
	#include <limits>
	#include <numeric>
	#include <tuple>

	using namespace llvm;
	using namespace PatternMatch;
	using namespace SwitchCG;

	#define DEBUG_TYPE "isel"

	/// LimitFloatPrecision - Generate low-precision inline sequences for
	/// some float libcalls (6, 8 or 12 bits).
	static unsigned LimitFloatPrecision;

	static cl::opt<bool>
	InsertAssertAlign("insert-assert-align", cl::init(true),
	cl::desc("Insert the experimental `assertalign` node."),
	cl::ReallyHidden);

	static cl::opt<unsigned, true>
	LimitFPPrecision("limit-float-precision",
	cl::desc("Generate low-precision inline sequences "
	"for some float libcalls"),
	cl::location(LimitFloatPrecision), cl::Hidden,
	cl::init(0));

	static cl::opt<unsigned> SwitchPeelThreshold(
	"switch-peel-threshold", cl::Hidden, cl::init(66),
	cl::desc("Set the case probability threshold for peeling the case from a "
	"switch statement. A value greater than 100 will void this "
	"optimization"));

	// Limit the width of DAG chains. This is important in general to prevent
	// DAG-based analysis from blowing up. For example, alias analysis and
	// load clustering may not complete in reasonable time. It is difficult to
	// recognize and avoid this situation within each individual analysis, and
	// future analyses are likely to have the same behavior. Limiting DAG width is
	// the safe approach and will be especially important with global DAGs.
	//
	// MaxParallelChains default is arbitrarily high to avoid affecting
	// optimization, but could be lowered to improve compile time. Any ld-ld-st-st
	// sequence over this should have been converted to llvm.memcpy by the
	// frontend. It is easy to induce this behavior with .ll code such as:
	// %buffer = alloca [4096 x i8]
	// %data = load [4096 x i8]* %argPtr
	// store [4096 x i8] %data, [4096 x i8]* %buffer
	static const unsigned MaxParallelChains = 64;

	static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
	const SDValue *Parts, unsigned NumParts,
	MVT PartVT, EVT ValueVT, const Value *V,
	Optional<CallingConv::ID> CC);

	/// getCopyFromParts - Create a value that contains the specified legal parts
	/// combined into the value they represent. If the parts combine to a type
	/// larger than ValueVT then AssertOp can be used to specify whether the extra
	/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
	/// (ISD::AssertSext).
	static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL,
	const SDValue *Parts, unsigned NumParts,
	MVT PartVT, EVT ValueVT, const Value *V,
	Optional<CallingConv::ID> CC = None,
	Optional<ISD::NodeType> AssertOp = None) {
	// Let the target assemble the parts if it wants to
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	if (SDValue Val = TLI.joinRegisterPartsIntoValue(DAG, DL, Parts, NumParts,
	PartVT, ValueVT, CC))
	return Val;

	if (ValueVT.isVector())
	return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT, V,
	CC);

	assert(NumParts > 0 && "No parts to assemble!");
	SDValue Val = Parts[0];

	if (NumParts > 1) {
	// Assemble the value from multiple parts.
	if (ValueVT.isInteger()) {
	unsigned PartBits = PartVT.getSizeInBits();
	unsigned ValueBits = ValueVT.getSizeInBits();

	// Assemble the power of 2 part.
	unsigned RoundParts =
	(NumParts & (NumParts - 1)) ? 1 << Log2_32(NumParts) : NumParts;
	unsigned RoundBits = PartBits * RoundParts;
	EVT RoundVT = RoundBits == ValueBits ?
	ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits);
	SDValue Lo, Hi;

	EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2);

	if (RoundParts > 2) {
	Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2,
	PartVT, HalfVT, V);
	Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2,
	RoundParts / 2, PartVT, HalfVT, V);
	} else {
	Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]);
	Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]);
	}

	if (DAG.getDataLayout().isBigEndian())
	std::swap(Lo, Hi);

	Val = DAG.getNode(ISD::BUILD_PAIR, DL, RoundVT, Lo, Hi);

	if (RoundParts < NumParts) {
	// Assemble the trailing non-power-of-2 part.
	unsigned OddParts = NumParts - RoundParts;
	EVT OddVT = EVT::getIntegerVT(DAG.getContext(), OddParts PartBits);
	Hi = getCopyFromParts(DAG, DL, Parts + RoundParts, OddParts, PartVT,
	OddVT, V, CC);

	// Combine the round and odd parts.
	Lo = Val;
	if (DAG.getDataLayout().isBigEndian())
	std::swap(Lo, Hi);
	EVT TotalVT = EVT::getIntegerVT(DAG.getContext(), NumParts PartBits);
	Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi);
	Hi =
	DAG.getNode(ISD::SHL, DL, TotalVT, Hi,
	DAG.getConstant(Lo.getValueSizeInBits(), DL,
	TLI.getPointerTy(DAG.getDataLayout())));
	Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo);
	Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi);
	}
	} else if (PartVT.isFloatingPoint()) {
	// FP split into multiple FP parts (for ppcf128)
	assert(ValueVT == EVT(MVT::ppcf128) && PartVT == MVT::f64 &&
	"Unexpected split");
	SDValue Lo, Hi;
	Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]);
	Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]);
	if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout()))
	std::swap(Lo, Hi);
	Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi);
	} else {
	// FP split into integer parts (soft fp)
	assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
	!PartVT.isVector() && "Unexpected split");
	EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
	Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V, CC);
	}
	}

	// There is now one part, held in Val. Correct it to match ValueVT.
	// PartEVT is the type of the register class that holds the value.
	// ValueVT is the type of the inline asm operation.
	EVT PartEVT = Val.getValueType();

	if (PartEVT == ValueVT)
	return Val;

	if (PartEVT.isInteger() && ValueVT.isFloatingPoint() &&
	ValueVT.bitsLT(PartEVT)) {
	// For an FP value in an integer part, we need to truncate to the right
	// width first.
	PartEVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
	Val = DAG.getNode(ISD::TRUNCATE, DL, PartEVT, Val);
	}

	// Handle types that have the same size.
	if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits())
	return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);

	// Handle types with different sizes.
	if (PartEVT.isInteger() && ValueVT.isInteger()) {
	if (ValueVT.bitsLT(PartEVT)) {
	// For a truncate, see if we have any information to
	// indicate whether the truncated bits will always be
	// zero or sign-extension.
	if (AssertOp.hasValue())
	Val = DAG.getNode(*AssertOp, DL, PartEVT, Val,
	DAG.getValueType(ValueVT));
	return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
	}
	return DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val);
	}

	if (PartEVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
	// FP_ROUND's are always exact here.
	if (ValueVT.bitsLT(Val.getValueType()))
	return DAG.getNode(
	ISD::FP_ROUND, DL, ValueVT, Val,
	DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout())));

	return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val);
	}

	// Handle MMX to a narrower integer type by bitcasting MMX to integer and
	// then truncating.
	if (PartEVT == MVT::x86mmx && ValueVT.isInteger() &&
	ValueVT.bitsLT(PartEVT)) {
	Val = DAG.getNode(ISD::BITCAST, DL, MVT::i64, Val);
	return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
	}

	report_fatal_error("Unknown mismatch in getCopyFromParts!");
	}

	static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V,
	const Twine &ErrMsg) {
	const Instruction *I = dyn_cast_or_null<Instruction>(V);
	if (!V)
	return Ctx.emitError(ErrMsg);

	const char *AsmError = ", possible invalid constraint for vector type";
	if (const CallInst *CI = dyn_cast<CallInst>(I))
	if (CI->isInlineAsm())
	return Ctx.emitError(I, ErrMsg + AsmError);

	return Ctx.emitError(I, ErrMsg);
	}

	/// getCopyFromPartsVector - Create a value that contains the specified legal
	/// parts combined into the value they represent. If the parts combine to a
	/// type larger than ValueVT then AssertOp can be used to specify whether the
	/// extra bits are known to be zero (ISD::AssertZext) or sign extended from
	/// ValueVT (ISD::AssertSext).
	static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
	const SDValue *Parts, unsigned NumParts,
	MVT PartVT, EVT ValueVT, const Value *V,
	Optional<CallingConv::ID> CallConv) {
	assert(ValueVT.isVector() && "Not a vector value");
	assert(NumParts > 0 && "No parts to assemble!");
	const bool IsABIRegCopy = CallConv.hasValue();

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	SDValue Val = Parts[0];

	// Handle a multi-element vector.
	if (NumParts > 1) {
	EVT IntermediateVT;
	MVT RegisterVT;
	unsigned NumIntermediates;
	unsigned NumRegs;

	if (IsABIRegCopy) {
	NumRegs = TLI.getVectorTypeBreakdownForCallingConv(
	*DAG.getContext(), CallConv.getValue(), ValueVT, IntermediateVT,
	NumIntermediates, RegisterVT);
	} else {
	NumRegs =
	TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
	NumIntermediates, RegisterVT);
	}

	assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
	NumParts = NumRegs; // Silence a compiler warning.
	assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
	assert(RegisterVT.getSizeInBits() ==
	Parts[0].getSimpleValueType().getSizeInBits() &&
	"Part type sizes don't match!");

	// Assemble the parts into intermediate operands.
	SmallVector<SDValue, 8> Ops(NumIntermediates);
	if (NumIntermediates == NumParts) {
	// If the register was not expanded, truncate or copy the value,
	// as appropriate.
	for (unsigned i = 0; i != NumParts; ++i)
	Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1,
	PartVT, IntermediateVT, V, CallConv);
	} else if (NumParts > 0) {
	// If the intermediate type was expanded, build the intermediate
	// operands from the parts.
	assert(NumParts % NumIntermediates == 0 &&
	"Must expand into a divisible number of parts!");
	unsigned Factor = NumParts / NumIntermediates;
	for (unsigned i = 0; i != NumIntermediates; ++i)
	Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor,
	PartVT, IntermediateVT, V, CallConv);
	}

	// Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
	// intermediate operands.
	EVT BuiltVectorTy =
	IntermediateVT.isVector()
	? EVT::getVectorVT(
	*DAG.getContext(), IntermediateVT.getScalarType(),
	IntermediateVT.getVectorElementCount() * NumParts)
	: EVT::getVectorVT(*DAG.getContext(),
	IntermediateVT.getScalarType(),
	NumIntermediates);
	Val = DAG.getNode(IntermediateVT.isVector() ? ISD::CONCAT_VECTORS
	: ISD::BUILD_VECTOR,
	DL, BuiltVectorTy, Ops);
	}

	// There is now one part, held in Val. Correct it to match ValueVT.
	EVT PartEVT = Val.getValueType();

	if (PartEVT == ValueVT)
	return Val;

	if (PartEVT.isVector()) {
	// If the element type of the source/dest vectors are the same, but the
	// parts vector has more elements than the value vector, then we have a
	// vector widening case (e.g. <2 x float> -> <4 x float>). Extract the
	// elements we want.
	if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) {
	assert((PartEVT.getVectorElementCount().getKnownMinValue() >
	ValueVT.getVectorElementCount().getKnownMinValue()) &&
	(PartEVT.getVectorElementCount().isScalable() ==
	ValueVT.getVectorElementCount().isScalable()) &&
	"Cannot narrow, it would be a lossy transformation");
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
	DAG.getVectorIdxConstant(0, DL));
	}

	// Vector/Vector bitcast.
	if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits())
	return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);

	assert(PartEVT.getVectorElementCount() == ValueVT.getVectorElementCount() &&
	"Cannot handle this kind of promotion");
	// Promoted vector extract
	return DAG.getAnyExtOrTrunc(Val, DL, ValueVT);

	}

	// Trivial bitcast if the types are the same size and the destination
	// vector type is legal.
	if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits() &&
	TLI.isTypeLegal(ValueVT))
	return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);

	if (ValueVT.getVectorNumElements() != 1) {
	// Certain ABIs require that vectors are passed as integers. For vectors
	// are the same size, this is an obvious bitcast.
	if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) {
	return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
	} else if (ValueVT.bitsLT(PartEVT)) {
	const uint64_t ValueSize = ValueVT.getFixedSizeInBits();
	EVT IntermediateType = EVT::getIntegerVT(*DAG.getContext(), ValueSize);
	// Drop the extra bits.
	Val = DAG.getNode(ISD::TRUNCATE, DL, IntermediateType, Val);
	return DAG.getBitcast(ValueVT, Val);
	}

	diagnosePossiblyInvalidConstraint(
	*DAG.getContext(), V, "non-trivial scalar-to-vector conversion");
	return DAG.getUNDEF(ValueVT);
	}

	// Handle cases such as i8 -> <1 x i1>
	EVT ValueSVT = ValueVT.getVectorElementType();
	if (ValueVT.getVectorNumElements() == 1 && ValueSVT != PartEVT) {
	if (ValueSVT.getSizeInBits() == PartEVT.getSizeInBits())
	Val = DAG.getNode(ISD::BITCAST, DL, ValueSVT, Val);
	else
	Val = ValueVT.isFloatingPoint()
	? DAG.getFPExtendOrRound(Val, DL, ValueSVT)
	: DAG.getAnyExtOrTrunc(Val, DL, ValueSVT);
	}

	return DAG.getBuildVector(ValueVT, DL, Val);
	}

	static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl,
	SDValue Val, SDValue *Parts, unsigned NumParts,
	MVT PartVT, const Value *V,
	Optional<CallingConv::ID> CallConv);

	/// getCopyToParts - Create a series of nodes that contain the specified value
	/// split into legal parts. If the parts contain more bits than Val, then, for
	/// integers, ExtendKind can be used to specify how to generate the extra bits.
	static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
	SDValue *Parts, unsigned NumParts, MVT PartVT,
	const Value *V,
	Optional<CallingConv::ID> CallConv = None,
	ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
	// Let the target split the parts if it wants to
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	if (TLI.splitValueIntoRegisterParts(DAG, DL, Val, Parts, NumParts, PartVT,
	CallConv))
	return;
	EVT ValueVT = Val.getValueType();

	// Handle the vector case separately.
	if (ValueVT.isVector())
	return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V,
	CallConv);

	unsigned PartBits = PartVT.getSizeInBits();
	unsigned OrigNumParts = NumParts;
	assert(DAG.getTargetLoweringInfo().isTypeLegal(PartVT) &&
	"Copying to an illegal type!");

	if (NumParts == 0)
	return;

	assert(!ValueVT.isVector() && "Vector case handled elsewhere");
	EVT PartEVT = PartVT;
	if (PartEVT == ValueVT) {
	assert(NumParts == 1 && "No-op copy with multiple parts!");
	Parts[0] = Val;
	return;
	}

	if (NumParts * PartBits > ValueVT.getSizeInBits()) {
	// If the parts cover more bits than the value has, promote the value.
	if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
	assert(NumParts == 1 && "Do not know what to promote to!");
	Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val);
	} else {
	if (ValueVT.isFloatingPoint()) {
	// FP values need to be bitcast, then extended if they are being put
	// into a larger container.
	ValueVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
	Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
	}
	assert((PartVT.isInteger() \|\| PartVT == MVT::x86mmx) &&
	ValueVT.isInteger() &&
	"Unknown mismatch!");
	ValueVT = EVT::getIntegerVT(DAG.getContext(), NumParts PartBits);
	Val = DAG.getNode(ExtendKind, DL, ValueVT, Val);
	if (PartVT == MVT::x86mmx)
	Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
	}
	} else if (PartBits == ValueVT.getSizeInBits()) {
	// Different types of the same size.
	assert(NumParts == 1 && PartEVT != ValueVT);
	Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
	} else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
	// If the parts cover less bits than value has, truncate the value.
	assert((PartVT.isInteger() \|\| PartVT == MVT::x86mmx) &&
	ValueVT.isInteger() &&
	"Unknown mismatch!");
	ValueVT = EVT::getIntegerVT(DAG.getContext(), NumParts PartBits);
	Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
	if (PartVT == MVT::x86mmx)
	Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
	}

	// The value may have changed - recompute ValueVT.
	ValueVT = Val.getValueType();
	assert(NumParts * PartBits == ValueVT.getSizeInBits() &&
	"Failed to tile the value with PartVT!");

	if (NumParts == 1) {
	if (PartEVT != ValueVT) {
	diagnosePossiblyInvalidConstraint(*DAG.getContext(), V,
	"scalar-to-vector conversion failed");
	Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
	}

	Parts[0] = Val;
	return;
	}

	// Expand the value into multiple parts.
	if (NumParts & (NumParts - 1)) {
	// The number of parts is not a power of 2. Split off and copy the tail.
	assert(PartVT.isInteger() && ValueVT.isInteger() &&
	"Do not know what to expand to!");
	unsigned RoundParts = 1 << Log2_32(NumParts);
	unsigned RoundBits = RoundParts * PartBits;
	unsigned OddParts = NumParts - RoundParts;
	SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val,
	DAG.getShiftAmountConstant(RoundBits, ValueVT, DL, /LegalTypes/false));

	getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V,
	CallConv);

	if (DAG.getDataLayout().isBigEndian())
	// The odd parts were reversed by getCopyToParts - unreverse them.
	std::reverse(Parts + RoundParts, Parts + NumParts);

	NumParts = RoundParts;
	ValueVT = EVT::getIntegerVT(DAG.getContext(), NumParts PartBits);
	Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
	}

	// The number of parts is a power of 2. Repeatedly bisect the value using
	// EXTRACT_ELEMENT.
	Parts[0] = DAG.getNode(ISD::BITCAST, DL,
	EVT::getIntegerVT(*DAG.getContext(),
	ValueVT.getSizeInBits()),
	Val);

	for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) {
	for (unsigned i = 0; i < NumParts; i += StepSize) {
	unsigned ThisBits = StepSize * PartBits / 2;
	EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits);
	SDValue &Part0 = Parts[i];
	SDValue &Part1 = Parts[i+StepSize/2];

	Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
	ThisVT, Part0, DAG.getIntPtrConstant(1, DL));
	Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
	ThisVT, Part0, DAG.getIntPtrConstant(0, DL));

	if (ThisBits == PartBits && ThisVT != PartVT) {
	Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0);
	Part1 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part1);
	}
	}
	}

	if (DAG.getDataLayout().isBigEndian())
	std::reverse(Parts, Parts + OrigNumParts);
	}

	static SDValue widenVectorToPartType(SelectionDAG &DAG, SDValue Val,
	const SDLoc &DL, EVT PartVT) {
	if (!PartVT.isVector())
	return SDValue();

	EVT ValueVT = Val.getValueType();
	ElementCount PartNumElts = PartVT.getVectorElementCount();
	ElementCount ValueNumElts = ValueVT.getVectorElementCount();

	// We only support widening vectors with equivalent element types and
	// fixed/scalable properties. If a target needs to widen a fixed-length type
	// to a scalable one, it should be possible to use INSERT_SUBVECTOR below.
	if (ElementCount::isKnownLE(PartNumElts, ValueNumElts) \|\|
	PartNumElts.isScalable() != ValueNumElts.isScalable() \|\|
	PartVT.getVectorElementType() != ValueVT.getVectorElementType())
	return SDValue();

	// Widening a scalable vector to another scalable vector is done by inserting
	// the vector into a larger undef one.
	if (PartNumElts.isScalable())
	return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
	Val, DAG.getVectorIdxConstant(0, DL));

	EVT ElementVT = PartVT.getVectorElementType();
	// Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in
	// undef elements.
	SmallVector<SDValue, 16> Ops;
	DAG.ExtractVectorElements(Val, Ops);
	SDValue EltUndef = DAG.getUNDEF(ElementVT);
	Ops.append((PartNumElts - ValueNumElts).getFixedValue(), EltUndef);

	// FIXME: Use CONCAT for 2x -> 4x.
	return DAG.getBuildVector(PartVT, DL, Ops);
	}

	/// getCopyToPartsVector - Create a series of nodes that contain the specified
	/// value split into legal parts.
	static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
	SDValue Val, SDValue *Parts, unsigned NumParts,
	MVT PartVT, const Value *V,
	Optional<CallingConv::ID> CallConv) {
	EVT ValueVT = Val.getValueType();
	assert(ValueVT.isVector() && "Not a vector");
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	const bool IsABIRegCopy = CallConv.hasValue();

	if (NumParts == 1) {
	EVT PartEVT = PartVT;
	if (PartEVT == ValueVT) {
	// Nothing to do.
	} else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
	// Bitconvert vector->vector case.
	Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
	} else if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, PartVT)) {
	Val = Widened;
	} else if (PartVT.isVector() &&
	PartEVT.getVectorElementType().bitsGE(
	ValueVT.getVectorElementType()) &&
	PartEVT.getVectorElementCount() ==
	ValueVT.getVectorElementCount()) {

	// Promoted vector extract
	Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
	} else {
	if (ValueVT.getVectorElementCount().isScalar()) {
	Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
	DAG.getVectorIdxConstant(0, DL));
	} else {
	uint64_t ValueSize = ValueVT.getFixedSizeInBits();
	assert(PartVT.getFixedSizeInBits() > ValueSize &&
	"lossy conversion of vector to scalar type");
	EVT IntermediateType = EVT::getIntegerVT(*DAG.getContext(), ValueSize);
	Val = DAG.getBitcast(IntermediateType, Val);
	Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
	}
	}

	assert(Val.getValueType() == PartVT && "Unexpected vector part value type");
	Parts[0] = Val;
	return;
	}

	// Handle a multi-element vector.
	EVT IntermediateVT;
	MVT RegisterVT;
	unsigned NumIntermediates;
	unsigned NumRegs;
	if (IsABIRegCopy) {
	NumRegs = TLI.getVectorTypeBreakdownForCallingConv(
	*DAG.getContext(), CallConv.getValue(), ValueVT, IntermediateVT,
	NumIntermediates, RegisterVT);
	} else {
	NumRegs =
	TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
	NumIntermediates, RegisterVT);
	}

	assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
	NumParts = NumRegs; // Silence a compiler warning.
	assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");

	assert(IntermediateVT.isScalableVector() == ValueVT.isScalableVector() &&
	"Mixing scalable and fixed vectors when copying in parts");

	Optional<ElementCount> DestEltCnt;

	if (IntermediateVT.isVector())
	DestEltCnt = IntermediateVT.getVectorElementCount() * NumIntermediates;
	else
	DestEltCnt = ElementCount::getFixed(NumIntermediates);

	EVT BuiltVectorTy = EVT::getVectorVT(
	*DAG.getContext(), IntermediateVT.getScalarType(), DestEltCnt.getValue());

	if (ValueVT == BuiltVectorTy) {
	// Nothing to do.
	} else if (ValueVT.getSizeInBits() == BuiltVectorTy.getSizeInBits()) {
	// Bitconvert vector->vector case.
	Val = DAG.getNode(ISD::BITCAST, DL, BuiltVectorTy, Val);
	} else if (SDValue Widened =
	widenVectorToPartType(DAG, Val, DL, BuiltVectorTy)) {
	Val = Widened;
	} else if (BuiltVectorTy.getVectorElementType().bitsGE(
	ValueVT.getVectorElementType()) &&
	BuiltVectorTy.getVectorElementCount() ==
	ValueVT.getVectorElementCount()) {
	// Promoted vector extract
	Val = DAG.getAnyExtOrTrunc(Val, DL, BuiltVectorTy);
	}

	assert(Val.getValueType() == BuiltVectorTy && "Unexpected vector value type");

	// Split the vector into intermediate operands.
	SmallVector<SDValue, 8> Ops(NumIntermediates);
	for (unsigned i = 0; i != NumIntermediates; ++i) {
	if (IntermediateVT.isVector()) {
	// This does something sensible for scalable vectors - see the
	// definition of EXTRACT_SUBVECTOR for further details.
	unsigned IntermediateNumElts = IntermediateVT.getVectorMinNumElements();
	Ops[i] =
	DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val,
	DAG.getVectorIdxConstant(i * IntermediateNumElts, DL));
	} else {
	Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntermediateVT, Val,
	DAG.getVectorIdxConstant(i, DL));
	}
	}

	// Split the intermediate operands into legal parts.
	if (NumParts == NumIntermediates) {
	// If the register was not expanded, promote or copy the value,
	// as appropriate.
	for (unsigned i = 0; i != NumParts; ++i)
	getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT, V, CallConv);
	} else if (NumParts > 0) {
	// If the intermediate type was expanded, split each the value into
	// legal parts.
	assert(NumIntermediates != 0 && "division by zero");
	assert(NumParts % NumIntermediates == 0 &&
	"Must expand into a divisible number of parts!");
	unsigned Factor = NumParts / NumIntermediates;
	for (unsigned i = 0; i != NumIntermediates; ++i)
	getCopyToParts(DAG, DL, Ops[i], &Parts[i * Factor], Factor, PartVT, V,
	CallConv);
	}
	}

	RegsForValue::RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt,
	EVT valuevt, Optional<CallingConv::ID> CC)
	: ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs),
	RegCount(1, regs.size()), CallConv(CC) {}

	RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
	const DataLayout &DL, unsigned Reg, Type *Ty,
	Optional<CallingConv::ID> CC) {
	ComputeValueVTs(TLI, DL, Ty, ValueVTs);

	CallConv = CC;

	for (EVT ValueVT : ValueVTs) {
	unsigned NumRegs =
	isABIMangled()
	? TLI.getNumRegistersForCallingConv(Context, CC.getValue(), ValueVT)
	: TLI.getNumRegisters(Context, ValueVT);
	MVT RegisterVT =
	isABIMangled()
	? TLI.getRegisterTypeForCallingConv(Context, CC.getValue(), ValueVT)
	: TLI.getRegisterType(Context, ValueVT);
	for (unsigned i = 0; i != NumRegs; ++i)
	Regs.push_back(Reg + i);
	RegVTs.push_back(RegisterVT);
	RegCount.push_back(NumRegs);
	Reg += NumRegs;
	}
	}

	SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
	FunctionLoweringInfo &FuncInfo,
	const SDLoc &dl, SDValue &Chain,
	SDValue Flag, const Value V) const {
	// A Value with type {} or [0 x %t] needs no registers.
	if (ValueVTs.empty())
	return SDValue();

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();

	// Assemble the legal parts into the final values.
	SmallVector<SDValue, 4> Values(ValueVTs.size());
	SmallVector<SDValue, 8> Parts;
	for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
	// Copy the legal parts from the registers.
	EVT ValueVT = ValueVTs[Value];
	unsigned NumRegs = RegCount[Value];
	MVT RegisterVT = isABIMangled() ? TLI.getRegisterTypeForCallingConv(
	*DAG.getContext(),
	CallConv.getValue(), RegVTs[Value])
	: RegVTs[Value];

	Parts.resize(NumRegs);
	for (unsigned i = 0; i != NumRegs; ++i) {
	SDValue P;
	if (!Flag) {
	P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT);
	} else {
	P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag);
	*Flag = P.getValue(2);
	}

	Chain = P.getValue(1);
	Parts[i] = P;

	// If the source register was virtual and if we know something about it,
	// add an assert node.
	if (!Register::isVirtualRegister(Regs[Part + i]) \|\|
	!RegisterVT.isInteger())
	continue;

	const FunctionLoweringInfo::LiveOutInfo *LOI =
	FuncInfo.GetLiveOutRegInfo(Regs[Part+i]);
	if (!LOI)
	continue;

	unsigned RegSize = RegisterVT.getScalarSizeInBits();
	unsigned NumSignBits = LOI->NumSignBits;
	unsigned NumZeroBits = LOI->Known.countMinLeadingZeros();

	if (NumZeroBits == RegSize) {
	// The current value is a zero.
	// Explicitly express that as it would be easier for
	// optimizations to kick in.
	Parts[i] = DAG.getConstant(0, dl, RegisterVT);
	continue;
	}

	// FIXME: We capture more information than the dag can represent. For
	// now, just use the tightest assertzext/assertsext possible.
	bool isSExt;
	EVT FromVT(MVT::Other);
	if (NumZeroBits) {
	FromVT = EVT::getIntegerVT(*DAG.getContext(), RegSize - NumZeroBits);
	isSExt = false;
	} else if (NumSignBits > 1) {
	FromVT =
	EVT::getIntegerVT(*DAG.getContext(), RegSize - NumSignBits + 1);
	isSExt = true;
	} else {
	continue;
	}
	// Add an assertion node.
	assert(FromVT != MVT::Other);
	Parts[i] = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
	RegisterVT, P, DAG.getValueType(FromVT));
	}

	Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(), NumRegs,
	RegisterVT, ValueVT, V, CallConv);
	Part += NumRegs;
	Parts.clear();
	}

	return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(ValueVTs), Values);
	}

	void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG,
	const SDLoc &dl, SDValue &Chain, SDValue *Flag,
	const Value *V,
	ISD::NodeType PreferredExtendType) const {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	ISD::NodeType ExtendKind = PreferredExtendType;

	// Get the list of the values's legal parts.
	unsigned NumRegs = Regs.size();
	SmallVector<SDValue, 8> Parts(NumRegs);
	for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
	unsigned NumParts = RegCount[Value];

	MVT RegisterVT = isABIMangled() ? TLI.getRegisterTypeForCallingConv(
	*DAG.getContext(),
	CallConv.getValue(), RegVTs[Value])
	: RegVTs[Value];

	if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT))
	ExtendKind = ISD::ZERO_EXTEND;

	getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value), &Parts[Part],
	NumParts, RegisterVT, V, CallConv, ExtendKind);
	Part += NumParts;
	}

	// Copy the parts into the registers.
	SmallVector<SDValue, 8> Chains(NumRegs);
	for (unsigned i = 0; i != NumRegs; ++i) {
	SDValue Part;
	if (!Flag) {
	Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]);
	} else {
	Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag);
	*Flag = Part.getValue(1);
	}

	Chains[i] = Part.getValue(0);
	}

	if (NumRegs == 1 \|\| Flag)
	// If NumRegs > 1 && Flag is used then the use of the last CopyToReg is
	// flagged to it. That is the CopyToReg nodes and the user are considered
	// a single scheduling unit. If we create a TokenFactor and return it as
	// chain, then the TokenFactor is both a predecessor (operand) of the
	// user as well as a successor (the TF operands are flagged to the user).
	// c1, f1 = CopyToReg
	// c2, f2 = CopyToReg
	// c3 = TokenFactor c1, c2
	// ...
	// = op c3, ..., f2
	Chain = Chains[NumRegs-1];
	else
	Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
	}

	void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
	unsigned MatchingIdx, const SDLoc &dl,
	SelectionDAG &DAG,
	std::vector<SDValue> &Ops) const {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();

	unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size());
	if (HasMatching)
	Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx);
	else if (!Regs.empty() && Register::isVirtualRegister(Regs.front())) {
	// Put the register class of the virtual registers in the flag word. That
	// way, later passes can recompute register class constraints for inline
	// assembly as well as normal instructions.
	// Don't do this for tied operands that can use the regclass information
	// from the def.
	const MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
	const TargetRegisterClass *RC = MRI.getRegClass(Regs.front());
	Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID());
	}

	SDValue Res = DAG.getTargetConstant(Flag, dl, MVT::i32);
	Ops.push_back(Res);

	if (Code == InlineAsm::Kind_Clobber) {
	// Clobbers should always have a 1:1 mapping with registers, and may
	// reference registers that have illegal (e.g. vector) types. Hence, we
	// shouldn't try to apply any sort of splitting logic to them.
	assert(Regs.size() == RegVTs.size() && Regs.size() == ValueVTs.size() &&
	"No 1:1 mapping from clobbers to regs?");
	Register SP = TLI.getStackPointerRegisterToSaveRestore();
	(void)SP;
	for (unsigned I = 0, E = ValueVTs.size(); I != E; ++I) {
	Ops.push_back(DAG.getRegister(Regs[I], RegVTs[I]));
	assert(
	(Regs[I] != SP \|\|
	DAG.getMachineFunction().getFrameInfo().hasOpaqueSPAdjustment()) &&
	"If we clobbered the stack pointer, MFI should know about it.");
	}
	return;
	}

	for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {
	MVT RegisterVT = RegVTs[Value];
	unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value],
	RegisterVT);
	for (unsigned i = 0; i != NumRegs; ++i) {
	assert(Reg < Regs.size() && "Mismatch in # registers expected");
	unsigned TheReg = Regs[Reg++];
	Ops.push_back(DAG.getRegister(TheReg, RegisterVT));
	}
	}
	}

	SmallVector<std::pair<unsigned, TypeSize>, 4>
	RegsForValue::getRegsAndSizes() const {
	SmallVector<std::pair<unsigned, TypeSize>, 4> OutVec;
	unsigned I = 0;
	for (auto CountAndVT : zip_first(RegCount, RegVTs)) {
	unsigned RegCount = std::get<0>(CountAndVT);
	MVT RegisterVT = std::get<1>(CountAndVT);
	TypeSize RegisterSize = RegisterVT.getSizeInBits();
	for (unsigned E = I + RegCount; I != E; ++I)
	OutVec.push_back(std::make_pair(Regs[I], RegisterSize));
	}
	return OutVec;
	}

	void SelectionDAGBuilder::init(GCFunctionInfo gfi, AliasAnalysis aa,
	const TargetLibraryInfo *li) {
	AA = aa;
	GFI = gfi;
	LibInfo = li;
	DL = &DAG.getDataLayout();
	Context = DAG.getContext();
	LPadToCallSiteMap.clear();
	SL->init(DAG.getTargetLoweringInfo(), TM, DAG.getDataLayout());
	}

	void SelectionDAGBuilder::clear() {
	NodeMap.clear();
	UnusedArgNodeMap.clear();
	PendingLoads.clear();
	PendingExports.clear();
	PendingConstrainedFP.clear();
	PendingConstrainedFPStrict.clear();
	CurInst = nullptr;
	HasTailCall = false;
	SDNodeOrder = LowestSDNodeOrder;
	StatepointLowering.clear();
	}

	void SelectionDAGBuilder::clearDanglingDebugInfo() {
	DanglingDebugInfoMap.clear();
	}

	// Update DAG root to include dependencies on Pending chains.
	SDValue SelectionDAGBuilder::updateRoot(SmallVectorImpl<SDValue> &Pending) {
	SDValue Root = DAG.getRoot();

	if (Pending.empty())
	return Root;

	// Add current root to PendingChains, unless we already indirectly
	// depend on it.
	if (Root.getOpcode() != ISD::EntryToken) {
	unsigned i = 0, e = Pending.size();
	for (; i != e; ++i) {
	assert(Pending[i].getNode()->getNumOperands() > 1);
	if (Pending[i].getNode()->getOperand(0) == Root)
	break; // Don't add the root if we already indirectly depend on it.
	}

	if (i == e)
	Pending.push_back(Root);
	}

	if (Pending.size() == 1)
	Root = Pending[0];
	else
	Root = DAG.getTokenFactor(getCurSDLoc(), Pending);

	DAG.setRoot(Root);
	Pending.clear();
	return Root;
	}

	SDValue SelectionDAGBuilder::getMemoryRoot() {
	return updateRoot(PendingLoads);
	}

	SDValue SelectionDAGBuilder::getRoot() {
	// Chain up all pending constrained intrinsics together with all
	// pending loads, by simply appending them to PendingLoads and
	// then calling getMemoryRoot().
	PendingLoads.reserve(PendingLoads.size() +
	PendingConstrainedFP.size() +
	PendingConstrainedFPStrict.size());
	PendingLoads.append(PendingConstrainedFP.begin(),
	PendingConstrainedFP.end());
	PendingLoads.append(PendingConstrainedFPStrict.begin(),
	PendingConstrainedFPStrict.end());
	PendingConstrainedFP.clear();
	PendingConstrainedFPStrict.clear();
	return getMemoryRoot();
	}

	SDValue SelectionDAGBuilder::getControlRoot() {
	// We need to emit pending fpexcept.strict constrained intrinsics,
	// so append them to the PendingExports list.
	PendingExports.append(PendingConstrainedFPStrict.begin(),
	PendingConstrainedFPStrict.end());
	PendingConstrainedFPStrict.clear();
	return updateRoot(PendingExports);
	}

	void SelectionDAGBuilder::visit(const Instruction &I) {
	// Set up outgoing PHI node register values before emitting the terminator.
	if (I.isTerminator()) {
	HandlePHINodesInSuccessorBlocks(I.getParent());
	}

	// Increase the SDNodeOrder if dealing with a non-debug instruction.
	if (!isa<DbgInfoIntrinsic>(I))
	++SDNodeOrder;

	CurInst = &I;

	visit(I.getOpcode(), I);

	if (!I.isTerminator() && !HasTailCall &&
	!isa<GCStatepointInst>(I)) // statepoints handle their exports internally
	CopyToExportRegsIfNeeded(&I);

	CurInst = nullptr;
	}

	void SelectionDAGBuilder::visitPHI(const PHINode &) {
	llvm_unreachable("SelectionDAGBuilder shouldn't visit PHI nodes!");
	}

	void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) {
	// Note: this doesn't use InstVisitor, because it has to work with
	// ConstantExpr's in addition to instructions.
	switch (Opcode) {
	default: llvm_unreachable("Unknown instruction type encountered!");
	// Build the switch statement using the Instruction.def file.
	#define HANDLE_INST(NUM, OPCODE, CLASS) \
	case Instruction::OPCODE: visit##OPCODE((const CLASS&)I); break;
	#include "llvm/IR/Instruction.def"
	}
	}

	void SelectionDAGBuilder::addDanglingDebugInfo(const DbgValueInst *DI,
	DebugLoc DL, unsigned Order) {
	// We treat variadic dbg_values differently at this stage.
	if (DI->hasArgList()) {
	// For variadic dbg_values we will now insert an undef.
	// FIXME: We can potentially recover these!
	SmallVector<SDDbgOperand, 2> Locs;
	for (const Value *V : DI->getValues()) {
	auto Undef = UndefValue::get(V->getType());
	Locs.push_back(SDDbgOperand::fromConst(Undef));
	}
	SDDbgValue *SDV = DAG.getDbgValueList(
	DI->getVariable(), DI->getExpression(), Locs, {},
	/IsIndirect=/false, DL, Order, /IsVariadic=/true);
	DAG.AddDbgValue(SDV, /isParameter=/false);
	} else {
	// TODO: Dangling debug info will eventually either be resolved or produce
	// an Undef DBG_VALUE. However in the resolution case, a gap may appear
	// between the original dbg.value location and its resolved DBG_VALUE,
	// which we should ideally fill with an extra Undef DBG_VALUE.
	assert(DI->getNumVariableLocationOps() == 1 &&
	"DbgValueInst without an ArgList should have a single location "
	"operand.");
	DanglingDebugInfoMap[DI->getValue(0)].emplace_back(DI, DL, Order);
	}
	}

	void SelectionDAGBuilder::dropDanglingDebugInfo(const DILocalVariable *Variable,
	const DIExpression *Expr) {
	auto isMatchingDbgValue = [&](DanglingDebugInfo &DDI) {
	const DbgValueInst *DI = DDI.getDI();
	DIVariable *DanglingVariable = DI->getVariable();
	DIExpression *DanglingExpr = DI->getExpression();
	if (DanglingVariable == Variable && Expr->fragmentsOverlap(DanglingExpr)) {
	LLVM_DEBUG(dbgs() << "Dropping dangling debug info for " << *DI << "\n");
	return true;
	}
	return false;
	};

	for (auto &DDIMI : DanglingDebugInfoMap) {
	DanglingDebugInfoVector &DDIV = DDIMI.second;

	// If debug info is to be dropped, run it through final checks to see
	// whether it can be salvaged.
	for (auto &DDI : DDIV)
	if (isMatchingDbgValue(DDI))
	salvageUnresolvedDbgValue(DDI);

	erase_if(DDIV, isMatchingDbgValue);
	}
	}

	// resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
	// generate the debug data structures now that we've seen its definition.
	void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
	SDValue Val) {
	auto DanglingDbgInfoIt = DanglingDebugInfoMap.find(V);
	if (DanglingDbgInfoIt == DanglingDebugInfoMap.end())
	return;

	DanglingDebugInfoVector &DDIV = DanglingDbgInfoIt->second;
	for (auto &DDI : DDIV) {
	const DbgValueInst *DI = DDI.getDI();
	assert(!DI->hasArgList() && "Not implemented for variadic dbg_values");
	assert(DI && "Ill-formed DanglingDebugInfo");
	DebugLoc dl = DDI.getdl();
	unsigned ValSDNodeOrder = Val.getNode()->getIROrder();
	unsigned DbgSDNodeOrder = DDI.getSDNodeOrder();
	DILocalVariable *Variable = DI->getVariable();
	DIExpression *Expr = DI->getExpression();
	assert(Variable->isValidLocationForIntrinsic(dl) &&
	"Expected inlined-at fields to agree");
	SDDbgValue *SDV;
	if (Val.getNode()) {
	// FIXME: I doubt that it is correct to resolve a dangling DbgValue as a
	// FuncArgumentDbgValue (it would be hoisted to the function entry, and if
	// we couldn't resolve it directly when examining the DbgValue intrinsic
	// in the first place we should not be more successful here). Unless we
	// have some test case that prove this to be correct we should avoid
	// calling EmitFuncArgumentDbgValue here.
	if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, false, Val)) {
	LLVM_DEBUG(dbgs() << "Resolve dangling debug info [order="
	<< DbgSDNodeOrder << "] for:\n " << *DI << "\n");
	LLVM_DEBUG(dbgs() << " By mapping to:\n "; Val.dump());
	// Increase the SDNodeOrder for the DbgValue here to make sure it is
	// inserted after the definition of Val when emitting the instructions
	// after ISel. An alternative could be to teach
	// ScheduleDAGSDNodes::EmitSchedule to delay the insertion properly.
	LLVM_DEBUG(if (ValSDNodeOrder > DbgSDNodeOrder) dbgs()
	<< "changing SDNodeOrder from " << DbgSDNodeOrder << " to "
	<< ValSDNodeOrder << "\n");
	SDV = getDbgValue(Val, Variable, Expr, dl,
	std::max(DbgSDNodeOrder, ValSDNodeOrder));
	DAG.AddDbgValue(SDV, false);
	} else
	LLVM_DEBUG(dbgs() << "Resolved dangling debug info for " << *DI
	<< "in EmitFuncArgumentDbgValue\n");
	} else {
	LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
	auto Undef = UndefValue::get(DDI.getDI()->getValue(0)->getType());
	auto SDV =
	DAG.getConstantDbgValue(Variable, Expr, Undef, dl, DbgSDNodeOrder);
	DAG.AddDbgValue(SDV, false);
	}
	}
	DDIV.clear();
	}

	void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) {
	// TODO: For the variadic implementation, instead of only checking the fail
	// state of `handleDebugValue`, we need know specifically which values were
	// invalid, so that we attempt to salvage only those values when processing
	// a DIArgList.
	assert(!DDI.getDI()->hasArgList() &&
	"Not implemented for variadic dbg_values");
	Value *V = DDI.getDI()->getValue(0);
	DILocalVariable *Var = DDI.getDI()->getVariable();
	DIExpression *Expr = DDI.getDI()->getExpression();
	DebugLoc DL = DDI.getdl();
	DebugLoc InstDL = DDI.getDI()->getDebugLoc();
	unsigned SDOrder = DDI.getSDNodeOrder();
	// Currently we consider only dbg.value intrinsics -- we tell the salvager
	// that DW_OP_stack_value is desired.
	assert(isa<DbgValueInst>(DDI.getDI()));
	bool StackValue = true;

	// Can this Value can be encoded without any further work?
	if (handleDebugValue(V, Var, Expr, DL, InstDL, SDOrder, /IsVariadic=/false))
	return;

	// Attempt to salvage back through as many instructions as possible. Bail if
	// a non-instruction is seen, such as a constant expression or global
	// variable. FIXME: Further work could recover those too.
	while (isa<Instruction>(V)) {
	Instruction &VAsInst = *cast<Instruction>(V);
	// Temporary "0", awaiting real implementation.
	SmallVector<Value *, 4> AdditionalValues;
	DIExpression *SalvagedExpr =
	salvageDebugInfoImpl(VAsInst, Expr, StackValue, 0, AdditionalValues);

	// If we cannot salvage any further, and haven't yet found a suitable debug
	// expression, bail out.
	// TODO: If AdditionalValues isn't empty, then the salvage can only be
	// represented with a DBG_VALUE_LIST, so we give up. When we have support
	// here for variadic dbg_values, remove that condition.
	if (!SalvagedExpr \|\| !AdditionalValues.empty())
	break;

	// New value and expr now represent this debuginfo.
	V = VAsInst.getOperand(0);
	Expr = SalvagedExpr;

	// Some kind of simplification occurred: check whether the operand of the
	// salvaged debug expression can be encoded in this DAG.
	if (handleDebugValue(V, Var, Expr, DL, InstDL, SDOrder,
	/IsVariadic=/false)) {
	LLVM_DEBUG(dbgs() << "Salvaged debug location info for:\n "
	<< DDI.getDI() << "\nBy stripping back to:\n " << V);
	return;
	}
	}

	// This was the final opportunity to salvage this debug information, and it
	// couldn't be done. Place an undef DBG_VALUE at this location to terminate
	// any earlier variable location.
	auto Undef = UndefValue::get(DDI.getDI()->getValue(0)->getType());
	auto SDV = DAG.getConstantDbgValue(Var, Expr, Undef, DL, SDNodeOrder);
	DAG.AddDbgValue(SDV, false);

	LLVM_DEBUG(dbgs() << "Dropping debug value info for:\n " << DDI.getDI()
	<< "\n");
	LLVM_DEBUG(dbgs() << " Last seen at:\n " << *DDI.getDI()->getOperand(0)
	<< "\n");
	}

	bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values,
	DILocalVariable *Var,
	DIExpression *Expr, DebugLoc dl,
	DebugLoc InstDL, unsigned Order,
	bool IsVariadic) {
	if (Values.empty())
	return true;
	SmallVector<SDDbgOperand> LocationOps;
	SmallVector<SDNode *> Dependencies;
	for (const Value *V : Values) {
	// Constant value.
	if (isa<ConstantInt>(V) \|\| isa<ConstantFP>(V) \|\| isa<UndefValue>(V) \|\|
	isa<ConstantPointerNull>(V)) {
	LocationOps.emplace_back(SDDbgOperand::fromConst(V));
	continue;
	}

	// If the Value is a frame index, we can create a FrameIndex debug value
	// without relying on the DAG at all.
	if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
	auto SI = FuncInfo.StaticAllocaMap.find(AI);
	if (SI != FuncInfo.StaticAllocaMap.end()) {
	LocationOps.emplace_back(SDDbgOperand::fromFrameIdx(SI->second));
	continue;
	}
	}

	// Do not use getValue() in here; we don't want to generate code at
	// this point if it hasn't been done yet.
	SDValue N = NodeMap[V];
	if (!N.getNode() && isa<Argument>(V)) // Check unused arguments map.
	N = UnusedArgNodeMap[V];
	if (N.getNode()) {
	// Only emit func arg dbg value for non-variadic dbg.values for now.
	if (!IsVariadic && EmitFuncArgumentDbgValue(V, Var, Expr, dl, false, N))
	return true;
	if (auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode())) {
	// Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can
	// describe stack slot locations.
	//
	// Consider "int x = 0; int *px = &x;". There are two kinds of
	// interesting debug values here after optimization:
	//
	// dbg.value(i32* %px, !"int *px", !DIExpression()), and
	// dbg.value(i32* %px, !"int x", !DIExpression(DW_OP_deref))
	//
	// Both describe the direct values of their associated variables.
	Dependencies.push_back(N.getNode());
	LocationOps.emplace_back(SDDbgOperand::fromFrameIdx(FISDN->getIndex()));
	continue;
	}
	LocationOps.emplace_back(
	SDDbgOperand::fromNode(N.getNode(), N.getResNo()));
	continue;
	}

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	// Special rules apply for the first dbg.values of parameter variables in a
	// function. Identify them by the fact they reference Argument Values, that
	// they're parameters, and they are parameters of the current function. We
	// need to let them dangle until they get an SDNode.
	bool IsParamOfFunc =
	isa<Argument>(V) && Var->isParameter() && !InstDL.getInlinedAt();
	if (IsParamOfFunc)
	return false;

	// The value is not used in this block yet (or it would have an SDNode).
	// We still want the value to appear for the user if possible -- if it has
	// an associated VReg, we can refer to that instead.
	auto VMI = FuncInfo.ValueMap.find(V);
	if (VMI != FuncInfo.ValueMap.end()) {
	unsigned Reg = VMI->second;
	// If this is a PHI node, it may be split up into several MI PHI nodes
	// (in FunctionLoweringInfo::set).
	RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg,
	V->getType(), None);
	if (RFV.occupiesMultipleRegs()) {
	// FIXME: We could potentially support variadic dbg_values here.
	if (IsVariadic)
	return false;
	unsigned Offset = 0;
	unsigned BitsToDescribe = 0;
	if (auto VarSize = Var->getSizeInBits())
	BitsToDescribe = *VarSize;
	if (auto Fragment = Expr->getFragmentInfo())
	BitsToDescribe = Fragment->SizeInBits;
	for (auto RegAndSize : RFV.getRegsAndSizes()) {
	// Bail out if all bits are described already.
	if (Offset >= BitsToDescribe)
	break;
	// TODO: handle scalable vectors.
	unsigned RegisterSize = RegAndSize.second;
	unsigned FragmentSize = (Offset + RegisterSize > BitsToDescribe)
	? BitsToDescribe - Offset
	: RegisterSize;
	auto FragmentExpr = DIExpression::createFragmentExpression(
	Expr, Offset, FragmentSize);
	if (!FragmentExpr)
	continue;
	SDDbgValue *SDV = DAG.getVRegDbgValue(
	Var, *FragmentExpr, RegAndSize.first, false, dl, SDNodeOrder);
	DAG.AddDbgValue(SDV, false);
	Offset += RegisterSize;
	}
	return true;
	}
	// We can use simple vreg locations for variadic dbg_values as well.
	LocationOps.emplace_back(SDDbgOperand::fromVReg(Reg));
	continue;
	}
	// We failed to create a SDDbgOperand for V.
	return false;
	}

	// We have created a SDDbgOperand for each Value in Values.
	// Should use Order instead of SDNodeOrder?
	assert(!LocationOps.empty());
	SDDbgValue *SDV =
	DAG.getDbgValueList(Var, Expr, LocationOps, Dependencies,
	/IsIndirect=/false, dl, SDNodeOrder, IsVariadic);
	DAG.AddDbgValue(SDV, /isParameter=/false);
	return true;
	}

	void SelectionDAGBuilder::resolveOrClearDbgInfo() {
	// Try to fixup any remaining dangling debug info -- and drop it if we can't.
	for (auto &Pair : DanglingDebugInfoMap)
	for (auto &DDI : Pair.second)
	salvageUnresolvedDbgValue(DDI);
	clearDanglingDebugInfo();
	}

	/// getCopyFromRegs - If there was virtual register allocated for the value V
	/// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise.
	SDValue SelectionDAGBuilder::getCopyFromRegs(const Value V, Type Ty) {
	DenseMap<const Value *, Register>::iterator It = FuncInfo.ValueMap.find(V);
	SDValue Result;

	if (It != FuncInfo.ValueMap.end()) {
	Register InReg = It->second;

	RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
	DAG.getDataLayout(), InReg, Ty,
	None); // This is not an ABI copy.
	SDValue Chain = DAG.getEntryNode();
	Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr,
	V);
	resolveDanglingDebugInfo(V, Result);
	}

	return Result;
	}

	/// getValue - Return an SDValue for the given Value.
	SDValue SelectionDAGBuilder::getValue(const Value *V) {
	// If we already have an SDValue for this value, use it. It's important
	// to do this first, so that we don't create a CopyFromReg if we already
	// have a regular SDValue.
	SDValue &N = NodeMap[V];
	if (N.getNode()) return N;

	// If there's a virtual register allocated and initialized for this
	// value, use it.
	if (SDValue copyFromReg = getCopyFromRegs(V, V->getType()))
	return copyFromReg;

	// Otherwise create a new SDValue and remember it.
	SDValue Val = getValueImpl(V);
	NodeMap[V] = Val;
	resolveDanglingDebugInfo(V, Val);
	return Val;
	}

	/// getNonRegisterValue - Return an SDValue for the given Value, but
	/// don't look in FuncInfo.ValueMap for a virtual register.
	SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) {
	// If we already have an SDValue for this value, use it.
	SDValue &N = NodeMap[V];
	if (N.getNode()) {
	if (isa<ConstantSDNode>(N) \|\| isa<ConstantFPSDNode>(N)) {
	// Remove the debug location from the node as the node is about to be used
	// in a location which may differ from the original debug location. This
	// is relevant to Constant and ConstantFP nodes because they can appear
	// as constant expressions inside PHI nodes.
	N->setDebugLoc(DebugLoc());
	}
	return N;
	}

	// Otherwise create a new SDValue and remember it.
	SDValue Val = getValueImpl(V);
	NodeMap[V] = Val;
	resolveDanglingDebugInfo(V, Val);
	return Val;
	}

	/// getValueImpl - Helper function for getValue and getNonRegisterValue.
	/// Create an SDValue for the given value.
	SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();

	if (const Constant *C = dyn_cast<Constant>(V)) {
	EVT VT = TLI.getValueType(DAG.getDataLayout(), V->getType(), true);

	if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
	return DAG.getConstant(*CI, getCurSDLoc(), VT);

	if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
	return DAG.getGlobalAddress(GV, getCurSDLoc(), VT);

	if (isa<ConstantPointerNull>(C)) {
	unsigned AS = V->getType()->getPointerAddressSpace();
	return DAG.getConstant(0, getCurSDLoc(),
	TLI.getPointerTy(DAG.getDataLayout(), AS));
	}

	if (match(C, m_VScale(DAG.getDataLayout())))
	return DAG.getVScale(getCurSDLoc(), VT, APInt(VT.getSizeInBits(), 1));

	if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
	return DAG.getConstantFP(*CFP, getCurSDLoc(), VT);

	if (isa<UndefValue>(C) && !V->getType()->isAggregateType())
	return DAG.getUNDEF(VT);

	if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
	visit(CE->getOpcode(), *CE);
	SDValue N1 = NodeMap[V];
	assert(N1.getNode() && "visit didn't populate the NodeMap!");
	return N1;
	}

	if (isa<ConstantStruct>(C) \|\| isa<ConstantArray>(C)) {
	SmallVector<SDValue, 4> Constants;
	for (const Use &U : C->operands()) {
	SDNode *Val = getValue(U).getNode();
	// If the operand is an empty aggregate, there are no values.
	if (!Val) continue;
	// Add each leaf value from the operand to the Constants list
	// to form a flattened list of all the values.
	for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
	Constants.push_back(SDValue(Val, i));
	}

	return DAG.getMergeValues(Constants, getCurSDLoc());
	}

	if (const ConstantDataSequential *CDS =
	dyn_cast<ConstantDataSequential>(C)) {
	SmallVector<SDValue, 4> Ops;
	for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
	SDNode *Val = getValue(CDS->getElementAsConstant(i)).getNode();
	// Add each leaf value from the operand to the Constants list
	// to form a flattened list of all the values.
	for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
	Ops.push_back(SDValue(Val, i));
	}

	if (isa<ArrayType>(CDS->getType()))
	return DAG.getMergeValues(Ops, getCurSDLoc());
	return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
	}

	if (C->getType()->isStructTy() \|\| C->getType()->isArrayTy()) {
	assert((isa<ConstantAggregateZero>(C) \|\| isa<UndefValue>(C)) &&
	"Unknown struct or array constant!");

	SmallVector<EVT, 4> ValueVTs;
	ComputeValueVTs(TLI, DAG.getDataLayout(), C->getType(), ValueVTs);
	unsigned NumElts = ValueVTs.size();
	if (NumElts == 0)
	return SDValue(); // empty struct
	SmallVector<SDValue, 4> Constants(NumElts);
	for (unsigned i = 0; i != NumElts; ++i) {
	EVT EltVT = ValueVTs[i];
	if (isa<UndefValue>(C))
	Constants[i] = DAG.getUNDEF(EltVT);
	else if (EltVT.isFloatingPoint())
	Constants[i] = DAG.getConstantFP(0, getCurSDLoc(), EltVT);
	else
	Constants[i] = DAG.getConstant(0, getCurSDLoc(), EltVT);
	}

	return DAG.getMergeValues(Constants, getCurSDLoc());
	}

	if (const BlockAddress *BA = dyn_cast<BlockAddress>(C))
	return DAG.getBlockAddress(BA, VT);

	if (const auto *Equiv = dyn_cast<DSOLocalEquivalent>(C))
	return getValue(Equiv->getGlobalValue());

	VectorType *VecTy = cast<VectorType>(V->getType());

	// Now that we know the number and type of the elements, get that number of
	// elements into the Ops array based on what kind of constant it is.
	if (const ConstantVector *CV = dyn_cast<ConstantVector>(C)) {
	SmallVector<SDValue, 16> Ops;
	unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
	for (unsigned i = 0; i != NumElements; ++i)
	Ops.push_back(getValue(CV->getOperand(i)));

	return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
	} else if (isa<ConstantAggregateZero>(C)) {
	EVT EltVT =
	TLI.getValueType(DAG.getDataLayout(), VecTy->getElementType());

	SDValue Op;
	if (EltVT.isFloatingPoint())
	Op = DAG.getConstantFP(0, getCurSDLoc(), EltVT);
	else
	Op = DAG.getConstant(0, getCurSDLoc(), EltVT);

	if (isa<ScalableVectorType>(VecTy))
	return NodeMap[V] = DAG.getSplatVector(VT, getCurSDLoc(), Op);
	else {
	SmallVector<SDValue, 16> Ops;
	Ops.assign(cast<FixedVectorType>(VecTy)->getNumElements(), Op);
	return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
	}
	}
	llvm_unreachable("Unknown vector constant");
	}

	// If this is a static alloca, generate it as the frameindex instead of
	// computation.
	if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
	DenseMap<const AllocaInst*, int>::iterator SI =
	FuncInfo.StaticAllocaMap.find(AI);
	if (SI != FuncInfo.StaticAllocaMap.end())
	return DAG.getFrameIndex(SI->second,
	TLI.getFrameIndexTy(DAG.getDataLayout()));
	}

	// If this is an instruction which fast-isel has deferred, select it now.
	if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
	unsigned InReg = FuncInfo.InitializeRegForValue(Inst);

	RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg,
	Inst->getType(), None);
	SDValue Chain = DAG.getEntryNode();
	return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
	}

	if (const MetadataAsValue *MD = dyn_cast<MetadataAsValue>(V)) {
	return DAG.getMDNode(cast<MDNode>(MD->getMetadata()));
	}
	llvm_unreachable("Can't get register for value!");
	}

	void SelectionDAGBuilder::visitCatchPad(const CatchPadInst &I) {
	auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
	bool IsMSVCCXX = Pers == EHPersonality::MSVC_CXX;
	bool IsCoreCLR = Pers == EHPersonality::CoreCLR;
	bool IsSEH = isAsynchronousEHPersonality(Pers);
	MachineBasicBlock *CatchPadMBB = FuncInfo.MBB;
	if (!IsSEH)
	CatchPadMBB->setIsEHScopeEntry();
	// In MSVC C++ and CoreCLR, catchblocks are funclets and need prologues.
	if (IsMSVCCXX \|\| IsCoreCLR)
	CatchPadMBB->setIsEHFuncletEntry();
	}

	void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) {
	// Update machine-CFG edge.
	MachineBasicBlock *TargetMBB = FuncInfo.MBBMap[I.getSuccessor()];
	FuncInfo.MBB->addSuccessor(TargetMBB);
	TargetMBB->setIsEHCatchretTarget(true);
	DAG.getMachineFunction().setHasEHCatchret(true);

	auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
	bool IsSEH = isAsynchronousEHPersonality(Pers);
	if (IsSEH) {
	// If this is not a fall-through branch or optimizations are switched off,
	// emit the branch.
	if (TargetMBB != NextBlock(FuncInfo.MBB) \|\|
	TM.getOptLevel() == CodeGenOpt::None)
	DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
	getControlRoot(), DAG.getBasicBlock(TargetMBB)));
	return;
	}

	// Figure out the funclet membership for the catchret's successor.
	// This will be used by the FuncletLayout pass to determine how to order the
	// BB's.
	// A 'catchret' returns to the outer scope's color.
	Value *ParentPad = I.getCatchSwitchParentPad();
	const BasicBlock *SuccessorColor;
	if (isa<ConstantTokenNone>(ParentPad))
	SuccessorColor = &FuncInfo.Fn->getEntryBlock();
	else
	SuccessorColor = cast<Instruction>(ParentPad)->getParent();
	assert(SuccessorColor && "No parent funclet for catchret!");
	MachineBasicBlock *SuccessorColorMBB = FuncInfo.MBBMap[SuccessorColor];
	assert(SuccessorColorMBB && "No MBB for SuccessorColor!");

	// Create the terminator node.
	SDValue Ret = DAG.getNode(ISD::CATCHRET, getCurSDLoc(), MVT::Other,
	getControlRoot(), DAG.getBasicBlock(TargetMBB),
	DAG.getBasicBlock(SuccessorColorMBB));
	DAG.setRoot(Ret);
	}

	void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) {
	// Don't emit any special code for the cleanuppad instruction. It just marks
	// the start of an EH scope/funclet.
	FuncInfo.MBB->setIsEHScopeEntry();
	auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
	if (Pers != EHPersonality::Wasm_CXX) {
	FuncInfo.MBB->setIsEHFuncletEntry();
	FuncInfo.MBB->setIsCleanupFuncletEntry();
	}
	}

	// In wasm EH, even though a catchpad may not catch an exception if a tag does
	// not match, it is OK to add only the first unwind destination catchpad to the
	// successors, because there will be at least one invoke instruction within the
	// catch scope that points to the next unwind destination, if one exists, so
	// CFGSort cannot mess up with BB sorting order.
	// (All catchpads with 'catch (type)' clauses have a 'llvm.rethrow' intrinsic
	// call within them, and catchpads only consisting of 'catch (...)' have a
	// '__cxa_end_catch' call within them, both of which generate invokes in case
	// the next unwind destination exists, i.e., the next unwind destination is not
	// the caller.)
	//
	// Having at most one EH pad successor is also simpler and helps later
	// transformations.
	//
	// For example,
	// current:
	// invoke void @foo to ... unwind label %catch.dispatch
	// catch.dispatch:
	// %0 = catchswitch within ... [label %catch.start] unwind label %next
	// catch.start:
	// ...
	// ... in this BB or some other child BB dominated by this BB there will be an
	// invoke that points to 'next' BB as an unwind destination
	//
	// next: ; We don't need to add this to 'current' BB's successor
	// ...
	static void findWasmUnwindDestinations(
	FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB,
	BranchProbability Prob,
	SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>>
	&UnwindDests) {
	while (EHPadBB) {
	const Instruction *Pad = EHPadBB->getFirstNonPHI();
	if (isa<CleanupPadInst>(Pad)) {
	// Stop on cleanup pads.
	UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
	UnwindDests.back().first->setIsEHScopeEntry();
	break;
	} else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
	// Add the catchpad handlers to the possible destinations. We don't
	// continue to the unwind destination of the catchswitch for wasm.
	for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
	UnwindDests.emplace_back(FuncInfo.MBBMap[CatchPadBB], Prob);
	UnwindDests.back().first->setIsEHScopeEntry();
	}
	break;
	} else {
	continue;
	}
	}
	}

	/// When an invoke or a cleanupret unwinds to the next EH pad, there are
	/// many places it could ultimately go. In the IR, we have a single unwind
	/// destination, but in the machine CFG, we enumerate all the possible blocks.
	/// This function skips over imaginary basic blocks that hold catchswitch
	/// instructions, and finds all the "real" machine
	/// basic block destinations. As those destinations may not be successors of
	/// EHPadBB, here we also calculate the edge probability to those destinations.
	/// The passed-in Prob is the edge probability to EHPadBB.
	static void findUnwindDestinations(
	FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB,
	BranchProbability Prob,
	SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>>
	&UnwindDests) {
	EHPersonality Personality =
	classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
	bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX;
	bool IsCoreCLR = Personality == EHPersonality::CoreCLR;
	bool IsWasmCXX = Personality == EHPersonality::Wasm_CXX;
	bool IsSEH = isAsynchronousEHPersonality(Personality);

	if (IsWasmCXX) {
	findWasmUnwindDestinations(FuncInfo, EHPadBB, Prob, UnwindDests);
	assert(UnwindDests.size() <= 1 &&
	"There should be at most one unwind destination for wasm");
	return;
	}

	while (EHPadBB) {
	const Instruction *Pad = EHPadBB->getFirstNonPHI();
	BasicBlock *NewEHPadBB = nullptr;
	if (isa<LandingPadInst>(Pad)) {
	// Stop on landingpads. They are not funclets.
	UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
	break;
	} else if (isa<CleanupPadInst>(Pad)) {
	// Stop on cleanup pads. Cleanups are always funclet entries for all known
	// personalities.
	UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
	UnwindDests.back().first->setIsEHScopeEntry();
	UnwindDests.back().first->setIsEHFuncletEntry();
	break;
	} else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
	// Add the catchpad handlers to the possible destinations.
	for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
	UnwindDests.emplace_back(FuncInfo.MBBMap[CatchPadBB], Prob);
	// For MSVC++ and the CLR, catchblocks are funclets and need prologues.
	if (IsMSVCCXX \|\| IsCoreCLR)
	UnwindDests.back().first->setIsEHFuncletEntry();
	if (!IsSEH)
	UnwindDests.back().first->setIsEHScopeEntry();
	}
	NewEHPadBB = CatchSwitch->getUnwindDest();
	} else {
	continue;
	}

	BranchProbabilityInfo *BPI = FuncInfo.BPI;
	if (BPI && NewEHPadBB)
	Prob *= BPI->getEdgeProbability(EHPadBB, NewEHPadBB);
	EHPadBB = NewEHPadBB;
	}
	}

	void SelectionDAGBuilder::visitCleanupRet(const CleanupReturnInst &I) {
	// Update successor info.
	SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests;
	auto UnwindDest = I.getUnwindDest();
	BranchProbabilityInfo *BPI = FuncInfo.BPI;
	BranchProbability UnwindDestProb =
	(BPI && UnwindDest)
	? BPI->getEdgeProbability(FuncInfo.MBB->getBasicBlock(), UnwindDest)
	: BranchProbability::getZero();
	findUnwindDestinations(FuncInfo, UnwindDest, UnwindDestProb, UnwindDests);
	for (auto &UnwindDest : UnwindDests) {
	UnwindDest.first->setIsEHPad();
	addSuccessorWithProb(FuncInfo.MBB, UnwindDest.first, UnwindDest.second);
	}
	FuncInfo.MBB->normalizeSuccProbs();

	// Create the terminator node.
	SDValue Ret =
	DAG.getNode(ISD::CLEANUPRET, getCurSDLoc(), MVT::Other, getControlRoot());
	DAG.setRoot(Ret);
	}

	void SelectionDAGBuilder::visitCatchSwitch(const CatchSwitchInst &CSI) {
	report_fatal_error("visitCatchSwitch not yet implemented!");
	}

	void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	auto &DL = DAG.getDataLayout();
	SDValue Chain = getControlRoot();
	SmallVector<ISD::OutputArg, 8> Outs;
	SmallVector<SDValue, 8> OutVals;

	// Calls to @llvm.experimental.deoptimize don't generate a return value, so
	// lower
	//
	// %val = call <ty> @llvm.experimental.deoptimize()
	// ret <ty> %val
	//
	// differently.
	if (I.getParent()->getTerminatingDeoptimizeCall()) {
	LowerDeoptimizingReturn();
	return;
	}

	if (!FuncInfo.CanLowerReturn) {
	unsigned DemoteReg = FuncInfo.DemoteRegister;
	const Function *F = I.getParent()->getParent();

	// Emit a store of the return value through the virtual register.
	// Leave Outs empty so that LowerReturn won't try to load return
	// registers the usual way.
	SmallVector<EVT, 1> PtrValueVTs;
	ComputeValueVTs(TLI, DL,
	F->getReturnType()->getPointerTo(
	DAG.getDataLayout().getAllocaAddrSpace()),
	PtrValueVTs);

	SDValue RetPtr = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(),
	DemoteReg, PtrValueVTs[0]);
	SDValue RetOp = getValue(I.getOperand(0));

	SmallVector<EVT, 4> ValueVTs, MemVTs;
	SmallVector<uint64_t, 4> Offsets;
	ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &MemVTs,
	&Offsets);
	unsigned NumValues = ValueVTs.size();

	SmallVector<SDValue, 4> Chains(NumValues);
	Align BaseAlign = DL.getPrefTypeAlign(I.getOperand(0)->getType());
	for (unsigned i = 0; i != NumValues; ++i) {
	// An aggregate return value cannot wrap around the address space, so
	// offsets to its parts don't wrap either.
	SDValue Ptr = DAG.getObjectPtrOffset(getCurSDLoc(), RetPtr,
	TypeSize::Fixed(Offsets[i]));

	SDValue Val = RetOp.getValue(RetOp.getResNo() + i);
	if (MemVTs[i] != ValueVTs[i])
	Val = DAG.getPtrExtOrTrunc(Val, getCurSDLoc(), MemVTs[i]);
	Chains[i] = DAG.getStore(
	Chain, getCurSDLoc(), Val,
	// FIXME: better loc info would be nice.
	Ptr, MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()),
	commonAlignment(BaseAlign, Offsets[i]));
	}

	Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(),
	MVT::Other, Chains);
	} else if (I.getNumOperands() != 0) {
	SmallVector<EVT, 4> ValueVTs;
	ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs);
	unsigned NumValues = ValueVTs.size();
	if (NumValues) {
	SDValue RetOp = getValue(I.getOperand(0));

	const Function *F = I.getParent()->getParent();

	bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters(
	I.getOperand(0)->getType(), F->getCallingConv(),
	/IsVarArg/ false, DL);

	ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
	if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
	Attribute::SExt))
	ExtendKind = ISD::SIGN_EXTEND;
	else if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
	Attribute::ZExt))
	ExtendKind = ISD::ZERO_EXTEND;

	LLVMContext &Context = F->getContext();
	bool RetInReg = F->getAttributes().hasAttribute(
	AttributeList::ReturnIndex, Attribute::InReg);

	for (unsigned j = 0; j != NumValues; ++j) {
	EVT VT = ValueVTs[j];

	if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger())
	VT = TLI.getTypeForExtReturn(Context, VT, ExtendKind);

	CallingConv::ID CC = F->getCallingConv();

	unsigned NumParts = TLI.getNumRegistersForCallingConv(Context, CC, VT);
	MVT PartVT = TLI.getRegisterTypeForCallingConv(Context, CC, VT);
	SmallVector<SDValue, 4> Parts(NumParts);
	getCopyToParts(DAG, getCurSDLoc(),
	SDValue(RetOp.getNode(), RetOp.getResNo() + j),
	&Parts[0], NumParts, PartVT, &I, CC, ExtendKind);

	// 'inreg' on function refers to return value
	ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
	if (RetInReg)
	Flags.setInReg();

	if (I.getOperand(0)->getType()->isPointerTy()) {
	Flags.setPointer();
	Flags.setPointerAddrSpace(
	cast<PointerType>(I.getOperand(0)->getType())->getAddressSpace());
	}

	if (NeedsRegBlock) {
	Flags.setInConsecutiveRegs();
	if (j == NumValues - 1)
	Flags.setInConsecutiveRegsLast();
	}

	// Propagate extension type if any
	if (ExtendKind == ISD::SIGN_EXTEND)
	Flags.setSExt();
	else if (ExtendKind == ISD::ZERO_EXTEND)
	Flags.setZExt();

	for (unsigned i = 0; i < NumParts; ++i) {
	Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(),
	VT, /isfixed=/true, 0, 0));
	OutVals.push_back(Parts[i]);
	}
	}
	}
	}

	// Push in swifterror virtual register as the last element of Outs. This makes
	// sure swifterror virtual register will be returned in the swifterror
	// physical register.
	const Function *F = I.getParent()->getParent();
	if (TLI.supportSwiftError() &&
	F->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) {
	assert(SwiftError.getFunctionArg() && "Need a swift error argument");
	ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
	Flags.setSwiftError();
	Outs.push_back(ISD::OutputArg(Flags, EVT(TLI.getPointerTy(DL)) /vt/,
	EVT(TLI.getPointerTy(DL)) /argvt/,
	true /isfixed/, 1 /origidx/,
	0 /partOffs/));
	// Create SDNode for the swifterror virtual register.
	OutVals.push_back(
	DAG.getRegister(SwiftError.getOrCreateVRegUseAt(
	&I, FuncInfo.MBB, SwiftError.getFunctionArg()),
	EVT(TLI.getPointerTy(DL))));
	}

	bool isVarArg = DAG.getMachineFunction().getFunction().isVarArg();
	CallingConv::ID CallConv =
	DAG.getMachineFunction().getFunction().getCallingConv();
	Chain = DAG.getTargetLoweringInfo().LowerReturn(
	Chain, CallConv, isVarArg, Outs, OutVals, getCurSDLoc(), DAG);

	// Verify that the target's LowerReturn behaved as expected.
	assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
	"LowerReturn didn't return a valid chain!");

	// Update the DAG with the new chain value resulting from return lowering.
	DAG.setRoot(Chain);
	}

	/// CopyToExportRegsIfNeeded - If the given value has virtual registers
	/// created for it, emit nodes to copy the value into the virtual
	/// registers.
	void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) {
	// Skip empty types
	if (V->getType()->isEmptyTy())
	return;

	DenseMap<const Value *, Register>::iterator VMI = FuncInfo.ValueMap.find(V);
	if (VMI != FuncInfo.ValueMap.end()) {
	assert(!V->use_empty() && "Unused value assigned virtual registers!");
	CopyValueToVirtualRegister(V, VMI->second);
	}
	}

	/// ExportFromCurrentBlock - If this condition isn't known to be exported from
	/// the current basic block, add it to ValueMap now so that we'll get a
	/// CopyTo/FromReg.
	void SelectionDAGBuilder::ExportFromCurrentBlock(const Value *V) {
	// No need to export constants.
	if (!isa<Instruction>(V) && !isa<Argument>(V)) return;

	// Already exported?
	if (FuncInfo.isExportedInst(V)) return;

	unsigned Reg = FuncInfo.InitializeRegForValue(V);
	CopyValueToVirtualRegister(V, Reg);
	}

	bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V,
	const BasicBlock *FromBB) {
	// The operands of the setcc have to be in this block. We don't know
	// how to export them from some other block.
	if (const Instruction *VI = dyn_cast<Instruction>(V)) {
	// Can export from current BB.
	if (VI->getParent() == FromBB)
	return true;

	// Is already exported, noop.
	return FuncInfo.isExportedInst(V);
	}

	// If this is an argument, we can export it if the BB is the entry block or
	// if it is already exported.
	if (isa<Argument>(V)) {
	if (FromBB->isEntryBlock())
	return true;

	// Otherwise, can only export this if it is already exported.
	return FuncInfo.isExportedInst(V);
	}

	// Otherwise, constants can always be exported.
	return true;
	}

	/// Return branch probability calculated by BranchProbabilityInfo for IR blocks.
	BranchProbability
	SelectionDAGBuilder::getEdgeProbability(const MachineBasicBlock *Src,
	const MachineBasicBlock *Dst) const {
	BranchProbabilityInfo *BPI = FuncInfo.BPI;
	const BasicBlock *SrcBB = Src->getBasicBlock();
	const BasicBlock *DstBB = Dst->getBasicBlock();
	if (!BPI) {
	// If BPI is not available, set the default probability as 1 / N, where N is
	// the number of successors.
	auto SuccSize = std::max<uint32_t>(succ_size(SrcBB), 1);
	return BranchProbability(1, SuccSize);
	}
	return BPI->getEdgeProbability(SrcBB, DstBB);
	}

	void SelectionDAGBuilder::addSuccessorWithProb(MachineBasicBlock *Src,
	MachineBasicBlock *Dst,
	BranchProbability Prob) {
	if (!FuncInfo.BPI)
	Src->addSuccessorWithoutProb(Dst);
	else {
	if (Prob.isUnknown())
	Prob = getEdgeProbability(Src, Dst);
	Src->addSuccessor(Dst, Prob);
	}
	}

	static bool InBlock(const Value V, const BasicBlock BB) {
	if (const Instruction *I = dyn_cast<Instruction>(V))
	return I->getParent() == BB;
	return true;
	}

	/// EmitBranchForMergedCondition - Helper method for FindMergedConditions.
	/// This function emits a branch and is used at the leaves of an OR or an
	/// AND operator tree.
	void
	SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
	MachineBasicBlock *TBB,
	MachineBasicBlock *FBB,
	MachineBasicBlock *CurBB,
	MachineBasicBlock *SwitchBB,
	BranchProbability TProb,
	BranchProbability FProb,
	bool InvertCond) {
	const BasicBlock *BB = CurBB->getBasicBlock();

	// If the leaf of the tree is a comparison, merge the condition into
	// the caseblock.
	if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) {
	// The operands of the cmp have to be in this block. We don't know
	// how to export them from some other block. If this is the first block
	// of the sequence, no exporting is needed.
	if (CurBB == SwitchBB \|\|
	(isExportableFromCurrentBlock(BOp->getOperand(0), BB) &&
	isExportableFromCurrentBlock(BOp->getOperand(1), BB))) {
	ISD::CondCode Condition;
	if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
	ICmpInst::Predicate Pred =
	InvertCond ? IC->getInversePredicate() : IC->getPredicate();
	Condition = getICmpCondCode(Pred);
	} else {
	const FCmpInst *FC = cast<FCmpInst>(Cond);
	FCmpInst::Predicate Pred =
	InvertCond ? FC->getInversePredicate() : FC->getPredicate();
	Condition = getFCmpCondCode(Pred);
	if (TM.Options.NoNaNsFPMath)
	Condition = getFCmpCodeWithoutNaN(Condition);
	}

	CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr,
	TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb);
	SL->SwitchCases.push_back(CB);
	return;
	}
	}

	// Create a CaseBlock record representing this branch.
	ISD::CondCode Opc = InvertCond ? ISD::SETNE : ISD::SETEQ;
	CaseBlock CB(Opc, Cond, ConstantInt::getTrue(*DAG.getContext()),
	nullptr, TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb);
	SL->SwitchCases.push_back(CB);
	}

	void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
	MachineBasicBlock *TBB,
	MachineBasicBlock *FBB,
	MachineBasicBlock *CurBB,
	MachineBasicBlock *SwitchBB,
	Instruction::BinaryOps Opc,
	BranchProbability TProb,
	BranchProbability FProb,
	bool InvertCond) {
	// Skip over not part of the tree and remember to invert op and operands at
	// next level.
	Value *NotCond;
	if (match(Cond, m_OneUse(m_Not(m_Value(NotCond)))) &&
	InBlock(NotCond, CurBB->getBasicBlock())) {
	FindMergedConditions(NotCond, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb,
	!InvertCond);
	return;
	}

	const Instruction *BOp = dyn_cast<Instruction>(Cond);
	const Value BOpOp0, BOpOp1;
	// Compute the effective opcode for Cond, taking into account whether it needs
	// to be inverted, e.g.
	// and (not (or A, B)), C
	// gets lowered as
	// and (and (not A, not B), C)
	Instruction::BinaryOps BOpc = (Instruction::BinaryOps)0;
	if (BOp) {
	BOpc = match(BOp, m_LogicalAnd(m_Value(BOpOp0), m_Value(BOpOp1)))
	? Instruction::And
	: (match(BOp, m_LogicalOr(m_Value(BOpOp0), m_Value(BOpOp1)))
	? Instruction::Or
	: (Instruction::BinaryOps)0);
	if (InvertCond) {
	if (BOpc == Instruction::And)
	BOpc = Instruction::Or;
	else if (BOpc == Instruction::Or)
	BOpc = Instruction::And;
	}
	}

	// If this node is not part of the or/and tree, emit it as a branch.
	// Note that all nodes in the tree should have same opcode.
	bool BOpIsInOrAndTree = BOpc && BOpc == Opc && BOp->hasOneUse();
	if (!BOpIsInOrAndTree \|\| BOp->getParent() != CurBB->getBasicBlock() \|\|
	!InBlock(BOpOp0, CurBB->getBasicBlock()) \|\|
	!InBlock(BOpOp1, CurBB->getBasicBlock())) {
	EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB,
	TProb, FProb, InvertCond);
	return;
	}

	// Create TmpBB after CurBB.
	MachineFunction::iterator BBI(CurBB);
	MachineFunction &MF = DAG.getMachineFunction();
	MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(CurBB->getBasicBlock());
	CurBB->getParent()->insert(++BBI, TmpBB);

	if (Opc == Instruction::Or) {
	// Codegen X \| Y as:
	// BB1:
	// jmp_if_X TBB
	// jmp TmpBB
	// TmpBB:
	// jmp_if_Y TBB
	// jmp FBB
	//

	// We have flexibility in setting Prob for BB1 and Prob for TmpBB.
	// The requirement is that
	// TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
	// = TrueProb for original BB.
	// Assuming the original probabilities are A and B, one choice is to set
	// BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to
	// A/(1+B) and 2B/(1+B). This choice assumes that
	// TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
	// Another choice is to assume TrueProb for BB1 equals to TrueProb for
	// TmpBB, but the math is more complicated.

	auto NewTrueProb = TProb / 2;
	auto NewFalseProb = TProb / 2 + FProb;
	// Emit the LHS condition.
	FindMergedConditions(BOpOp0, TBB, TmpBB, CurBB, SwitchBB, Opc, NewTrueProb,
	NewFalseProb, InvertCond);

	// Normalize A/2 and B to get A/(1+B) and 2B/(1+B).
	SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb};
	BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
	// Emit the RHS condition into TmpBB.
	FindMergedConditions(BOpOp1, TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0],
	Probs[1], InvertCond);
	} else {
	assert(Opc == Instruction::And && "Unknown merge op!");
	// Codegen X & Y as:
	// BB1:
	// jmp_if_X TmpBB
	// jmp FBB
	// TmpBB:
	// jmp_if_Y TBB
	// jmp FBB
	//
	// This requires creation of TmpBB after CurBB.

	// We have flexibility in setting Prob for BB1 and Prob for TmpBB.
	// The requirement is that
	// FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
	// = FalseProb for original BB.
	// Assuming the original probabilities are A and B, one choice is to set
	// BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to
	// 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 ==
	// TrueProb for BB1 * FalseProb for TmpBB.

	auto NewTrueProb = TProb + FProb / 2;
	auto NewFalseProb = FProb / 2;
	// Emit the LHS condition.
	FindMergedConditions(BOpOp0, TmpBB, FBB, CurBB, SwitchBB, Opc, NewTrueProb,
	NewFalseProb, InvertCond);

	// Normalize A and B/2 to get 2A/(1+A) and B/(1+A).
	SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2};
	BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
	// Emit the RHS condition into TmpBB.
	FindMergedConditions(BOpOp1, TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0],
	Probs[1], InvertCond);
	}
	}

	/// If the set of cases should be emitted as a series of branches, return true.
	/// If we should emit this as a bunch of and/or'd together conditions, return
	/// false.
	bool
	SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases) {
	if (Cases.size() != 2) return true;

	// If this is two comparisons of the same values or'd or and'd together, they
	// will get folded into a single comparison, so don't emit two blocks.
	if ((Cases[0].CmpLHS == Cases[1].CmpLHS &&
	Cases[0].CmpRHS == Cases[1].CmpRHS) \|\|
	(Cases[0].CmpRHS == Cases[1].CmpLHS &&
	Cases[0].CmpLHS == Cases[1].CmpRHS)) {
	return false;
	}

	// Handle: (X != null) \| (Y != null) --> (X\|Y) != 0
	// Handle: (X == null) & (Y == null) --> (X\|Y) == 0
	if (Cases[0].CmpRHS == Cases[1].CmpRHS &&
	Cases[0].CC == Cases[1].CC &&
	isa<Constant>(Cases[0].CmpRHS) &&
	cast<Constant>(Cases[0].CmpRHS)->isNullValue()) {
	if (Cases[0].CC == ISD::SETEQ && Cases[0].TrueBB == Cases[1].ThisBB)
	return false;
	if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB)
	return false;
	}

	return true;
	}

	void SelectionDAGBuilder::visitBr(const BranchInst &I) {
	MachineBasicBlock *BrMBB = FuncInfo.MBB;

	// Update machine-CFG edges.
	MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)];

	if (I.isUnconditional()) {
	// Update machine-CFG edges.
	BrMBB->addSuccessor(Succ0MBB);

	// If this is not a fall-through branch or optimizations are switched off,
	// emit the branch.
	if (Succ0MBB != NextBlock(BrMBB) \|\| TM.getOptLevel() == CodeGenOpt::None)
	DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
	MVT::Other, getControlRoot(),
	DAG.getBasicBlock(Succ0MBB)));

	return;
	}

	// If this condition is one of the special cases we handle, do special stuff
	// now.
	const Value *CondVal = I.getCondition();
	MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)];

	// If this is a series of conditions that are or'd or and'd together, emit
	// this as a sequence of branches instead of setcc's with and/or operations.
	// As long as jumps are not expensive (exceptions for multi-use logic ops,
	// unpredictable branches, and vector extracts because those jumps are likely
	// expensive for any target), this should improve performance.
	// For example, instead of something like:
	// cmp A, B
	// C = seteq
	// cmp D, E
	// F = setle
	// or C, F
	// jnz foo
	// Emit:
	// cmp A, B
	// je foo
	// cmp D, E
	// jle foo
	const Instruction *BOp = dyn_cast<Instruction>(CondVal);
	if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp &&
	BOp->hasOneUse() && !I.hasMetadata(LLVMContext::MD_unpredictable)) {
	Value *Vec;
	const Value BOp0, BOp1;
	Instruction::BinaryOps Opcode = (Instruction::BinaryOps)0;
	if (match(BOp, m_LogicalAnd(m_Value(BOp0), m_Value(BOp1))))
	Opcode = Instruction::And;
	else if (match(BOp, m_LogicalOr(m_Value(BOp0), m_Value(BOp1))))
	Opcode = Instruction::Or;

	if (Opcode && !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) &&
	match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value())))) {
	FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, Opcode,
	getEdgeProbability(BrMBB, Succ0MBB),
	getEdgeProbability(BrMBB, Succ1MBB),
	/InvertCond=/false);
	// If the compares in later blocks need to use values not currently
	// exported from this block, export them now. This block should always
	// be the first entry.
	assert(SL->SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!");

	// Allow some cases to be rejected.
	if (ShouldEmitAsBranches(SL->SwitchCases)) {
	for (unsigned i = 1, e = SL->SwitchCases.size(); i != e; ++i) {
	ExportFromCurrentBlock(SL->SwitchCases[i].CmpLHS);
	ExportFromCurrentBlock(SL->SwitchCases[i].CmpRHS);
	}

	// Emit the branch for this block.
	visitSwitchCase(SL->SwitchCases[0], BrMBB);
	SL->SwitchCases.erase(SL->SwitchCases.begin());
	return;
	}

	// Okay, we decided not to do this, remove any inserted MBB's and clear
	// SwitchCases.
	for (unsigned i = 1, e = SL->SwitchCases.size(); i != e; ++i)
	FuncInfo.MF->erase(SL->SwitchCases[i].ThisBB);

	SL->SwitchCases.clear();
	}
	}

	// Create a CaseBlock record representing this branch.
	CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()),
	nullptr, Succ0MBB, Succ1MBB, BrMBB, getCurSDLoc());

	// Use visitSwitchCase to actually insert the fast branch sequence for this
	// cond branch.
	visitSwitchCase(CB, BrMBB);
	}

	/// visitSwitchCase - Emits the necessary code to represent a single node in
	/// the binary search tree resulting from lowering a switch instruction.
	void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
	MachineBasicBlock *SwitchBB) {
	SDValue Cond;
	SDValue CondLHS = getValue(CB.CmpLHS);
	SDLoc dl = CB.DL;

	if (CB.CC == ISD::SETTRUE) {
	// Branch or fall through to TrueBB.
	addSuccessorWithProb(SwitchBB, CB.TrueBB, CB.TrueProb);
	SwitchBB->normalizeSuccProbs();
	if (CB.TrueBB != NextBlock(SwitchBB)) {
	DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, getControlRoot(),
	DAG.getBasicBlock(CB.TrueBB)));
	}
	return;
	}

	auto &TLI = DAG.getTargetLoweringInfo();
	EVT MemVT = TLI.getMemValueType(DAG.getDataLayout(), CB.CmpLHS->getType());

	// Build the setcc now.
	if (!CB.CmpMHS) {
	// Fold "(X == true)" to X and "(X == false)" to !X to
	// handle common cases produced by branch lowering.
	if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) &&
	CB.CC == ISD::SETEQ)
	Cond = CondLHS;
	else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) &&
	CB.CC == ISD::SETEQ) {
	SDValue True = DAG.getConstant(1, dl, CondLHS.getValueType());
	Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True);
	} else {
	SDValue CondRHS = getValue(CB.CmpRHS);

	// If a pointer's DAG type is larger than its memory type then the DAG
	// values are zero-extended. This breaks signed comparisons so truncate
	// back to the underlying type before doing the compare.
	if (CondLHS.getValueType() != MemVT) {
	CondLHS = DAG.getPtrExtOrTrunc(CondLHS, getCurSDLoc(), MemVT);
	CondRHS = DAG.getPtrExtOrTrunc(CondRHS, getCurSDLoc(), MemVT);
	}
	Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, CondRHS, CB.CC);
	}
	} else {
	assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now");

	const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
	const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue();

	SDValue CmpOp = getValue(CB.CmpMHS);
	EVT VT = CmpOp.getValueType();

	if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
	Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, dl, VT),
	ISD::SETLE);
	} else {
	SDValue SUB = DAG.getNode(ISD::SUB, dl,
	VT, CmpOp, DAG.getConstant(Low, dl, VT));
	Cond = DAG.getSetCC(dl, MVT::i1, SUB,
	DAG.getConstant(High-Low, dl, VT), ISD::SETULE);
	}
	}

	// Update successor info
	addSuccessorWithProb(SwitchBB, CB.TrueBB, CB.TrueProb);
	// TrueBB and FalseBB are always different unless the incoming IR is
	// degenerate. This only happens when running llc on weird IR.
	if (CB.TrueBB != CB.FalseBB)
	addSuccessorWithProb(SwitchBB, CB.FalseBB, CB.FalseProb);
	SwitchBB->normalizeSuccProbs();

	// If the lhs block is the next block, invert the condition so that we can
	// fall through to the lhs instead of the rhs block.
	if (CB.TrueBB == NextBlock(SwitchBB)) {
	std::swap(CB.TrueBB, CB.FalseBB);
	SDValue True = DAG.getConstant(1, dl, Cond.getValueType());
	Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True);
	}

	SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
	MVT::Other, getControlRoot(), Cond,
	DAG.getBasicBlock(CB.TrueBB));

	// Insert the false branch. Do this even if it's a fall through branch,
	// this makes it easier to do DAG optimizations which require inverting
	// the branch condition.
	BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
	DAG.getBasicBlock(CB.FalseBB));

	DAG.setRoot(BrCond);
	}

	/// visitJumpTable - Emit JumpTable node in the current MBB
	void SelectionDAGBuilder::visitJumpTable(SwitchCG::JumpTable &JT) {
	// Emit the code for the jump table
	assert(JT.Reg != -1U && "Should lower JT Header first!");
	EVT PTy = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
	SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(),
	JT.Reg, PTy);
	SDValue Table = DAG.getJumpTable(JT.JTI, PTy);
	SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurSDLoc(),
	MVT::Other, Index.getValue(1),
	Table, Index);
	DAG.setRoot(BrJumpTable);
	}

	/// visitJumpTableHeader - This function emits necessary code to produce index
	/// in the JumpTable from switch case.
	void SelectionDAGBuilder::visitJumpTableHeader(SwitchCG::JumpTable &JT,
	JumpTableHeader &JTH,
	MachineBasicBlock *SwitchBB) {
	SDLoc dl = getCurSDLoc();

	// Subtract the lowest switch case value from the value being switched on.
	SDValue SwitchOp = getValue(JTH.SValue);
	EVT VT = SwitchOp.getValueType();
	SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp,
	DAG.getConstant(JTH.First, dl, VT));

	// The SDNode we just created, which holds the value being switched on minus
	// the smallest case value, needs to be copied to a virtual register so it
	// can be used as an index into the jump table in a subsequent basic block.
	// This value may be smaller or larger than the target's pointer type, and
	// therefore require extension or truncating.
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	SwitchOp = DAG.getZExtOrTrunc(Sub, dl, TLI.getPointerTy(DAG.getDataLayout()));

	unsigned JumpTableReg =
	FuncInfo.CreateReg(TLI.getPointerTy(DAG.getDataLayout()));
	SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl,
	JumpTableReg, SwitchOp);
	JT.Reg = JumpTableReg;

	if (!JTH.OmitRangeCheck) {
	// Emit the range check for the jump table, and branch to the default block
	// for the switch statement if the value being switched on exceeds the
	// largest case in the switch.
	SDValue CMP = DAG.getSetCC(
	dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
	Sub.getValueType()),
	Sub, DAG.getConstant(JTH.Last - JTH.First, dl, VT), ISD::SETUGT);

	SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
	MVT::Other, CopyTo, CMP,
	DAG.getBasicBlock(JT.Default));

	// Avoid emitting unnecessary branches to the next block.
	if (JT.MBB != NextBlock(SwitchBB))
	BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
	DAG.getBasicBlock(JT.MBB));

	DAG.setRoot(BrCond);
	} else {
	// Avoid emitting unnecessary branches to the next block.
	if (JT.MBB != NextBlock(SwitchBB))
	DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, CopyTo,
	DAG.getBasicBlock(JT.MBB)));
	else
	DAG.setRoot(CopyTo);
	}
	}

	/// Create a LOAD_STACK_GUARD node, and let it carry the target specific global
	/// variable if there exists one.
	static SDValue getLoadStackGuard(SelectionDAG &DAG, const SDLoc &DL,
	SDValue &Chain) {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
	EVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout());
	MachineFunction &MF = DAG.getMachineFunction();
	Value Global = TLI.getSDagStackGuard(MF.getFunction().getParent());
	MachineSDNode *Node =
	DAG.getMachineNode(TargetOpcode::LOAD_STACK_GUARD, DL, PtrTy, Chain);
	if (Global) {
	MachinePointerInfo MPInfo(Global);
	auto Flags = MachineMemOperand::MOLoad \| MachineMemOperand::MOInvariant \|
	MachineMemOperand::MODereferenceable;
	MachineMemOperand *MemRef = MF.getMachineMemOperand(
	MPInfo, Flags, PtrTy.getSizeInBits() / 8, DAG.getEVTAlign(PtrTy));
	DAG.setNodeMemRefs(Node, {MemRef});
	}
	if (PtrTy != PtrMemTy)
	return DAG.getPtrExtOrTrunc(SDValue(Node, 0), DL, PtrMemTy);
	return SDValue(Node, 0);
	}

	/// Codegen a new tail for a stack protector check ParentMBB which has had its
	/// tail spliced into a stack protector check success bb.
	///
	/// For a high level explanation of how this fits into the stack protector
	/// generation see the comment on the declaration of class
	/// StackProtectorDescriptor.
	void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
	MachineBasicBlock *ParentBB) {

	// First create the loads to the guard/stack slot for the comparison.
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
	EVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout());

	MachineFrameInfo &MFI = ParentBB->getParent()->getFrameInfo();
	int FI = MFI.getStackProtectorIndex();

	SDValue Guard;
	SDLoc dl = getCurSDLoc();
	SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy);
	const Module &M = *ParentBB->getParent()->getFunction().getParent();
	Align Align = DL->getPrefTypeAlign(Type::getInt8PtrTy(M.getContext()));

	// Generate code to load the content of the guard slot.
	SDValue GuardVal = DAG.getLoad(
	PtrMemTy, dl, DAG.getEntryNode(), StackSlotPtr,
	MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), Align,
	MachineMemOperand::MOVolatile);

	if (TLI.useStackGuardXorFP())
	GuardVal = TLI.emitStackGuardXorFP(DAG, GuardVal, dl);

	// Retrieve guard check function, nullptr if instrumentation is inlined.
	if (const Function *GuardCheckFn = TLI.getSSPStackGuardCheck(M)) {
	// The target provides a guard check function to validate the guard value.
	// Generate a call to that function with the content of the guard slot as
	// argument.
	FunctionType *FnTy = GuardCheckFn->getFunctionType();
	assert(FnTy->getNumParams() == 1 && "Invalid function signature");

	TargetLowering::ArgListTy Args;
	TargetLowering::ArgListEntry Entry;
	Entry.Node = GuardVal;
	Entry.Ty = FnTy->getParamType(0);
	if (GuardCheckFn->hasAttribute(1, Attribute::AttrKind::InReg))
	Entry.IsInReg = true;
	Args.push_back(Entry);

	TargetLowering::CallLoweringInfo CLI(DAG);
	CLI.setDebugLoc(getCurSDLoc())
	.setChain(DAG.getEntryNode())
	.setCallee(GuardCheckFn->getCallingConv(), FnTy->getReturnType(),
	getValue(GuardCheckFn), std::move(Args));

	std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
	DAG.setRoot(Result.second);
	return;
	}

	// If useLoadStackGuardNode returns true, generate LOAD_STACK_GUARD.
	// Otherwise, emit a volatile load to retrieve the stack guard value.
	SDValue Chain = DAG.getEntryNode();
	if (TLI.useLoadStackGuardNode()) {
	Guard = getLoadStackGuard(DAG, dl, Chain);
	} else {
	const Value *IRGuard = TLI.getSDagStackGuard(M);
	SDValue GuardPtr = getValue(IRGuard);

	Guard = DAG.getLoad(PtrMemTy, dl, Chain, GuardPtr,
	MachinePointerInfo(IRGuard, 0), Align,
	MachineMemOperand::MOVolatile);
	}

	// Perform the comparison via a getsetcc.
	SDValue Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(DAG.getDataLayout(),
	*DAG.getContext(),
	Guard.getValueType()),
	Guard, GuardVal, ISD::SETNE);

	// If the guard/stackslot do not equal, branch to failure MBB.
	SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
	MVT::Other, GuardVal.getOperand(0),
	Cmp, DAG.getBasicBlock(SPD.getFailureMBB()));
	// Otherwise branch to success MBB.
	SDValue Br = DAG.getNode(ISD::BR, dl,
	MVT::Other, BrCond,
	DAG.getBasicBlock(SPD.getSuccessMBB()));

	DAG.setRoot(Br);
	}

	/// Codegen the failure basic block for a stack protector check.
	///
	/// A failure stack protector machine basic block consists simply of a call to
	/// __stack_chk_fail().
	///
	/// For a high level explanation of how this fits into the stack protector
	/// generation see the comment on the declaration of class
	/// StackProtectorDescriptor.
	void
	SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	TargetLowering::MakeLibCallOptions CallOptions;
	CallOptions.setDiscardResult(true);
	SDValue Chain =
	TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid,
	None, CallOptions, getCurSDLoc()).second;
	// On PS4, the "return address" must still be within the calling function,
	// even if it's at the very end, so emit an explicit TRAP here.
	// Passing 'true' for doesNotReturn above won't generate the trap for us.
	if (TM.getTargetTriple().isPS4CPU())
	Chain = DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, Chain);
	// WebAssembly needs an unreachable instruction after a non-returning call,
	// because the function return type can be different from __stack_chk_fail's
	// return type (void).
	if (TM.getTargetTriple().isWasm())
	Chain = DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, Chain);

	DAG.setRoot(Chain);
	}

	/// visitBitTestHeader - This function emits necessary code to produce value
	/// suitable for "bit tests"
	void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
	MachineBasicBlock *SwitchBB) {
	SDLoc dl = getCurSDLoc();

	// Subtract the minimum value.
	SDValue SwitchOp = getValue(B.SValue);
	EVT VT = SwitchOp.getValueType();
	SDValue RangeSub =
	DAG.getNode(ISD::SUB, dl, VT, SwitchOp, DAG.getConstant(B.First, dl, VT));

	// Determine the type of the test operands.
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	bool UsePtrType = false;
	if (!TLI.isTypeLegal(VT)) {
	UsePtrType = true;
	} else {
	for (unsigned i = 0, e = B.Cases.size(); i != e; ++i)
	if (!isUIntN(VT.getSizeInBits(), B.Cases[i].Mask)) {
	// Switch table case range are encoded into series of masks.
	// Just use pointer type, it's guaranteed to fit.
	UsePtrType = true;
	break;
	}
	}
	SDValue Sub = RangeSub;
	if (UsePtrType) {
	VT = TLI.getPointerTy(DAG.getDataLayout());
	Sub = DAG.getZExtOrTrunc(Sub, dl, VT);
	}

	B.RegVT = VT.getSimpleVT();
	B.Reg = FuncInfo.CreateReg(B.RegVT);
	SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl, B.Reg, Sub);

	MachineBasicBlock* MBB = B.Cases[0].ThisBB;

	if (!B.OmitRangeCheck)
	addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb);
	addSuccessorWithProb(SwitchBB, MBB, B.Prob);
	SwitchBB->normalizeSuccProbs();

	SDValue Root = CopyTo;
	if (!B.OmitRangeCheck) {
	// Conditional branch to the default block.
	SDValue RangeCmp = DAG.getSetCC(dl,
	TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
	RangeSub.getValueType()),
	RangeSub, DAG.getConstant(B.Range, dl, RangeSub.getValueType()),
	ISD::SETUGT);

	Root = DAG.getNode(ISD::BRCOND, dl, MVT::Other, Root, RangeCmp,
	DAG.getBasicBlock(B.Default));
	}

	// Avoid emitting unnecessary branches to the next block.
	if (MBB != NextBlock(SwitchBB))
	Root = DAG.getNode(ISD::BR, dl, MVT::Other, Root, DAG.getBasicBlock(MBB));

	DAG.setRoot(Root);
	}

	/// visitBitTestCase - this function produces one "bit test"
	void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
	MachineBasicBlock* NextMBB,
	BranchProbability BranchProbToNext,
	unsigned Reg,
	BitTestCase &B,
	MachineBasicBlock *SwitchBB) {
	SDLoc dl = getCurSDLoc();
	MVT VT = BB.RegVT;
	SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), dl, Reg, VT);
	SDValue Cmp;
	unsigned PopCount = countPopulation(B.Mask);
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	if (PopCount == 1) {
	// Testing for a single bit; just compare the shift count with what it
	// would need to be to shift a 1 bit in that position.
	Cmp = DAG.getSetCC(
	dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
	ShiftOp, DAG.getConstant(countTrailingZeros(B.Mask), dl, VT),
	ISD::SETEQ);
	} else if (PopCount == BB.Range) {
	// There is only one zero bit in the range, test for it directly.
	Cmp = DAG.getSetCC(
	dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
	ShiftOp, DAG.getConstant(countTrailingOnes(B.Mask), dl, VT),
	ISD::SETNE);
	} else {
	// Make desired shift
	SDValue SwitchVal = DAG.getNode(ISD::SHL, dl, VT,
	DAG.getConstant(1, dl, VT), ShiftOp);

	// Emit bit tests and jumps
	SDValue AndOp = DAG.getNode(ISD::AND, dl,
	VT, SwitchVal, DAG.getConstant(B.Mask, dl, VT));
	Cmp = DAG.getSetCC(
	dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
	AndOp, DAG.getConstant(0, dl, VT), ISD::SETNE);
	}

	// The branch probability from SwitchBB to B.TargetBB is B.ExtraProb.
	addSuccessorWithProb(SwitchBB, B.TargetBB, B.ExtraProb);
	// The branch probability from SwitchBB to NextMBB is BranchProbToNext.
	addSuccessorWithProb(SwitchBB, NextMBB, BranchProbToNext);
	// It is not guaranteed that the sum of B.ExtraProb and BranchProbToNext is
	// one as they are relative probabilities (and thus work more like weights),
	// and hence we need to normalize them to let the sum of them become one.
	SwitchBB->normalizeSuccProbs();

	SDValue BrAnd = DAG.getNode(ISD::BRCOND, dl,
	MVT::Other, getControlRoot(),
	Cmp, DAG.getBasicBlock(B.TargetBB));

	// Avoid emitting unnecessary branches to the next block.
	if (NextMBB != NextBlock(SwitchBB))
	BrAnd = DAG.getNode(ISD::BR, dl, MVT::Other, BrAnd,
	DAG.getBasicBlock(NextMBB));

	DAG.setRoot(BrAnd);
	}

	void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
	MachineBasicBlock *InvokeMBB = FuncInfo.MBB;

	// Retrieve successors. Look through artificial IR level blocks like
	// catchswitch for successors.
	MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
	const BasicBlock *EHPadBB = I.getSuccessor(1);

	// Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
	// have to do anything here to lower funclet bundles.
	assert(!I.hasOperandBundlesOtherThan(
	{LLVMContext::OB_deopt, LLVMContext::OB_gc_transition,
	LLVMContext::OB_gc_live, LLVMContext::OB_funclet,
	LLVMContext::OB_cfguardtarget,
	LLVMContext::OB_clang_arc_attachedcall}) &&
	"Cannot lower invokes with arbitrary operand bundles yet!");

	const Value *Callee(I.getCalledOperand());
	const Function *Fn = dyn_cast<Function>(Callee);
	if (isa<InlineAsm>(Callee))
	visitInlineAsm(I, EHPadBB);
	else if (Fn && Fn->isIntrinsic()) {
	switch (Fn->getIntrinsicID()) {
	default:
	llvm_unreachable("Cannot invoke this intrinsic");
	case Intrinsic::donothing:
	// Ignore invokes to @llvm.donothing: jump directly to the next BB.
	case Intrinsic::seh_try_begin:
	case Intrinsic::seh_scope_begin:
	case Intrinsic::seh_try_end:
	case Intrinsic::seh_scope_end:
	break;
	case Intrinsic::experimental_patchpoint_void:
	case Intrinsic::experimental_patchpoint_i64:
	visitPatchpoint(I, EHPadBB);
	break;
	case Intrinsic::experimental_gc_statepoint:
	LowerStatepoint(cast<GCStatepointInst>(I), EHPadBB);
	break;
	case Intrinsic::wasm_rethrow: {
	// This is usually done in visitTargetIntrinsic, but this intrinsic is
	// special because it can be invoked, so we manually lower it to a DAG
	// node here.
	SmallVector<SDValue, 8> Ops;
	Ops.push_back(getRoot()); // inchain
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	Ops.push_back(
	DAG.getTargetConstant(Intrinsic::wasm_rethrow, getCurSDLoc(),
	TLI.getPointerTy(DAG.getDataLayout())));
	SDVTList VTs = DAG.getVTList(ArrayRef<EVT>({MVT::Other})); // outchain
	DAG.setRoot(DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops));
	break;
	}
	}
	} else if (I.countOperandBundlesOfType(LLVMContext::OB_deopt)) {
	// Currently we do not lower any intrinsic calls with deopt operand bundles.
	// Eventually we will support lowering the @llvm.experimental.deoptimize
	// intrinsic, and right now there are no plans to support other intrinsics
	// with deopt state.
	LowerCallSiteWithDeoptBundle(&I, getValue(Callee), EHPadBB);
	} else {
	LowerCallTo(I, getValue(Callee), false, false, EHPadBB);
	}

	// If the value of the invoke is used outside of its defining block, make it
	// available as a virtual register.
	// We already took care of the exported value for the statepoint instruction
	// during call to the LowerStatepoint.
	if (!isa<GCStatepointInst>(I)) {
	CopyToExportRegsIfNeeded(&I);
	}

	SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests;
	BranchProbabilityInfo *BPI = FuncInfo.BPI;
	BranchProbability EHPadBBProb =
	BPI ? BPI->getEdgeProbability(InvokeMBB->getBasicBlock(), EHPadBB)
	: BranchProbability::getZero();
	findUnwindDestinations(FuncInfo, EHPadBB, EHPadBBProb, UnwindDests);

	// Update successor info.
	addSuccessorWithProb(InvokeMBB, Return);
	for (auto &UnwindDest : UnwindDests) {
	UnwindDest.first->setIsEHPad();
	addSuccessorWithProb(InvokeMBB, UnwindDest.first, UnwindDest.second);
	}
	InvokeMBB->normalizeSuccProbs();

	// Drop into normal successor.
	DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, getControlRoot(),
	DAG.getBasicBlock(Return)));
	}

	void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) {
	MachineBasicBlock *CallBrMBB = FuncInfo.MBB;

	// Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
	// have to do anything here to lower funclet bundles.
	assert(!I.hasOperandBundlesOtherThan(
	{LLVMContext::OB_deopt, LLVMContext::OB_funclet}) &&
	"Cannot lower callbrs with arbitrary operand bundles yet!");

	assert(I.isInlineAsm() && "Only know how to handle inlineasm callbr");
	visitInlineAsm(I);
	CopyToExportRegsIfNeeded(&I);

	// Retrieve successors.
	MachineBasicBlock *Return = FuncInfo.MBBMap[I.getDefaultDest()];

	// Update successor info.
	addSuccessorWithProb(CallBrMBB, Return, BranchProbability::getOne());
	for (unsigned i = 0, e = I.getNumIndirectDests(); i < e; ++i) {
	MachineBasicBlock *Target = FuncInfo.MBBMap[I.getIndirectDest(i)];
	addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero());
	Target->setIsInlineAsmBrIndirectTarget();
	}
	CallBrMBB->normalizeSuccProbs();

	// Drop into default successor.
	DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
	MVT::Other, getControlRoot(),
	DAG.getBasicBlock(Return)));
	}

	void SelectionDAGBuilder::visitResume(const ResumeInst &RI) {
	llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!");
	}

	void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
	assert(FuncInfo.MBB->isEHPad() &&
	"Call to landingpad not in landing pad!");

	// If there aren't registers to copy the values into (e.g., during SjLj
	// exceptions), then don't bother to create these DAG nodes.
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	const Constant *PersonalityFn = FuncInfo.Fn->getPersonalityFn();
	if (TLI.getExceptionPointerRegister(PersonalityFn) == 0 &&
	TLI.getExceptionSelectorRegister(PersonalityFn) == 0)
	return;

	// If landingpad's return type is token type, we don't create DAG nodes
	// for its exception pointer and selector value. The extraction of exception
	// pointer or selector value from token type landingpads is not currently
	// supported.
	if (LP.getType()->isTokenTy())
	return;

	SmallVector<EVT, 2> ValueVTs;
	SDLoc dl = getCurSDLoc();
	ComputeValueVTs(TLI, DAG.getDataLayout(), LP.getType(), ValueVTs);
	assert(ValueVTs.size() == 2 && "Only two-valued landingpads are supported");

	// Get the two live-in registers as SDValues. The physregs have already been
	// copied into virtual registers.
	SDValue Ops[2];
	if (FuncInfo.ExceptionPointerVirtReg) {
	Ops[0] = DAG.getZExtOrTrunc(
	DAG.getCopyFromReg(DAG.getEntryNode(), dl,
	FuncInfo.ExceptionPointerVirtReg,
	TLI.getPointerTy(DAG.getDataLayout())),
	dl, ValueVTs[0]);
	} else {
	Ops[0] = DAG.getConstant(0, dl, TLI.getPointerTy(DAG.getDataLayout()));
	}
	Ops[1] = DAG.getZExtOrTrunc(
	DAG.getCopyFromReg(DAG.getEntryNode(), dl,
	FuncInfo.ExceptionSelectorVirtReg,
	TLI.getPointerTy(DAG.getDataLayout())),
	dl, ValueVTs[1]);

	// Merge into one.
	SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl,
	DAG.getVTList(ValueVTs), Ops);
	setValue(&LP, Res);
	}

	void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First,
	MachineBasicBlock *Last) {
	// Update JTCases.
	for (unsigned i = 0, e = SL->JTCases.size(); i != e; ++i)
	if (SL->JTCases[i].first.HeaderBB == First)
	SL->JTCases[i].first.HeaderBB = Last;

	// Update BitTestCases.
	for (unsigned i = 0, e = SL->BitTestCases.size(); i != e; ++i)
	if (SL->BitTestCases[i].Parent == First)
	SL->BitTestCases[i].Parent = Last;
	}

	void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
	MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB;

	// Update machine-CFG edges with unique successors.
	SmallSet<BasicBlock*, 32> Done;
	for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) {
	BasicBlock *BB = I.getSuccessor(i);
	bool Inserted = Done.insert(BB).second;
	if (!Inserted)
	continue;

	MachineBasicBlock *Succ = FuncInfo.MBBMap[BB];
	addSuccessorWithProb(IndirectBrMBB, Succ);
	}
	IndirectBrMBB->normalizeSuccProbs();

	DAG.setRoot(DAG.getNode(ISD::BRIND, getCurSDLoc(),
	MVT::Other, getControlRoot(),
	getValue(I.getAddress())));
	}

	void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) {
	if (!DAG.getTarget().Options.TrapUnreachable)
	return;

	// We may be able to ignore unreachable behind a noreturn call.
	if (DAG.getTarget().Options.NoTrapAfterNoreturn) {
	const BasicBlock &BB = *I.getParent();
	if (&I != &BB.front()) {
	BasicBlock::const_iterator PredI =
	std::prev(BasicBlock::const_iterator(&I));
	if (const CallInst Call = dyn_cast<CallInst>(&PredI)) {
	if (Call->doesNotReturn())
	return;
	}
	}
	}

	DAG.setRoot(DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot()));
	}

	void SelectionDAGBuilder::visitUnary(const User &I, unsigned Opcode) {
	SDNodeFlags Flags;

	SDValue Op = getValue(I.getOperand(0));
	SDValue UnNodeValue = DAG.getNode(Opcode, getCurSDLoc(), Op.getValueType(),
	Op, Flags);
	setValue(&I, UnNodeValue);
	}

	void SelectionDAGBuilder::visitBinary(const User &I, unsigned Opcode) {
	SDNodeFlags Flags;
	if (auto *OFBinOp = dyn_cast<OverflowingBinaryOperator>(&I)) {
	Flags.setNoSignedWrap(OFBinOp->hasNoSignedWrap());
	Flags.setNoUnsignedWrap(OFBinOp->hasNoUnsignedWrap());
	}
	if (auto *ExactOp = dyn_cast<PossiblyExactOperator>(&I))
	Flags.setExact(ExactOp->isExact());
	if (auto *FPOp = dyn_cast<FPMathOperator>(&I))
	Flags.copyFMF(*FPOp);

	SDValue Op1 = getValue(I.getOperand(0));
	SDValue Op2 = getValue(I.getOperand(1));
	SDValue BinNodeValue = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(),
	Op1, Op2, Flags);
	setValue(&I, BinNodeValue);
	}

	void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
	SDValue Op1 = getValue(I.getOperand(0));
	SDValue Op2 = getValue(I.getOperand(1));

	EVT ShiftTy = DAG.getTargetLoweringInfo().getShiftAmountTy(
	Op1.getValueType(), DAG.getDataLayout());

	// Coerce the shift amount to the right type if we can.
	if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) {
	unsigned ShiftSize = ShiftTy.getSizeInBits();
	unsigned Op2Size = Op2.getValueSizeInBits();
	SDLoc DL = getCurSDLoc();

	// If the operand is smaller than the shift count type, promote it.
	if (ShiftSize > Op2Size)
	Op2 = DAG.getNode(ISD::ZERO_EXTEND, DL, ShiftTy, Op2);

	// If the operand is larger than the shift count type but the shift
	// count type has enough bits to represent any shift value, truncate
	// it now. This is a common case and it exposes the truncate to
	// optimization early.
	else if (ShiftSize >= Log2_32_Ceil(Op2.getValueSizeInBits()))
	Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2);
	// Otherwise we'll need to temporarily settle for some other convenient
	// type. Type legalization will make adjustments once the shiftee is split.
	else
	Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32);
	}

	bool nuw = false;
	bool nsw = false;
	bool exact = false;

	if (Opcode == ISD::SRL \|\| Opcode == ISD::SRA \|\| Opcode == ISD::SHL) {

	if (const OverflowingBinaryOperator *OFBinOp =
	dyn_cast<const OverflowingBinaryOperator>(&I)) {
	nuw = OFBinOp->hasNoUnsignedWrap();
	nsw = OFBinOp->hasNoSignedWrap();
	}
	if (const PossiblyExactOperator *ExactOp =
	dyn_cast<const PossiblyExactOperator>(&I))
	exact = ExactOp->isExact();
	}
	SDNodeFlags Flags;
	Flags.setExact(exact);
	Flags.setNoSignedWrap(nsw);
	Flags.setNoUnsignedWrap(nuw);
	SDValue Res = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2,
	Flags);
	setValue(&I, Res);
	}

	void SelectionDAGBuilder::visitSDiv(const User &I) {
	SDValue Op1 = getValue(I.getOperand(0));
	SDValue Op2 = getValue(I.getOperand(1));

	SDNodeFlags Flags;
	Flags.setExact(isa<PossiblyExactOperator>(&I) &&
	cast<PossiblyExactOperator>(&I)->isExact());
	setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(), Op1,
	Op2, Flags));
	}

	void SelectionDAGBuilder::visitICmp(const User &I) {
	ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE;
	if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I))
	predicate = IC->getPredicate();
	else if (const ConstantExpr *IC = dyn_cast<ConstantExpr>(&I))
	predicate = ICmpInst::Predicate(IC->getPredicate());
	SDValue Op1 = getValue(I.getOperand(0));
	SDValue Op2 = getValue(I.getOperand(1));
	ISD::CondCode Opcode = getICmpCondCode(predicate);

	auto &TLI = DAG.getTargetLoweringInfo();
	EVT MemVT =
	TLI.getMemValueType(DAG.getDataLayout(), I.getOperand(0)->getType());

	// If a pointer's DAG type is larger than its memory type then the DAG values
	// are zero-extended. This breaks signed comparisons so truncate back to the
	// underlying type before doing the compare.
	if (Op1.getValueType() != MemVT) {
	Op1 = DAG.getPtrExtOrTrunc(Op1, getCurSDLoc(), MemVT);
	Op2 = DAG.getPtrExtOrTrunc(Op2, getCurSDLoc(), MemVT);
	}

	EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
	I.getType());
	setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode));
	}

	void SelectionDAGBuilder::visitFCmp(const User &I) {
	FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE;
	if (const FCmpInst *FC = dyn_cast<FCmpInst>(&I))
	predicate = FC->getPredicate();
	else if (const ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
	predicate = FCmpInst::Predicate(FC->getPredicate());
	SDValue Op1 = getValue(I.getOperand(0));
	SDValue Op2 = getValue(I.getOperand(1));

	ISD::CondCode Condition = getFCmpCondCode(predicate);
	auto *FPMO = cast<FPMathOperator>(&I);
	if (FPMO->hasNoNaNs() \|\| TM.Options.NoNaNsFPMath)
	Condition = getFCmpCodeWithoutNaN(Condition);

	SDNodeFlags Flags;
	Flags.copyFMF(*FPMO);
	SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);

	EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
	I.getType());
	setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition));
	}

	// Check if the condition of the select has one use or two users that are both
	// selects with the same condition.
	static bool hasOnlySelectUsers(const Value *Cond) {
	return llvm::all_of(Cond->users(), [](const Value *V) {
	return isa<SelectInst>(V);
	});
	}

	void SelectionDAGBuilder::visitSelect(const User &I) {
	SmallVector<EVT, 4> ValueVTs;
	ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), I.getType(),
	ValueVTs);
	unsigned NumValues = ValueVTs.size();
	if (NumValues == 0) return;

	SmallVector<SDValue, 4> Values(NumValues);
	SDValue Cond = getValue(I.getOperand(0));
	SDValue LHSVal = getValue(I.getOperand(1));
	SDValue RHSVal = getValue(I.getOperand(2));
	SmallVector<SDValue, 1> BaseOps(1, Cond);
	ISD::NodeType OpCode =
	Cond.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT;

	bool IsUnaryAbs = false;
	bool Negate = false;

	SDNodeFlags Flags;
	if (auto *FPOp = dyn_cast<FPMathOperator>(&I))
	Flags.copyFMF(*FPOp);

	// Min/max matching is only viable if all output VTs are the same.
	if (is_splat(ValueVTs)) {
	EVT VT = ValueVTs[0];
	LLVMContext &Ctx = *DAG.getContext();
	auto &TLI = DAG.getTargetLoweringInfo();

	// We care about the legality of the operation after it has been type
	// legalized.
	while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal)
	VT = TLI.getTypeToTransformTo(Ctx, VT);

	// If the vselect is legal, assume we want to leave this as a vector setcc +
	// vselect. Otherwise, if this is going to be scalarized, we want to see if
	// min/max is legal on the scalar type.
	bool UseScalarMinMax = VT.isVector() &&
	!TLI.isOperationLegalOrCustom(ISD::VSELECT, VT);

	Value LHS, RHS;
	auto SPR = matchSelectPattern(const_cast<User*>(&I), LHS, RHS);
	ISD::NodeType Opc = ISD::DELETED_NODE;
	switch (SPR.Flavor) {
	case SPF_UMAX: Opc = ISD::UMAX; break;
	case SPF_UMIN: Opc = ISD::UMIN; break;
	case SPF_SMAX: Opc = ISD::SMAX; break;
	case SPF_SMIN: Opc = ISD::SMIN; break;
	case SPF_FMINNUM:
	switch (SPR.NaNBehavior) {
	case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
	case SPNB_RETURNS_NAN: Opc = ISD::FMINIMUM; break;
	case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break;
	case SPNB_RETURNS_ANY: {
	if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT))
	Opc = ISD::FMINNUM;
	else if (TLI.isOperationLegalOrCustom(ISD::FMINIMUM, VT))
	Opc = ISD::FMINIMUM;
	else if (UseScalarMinMax)
	Opc = TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT.getScalarType()) ?
	ISD::FMINNUM : ISD::FMINIMUM;
	break;
	}
	}
	break;
	case SPF_FMAXNUM:
	switch (SPR.NaNBehavior) {
	case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
	case SPNB_RETURNS_NAN: Opc = ISD::FMAXIMUM; break;
	case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break;
	case SPNB_RETURNS_ANY:

	if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT))
	Opc = ISD::FMAXNUM;
	else if (TLI.isOperationLegalOrCustom(ISD::FMAXIMUM, VT))
	Opc = ISD::FMAXIMUM;
	else if (UseScalarMinMax)
	Opc = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT.getScalarType()) ?
	ISD::FMAXNUM : ISD::FMAXIMUM;
	break;
	}
	break;
	case SPF_NABS:
	Negate = true;
	LLVM_FALLTHROUGH;
	case SPF_ABS:
	IsUnaryAbs = true;
	Opc = ISD::ABS;
	break;
	default: break;
	}

	if (!IsUnaryAbs && Opc != ISD::DELETED_NODE &&
	(TLI.isOperationLegalOrCustom(Opc, VT) \|\|
	(UseScalarMinMax &&
	TLI.isOperationLegalOrCustom(Opc, VT.getScalarType()))) &&
	// If the underlying comparison instruction is used by any other
	// instruction, the consumed instructions won't be destroyed, so it is
	// not profitable to convert to a min/max.
	hasOnlySelectUsers(cast<SelectInst>(I).getCondition())) {
	OpCode = Opc;
	LHSVal = getValue(LHS);
	RHSVal = getValue(RHS);
	BaseOps.clear();
	}

	if (IsUnaryAbs) {
	OpCode = Opc;
	LHSVal = getValue(LHS);
	BaseOps.clear();
	}
	}

	if (IsUnaryAbs) {
	for (unsigned i = 0; i != NumValues; ++i) {
	SDLoc dl = getCurSDLoc();
	EVT VT = LHSVal.getNode()->getValueType(LHSVal.getResNo() + i);
	Values[i] =
	DAG.getNode(OpCode, dl, VT, LHSVal.getValue(LHSVal.getResNo() + i));
	if (Negate)
	Values[i] = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT),
	Values[i]);
	}
	} else {
	for (unsigned i = 0; i != NumValues; ++i) {
	SmallVector<SDValue, 3> Ops(BaseOps.begin(), BaseOps.end());
	Ops.push_back(SDValue(LHSVal.getNode(), LHSVal.getResNo() + i));
	Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i));
	Values[i] = DAG.getNode(
	OpCode, getCurSDLoc(),
	LHSVal.getNode()->getValueType(LHSVal.getResNo() + i), Ops, Flags);
	}
	}

	setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
	DAG.getVTList(ValueVTs), Values));
	}

	void SelectionDAGBuilder::visitTrunc(const User &I) {
	// TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
	SDValue N = getValue(I.getOperand(0));
	EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
	I.getType());
	setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N));
	}

	void SelectionDAGBuilder::visitZExt(const User &I) {
	// ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
	// ZExt also can't be a cast to bool for same reason. So, nothing much to do
	SDValue N = getValue(I.getOperand(0));
	EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
	I.getType());
	setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(), DestVT, N));
	}

	void SelectionDAGBuilder::visitSExt(const User &I) {
	// SExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
	// SExt also can't be a cast to bool for same reason. So, nothing much to do
	SDValue N = getValue(I.getOperand(0));
	EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
	I.getType());
	setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurSDLoc(), DestVT, N));
	}

	void SelectionDAGBuilder::visitFPTrunc(const User &I) {
	// FPTrunc is never a no-op cast, no need to check
	SDValue N = getValue(I.getOperand(0));
	SDLoc dl = getCurSDLoc();
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
	setValue(&I, DAG.getNode(ISD::FP_ROUND, dl, DestVT, N,
	DAG.getTargetConstant(
	0, dl, TLI.getPointerTy(DAG.getDataLayout()))));
	}

	void SelectionDAGBuilder::visitFPExt(const User &I) {
	// FPExt is never a no-op cast, no need to check
	SDValue N = getValue(I.getOperand(0));
	EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
	I.getType());
	setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurSDLoc(), DestVT, N));
	}

	void SelectionDAGBuilder::visitFPToUI(const User &I) {
	// FPToUI is never a no-op cast, no need to check
	SDValue N = getValue(I.getOperand(0));
	EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
	I.getType());
	setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurSDLoc(), DestVT, N));
	}

	void SelectionDAGBuilder::visitFPToSI(const User &I) {
	// FPToSI is never a no-op cast, no need to check
	SDValue N = getValue(I.getOperand(0));
	EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
	I.getType());
	setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurSDLoc(), DestVT, N));
	}

	void SelectionDAGBuilder::visitUIToFP(const User &I) {
	// UIToFP is never a no-op cast, no need to check
	SDValue N = getValue(I.getOperand(0));
	EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
	I.getType());
	setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurSDLoc(), DestVT, N));
	}

	void SelectionDAGBuilder::visitSIToFP(const User &I) {
	// SIToFP is never a no-op cast, no need to check
	SDValue N = getValue(I.getOperand(0));
	EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
	I.getType());
	setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurSDLoc(), DestVT, N));
	}

	void SelectionDAGBuilder::visitPtrToInt(const User &I) {
	// What to do depends on the size of the integer and the size of the pointer.
	// We can either truncate, zero extend, or no-op, accordingly.
	SDValue N = getValue(I.getOperand(0));
	auto &TLI = DAG.getTargetLoweringInfo();
	EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
	I.getType());
	EVT PtrMemVT =
	TLI.getMemValueType(DAG.getDataLayout(), I.getOperand(0)->getType());
	N = DAG.getPtrExtOrTrunc(N, getCurSDLoc(), PtrMemVT);
	N = DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT);
	setValue(&I, N);
	}

	void SelectionDAGBuilder::visitIntToPtr(const User &I) {
	// What to do depends on the size of the integer and the size of the pointer.
	// We can either truncate, zero extend, or no-op, accordingly.
	SDValue N = getValue(I.getOperand(0));
	auto &TLI = DAG.getTargetLoweringInfo();
	EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
	EVT PtrMemVT = TLI.getMemValueType(DAG.getDataLayout(), I.getType());
	N = DAG.getZExtOrTrunc(N, getCurSDLoc(), PtrMemVT);
	N = DAG.getPtrExtOrTrunc(N, getCurSDLoc(), DestVT);
	setValue(&I, N);
	}

	void SelectionDAGBuilder::visitBitCast(const User &I) {
	SDValue N = getValue(I.getOperand(0));
	SDLoc dl = getCurSDLoc();
	EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
	I.getType());

	// BitCast assures us that source and destination are the same size so this is
	// either a BITCAST or a no-op.
	if (DestVT != N.getValueType())
	setValue(&I, DAG.getNode(ISD::BITCAST, dl,
	DestVT, N)); // convert types.
	// Check if the original LLVM IR Operand was a ConstantInt, because getValue()
	// might fold any kind of constant expression to an integer constant and that
	// is not what we are looking for. Only recognize a bitcast of a genuine
	// constant integer as an opaque constant.
	else if(ConstantInt *C = dyn_cast<ConstantInt>(I.getOperand(0)))
	setValue(&I, DAG.getConstant(C->getValue(), dl, DestVT, /isTarget=/false,
	/isOpaque/true));
	else
	setValue(&I, N); // noop cast.
	}

	void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	const Value *SV = I.getOperand(0);
	SDValue N = getValue(SV);
	EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());

	unsigned SrcAS = SV->getType()->getPointerAddressSpace();
	unsigned DestAS = I.getType()->getPointerAddressSpace();

	if (!TM.isNoopAddrSpaceCast(SrcAS, DestAS))
	N = DAG.getAddrSpaceCast(getCurSDLoc(), DestVT, N, SrcAS, DestAS);

	setValue(&I, N);
	}

	void SelectionDAGBuilder::visitInsertElement(const User &I) {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	SDValue InVec = getValue(I.getOperand(0));
	SDValue InVal = getValue(I.getOperand(1));
	SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)), getCurSDLoc(),
	TLI.getVectorIdxTy(DAG.getDataLayout()));
	setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(),
	TLI.getValueType(DAG.getDataLayout(), I.getType()),
	InVec, InVal, InIdx));
	}

	void SelectionDAGBuilder::visitExtractElement(const User &I) {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	SDValue InVec = getValue(I.getOperand(0));
	SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), getCurSDLoc(),
	TLI.getVectorIdxTy(DAG.getDataLayout()));
	setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(),
	TLI.getValueType(DAG.getDataLayout(), I.getType()),
	InVec, InIdx));
	}

	void SelectionDAGBuilder::visitShuffleVector(const User &I) {
	SDValue Src1 = getValue(I.getOperand(0));
	SDValue Src2 = getValue(I.getOperand(1));
	ArrayRef<int> Mask;
	if (auto *SVI = dyn_cast<ShuffleVectorInst>(&I))
	Mask = SVI->getShuffleMask();
	else
	Mask = cast<ConstantExpr>(I).getShuffleMask();
	SDLoc DL = getCurSDLoc();
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
	EVT SrcVT = Src1.getValueType();

	if (all_of(Mask, [](int Elem) { return Elem == 0; }) &&
	VT.isScalableVector()) {
	// Canonical splat form of first element of first input vector.
	SDValue FirstElt =
	DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcVT.getScalarType(), Src1,
	DAG.getVectorIdxConstant(0, DL));
	setValue(&I, DAG.getNode(ISD::SPLAT_VECTOR, DL, VT, FirstElt));
	return;
	}

	// For now, we only handle splats for scalable vectors.
	// The DAGCombiner will perform a BUILD_VECTOR -> SPLAT_VECTOR transformation
	// for targets that support a SPLAT_VECTOR for non-scalable vector types.
	assert(!VT.isScalableVector() && "Unsupported scalable vector shuffle");

	unsigned SrcNumElts = SrcVT.getVectorNumElements();
	unsigned MaskNumElts = Mask.size();

	if (SrcNumElts == MaskNumElts) {
	setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, Mask));
	return;
	}

	// Normalize the shuffle vector since mask and vector length don't match.
	if (SrcNumElts < MaskNumElts) {
	// Mask is longer than the source vectors. We can use concatenate vector to
	// make the mask and vectors lengths match.

	if (MaskNumElts % SrcNumElts == 0) {
	// Mask length is a multiple of the source vector length.
	// Check if the shuffle is some kind of concatenation of the input
	// vectors.
	unsigned NumConcat = MaskNumElts / SrcNumElts;
	bool IsConcat = true;
	SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
	for (unsigned i = 0; i != MaskNumElts; ++i) {
	int Idx = Mask[i];
	if (Idx < 0)
	continue;
	// Ensure the indices in each SrcVT sized piece are sequential and that
	// the same source is used for the whole piece.
	if ((Idx % SrcNumElts != (i % SrcNumElts)) \|\|
	(ConcatSrcs[i / SrcNumElts] >= 0 &&
	ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts))) {
	IsConcat = false;
	break;
	}
	// Remember which source this index came from.
	ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
	}

	// The shuffle is concatenating multiple vectors together. Just emit
	// a CONCAT_VECTORS operation.
	if (IsConcat) {
	SmallVector<SDValue, 8> ConcatOps;
	for (auto Src : ConcatSrcs) {
	if (Src < 0)
	ConcatOps.push_back(DAG.getUNDEF(SrcVT));
	else if (Src == 0)
	ConcatOps.push_back(Src1);
	else
	ConcatOps.push_back(Src2);
	}
	setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps));
	return;
	}
	}

	unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
	unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
	EVT PaddedVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
	PaddedMaskNumElts);

	// Pad both vectors with undefs to make them the same length as the mask.
	SDValue UndefVal = DAG.getUNDEF(SrcVT);

	SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal);
	SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal);
	MOps1[0] = Src1;
	MOps2[0] = Src2;

	Src1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps1);
	Src2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps2);

	// Readjust mask for new input vector length.
	SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
	for (unsigned i = 0; i != MaskNumElts; ++i) {
	int Idx = Mask[i];
	if (Idx >= (int)SrcNumElts)
	Idx -= SrcNumElts - PaddedMaskNumElts;
	MappedOps[i] = Idx;
	}

	SDValue Result = DAG.getVectorShuffle(PaddedVT, DL, Src1, Src2, MappedOps);

	// If the concatenated vector was padded, extract a subvector with the
	// correct number of elements.
	if (MaskNumElts != PaddedMaskNumElts)
	Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Result,
	DAG.getVectorIdxConstant(0, DL));

	setValue(&I, Result);
	return;
	}

	if (SrcNumElts > MaskNumElts) {
	// Analyze the access pattern of the vector to see if we can extract
	// two subvectors and do the shuffle.
	int StartIdx[2] = { -1, -1 }; // StartIdx to extract from
	bool CanExtract = true;
	for (int Idx : Mask) {
	unsigned Input = 0;
	if (Idx < 0)
	continue;

	if (Idx >= (int)SrcNumElts) {
	Input = 1;
	Idx -= SrcNumElts;
	}

	// If all the indices come from the same MaskNumElts sized portion of
	// the sources we can use extract. Also make sure the extract wouldn't
	// extract past the end of the source.
	int NewStartIdx = alignDown(Idx, MaskNumElts);
	if (NewStartIdx + MaskNumElts > SrcNumElts \|\|
	(StartIdx[Input] >= 0 && StartIdx[Input] != NewStartIdx))
	CanExtract = false;
	// Make sure we always update StartIdx as we use it to track if all
	// elements are undef.
	StartIdx[Input] = NewStartIdx;
	}

	if (StartIdx[0] < 0 && StartIdx[1] < 0) {
	setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used.
	return;
	}
	if (CanExtract) {
	// Extract appropriate subvector and generate a vector shuffle
	for (unsigned Input = 0; Input < 2; ++Input) {
	SDValue &Src = Input == 0 ? Src1 : Src2;
	if (StartIdx[Input] < 0)
	Src = DAG.getUNDEF(VT);
	else {
	Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Src,
	DAG.getVectorIdxConstant(StartIdx[Input], DL));
	}
	}

	// Calculate new mask.
	SmallVector<int, 8> MappedOps(Mask.begin(), Mask.end());
	for (int &Idx : MappedOps) {
	if (Idx >= (int)SrcNumElts)
	Idx -= SrcNumElts + StartIdx[1] - MaskNumElts;
	else if (Idx >= 0)
	Idx -= StartIdx[0];
	}

	setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, MappedOps));
	return;
	}
	}

	// We can't use either concat vectors or extract subvectors so fall back to
	// replacing the shuffle with extract and build vector.
	// to insert and build vector.
	EVT EltVT = VT.getVectorElementType();
	SmallVector<SDValue,8> Ops;
	for (int Idx : Mask) {
	SDValue Res;

	if (Idx < 0) {
	Res = DAG.getUNDEF(EltVT);
	} else {
	SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2;
	if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts;

	Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src,
	DAG.getVectorIdxConstant(Idx, DL));
	}

	Ops.push_back(Res);
	}

	setValue(&I, DAG.getBuildVector(VT, DL, Ops));
	}

	void SelectionDAGBuilder::visitInsertValue(const User &I) {
	ArrayRef<unsigned> Indices;
	if (const InsertValueInst *IV = dyn_cast<InsertValueInst>(&I))
	Indices = IV->getIndices();
	else
	Indices = cast<ConstantExpr>(&I)->getIndices();

	const Value *Op0 = I.getOperand(0);
	const Value *Op1 = I.getOperand(1);
	Type *AggTy = I.getType();
	Type *ValTy = Op1->getType();
	bool IntoUndef = isa<UndefValue>(Op0);
	bool FromUndef = isa<UndefValue>(Op1);

	unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices);

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	SmallVector<EVT, 4> AggValueVTs;
	ComputeValueVTs(TLI, DAG.getDataLayout(), AggTy, AggValueVTs);
	SmallVector<EVT, 4> ValValueVTs;
	ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs);

	unsigned NumAggValues = AggValueVTs.size();
	unsigned NumValValues = ValValueVTs.size();
	SmallVector<SDValue, 4> Values(NumAggValues);

	// Ignore an insertvalue that produces an empty object
	if (!NumAggValues) {
	setValue(&I, DAG.getUNDEF(MVT(MVT::Other)));
	return;
	}

	SDValue Agg = getValue(Op0);
	unsigned i = 0;
	// Copy the beginning value(s) from the original aggregate.
	for (; i != LinearIndex; ++i)
	Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
	SDValue(Agg.getNode(), Agg.getResNo() + i);
	// Copy values from the inserted value(s).
	if (NumValValues) {
	SDValue Val = getValue(Op1);
	for (; i != LinearIndex + NumValValues; ++i)
	Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) :
	SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex);
	}
	// Copy remaining value(s) from the original aggregate.
	for (; i != NumAggValues; ++i)
	Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
	SDValue(Agg.getNode(), Agg.getResNo() + i);

	setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
	DAG.getVTList(AggValueVTs), Values));
	}

	void SelectionDAGBuilder::visitExtractValue(const User &I) {
	ArrayRef<unsigned> Indices;
	if (const ExtractValueInst *EV = dyn_cast<ExtractValueInst>(&I))
	Indices = EV->getIndices();
	else
	Indices = cast<ConstantExpr>(&I)->getIndices();

	const Value *Op0 = I.getOperand(0);
	Type *AggTy = Op0->getType();
	Type *ValTy = I.getType();
	bool OutOfUndef = isa<UndefValue>(Op0);

	unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices);

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	SmallVector<EVT, 4> ValValueVTs;
	ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs);

	unsigned NumValValues = ValValueVTs.size();

	// Ignore a extractvalue that produces an empty object
	if (!NumValValues) {
	setValue(&I, DAG.getUNDEF(MVT(MVT::Other)));
	return;
	}

	SmallVector<SDValue, 4> Values(NumValValues);

	SDValue Agg = getValue(Op0);
	// Copy out the selected value(s).
	for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i)
	Values[i - LinearIndex] =
	OutOfUndef ?
	DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) :
	SDValue(Agg.getNode(), Agg.getResNo() + i);

	setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
	DAG.getVTList(ValValueVTs), Values));
	}

	void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
	Value *Op0 = I.getOperand(0);
	// Note that the pointer operand may be a vector of pointers. Take the scalar
	// element which holds a pointer.
	unsigned AS = Op0->getType()->getScalarType()->getPointerAddressSpace();
	SDValue N = getValue(Op0);
	SDLoc dl = getCurSDLoc();
	auto &TLI = DAG.getTargetLoweringInfo();

	// Normalize Vector GEP - all scalar operands should be converted to the
	// splat vector.
	bool IsVectorGEP = I.getType()->isVectorTy();
	ElementCount VectorElementCount =
	IsVectorGEP ? cast<VectorType>(I.getType())->getElementCount()
	: ElementCount::getFixed(0);

	if (IsVectorGEP && !N.getValueType().isVector()) {
	LLVMContext &Context = *DAG.getContext();
	EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorElementCount);
	if (VectorElementCount.isScalable())
	N = DAG.getSplatVector(VT, dl, N);
	else
	N = DAG.getSplatBuildVector(VT, dl, N);
	}

	for (gep_type_iterator GTI = gep_type_begin(&I), E = gep_type_end(&I);
	GTI != E; ++GTI) {
	const Value *Idx = GTI.getOperand();
	if (StructType *StTy = GTI.getStructTypeOrNull()) {
	unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue();
	if (Field) {
	// N = N + Offset
	uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field);

	// In an inbounds GEP with an offset that is nonnegative even when
	// interpreted as signed, assume there is no unsigned overflow.
	SDNodeFlags Flags;
	if (int64_t(Offset) >= 0 && cast<GEPOperator>(I).isInBounds())
	Flags.setNoUnsignedWrap(true);

	N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N,
	DAG.getConstant(Offset, dl, N.getValueType()), Flags);
	}
	} else {
	// IdxSize is the width of the arithmetic according to IR semantics.
	// In SelectionDAG, we may prefer to do arithmetic in a wider bitwidth
	// (and fix up the result later).
	unsigned IdxSize = DAG.getDataLayout().getIndexSizeInBits(AS);
	MVT IdxTy = MVT::getIntegerVT(IdxSize);
	TypeSize ElementSize = DL->getTypeAllocSize(GTI.getIndexedType());
	// We intentionally mask away the high bits here; ElementSize may not
	// fit in IdxTy.
	APInt ElementMul(IdxSize, ElementSize.getKnownMinSize());
	bool ElementScalable = ElementSize.isScalable();

	// If this is a scalar constant or a splat vector of constants,
	// handle it quickly.
	const auto *C = dyn_cast<Constant>(Idx);
	if (C && isa<VectorType>(C->getType()))
	C = C->getSplatValue();

	const auto *CI = dyn_cast_or_null<ConstantInt>(C);
	if (CI && CI->isZero())
	continue;
	if (CI && !ElementScalable) {
	APInt Offs = ElementMul * CI->getValue().sextOrTrunc(IdxSize);
	LLVMContext &Context = *DAG.getContext();
	SDValue OffsVal;
	if (IsVectorGEP)
	OffsVal = DAG.getConstant(
	Offs, dl, EVT::getVectorVT(Context, IdxTy, VectorElementCount));
	else
	OffsVal = DAG.getConstant(Offs, dl, IdxTy);

	// In an inbounds GEP with an offset that is nonnegative even when
	// interpreted as signed, assume there is no unsigned overflow.
	SDNodeFlags Flags;
	if (Offs.isNonNegative() && cast<GEPOperator>(I).isInBounds())
	Flags.setNoUnsignedWrap(true);

	OffsVal = DAG.getSExtOrTrunc(OffsVal, dl, N.getValueType());

	N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal, Flags);
	continue;
	}

	// N = N + Idx * ElementMul;
	SDValue IdxN = getValue(Idx);

	if (!IdxN.getValueType().isVector() && IsVectorGEP) {
	EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(),
	VectorElementCount);
	if (VectorElementCount.isScalable())
	IdxN = DAG.getSplatVector(VT, dl, IdxN);
	else
	IdxN = DAG.getSplatBuildVector(VT, dl, IdxN);
	}

	// If the index is smaller or larger than intptr_t, truncate or extend
	// it.
	IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType());

	if (ElementScalable) {
	EVT VScaleTy = N.getValueType().getScalarType();
	SDValue VScale = DAG.getNode(
	ISD::VSCALE, dl, VScaleTy,
	DAG.getConstant(ElementMul.getZExtValue(), dl, VScaleTy));
	if (IsVectorGEP)
	VScale = DAG.getSplatVector(N.getValueType(), dl, VScale);
	IdxN = DAG.getNode(ISD::MUL, dl, N.getValueType(), IdxN, VScale);
	} else {
	// If this is a multiply by a power of two, turn it into a shl
	// immediately. This is a very common case.
	if (ElementMul != 1) {
	if (ElementMul.isPowerOf2()) {
	unsigned Amt = ElementMul.logBase2();
	IdxN = DAG.getNode(ISD::SHL, dl,
	N.getValueType(), IdxN,
	DAG.getConstant(Amt, dl, IdxN.getValueType()));
	} else {
	SDValue Scale = DAG.getConstant(ElementMul.getZExtValue(), dl,
	IdxN.getValueType());
	IdxN = DAG.getNode(ISD::MUL, dl,
	N.getValueType(), IdxN, Scale);
	}
	}
	}

	N = DAG.getNode(ISD::ADD, dl,
	N.getValueType(), N, IdxN);
	}
	}

	MVT PtrTy = TLI.getPointerTy(DAG.getDataLayout(), AS);
	MVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout(), AS);
	if (IsVectorGEP) {
	PtrTy = MVT::getVectorVT(PtrTy, VectorElementCount);
	PtrMemTy = MVT::getVectorVT(PtrMemTy, VectorElementCount);
	}

	if (PtrMemTy != PtrTy && !cast<GEPOperator>(I).isInBounds())
	N = DAG.getPtrExtendInReg(N, dl, PtrMemTy);

	setValue(&I, N);
	}

	void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
	// If this is a fixed sized alloca in the entry block of the function,
	// allocate it statically on the stack.
	if (FuncInfo.StaticAllocaMap.count(&I))
	return; // getValue will auto-populate this.

	SDLoc dl = getCurSDLoc();
	Type *Ty = I.getAllocatedType();
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	auto &DL = DAG.getDataLayout();
	uint64_t TySize = DL.getTypeAllocSize(Ty);
	MaybeAlign Alignment = std::max(DL.getPrefTypeAlign(Ty), I.getAlign());

	SDValue AllocSize = getValue(I.getArraySize());

	EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout(), DL.getAllocaAddrSpace());
	if (AllocSize.getValueType() != IntPtr)
	AllocSize = DAG.getZExtOrTrunc(AllocSize, dl, IntPtr);

	AllocSize = DAG.getNode(ISD::MUL, dl, IntPtr,
	AllocSize,
	DAG.getConstant(TySize, dl, IntPtr));

	// Handle alignment. If the requested alignment is less than or equal to
	// the stack alignment, ignore it. If the size is greater than or equal to
	// the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
	Align StackAlign = DAG.getSubtarget().getFrameLowering()->getStackAlign();
	if (*Alignment <= StackAlign)
	Alignment = None;

	const uint64_t StackAlignMask = StackAlign.value() - 1U;
	// Round the size of the allocation up to the stack alignment size
	// by add SA-1 to the size. This doesn't overflow because we're computing
	// an address inside an alloca.
	SDNodeFlags Flags;
	Flags.setNoUnsignedWrap(true);
	AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize,
	DAG.getConstant(StackAlignMask, dl, IntPtr), Flags);

	// Mask out the low bits for alignment purposes.
	AllocSize = DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize,
	DAG.getConstant(~StackAlignMask, dl, IntPtr));

	SDValue Ops[] = {
	getRoot(), AllocSize,
	DAG.getConstant(Alignment ? Alignment->value() : 0, dl, IntPtr)};
	SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other);
	SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, dl, VTs, Ops);
	setValue(&I, DSA);
	DAG.setRoot(DSA.getValue(1));

	assert(FuncInfo.MF->getFrameInfo().hasVarSizedObjects());
	}

	void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
	if (I.isAtomic())
	return visitAtomicLoad(I);

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	const Value *SV = I.getOperand(0);
	if (TLI.supportSwiftError()) {
	// Swifterror values can come from either a function parameter with
	// swifterror attribute or an alloca with swifterror attribute.
	if (const Argument *Arg = dyn_cast<Argument>(SV)) {
	if (Arg->hasSwiftErrorAttr())
	return visitLoadFromSwiftError(I);
	}

	if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
	if (Alloca->isSwiftError())
	return visitLoadFromSwiftError(I);
	}
	}

	SDValue Ptr = getValue(SV);

	Type *Ty = I.getType();
	Align Alignment = I.getAlign();

	AAMDNodes AAInfo;
	I.getAAMetadata(AAInfo);
	const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);

	SmallVector<EVT, 4> ValueVTs, MemVTs;
	SmallVector<uint64_t, 4> Offsets;
	ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &MemVTs, &Offsets);
	unsigned NumValues = ValueVTs.size();
	if (NumValues == 0)
	return;

	bool isVolatile = I.isVolatile();

	SDValue Root;
	bool ConstantMemory = false;
	if (isVolatile)
	// Serialize volatile loads with other side effects.
	Root = getRoot();
	else if (NumValues > MaxParallelChains)
	Root = getMemoryRoot();
	else if (AA &&
	AA->pointsToConstantMemory(MemoryLocation(
	SV,
	LocationSize::precise(DAG.getDataLayout().getTypeStoreSize(Ty)),
	AAInfo))) {
	// Do not serialize (non-volatile) loads of constant memory with anything.
	Root = DAG.getEntryNode();
	ConstantMemory = true;
	} else {
	// Do not serialize non-volatile loads against each other.
	Root = DAG.getRoot();
	}

	SDLoc dl = getCurSDLoc();

	if (isVolatile)
	Root = TLI.prepareVolatileOrAtomicLoad(Root, dl, DAG);

	// An aggregate load cannot wrap around the address space, so offsets to its
	// parts don't wrap either.
	SDNodeFlags Flags;
	Flags.setNoUnsignedWrap(true);

	SmallVector<SDValue, 4> Values(NumValues);
	SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
	EVT PtrVT = Ptr.getValueType();

	MachineMemOperand::Flags MMOFlags
	= TLI.getLoadMemOperandFlags(I, DAG.getDataLayout());

	unsigned ChainI = 0;
	for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
	// Serializing loads here may result in excessive register pressure, and
	// TokenFactor places arbitrary choke points on the scheduler. SD scheduling
	// could recover a bit by hoisting nodes upward in the chain by recognizing
	// they are side-effect free or do not alias. The optimizer should really
	// avoid this case by converting large object/array copies to llvm.memcpy
	// (MaxParallelChains should always remain as failsafe).
	if (ChainI == MaxParallelChains) {
	assert(PendingLoads.empty() && "PendingLoads must be serialized first");
	SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
	makeArrayRef(Chains.data(), ChainI));
	Root = Chain;
	ChainI = 0;
	}
	SDValue A = DAG.getNode(ISD::ADD, dl,
	PtrVT, Ptr,
	DAG.getConstant(Offsets[i], dl, PtrVT),
	Flags);

	SDValue L = DAG.getLoad(MemVTs[i], dl, Root, A,
	MachinePointerInfo(SV, Offsets[i]), Alignment,
	MMOFlags, AAInfo, Ranges);
	Chains[ChainI] = L.getValue(1);

	if (MemVTs[i] != ValueVTs[i])
	L = DAG.getZExtOrTrunc(L, dl, ValueVTs[i]);

	Values[i] = L;
	}

	if (!ConstantMemory) {
	SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
	makeArrayRef(Chains.data(), ChainI));
	if (isVolatile)
	DAG.setRoot(Chain);
	else
	PendingLoads.push_back(Chain);
	}

	setValue(&I, DAG.getNode(ISD::MERGE_VALUES, dl,
	DAG.getVTList(ValueVTs), Values));
	}

	void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) {
	assert(DAG.getTargetLoweringInfo().supportSwiftError() &&
	"call visitStoreToSwiftError when backend supports swifterror");

	SmallVector<EVT, 4> ValueVTs;
	SmallVector<uint64_t, 4> Offsets;
	const Value *SrcV = I.getOperand(0);
	ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(),
	SrcV->getType(), ValueVTs, &Offsets);
	assert(ValueVTs.size() == 1 && Offsets[0] == 0 &&
	"expect a single EVT for swifterror");

	SDValue Src = getValue(SrcV);
	// Create a virtual register, then update the virtual register.
	Register VReg =
	SwiftError.getOrCreateVRegDefAt(&I, FuncInfo.MBB, I.getPointerOperand());
	// Chain, DL, Reg, N or Chain, DL, Reg, N, Glue
	// Chain can be getRoot or getControlRoot.
	SDValue CopyNode = DAG.getCopyToReg(getRoot(), getCurSDLoc(), VReg,
	SDValue(Src.getNode(), Src.getResNo()));
	DAG.setRoot(CopyNode);
	}

	void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
	assert(DAG.getTargetLoweringInfo().supportSwiftError() &&
	"call visitLoadFromSwiftError when backend supports swifterror");

	assert(!I.isVolatile() &&
	!I.hasMetadata(LLVMContext::MD_nontemporal) &&
	!I.hasMetadata(LLVMContext::MD_invariant_load) &&
	"Support volatile, non temporal, invariant for load_from_swift_error");

	const Value *SV = I.getOperand(0);
	Type *Ty = I.getType();
	AAMDNodes AAInfo;
	I.getAAMetadata(AAInfo);
	assert(
	(!AA \|\|
	!AA->pointsToConstantMemory(MemoryLocation(
	SV, LocationSize::precise(DAG.getDataLayout().getTypeStoreSize(Ty)),
	AAInfo))) &&
	"load_from_swift_error should not be constant memory");

	SmallVector<EVT, 4> ValueVTs;
	SmallVector<uint64_t, 4> Offsets;
	ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), Ty,
	ValueVTs, &Offsets);
	assert(ValueVTs.size() == 1 && Offsets[0] == 0 &&
	"expect a single EVT for swifterror");

	// Chain, DL, Reg, VT, Glue or Chain, DL, Reg, VT
	SDValue L = DAG.getCopyFromReg(
	getRoot(), getCurSDLoc(),
	SwiftError.getOrCreateVRegUseAt(&I, FuncInfo.MBB, SV), ValueVTs[0]);

	setValue(&I, L);
	}

	void SelectionDAGBuilder::visitStore(const StoreInst &I) {
	if (I.isAtomic())
	return visitAtomicStore(I);

	const Value *SrcV = I.getOperand(0);
	const Value *PtrV = I.getOperand(1);

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	if (TLI.supportSwiftError()) {
	// Swifterror values can come from either a function parameter with
	// swifterror attribute or an alloca with swifterror attribute.
	if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
	if (Arg->hasSwiftErrorAttr())
	return visitStoreToSwiftError(I);
	}

	if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
	if (Alloca->isSwiftError())
	return visitStoreToSwiftError(I);
	}
	}

	SmallVector<EVT, 4> ValueVTs, MemVTs;
	SmallVector<uint64_t, 4> Offsets;
	ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(),
	SrcV->getType(), ValueVTs, &MemVTs, &Offsets);
	unsigned NumValues = ValueVTs.size();
	if (NumValues == 0)
	return;

	// Get the lowered operands. Note that we do this after
	// checking if NumResults is zero, because with zero results
	// the operands won't have values in the map.
	SDValue Src = getValue(SrcV);
	SDValue Ptr = getValue(PtrV);

	SDValue Root = I.isVolatile() ? getRoot() : getMemoryRoot();
	SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
	SDLoc dl = getCurSDLoc();
	Align Alignment = I.getAlign();
	AAMDNodes AAInfo;
	I.getAAMetadata(AAInfo);

	auto MMOFlags = TLI.getStoreMemOperandFlags(I, DAG.getDataLayout());

	// An aggregate load cannot wrap around the address space, so offsets to its
	// parts don't wrap either.
	SDNodeFlags Flags;
	Flags.setNoUnsignedWrap(true);

	unsigned ChainI = 0;
	for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
	// See visitLoad comments.
	if (ChainI == MaxParallelChains) {
	SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
	makeArrayRef(Chains.data(), ChainI));
	Root = Chain;
	ChainI = 0;
	}
	SDValue Add =
	DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(Offsets[i]), dl, Flags);
	SDValue Val = SDValue(Src.getNode(), Src.getResNo() + i);
	if (MemVTs[i] != ValueVTs[i])
	Val = DAG.getPtrExtOrTrunc(Val, dl, MemVTs[i]);
	SDValue St =
	DAG.getStore(Root, dl, Val, Add, MachinePointerInfo(PtrV, Offsets[i]),
	Alignment, MMOFlags, AAInfo);
	Chains[ChainI] = St;
	}

	SDValue StoreNode = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
	makeArrayRef(Chains.data(), ChainI));
	DAG.setRoot(StoreNode);
	}

	void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
	bool IsCompressing) {
	SDLoc sdl = getCurSDLoc();

	auto getMaskedStoreOps = [&](Value &Ptr, Value &Mask, Value *&Src0,
	MaybeAlign &Alignment) {
	// llvm.masked.store.*(Src0, Ptr, alignment, Mask)
	Src0 = I.getArgOperand(0);
	Ptr = I.getArgOperand(1);
	Alignment = cast<ConstantInt>(I.getArgOperand(2))->getMaybeAlignValue();
	Mask = I.getArgOperand(3);
	};
	auto getCompressingStoreOps = [&](Value &Ptr, Value &Mask, Value *&Src0,
	MaybeAlign &Alignment) {
	// llvm.masked.compressstore.*(Src0, Ptr, Mask)
	Src0 = I.getArgOperand(0);
	Ptr = I.getArgOperand(1);
	Mask = I.getArgOperand(2);
	Alignment = None;
	};

	Value PtrOperand, MaskOperand, *Src0Operand;
	MaybeAlign Alignment;
	if (IsCompressing)
	getCompressingStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
	else
	getMaskedStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment);

	SDValue Ptr = getValue(PtrOperand);
	SDValue Src0 = getValue(Src0Operand);
	SDValue Mask = getValue(MaskOperand);
	SDValue Offset = DAG.getUNDEF(Ptr.getValueType());

	EVT VT = Src0.getValueType();
	if (!Alignment)
	Alignment = DAG.getEVTAlign(VT);

	AAMDNodes AAInfo;
	I.getAAMetadata(AAInfo);

	MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
	MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore,
	// TODO: Make MachineMemOperands aware of scalable
	// vectors.
	VT.getStoreSize().getKnownMinSize(), *Alignment, AAInfo);
	SDValue StoreNode =
	DAG.getMaskedStore(getMemoryRoot(), sdl, Src0, Ptr, Offset, Mask, VT, MMO,
	ISD::UNINDEXED, false /* Truncating */, IsCompressing);
	DAG.setRoot(StoreNode);
	setValue(&I, StoreNode);
	}

	// Get a uniform base for the Gather/Scatter intrinsic.
	// The first argument of the Gather/Scatter intrinsic is a vector of pointers.
	// We try to represent it as a base pointer + vector of indices.
	// Usually, the vector of pointers comes from a 'getelementptr' instruction.
	// The first operand of the GEP may be a single pointer or a vector of pointers
	// Example:
	// %gep.ptr = getelementptr i32, <8 x i32*> %vptr, <8 x i32> %ind
	// or
	// %gep.ptr = getelementptr i32, i32* %ptr, <8 x i32> %ind
	// %res = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.ptr, ..
	//
	// When the first GEP operand is a single pointer - it is the uniform base we
	// are looking for. If first operand of the GEP is a splat vector - we
	// extract the splat value and use it as a uniform base.
	// In all other cases the function returns 'false'.
	static bool getUniformBase(const Value *Ptr, SDValue &Base, SDValue &Index,
	ISD::MemIndexType &IndexType, SDValue &Scale,
	SelectionDAGBuilder SDB, const BasicBlock CurBB) {
	SelectionDAG& DAG = SDB->DAG;
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	const DataLayout &DL = DAG.getDataLayout();

	assert(Ptr->getType()->isVectorTy() && "Uexpected pointer type");

	// Handle splat constant pointer.
	if (auto *C = dyn_cast<Constant>(Ptr)) {
	C = C->getSplatValue();
	if (!C)
	return false;

	Base = SDB->getValue(C);

	ElementCount NumElts = cast<VectorType>(Ptr->getType())->getElementCount();
	EVT VT = EVT::getVectorVT(*DAG.getContext(), TLI.getPointerTy(DL), NumElts);
	Index = DAG.getConstant(0, SDB->getCurSDLoc(), VT);
	IndexType = ISD::SIGNED_SCALED;
	Scale = DAG.getTargetConstant(1, SDB->getCurSDLoc(), TLI.getPointerTy(DL));
	return true;
	}

	const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
	if (!GEP \|\| GEP->getParent() != CurBB)
	return false;

	if (GEP->getNumOperands() != 2)
	return false;

	const Value *BasePtr = GEP->getPointerOperand();
	const Value *IndexVal = GEP->getOperand(GEP->getNumOperands() - 1);

	// Make sure the base is scalar and the index is a vector.
	if (BasePtr->getType()->isVectorTy() \|\| !IndexVal->getType()->isVectorTy())
	return false;

	Base = SDB->getValue(BasePtr);
	Index = SDB->getValue(IndexVal);
	IndexType = ISD::SIGNED_SCALED;
	Scale = DAG.getTargetConstant(
	DL.getTypeAllocSize(GEP->getResultElementType()),
	SDB->getCurSDLoc(), TLI.getPointerTy(DL));
	return true;
	}

	void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
	SDLoc sdl = getCurSDLoc();

	// llvm.masked.scatter.*(Src0, Ptrs, alignment, Mask)
	const Value *Ptr = I.getArgOperand(1);
	SDValue Src0 = getValue(I.getArgOperand(0));
	SDValue Mask = getValue(I.getArgOperand(3));
	EVT VT = Src0.getValueType();
	Align Alignment = cast<ConstantInt>(I.getArgOperand(2))
	->getMaybeAlignValue()
	.getValueOr(DAG.getEVTAlign(VT.getScalarType()));
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();

	AAMDNodes AAInfo;
	I.getAAMetadata(AAInfo);

	SDValue Base;
	SDValue Index;
	ISD::MemIndexType IndexType;
	SDValue Scale;
	bool UniformBase = getUniformBase(Ptr, Base, Index, IndexType, Scale, this,
	I.getParent());

	unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace();
	MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
	MachinePointerInfo(AS), MachineMemOperand::MOStore,
	// TODO: Make MachineMemOperands aware of scalable
	// vectors.
	MemoryLocation::UnknownSize, Alignment, AAInfo);
	if (!UniformBase) {
	Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
	Index = getValue(Ptr);
	IndexType = ISD::SIGNED_UNSCALED;
	Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));
	}

	EVT IdxVT = Index.getValueType();
	EVT EltTy = IdxVT.getVectorElementType();
	if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) {
	EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy);
	Index = DAG.getNode(ISD::SIGN_EXTEND, sdl, NewIdxVT, Index);
	}

	SDValue Ops[] = { getMemoryRoot(), Src0, Mask, Base, Index, Scale };
	SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl,
	Ops, MMO, IndexType, false);
	DAG.setRoot(Scatter);
	setValue(&I, Scatter);
	}

	void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
	SDLoc sdl = getCurSDLoc();

	auto getMaskedLoadOps = [&](Value &Ptr, Value &Mask, Value *&Src0,
	MaybeAlign &Alignment) {
	// @llvm.masked.load.*(Ptr, alignment, Mask, Src0)
	Ptr = I.getArgOperand(0);
	Alignment = cast<ConstantInt>(I.getArgOperand(1))->getMaybeAlignValue();
	Mask = I.getArgOperand(2);
	Src0 = I.getArgOperand(3);
	};
	auto getExpandingLoadOps = [&](Value &Ptr, Value &Mask, Value *&Src0,
	MaybeAlign &Alignment) {
	// @llvm.masked.expandload.*(Ptr, Mask, Src0)
	Ptr = I.getArgOperand(0);
	Alignment = None;
	Mask = I.getArgOperand(1);
	Src0 = I.getArgOperand(2);
	};

	Value PtrOperand, MaskOperand, *Src0Operand;
	MaybeAlign Alignment;
	if (IsExpanding)
	getExpandingLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
	else
	getMaskedLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment);

	SDValue Ptr = getValue(PtrOperand);
	SDValue Src0 = getValue(Src0Operand);
	SDValue Mask = getValue(MaskOperand);
	SDValue Offset = DAG.getUNDEF(Ptr.getValueType());

	EVT VT = Src0.getValueType();
	if (!Alignment)
	Alignment = DAG.getEVTAlign(VT);

	AAMDNodes AAInfo;
	I.getAAMetadata(AAInfo);
	const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);

	// Do not serialize masked loads of constant memory with anything.
	MemoryLocation ML;
	if (VT.isScalableVector())
	ML = MemoryLocation::getAfter(PtrOperand);
	else
	ML = MemoryLocation(PtrOperand, LocationSize::precise(
	DAG.getDataLayout().getTypeStoreSize(I.getType())),
	AAInfo);
	bool AddToChain = !AA \|\| !AA->pointsToConstantMemory(ML);

	SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();

	MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
	MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad,
	// TODO: Make MachineMemOperands aware of scalable
	// vectors.
	VT.getStoreSize().getKnownMinSize(), *Alignment, AAInfo, Ranges);

	SDValue Load =
	DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Offset, Mask, Src0, VT, MMO,
	ISD::UNINDEXED, ISD::NON_EXTLOAD, IsExpanding);
	if (AddToChain)
	PendingLoads.push_back(Load.getValue(1));
	setValue(&I, Load);
	}

	void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
	SDLoc sdl = getCurSDLoc();

	// @llvm.masked.gather.*(Ptrs, alignment, Mask, Src0)
	const Value *Ptr = I.getArgOperand(0);
	SDValue Src0 = getValue(I.getArgOperand(3));
	SDValue Mask = getValue(I.getArgOperand(2));

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
	Align Alignment = cast<ConstantInt>(I.getArgOperand(1))
	->getMaybeAlignValue()
	.getValueOr(DAG.getEVTAlign(VT.getScalarType()));

	AAMDNodes AAInfo;
	I.getAAMetadata(AAInfo);
	const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);

	SDValue Root = DAG.getRoot();
	SDValue Base;
	SDValue Index;
	ISD::MemIndexType IndexType;
	SDValue Scale;
	bool UniformBase = getUniformBase(Ptr, Base, Index, IndexType, Scale, this,
	I.getParent());
	unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace();
	MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
	MachinePointerInfo(AS), MachineMemOperand::MOLoad,
	// TODO: Make MachineMemOperands aware of scalable
	// vectors.
	MemoryLocation::UnknownSize, Alignment, AAInfo, Ranges);

	if (!UniformBase) {
	Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
	Index = getValue(Ptr);
	IndexType = ISD::SIGNED_UNSCALED;
	Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));
	}

	EVT IdxVT = Index.getValueType();
	EVT EltTy = IdxVT.getVectorElementType();
	if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) {
	EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy);
	Index = DAG.getNode(ISD::SIGN_EXTEND, sdl, NewIdxVT, Index);
	}

	SDValue Ops[] = { Root, Src0, Mask, Base, Index, Scale };
	SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl,
	Ops, MMO, IndexType, ISD::NON_EXTLOAD);

	PendingLoads.push_back(Gather.getValue(1));
	setValue(&I, Gather);
	}

	void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
	SDLoc dl = getCurSDLoc();
	AtomicOrdering SuccessOrdering = I.getSuccessOrdering();
	AtomicOrdering FailureOrdering = I.getFailureOrdering();
	SyncScope::ID SSID = I.getSyncScopeID();

	SDValue InChain = getRoot();

	MVT MemVT = getValue(I.getCompareOperand()).getSimpleValueType();
	SDVTList VTs = DAG.getVTList(MemVT, MVT::i1, MVT::Other);

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	auto Flags = TLI.getAtomicMemOperandFlags(I, DAG.getDataLayout());

	MachineFunction &MF = DAG.getMachineFunction();
	MachineMemOperand *MMO = MF.getMachineMemOperand(
	MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(),
	DAG.getEVTAlign(MemVT), AAMDNodes(), nullptr, SSID, SuccessOrdering,
	FailureOrdering);

	SDValue L = DAG.getAtomicCmpSwap(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS,
	dl, MemVT, VTs, InChain,
	getValue(I.getPointerOperand()),
	getValue(I.getCompareOperand()),
	getValue(I.getNewValOperand()), MMO);

	SDValue OutChain = L.getValue(2);

	setValue(&I, L);
	DAG.setRoot(OutChain);
	}

	void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
	SDLoc dl = getCurSDLoc();
	ISD::NodeType NT;
	switch (I.getOperation()) {
	default: llvm_unreachable("Unknown atomicrmw operation");
	case AtomicRMWInst::Xchg: NT = ISD::ATOMIC_SWAP; break;
	case AtomicRMWInst::Add: NT = ISD::ATOMIC_LOAD_ADD; break;
	case AtomicRMWInst::Sub: NT = ISD::ATOMIC_LOAD_SUB; break;
	case AtomicRMWInst::And: NT = ISD::ATOMIC_LOAD_AND; break;
	case AtomicRMWInst::Nand: NT = ISD::ATOMIC_LOAD_NAND; break;
	case AtomicRMWInst::Or: NT = ISD::ATOMIC_LOAD_OR; break;
	case AtomicRMWInst::Xor: NT = ISD::ATOMIC_LOAD_XOR; break;
	case AtomicRMWInst::Max: NT = ISD::ATOMIC_LOAD_MAX; break;
	case AtomicRMWInst::Min: NT = ISD::ATOMIC_LOAD_MIN; break;
	case AtomicRMWInst::UMax: NT = ISD::ATOMIC_LOAD_UMAX; break;
	case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break;
	case AtomicRMWInst::FAdd: NT = ISD::ATOMIC_LOAD_FADD; break;
	case AtomicRMWInst::FSub: NT = ISD::ATOMIC_LOAD_FSUB; break;
	}
	AtomicOrdering Ordering = I.getOrdering();
	SyncScope::ID SSID = I.getSyncScopeID();

	SDValue InChain = getRoot();

	auto MemVT = getValue(I.getValOperand()).getSimpleValueType();
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	auto Flags = TLI.getAtomicMemOperandFlags(I, DAG.getDataLayout());

	MachineFunction &MF = DAG.getMachineFunction();
	MachineMemOperand *MMO = MF.getMachineMemOperand(
	MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(),
	DAG.getEVTAlign(MemVT), AAMDNodes(), nullptr, SSID, Ordering);

	SDValue L =
	DAG.getAtomic(NT, dl, MemVT, InChain,
	getValue(I.getPointerOperand()), getValue(I.getValOperand()),
	MMO);

	SDValue OutChain = L.getValue(1);

	setValue(&I, L);
	DAG.setRoot(OutChain);
	}

	void SelectionDAGBuilder::visitFence(const FenceInst &I) {
	SDLoc dl = getCurSDLoc();
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	SDValue Ops[3];
	Ops[0] = getRoot();
	Ops[1] = DAG.getTargetConstant((unsigned)I.getOrdering(), dl,
	TLI.getFenceOperandTy(DAG.getDataLayout()));
	Ops[2] = DAG.getTargetConstant(I.getSyncScopeID(), dl,
	TLI.getFenceOperandTy(DAG.getDataLayout()));
	DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops));
	}

	void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
	SDLoc dl = getCurSDLoc();
	AtomicOrdering Order = I.getOrdering();
	SyncScope::ID SSID = I.getSyncScopeID();

	SDValue InChain = getRoot();

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
	EVT MemVT = TLI.getMemValueType(DAG.getDataLayout(), I.getType());

	if (!TLI.supportsUnalignedAtomics() &&
	I.getAlignment() < MemVT.getSizeInBits() / 8)
	report_fatal_error("Cannot generate unaligned atomic load");

	auto Flags = TLI.getLoadMemOperandFlags(I, DAG.getDataLayout());

	MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
	MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(),
	I.getAlign(), AAMDNodes(), nullptr, SSID, Order);

	InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG);

	SDValue Ptr = getValue(I.getPointerOperand());

	if (TLI.lowerAtomicLoadAsLoadSDNode(I)) {
	// TODO: Once this is better exercised by tests, it should be merged with
	// the normal path for loads to prevent future divergence.
	SDValue L = DAG.getLoad(MemVT, dl, InChain, Ptr, MMO);
	if (MemVT != VT)
	L = DAG.getPtrExtOrTrunc(L, dl, VT);

	setValue(&I, L);
	SDValue OutChain = L.getValue(1);
	if (!I.isUnordered())
	DAG.setRoot(OutChain);
	else
	PendingLoads.push_back(OutChain);
	return;
	}

	SDValue L = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, MemVT, MemVT, InChain,
	Ptr, MMO);

	SDValue OutChain = L.getValue(1);
	if (MemVT != VT)
	L = DAG.getPtrExtOrTrunc(L, dl, VT);

	setValue(&I, L);
	DAG.setRoot(OutChain);
	}

	void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
	SDLoc dl = getCurSDLoc();

	AtomicOrdering Ordering = I.getOrdering();
	SyncScope::ID SSID = I.getSyncScopeID();

	SDValue InChain = getRoot();

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	EVT MemVT =
	TLI.getMemValueType(DAG.getDataLayout(), I.getValueOperand()->getType());

	if (I.getAlignment() < MemVT.getSizeInBits() / 8)
	report_fatal_error("Cannot generate unaligned atomic store");

	auto Flags = TLI.getStoreMemOperandFlags(I, DAG.getDataLayout());

	MachineFunction &MF = DAG.getMachineFunction();
	MachineMemOperand *MMO = MF.getMachineMemOperand(
	MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(),
	I.getAlign(), AAMDNodes(), nullptr, SSID, Ordering);

	SDValue Val = getValue(I.getValueOperand());
	if (Val.getValueType() != MemVT)
	Val = DAG.getPtrExtOrTrunc(Val, dl, MemVT);
	SDValue Ptr = getValue(I.getPointerOperand());

	if (TLI.lowerAtomicStoreAsStoreSDNode(I)) {
	// TODO: Once this is better exercised by tests, it should be merged with
	// the normal path for stores to prevent future divergence.
	SDValue S = DAG.getStore(InChain, dl, Val, Ptr, MMO);
	DAG.setRoot(S);
	return;
	}
	SDValue OutChain = DAG.getAtomic(ISD::ATOMIC_STORE, dl, MemVT, InChain,
	Ptr, Val, MMO);


	DAG.setRoot(OutChain);
	}

	/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
	/// node.
	void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
	unsigned Intrinsic) {
	// Ignore the callsite's attributes. A specific call site may be marked with
	// readnone, but the lowering code will expect the chain based on the
	// definition.
	const Function *F = I.getCalledFunction();
	bool HasChain = !F->doesNotAccessMemory();
	bool OnlyLoad = HasChain && F->onlyReadsMemory();

	// Build the operand list.
	SmallVector<SDValue, 8> Ops;
	if (HasChain) { // If this intrinsic has side-effects, chainify it.
	if (OnlyLoad) {
	// We don't need to serialize loads against other loads.
	Ops.push_back(DAG.getRoot());
	} else {
	Ops.push_back(getRoot());
	}
	}

	// Info is set by getTgtMemInstrinsic
	TargetLowering::IntrinsicInfo Info;
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I,
	DAG.getMachineFunction(),
	Intrinsic);

	// Add the intrinsic ID as an integer operand if it's not a target intrinsic.
	if (!IsTgtIntrinsic \|\| Info.opc == ISD::INTRINSIC_VOID \|\|
	Info.opc == ISD::INTRINSIC_W_CHAIN)
	Ops.push_back(DAG.getTargetConstant(Intrinsic, getCurSDLoc(),
	TLI.getPointerTy(DAG.getDataLayout())));

	// Add all operands of the call to the operand list.
	for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
	const Value *Arg = I.getArgOperand(i);
	if (!I.paramHasAttr(i, Attribute::ImmArg)) {
	Ops.push_back(getValue(Arg));
	continue;
	}

	// Use TargetConstant instead of a regular constant for immarg.
	EVT VT = TLI.getValueType(*DL, Arg->getType(), true);
	if (const ConstantInt *CI = dyn_cast<ConstantInt>(Arg)) {
	assert(CI->getBitWidth() <= 64 &&
	"large intrinsic immediates not handled");
	Ops.push_back(DAG.getTargetConstant(*CI, SDLoc(), VT));
	} else {
	Ops.push_back(
	DAG.getTargetConstantFP(*cast<ConstantFP>(Arg), SDLoc(), VT));
	}
	}

	SmallVector<EVT, 4> ValueVTs;
	ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs);

	if (HasChain)
	ValueVTs.push_back(MVT::Other);

	SDVTList VTs = DAG.getVTList(ValueVTs);

	// Propagate fast-math-flags from IR to node(s).
	SDNodeFlags Flags;
	if (auto *FPMO = dyn_cast<FPMathOperator>(&I))
	Flags.copyFMF(*FPMO);
	SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);

	// Create the node.
	SDValue Result;
	if (IsTgtIntrinsic) {
	// This is target intrinsic that touches memory
	AAMDNodes AAInfo;
	I.getAAMetadata(AAInfo);
	Result =
	DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT,
	MachinePointerInfo(Info.ptrVal, Info.offset),
	Info.align, Info.flags, Info.size, AAInfo);
	} else if (!HasChain) {
	Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);
	} else if (!I.getType()->isVoidTy()) {
	Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), VTs, Ops);
	} else {
	Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops);
	}

	if (HasChain) {
	SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1);
	if (OnlyLoad)
	PendingLoads.push_back(Chain);
	else
	DAG.setRoot(Chain);
	}

	if (!I.getType()->isVoidTy()) {
	if (VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
	EVT VT = TLI.getValueType(DAG.getDataLayout(), PTy);
	Result = DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT, Result);
	} else
	Result = lowerRangeToAssertZExt(DAG, I, Result);

	MaybeAlign Alignment = I.getRetAlign();
	if (!Alignment)
	Alignment = F->getAttributes().getRetAlignment();
	// Insert `assertalign` node if there's an alignment.
	if (InsertAssertAlign && Alignment) {
	Result =
	DAG.getAssertAlign(getCurSDLoc(), Result, Alignment.valueOrOne());
	}

	setValue(&I, Result);
	}
	}

	/// GetSignificand - Get the significand and build it into a floating-point
	/// number with exponent of 1:
	///
	/// Op = (Op & 0x007fffff) \| 0x3f800000;
	///
	/// where Op is the hexadecimal representation of floating point value.
	static SDValue GetSignificand(SelectionDAG &DAG, SDValue Op, const SDLoc &dl) {
	SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
	DAG.getConstant(0x007fffff, dl, MVT::i32));
	SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,
	DAG.getConstant(0x3f800000, dl, MVT::i32));
	return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2);
	}

	/// GetExponent - Get the exponent:
	///
	/// (float)(int)(((Op & 0x7f800000) >> 23) - 127);
	///
	/// where Op is the hexadecimal representation of floating point value.
	static SDValue GetExponent(SelectionDAG &DAG, SDValue Op,
	const TargetLowering &TLI, const SDLoc &dl) {
	SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
	DAG.getConstant(0x7f800000, dl, MVT::i32));
	SDValue t1 = DAG.getNode(
	ISD::SRL, dl, MVT::i32, t0,
	DAG.getConstant(23, dl, TLI.getPointerTy(DAG.getDataLayout())));
	SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1,
	DAG.getConstant(127, dl, MVT::i32));
	return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2);
	}

	/// getF32Constant - Get 32-bit floating point constant.
	static SDValue getF32Constant(SelectionDAG &DAG, unsigned Flt,
	const SDLoc &dl) {
	return DAG.getConstantFP(APFloat(APFloat::IEEEsingle(), APInt(32, Flt)), dl,
	MVT::f32);
	}

	static SDValue getLimitedPrecisionExp2(SDValue t0, const SDLoc &dl,
	SelectionDAG &DAG) {
	// TODO: What fast-math-flags should be set on the floating-point nodes?

	// IntegerPartOfX = ((int32_t)(t0);
	SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);

	// FractionalPartOfX = t0 - (float)IntegerPartOfX;
	SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
	SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);

	// IntegerPartOfX <<= 23;
	IntegerPartOfX = DAG.getNode(
	ISD::SHL, dl, MVT::i32, IntegerPartOfX,
	DAG.getConstant(23, dl, DAG.getTargetLoweringInfo().getPointerTy(
	DAG.getDataLayout())));

	SDValue TwoToFractionalPartOfX;
	if (LimitFloatPrecision <= 6) {
	// For floating-point precision of 6:
	//
	// TwoToFractionalPartOfX =
	// 0.997535578f +
	// (0.735607626f + 0.252464424f * x) * x;
	//
	// error 0.0144103317, which is 6 bits
	SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
	getF32Constant(DAG, 0x3e814304, dl));
	SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
	getF32Constant(DAG, 0x3f3c50c8, dl));
	SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
	TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
	getF32Constant(DAG, 0x3f7f5e7e, dl));
	} else if (LimitFloatPrecision <= 12) {
	// For floating-point precision of 12:
	//
	// TwoToFractionalPartOfX =
	// 0.999892986f +
	// (0.696457318f +
	// (0.224338339f + 0.792043434e-1f * x) * x) * x;
	//
	// error 0.000107046256, which is 13 to 14 bits
	SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
	getF32Constant(DAG, 0x3da235e3, dl));
	SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
	getF32Constant(DAG, 0x3e65b8f3, dl));
	SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
	SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
	getF32Constant(DAG, 0x3f324b07, dl));
	SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
	TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
	getF32Constant(DAG, 0x3f7ff8fd, dl));
	} else { // LimitFloatPrecision <= 18
	// For floating-point precision of 18:
	//
	// TwoToFractionalPartOfX =
	// 0.999999982f +
	// (0.693148872f +
	// (0.240227044f +
	// (0.554906021e-1f +
	// (0.961591928e-2f +
	// (0.136028312e-2f + 0.157059148e-3f x)x)x)x)x)x;
	// error 2.47208000*10^(-7), which is better than 18 bits
	SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
	getF32Constant(DAG, 0x3924b03e, dl));
	SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
	getF32Constant(DAG, 0x3ab24b87, dl));
	SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
	SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
	getF32Constant(DAG, 0x3c1d8c17, dl));
	SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
	SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
	getF32Constant(DAG, 0x3d634a1d, dl));
	SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
	SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
	getF32Constant(DAG, 0x3e75fe14, dl));
	SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
	SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
	getF32Constant(DAG, 0x3f317234, dl));
	SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
	TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
	getF32Constant(DAG, 0x3f800000, dl));
	}

	// Add the exponent into the result in integer domain.
	SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFractionalPartOfX);
	return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
	DAG.getNode(ISD::ADD, dl, MVT::i32, t13, IntegerPartOfX));
	}

	/// expandExp - Lower an exp intrinsic. Handles the special sequences for
	/// limited-precision mode.
	static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
	const TargetLowering &TLI, SDNodeFlags Flags) {
	if (Op.getValueType() == MVT::f32 &&
	LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {

	// Put the exponent in the right bit position for later addition to the
	// final result:
	//
	// t0 = Op * log2(e)

	// TODO: What fast-math-flags should be set here?
	SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
	DAG.getConstantFP(numbers::log2ef, dl, MVT::f32));
	return getLimitedPrecisionExp2(t0, dl, DAG);
	}

	// No special expansion.
	return DAG.getNode(ISD::FEXP, dl, Op.getValueType(), Op, Flags);
	}

	/// expandLog - Lower a log intrinsic. Handles the special sequences for
	/// limited-precision mode.
	static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
	const TargetLowering &TLI, SDNodeFlags Flags) {
	// TODO: What fast-math-flags should be set on the floating-point nodes?

	if (Op.getValueType() == MVT::f32 &&
	LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
	SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);

	// Scale the exponent by log(2).
	SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
	SDValue LogOfExponent =
	DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
	DAG.getConstantFP(numbers::ln2f, dl, MVT::f32));

	// Get the significand and build it into a floating-point number with
	// exponent of 1.
	SDValue X = GetSignificand(DAG, Op1, dl);

	SDValue LogOfMantissa;
	if (LimitFloatPrecision <= 6) {
	// For floating-point precision of 6:
	//
	// LogofMantissa =
	// -1.1609546f +
	// (1.4034025f - 0.23903021f * x) * x;
	//
	// error 0.0034276066, which is better than 8 bits
	SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
	getF32Constant(DAG, 0xbe74c456, dl));
	SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
	getF32Constant(DAG, 0x3fb3a2b1, dl));
	SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
	LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
	getF32Constant(DAG, 0x3f949a29, dl));
	} else if (LimitFloatPrecision <= 12) {
	// For floating-point precision of 12:
	//
	// LogOfMantissa =
	// -1.7417939f +
	// (2.8212026f +
	// (-1.4699568f +
	// (0.44717955f - 0.56570851e-1f * x) * x) * x) * x;
	//
	// error 0.000061011436, which is 14 bits
	SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
	getF32Constant(DAG, 0xbd67b6d6, dl));
	SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
	getF32Constant(DAG, 0x3ee4f4b8, dl));
	SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
	SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
	getF32Constant(DAG, 0x3fbc278b, dl));
	SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
	SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
	getF32Constant(DAG, 0x40348e95, dl));
	SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
	LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
	getF32Constant(DAG, 0x3fdef31a, dl));
	} else { // LimitFloatPrecision <= 18
	// For floating-point precision of 18:
	//
	// LogOfMantissa =
	// -2.1072184f +
	// (4.2372794f +
	// (-3.7029485f +
	// (2.2781945f +
	// (-0.87823314f +
	// (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x;
	//
	// error 0.0000023660568, which is better than 18 bits
	SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
	getF32Constant(DAG, 0xbc91e5ac, dl));
	SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
	getF32Constant(DAG, 0x3e4350aa, dl));
	SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
	SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
	getF32Constant(DAG, 0x3f60d3e3, dl));
	SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
	SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
	getF32Constant(DAG, 0x4011cdf0, dl));
	SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
	SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
	getF32Constant(DAG, 0x406cfd1c, dl));
	SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
	SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
	getF32Constant(DAG, 0x408797cb, dl));
	SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
	LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
	getF32Constant(DAG, 0x4006dcab, dl));
	}

	return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa);
	}

	// No special expansion.
	return DAG.getNode(ISD::FLOG, dl, Op.getValueType(), Op, Flags);
	}

	/// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for
	/// limited-precision mode.
	static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
	const TargetLowering &TLI, SDNodeFlags Flags) {
	// TODO: What fast-math-flags should be set on the floating-point nodes?

	if (Op.getValueType() == MVT::f32 &&
	LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
	SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);

	// Get the exponent.
	SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl);

	// Get the significand and build it into a floating-point number with
	// exponent of 1.
	SDValue X = GetSignificand(DAG, Op1, dl);

	// Different possible minimax approximations of significand in
	// floating-point for various degrees of accuracy over [1,2].
	SDValue Log2ofMantissa;
	if (LimitFloatPrecision <= 6) {
	// For floating-point precision of 6:
	//
	// Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x;
	//
	// error 0.0049451742, which is more than 7 bits
	SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
	getF32Constant(DAG, 0xbeb08fe0, dl));
	SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
	getF32Constant(DAG, 0x40019463, dl));
	SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
	Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
	getF32Constant(DAG, 0x3fd6633d, dl));
	} else if (LimitFloatPrecision <= 12) {
	// For floating-point precision of 12:
	//
	// Log2ofMantissa =
	// -2.51285454f +
	// (4.07009056f +
	// (-2.12067489f +
	// (.645142248f - 0.816157886e-1f * x) * x) * x) * x;
	//
	// error 0.0000876136000, which is better than 13 bits
	SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
	getF32Constant(DAG, 0xbda7262e, dl));
	SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
	getF32Constant(DAG, 0x3f25280b, dl));
	SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
	SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
	getF32Constant(DAG, 0x4007b923, dl));
	SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
	SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
	getF32Constant(DAG, 0x40823e2f, dl));
	SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
	Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
	getF32Constant(DAG, 0x4020d29c, dl));
	} else { // LimitFloatPrecision <= 18
	// For floating-point precision of 18:
	//
	// Log2ofMantissa =
	// -3.0400495f +
	// (6.1129976f +
	// (-5.3420409f +
	// (3.2865683f +
	// (-1.2669343f +
	// (0.27515199f -
	// 0.25691327e-1f * x) * x) * x) * x) * x) * x;
	//
	// error 0.0000018516, which is better than 18 bits
	SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
	getF32Constant(DAG, 0xbcd2769e, dl));
	SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
	getF32Constant(DAG, 0x3e8ce0b9, dl));
	SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
	SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
	getF32Constant(DAG, 0x3fa22ae7, dl));
	SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
	SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
	getF32Constant(DAG, 0x40525723, dl));
	SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
	SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
	getF32Constant(DAG, 0x40aaf200, dl));
	SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
	SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
	getF32Constant(DAG, 0x40c39dad, dl));
	SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
	Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
	getF32Constant(DAG, 0x4042902c, dl));
	}

	return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa);
	}

	// No special expansion.
	return DAG.getNode(ISD::FLOG2, dl, Op.getValueType(), Op, Flags);
	}

	/// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for
	/// limited-precision mode.
	static SDValue expandLog10(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
	const TargetLowering &TLI, SDNodeFlags Flags) {
	// TODO: What fast-math-flags should be set on the floating-point nodes?

	if (Op.getValueType() == MVT::f32 &&
	LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
	SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);

	// Scale the exponent by log10(2) [0.30102999f].
	SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
	SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
	getF32Constant(DAG, 0x3e9a209a, dl));

	// Get the significand and build it into a floating-point number with
	// exponent of 1.
	SDValue X = GetSignificand(DAG, Op1, dl);

	SDValue Log10ofMantissa;
	if (LimitFloatPrecision <= 6) {
	// For floating-point precision of 6:
	//
	// Log10ofMantissa =
	// -0.50419619f +
	// (0.60948995f - 0.10380950f * x) * x;
	//
	// error 0.0014886165, which is 6 bits
	SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
	getF32Constant(DAG, 0xbdd49a13, dl));
	SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
	getF32Constant(DAG, 0x3f1c0789, dl));
	SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
	Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
	getF32Constant(DAG, 0x3f011300, dl));
	} else if (LimitFloatPrecision <= 12) {
	// For floating-point precision of 12:
	//
	// Log10ofMantissa =
	// -0.64831180f +
	// (0.91751397f +
	// (-0.31664806f + 0.47637168e-1f * x) * x) * x;
	//
	// error 0.00019228036, which is better than 12 bits
	SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
	getF32Constant(DAG, 0x3d431f31, dl));
	SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
	getF32Constant(DAG, 0x3ea21fb2, dl));
	SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
	SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
	getF32Constant(DAG, 0x3f6ae232, dl));
	SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
	Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
	getF32Constant(DAG, 0x3f25f7c3, dl));
	} else { // LimitFloatPrecision <= 18
	// For floating-point precision of 18:
	//
	// Log10ofMantissa =
	// -0.84299375f +
	// (1.5327582f +
	// (-1.0688956f +
	// (0.49102474f +
	// (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x;
	//
	// error 0.0000037995730, which is better than 18 bits
	SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
	getF32Constant(DAG, 0x3c5d51ce, dl));
	SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
	getF32Constant(DAG, 0x3e00685a, dl));
	SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
	SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
	getF32Constant(DAG, 0x3efb6798, dl));
	SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
	SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
	getF32Constant(DAG, 0x3f88d192, dl));
	SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
	SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
	getF32Constant(DAG, 0x3fc4316c, dl));
	SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
	Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8,
	getF32Constant(DAG, 0x3f57ce70, dl));
	}

	return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa);
	}

	// No special expansion.
	return DAG.getNode(ISD::FLOG10, dl, Op.getValueType(), Op, Flags);
	}

	/// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for
	/// limited-precision mode.
	static SDValue expandExp2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
	const TargetLowering &TLI, SDNodeFlags Flags) {
	if (Op.getValueType() == MVT::f32 &&
	LimitFloatPrecision > 0 && LimitFloatPrecision <= 18)
	return getLimitedPrecisionExp2(Op, dl, DAG);

	// No special expansion.
	return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op, Flags);
	}

	/// visitPow - Lower a pow intrinsic. Handles the special sequences for
	/// limited-precision mode with x == 10.0f.
	static SDValue expandPow(const SDLoc &dl, SDValue LHS, SDValue RHS,
	SelectionDAG &DAG, const TargetLowering &TLI,
	SDNodeFlags Flags) {
	bool IsExp10 = false;
	if (LHS.getValueType() == MVT::f32 && RHS.getValueType() == MVT::f32 &&
	LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
	if (ConstantFPSDNode *LHSC = dyn_cast<ConstantFPSDNode>(LHS)) {
	APFloat Ten(10.0f);
	IsExp10 = LHSC->isExactlyValue(Ten);
	}
	}

	// TODO: What fast-math-flags should be set on the FMUL node?
	if (IsExp10) {
	// Put the exponent in the right bit position for later addition to the
	// final result:
	//
	// #define LOG2OF10 3.3219281f
	// t0 = Op * LOG2OF10;
	SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, RHS,
	getF32Constant(DAG, 0x40549a78, dl));
	return getLimitedPrecisionExp2(t0, dl, DAG);
	}

	// No special expansion.
	return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS, Flags);
	}

	/// ExpandPowI - Expand a llvm.powi intrinsic.
	static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS,
	SelectionDAG &DAG) {
	// If RHS is a constant, we can expand this out to a multiplication tree,
	// otherwise we end up lowering to a call to __powidf2 (for example). When
	// optimizing for size, we only want to do this if the expansion would produce
	// a small number of multiplies, otherwise we do the full expansion.
	if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
	// Get the exponent as a positive value.
	unsigned Val = RHSC->getSExtValue();
	if ((int)Val < 0) Val = -Val;

	// powi(x, 0) -> 1.0
	if (Val == 0)
	return DAG.getConstantFP(1.0, DL, LHS.getValueType());

	bool OptForSize = DAG.shouldOptForSize();
	if (!OptForSize \|\|
	// If optimizing for size, don't insert too many multiplies.
	// This inserts up to 5 multiplies.
	countPopulation(Val) + Log2_32(Val) < 7) {
	// We use the simple binary decomposition method to generate the multiply
	// sequence. There are more optimal ways to do this (for example,
	// powi(x,15) generates one more multiply than it should), but this has
	// the benefit of being both really simple and much better than a libcall.
	SDValue Res; // Logically starts equal to 1.0
	SDValue CurSquare = LHS;
	// TODO: Intrinsics should have fast-math-flags that propagate to these
	// nodes.
	while (Val) {
	if (Val & 1) {
	if (Res.getNode())
	Res = DAG.getNode(ISD::FMUL, DL,Res.getValueType(), Res, CurSquare);
	else
	Res = CurSquare; // 1.0*CurSquare.
	}

	CurSquare = DAG.getNode(ISD::FMUL, DL, CurSquare.getValueType(),
	CurSquare, CurSquare);
	Val >>= 1;
	}

	// If the original was negative, invert the result, producing 1/(xxx).
	if (RHSC->getSExtValue() < 0)
	Res = DAG.getNode(ISD::FDIV, DL, LHS.getValueType(),
	DAG.getConstantFP(1.0, DL, LHS.getValueType()), Res);
	return Res;
	}
	}

	// Otherwise, expand to a libcall.
	return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS);
	}

	static SDValue expandDivFix(unsigned Opcode, const SDLoc &DL,
	SDValue LHS, SDValue RHS, SDValue Scale,
	SelectionDAG &DAG, const TargetLowering &TLI) {
	EVT VT = LHS.getValueType();
	bool Signed = Opcode == ISD::SDIVFIX \|\| Opcode == ISD::SDIVFIXSAT;
	bool Saturating = Opcode == ISD::SDIVFIXSAT \|\| Opcode == ISD::UDIVFIXSAT;
	LLVMContext &Ctx = *DAG.getContext();

	// If the type is legal but the operation isn't, this node might survive all
	// the way to operation legalization. If we end up there and we do not have
	// the ability to widen the type (if VT*2 is not legal), we cannot expand the
	// node.

	// Coax the legalizer into expanding the node during type legalization instead
	// by bumping the size by one bit. This will force it to Promote, enabling the
	// early expansion and avoiding the need to expand later.

	// We don't have to do this if Scale is 0; that can always be expanded, unless
	// it's a saturating signed operation. Those can experience true integer
	// division overflow, a case which we must avoid.

	// FIXME: We wouldn't have to do this (or any of the early
	// expansion/promotion) if it was possible to expand a libcall of an
	// illegal type during operation legalization. But it's not, so things
	// get a bit hacky.
	unsigned ScaleInt = cast<ConstantSDNode>(Scale)->getZExtValue();
	if ((ScaleInt > 0 \|\| (Saturating && Signed)) &&
	(TLI.isTypeLegal(VT) \|\|
	(VT.isVector() && TLI.isTypeLegal(VT.getVectorElementType())))) {
	TargetLowering::LegalizeAction Action = TLI.getFixedPointOperationAction(
	Opcode, VT, ScaleInt);
	if (Action != TargetLowering::Legal && Action != TargetLowering::Custom) {
	EVT PromVT;
	if (VT.isScalarInteger())
	PromVT = EVT::getIntegerVT(Ctx, VT.getSizeInBits() + 1);
	else if (VT.isVector()) {
	PromVT = VT.getVectorElementType();
	PromVT = EVT::getIntegerVT(Ctx, PromVT.getSizeInBits() + 1);
	PromVT = EVT::getVectorVT(Ctx, PromVT, VT.getVectorElementCount());
	} else
	llvm_unreachable("Wrong VT for DIVFIX?");
	if (Signed) {
	LHS = DAG.getSExtOrTrunc(LHS, DL, PromVT);
	RHS = DAG.getSExtOrTrunc(RHS, DL, PromVT);
	} else {
	LHS = DAG.getZExtOrTrunc(LHS, DL, PromVT);
	RHS = DAG.getZExtOrTrunc(RHS, DL, PromVT);
	}
	EVT ShiftTy = TLI.getShiftAmountTy(PromVT, DAG.getDataLayout());
	// For saturating operations, we need to shift up the LHS to get the
	// proper saturation width, and then shift down again afterwards.
	if (Saturating)
	LHS = DAG.getNode(ISD::SHL, DL, PromVT, LHS,
	DAG.getConstant(1, DL, ShiftTy));
	SDValue Res = DAG.getNode(Opcode, DL, PromVT, LHS, RHS, Scale);
	if (Saturating)
	Res = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, DL, PromVT, Res,
	DAG.getConstant(1, DL, ShiftTy));
	return DAG.getZExtOrTrunc(Res, DL, VT);
	}
	}

	return DAG.getNode(Opcode, DL, VT, LHS, RHS, Scale);
	}

	// getUnderlyingArgRegs - Find underlying registers used for a truncated,
	// bitcasted, or split argument. Returns a list of <Register, size in bits>
	static void
	getUnderlyingArgRegs(SmallVectorImpl<std::pair<unsigned, TypeSize>> &Regs,
	const SDValue &N) {
	switch (N.getOpcode()) {
	case ISD::CopyFromReg: {
	SDValue Op = N.getOperand(1);
	Regs.emplace_back(cast<RegisterSDNode>(Op)->getReg(),
	Op.getValueType().getSizeInBits());
	return;
	}
	case ISD::BITCAST:
	case ISD::AssertZext:
	case ISD::AssertSext:
	case ISD::TRUNCATE:
	getUnderlyingArgRegs(Regs, N.getOperand(0));
	return;
	case ISD::BUILD_PAIR:
	case ISD::BUILD_VECTOR:
	case ISD::CONCAT_VECTORS:
	for (SDValue Op : N->op_values())
	getUnderlyingArgRegs(Regs, Op);
	return;
	default:
	return;
	}
	}

	/// If the DbgValueInst is a dbg_value of a function argument, create the
	/// corresponding DBG_VALUE machine instruction for it now. At the end of
	/// instruction selection, they will be inserted to the entry BB.
	/// We don't currently support this for variadic dbg_values, as they shouldn't
	/// appear for function arguments or in the prologue.
	bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
	const Value V, DILocalVariable Variable, DIExpression *Expr,
	DILocation *DL, bool IsDbgDeclare, const SDValue &N) {
	const Argument *Arg = dyn_cast<Argument>(V);
	if (!Arg)
	return false;

	MachineFunction &MF = DAG.getMachineFunction();
	const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();

	// Helper to create DBG_INSTR_REFs or DBG_VALUEs, depending on what kind
	// we've been asked to pursue.
	auto MakeVRegDbgValue = [&](Register Reg, DIExpression *FragExpr,
	bool Indirect) {
	if (Reg.isVirtual() && TM.Options.ValueTrackingVariableLocations) {
	// For VRegs, in instruction referencing mode, create a DBG_INSTR_REF
	// pointing at the VReg, which will be patched up later.
	auto &Inst = TII->get(TargetOpcode::DBG_INSTR_REF);
	auto MIB = BuildMI(MF, DL, Inst);
	MIB.addReg(Reg, RegState::Debug);
	MIB.addImm(0);
	MIB.addMetadata(Variable);
	auto *NewDIExpr = FragExpr;
	// We don't have an "Indirect" field in DBG_INSTR_REF, fold that into
	// the DIExpression.
	if (Indirect)
	NewDIExpr = DIExpression::prepend(FragExpr, DIExpression::DerefBefore);
	MIB.addMetadata(NewDIExpr);
	return MIB;
	} else {
	// Create a completely standard DBG_VALUE.
	auto &Inst = TII->get(TargetOpcode::DBG_VALUE);
	return BuildMI(MF, DL, Inst, Indirect, Reg, Variable, FragExpr);
	}
	};

	if (!IsDbgDeclare) {
	// ArgDbgValues are hoisted to the beginning of the entry block. So we
	// should only emit as ArgDbgValue if the dbg.value intrinsic is found in
	// the entry block.
	bool IsInEntryBlock = FuncInfo.MBB == &FuncInfo.MF->front();
	if (!IsInEntryBlock)
	return false;

	// ArgDbgValues are hoisted to the beginning of the entry block. So we
	// should only emit as ArgDbgValue if the dbg.value intrinsic describes a
	// variable that also is a param.
	//
	// Although, if we are at the top of the entry block already, we can still
	// emit using ArgDbgValue. This might catch some situations when the
	// dbg.value refers to an argument that isn't used in the entry block, so
	// any CopyToReg node would be optimized out and the only way to express
	// this DBG_VALUE is by using the physical reg (or FI) as done in this
	// method. ArgDbgValues are hoisted to the beginning of the entry block. So
	// we should only emit as ArgDbgValue if the Variable is an argument to the
	// current function, and the dbg.value intrinsic is found in the entry
	// block.
	bool VariableIsFunctionInputArg = Variable->isParameter() &&
	!DL->getInlinedAt();
	bool IsInPrologue = SDNodeOrder == LowestSDNodeOrder;
	if (!IsInPrologue && !VariableIsFunctionInputArg)
	return false;

	// Here we assume that a function argument on IR level only can be used to
	// describe one input parameter on source level. If we for example have
	// source code like this
	//
	// struct A { long x, y; };
	// void foo(struct A a, long b) {
	// ...
	// b = a.x;
	// ...
	// }
	//
	// and IR like this
	//
	// define void @foo(i32 %a1, i32 %a2, i32 %b) {
	// entry:
	// call void @llvm.dbg.value(metadata i32 %a1, "a", DW_OP_LLVM_fragment
	// call void @llvm.dbg.value(metadata i32 %a2, "a", DW_OP_LLVM_fragment
	// call void @llvm.dbg.value(metadata i32 %b, "b",
	// ...
	// call void @llvm.dbg.value(metadata i32 %a1, "b"
	// ...
	//
	// then the last dbg.value is describing a parameter "b" using a value that
	// is an argument. But since we already has used %a1 to describe a parameter
	// we should not handle that last dbg.value here (that would result in an
	// incorrect hoisting of the DBG_VALUE to the function entry).
	// Notice that we allow one dbg.value per IR level argument, to accommodate
	// for the situation with fragments above.
	if (VariableIsFunctionInputArg) {
	unsigned ArgNo = Arg->getArgNo();
	if (ArgNo >= FuncInfo.DescribedArgs.size())
	FuncInfo.DescribedArgs.resize(ArgNo + 1, false);
	else if (!IsInPrologue && FuncInfo.DescribedArgs.test(ArgNo))
	return false;
	FuncInfo.DescribedArgs.set(ArgNo);
	}
	}

	bool IsIndirect = false;
	Optional<MachineOperand> Op;
	// Some arguments' frame index is recorded during argument lowering.
	int FI = FuncInfo.getArgumentFrameIndex(Arg);
	if (FI != std::numeric_limits<int>::max())
	Op = MachineOperand::CreateFI(FI);

	SmallVector<std::pair<unsigned, TypeSize>, 8> ArgRegsAndSizes;
	if (!Op && N.getNode()) {
	getUnderlyingArgRegs(ArgRegsAndSizes, N);
	Register Reg;
	if (ArgRegsAndSizes.size() == 1)
	Reg = ArgRegsAndSizes.front().first;

	if (Reg && Reg.isVirtual()) {
	MachineRegisterInfo &RegInfo = MF.getRegInfo();
	Register PR = RegInfo.getLiveInPhysReg(Reg);
	if (PR)
	Reg = PR;
	}
	if (Reg) {
	Op = MachineOperand::CreateReg(Reg, false);
	IsIndirect = IsDbgDeclare;
	}
	}

	if (!Op && N.getNode()) {
	// Check if frame index is available.
	SDValue LCandidate = peekThroughBitcasts(N);
	if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(LCandidate.getNode()))
	if (FrameIndexSDNode *FINode =
	dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
	Op = MachineOperand::CreateFI(FINode->getIndex());
	}

	if (!Op) {
	// Create a DBG_VALUE for each decomposed value in ArgRegs to cover Reg
	auto splitMultiRegDbgValue = [&](ArrayRef<std::pair<unsigned, TypeSize>>
	SplitRegs) {
	unsigned Offset = 0;
	for (auto RegAndSize : SplitRegs) {
	// If the expression is already a fragment, the current register
	// offset+size might extend beyond the fragment. In this case, only
	// the register bits that are inside the fragment are relevant.
	int RegFragmentSizeInBits = RegAndSize.second;
	if (auto ExprFragmentInfo = Expr->getFragmentInfo()) {
	uint64_t ExprFragmentSizeInBits = ExprFragmentInfo->SizeInBits;
	// The register is entirely outside the expression fragment,
	// so is irrelevant for debug info.
	if (Offset >= ExprFragmentSizeInBits)
	break;
	// The register is partially outside the expression fragment, only
	// the low bits within the fragment are relevant for debug info.
	if (Offset + RegFragmentSizeInBits > ExprFragmentSizeInBits) {
	RegFragmentSizeInBits = ExprFragmentSizeInBits - Offset;
	}
	}

	auto FragmentExpr = DIExpression::createFragmentExpression(
	Expr, Offset, RegFragmentSizeInBits);
	Offset += RegAndSize.second;
	// If a valid fragment expression cannot be created, the variable's
	// correct value cannot be determined and so it is set as Undef.
	if (!FragmentExpr) {
	SDDbgValue *SDV = DAG.getConstantDbgValue(
	Variable, Expr, UndefValue::get(V->getType()), DL, SDNodeOrder);
	DAG.AddDbgValue(SDV, false);
	continue;
	}
	MachineInstr *NewMI =
	MakeVRegDbgValue(RegAndSize.first, *FragmentExpr, IsDbgDeclare);
	FuncInfo.ArgDbgValues.push_back(NewMI);
	}
	};

	// Check if ValueMap has reg number.
	DenseMap<const Value *, Register>::const_iterator
	VMI = FuncInfo.ValueMap.find(V);
	if (VMI != FuncInfo.ValueMap.end()) {
	const auto &TLI = DAG.getTargetLoweringInfo();
	RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second,
	V->getType(), None);
	if (RFV.occupiesMultipleRegs()) {
	splitMultiRegDbgValue(RFV.getRegsAndSizes());
	return true;
	}

	Op = MachineOperand::CreateReg(VMI->second, false);
	IsIndirect = IsDbgDeclare;
	} else if (ArgRegsAndSizes.size() > 1) {
	// This was split due to the calling convention, and no virtual register
	// mapping exists for the value.
	splitMultiRegDbgValue(ArgRegsAndSizes);
	return true;
	}
	}

	if (!Op)
	return false;

	assert(Variable->isValidLocationForIntrinsic(DL) &&
	"Expected inlined-at fields to agree");
	MachineInstr *NewMI = nullptr;

	if (Op->isReg())
	NewMI = MakeVRegDbgValue(Op->getReg(), Expr, IsIndirect);
	else
	NewMI = BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), true, *Op,
	Variable, Expr);

	FuncInfo.ArgDbgValues.push_back(NewMI);
	return true;
	}

	/// Return the appropriate SDDbgValue based on N.
	SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N,
	DILocalVariable *Variable,
	DIExpression *Expr,
	const DebugLoc &dl,
	unsigned DbgSDNodeOrder) {
	if (auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode())) {
	// Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can describe
	// stack slot locations.
	//
	// Consider "int x = 0; int *px = &x;". There are two kinds of interesting
	// debug values here after optimization:
	//
	// dbg.value(i32* %px, !"int *px", !DIExpression()), and
	// dbg.value(i32* %px, !"int x", !DIExpression(DW_OP_deref))
	//
	// Both describe the direct values of their associated variables.
	return DAG.getFrameIndexDbgValue(Variable, Expr, FISDN->getIndex(),
	/IsIndirect/ false, dl, DbgSDNodeOrder);
	}
	return DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(),
	/IsIndirect/ false, dl, DbgSDNodeOrder);
	}

	static unsigned FixedPointIntrinsicToOpcode(unsigned Intrinsic) {
	switch (Intrinsic) {
	case Intrinsic::smul_fix:
	return ISD::SMULFIX;
	case Intrinsic::umul_fix:
	return ISD::UMULFIX;
	case Intrinsic::smul_fix_sat:
	return ISD::SMULFIXSAT;
	case Intrinsic::umul_fix_sat:
	return ISD::UMULFIXSAT;
	case Intrinsic::sdiv_fix:
	return ISD::SDIVFIX;
	case Intrinsic::udiv_fix:
	return ISD::UDIVFIX;
	case Intrinsic::sdiv_fix_sat:
	return ISD::SDIVFIXSAT;
	case Intrinsic::udiv_fix_sat:
	return ISD::UDIVFIXSAT;
	default:
	llvm_unreachable("Unhandled fixed point intrinsic");
	}
	}

	void SelectionDAGBuilder::lowerCallToExternalSymbol(const CallInst &I,
	const char *FunctionName) {
	assert(FunctionName && "FunctionName must not be nullptr");
	SDValue Callee = DAG.getExternalSymbol(
	FunctionName,
	DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()));
	LowerCallTo(I, Callee, I.isTailCall(), I.isMustTailCall());
	}

	/// Given a @llvm.call.preallocated.setup, return the corresponding
	/// preallocated call.
	static const CallBase FindPreallocatedCall(const Value PreallocatedSetup) {
	assert(cast<CallBase>(PreallocatedSetup)
	->getCalledFunction()
	->getIntrinsicID() == Intrinsic::call_preallocated_setup &&
	"expected call_preallocated_setup Value");
	for (auto *U : PreallocatedSetup->users()) {
	auto *UseCall = cast<CallBase>(U);
	const Function *Fn = UseCall->getCalledFunction();
	if (!Fn \|\| Fn->getIntrinsicID() != Intrinsic::call_preallocated_arg) {
	return UseCall;
	}
	}
	llvm_unreachable("expected corresponding call to preallocated setup/arg");
	}

	/// Lower the call to the specified intrinsic function.
	void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
	unsigned Intrinsic) {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	SDLoc sdl = getCurSDLoc();
	DebugLoc dl = getCurDebugLoc();
	SDValue Res;

	SDNodeFlags Flags;
	if (auto *FPOp = dyn_cast<FPMathOperator>(&I))
	Flags.copyFMF(*FPOp);

	switch (Intrinsic) {
	default:
	// By default, turn this into a target intrinsic node.
	visitTargetIntrinsic(I, Intrinsic);
	return;
	case Intrinsic::vscale: {
	match(&I, m_VScale(DAG.getDataLayout()));
	EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
	setValue(&I,
	DAG.getVScale(getCurSDLoc(), VT, APInt(VT.getSizeInBits(), 1)));
	return;
	}
	case Intrinsic::vastart: visitVAStart(I); return;
	case Intrinsic::vaend: visitVAEnd(I); return;
	case Intrinsic::vacopy: visitVACopy(I); return;
	case Intrinsic::returnaddress:
	setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl,
	TLI.getPointerTy(DAG.getDataLayout()),
	getValue(I.getArgOperand(0))));
	return;
	case Intrinsic::addressofreturnaddress:
	setValue(&I, DAG.getNode(ISD::ADDROFRETURNADDR, sdl,
	TLI.getPointerTy(DAG.getDataLayout())));
	return;
	case Intrinsic::sponentry:
	setValue(&I, DAG.getNode(ISD::SPONENTRY, sdl,
	TLI.getFrameIndexTy(DAG.getDataLayout())));
	return;
	case Intrinsic::frameaddress:
	setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl,
	TLI.getFrameIndexTy(DAG.getDataLayout()),
	getValue(I.getArgOperand(0))));
	return;
	case Intrinsic::read_volatile_register:
	case Intrinsic::read_register: {
	Value *Reg = I.getArgOperand(0);
	SDValue Chain = getRoot();
	SDValue RegName =
	DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata()));
	EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
	Res = DAG.getNode(ISD::READ_REGISTER, sdl,
	DAG.getVTList(VT, MVT::Other), Chain, RegName);
	setValue(&I, Res);
	DAG.setRoot(Res.getValue(1));
	return;
	}
	case Intrinsic::write_register: {
	Value *Reg = I.getArgOperand(0);
	Value *RegValue = I.getArgOperand(1);
	SDValue Chain = getRoot();
	SDValue RegName =
	DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata()));
	DAG.setRoot(DAG.getNode(ISD::WRITE_REGISTER, sdl, MVT::Other, Chain,
	RegName, getValue(RegValue)));
	return;
	}
	case Intrinsic::memcpy: {
	const auto &MCI = cast<MemCpyInst>(I);
	SDValue Op1 = getValue(I.getArgOperand(0));
	SDValue Op2 = getValue(I.getArgOperand(1));
	SDValue Op3 = getValue(I.getArgOperand(2));
	// @llvm.memcpy defines 0 and 1 to both mean no alignment.
	Align DstAlign = MCI.getDestAlign().valueOrOne();
	Align SrcAlign = MCI.getSourceAlign().valueOrOne();
	Align Alignment = commonAlignment(DstAlign, SrcAlign);
	bool isVol = MCI.isVolatile();
	bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
	// FIXME: Support passing different dest/src alignments to the memcpy DAG
	// node.
	SDValue Root = isVol ? getRoot() : getMemoryRoot();
	AAMDNodes AAInfo;
	I.getAAMetadata(AAInfo);
	SDValue MC = DAG.getMemcpy(Root, sdl, Op1, Op2, Op3, Alignment, isVol,
	/* AlwaysInline */ false, isTC,
	MachinePointerInfo(I.getArgOperand(0)),
	MachinePointerInfo(I.getArgOperand(1)), AAInfo);
	updateDAGForMaybeTailCall(MC);
	return;
	}
	case Intrinsic::memcpy_inline: {
	const auto &MCI = cast<MemCpyInlineInst>(I);
	SDValue Dst = getValue(I.getArgOperand(0));
	SDValue Src = getValue(I.getArgOperand(1));
	SDValue Size = getValue(I.getArgOperand(2));
	assert(isa<ConstantSDNode>(Size) && "memcpy_inline needs constant size");
	// @llvm.memcpy.inline defines 0 and 1 to both mean no alignment.
	Align DstAlign = MCI.getDestAlign().valueOrOne();
	Align SrcAlign = MCI.getSourceAlign().valueOrOne();
	Align Alignment = commonAlignment(DstAlign, SrcAlign);
	bool isVol = MCI.isVolatile();
	bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
	// FIXME: Support passing different dest/src alignments to the memcpy DAG
	// node.
	AAMDNodes AAInfo;
	I.getAAMetadata(AAInfo);
	SDValue MC = DAG.getMemcpy(getRoot(), sdl, Dst, Src, Size, Alignment, isVol,
	/* AlwaysInline */ true, isTC,
	MachinePointerInfo(I.getArgOperand(0)),
	MachinePointerInfo(I.getArgOperand(1)), AAInfo);
	updateDAGForMaybeTailCall(MC);
	return;
	}
	case Intrinsic::memset: {
	const auto &MSI = cast<MemSetInst>(I);
	SDValue Op1 = getValue(I.getArgOperand(0));
	SDValue Op2 = getValue(I.getArgOperand(1));
	SDValue Op3 = getValue(I.getArgOperand(2));
	// @llvm.memset defines 0 and 1 to both mean no alignment.
	Align Alignment = MSI.getDestAlign().valueOrOne();
	bool isVol = MSI.isVolatile();
	bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
	SDValue Root = isVol ? getRoot() : getMemoryRoot();
	AAMDNodes AAInfo;
	I.getAAMetadata(AAInfo);
	SDValue MS = DAG.getMemset(Root, sdl, Op1, Op2, Op3, Alignment, isVol, isTC,
	MachinePointerInfo(I.getArgOperand(0)), AAInfo);
	updateDAGForMaybeTailCall(MS);
	return;
	}
	case Intrinsic::memmove: {
	const auto &MMI = cast<MemMoveInst>(I);
	SDValue Op1 = getValue(I.getArgOperand(0));
	SDValue Op2 = getValue(I.getArgOperand(1));
	SDValue Op3 = getValue(I.getArgOperand(2));
	// @llvm.memmove defines 0 and 1 to both mean no alignment.
	Align DstAlign = MMI.getDestAlign().valueOrOne();
	Align SrcAlign = MMI.getSourceAlign().valueOrOne();
	Align Alignment = commonAlignment(DstAlign, SrcAlign);
	bool isVol = MMI.isVolatile();
	bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
	// FIXME: Support passing different dest/src alignments to the memmove DAG
	// node.
	SDValue Root = isVol ? getRoot() : getMemoryRoot();
	AAMDNodes AAInfo;
	I.getAAMetadata(AAInfo);
	SDValue MM = DAG.getMemmove(Root, sdl, Op1, Op2, Op3, Alignment, isVol,
	isTC, MachinePointerInfo(I.getArgOperand(0)),
	MachinePointerInfo(I.getArgOperand(1)), AAInfo);
	updateDAGForMaybeTailCall(MM);
	return;
	}
	case Intrinsic::memcpy_element_unordered_atomic: {
	const AtomicMemCpyInst &MI = cast<AtomicMemCpyInst>(I);
	SDValue Dst = getValue(MI.getRawDest());
	SDValue Src = getValue(MI.getRawSource());
	SDValue Length = getValue(MI.getLength());

	unsigned DstAlign = MI.getDestAlignment();
	unsigned SrcAlign = MI.getSourceAlignment();
	Type *LengthTy = MI.getLength()->getType();
	unsigned ElemSz = MI.getElementSizeInBytes();
	bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
	SDValue MC = DAG.getAtomicMemcpy(getRoot(), sdl, Dst, DstAlign, Src,
	SrcAlign, Length, LengthTy, ElemSz, isTC,
	MachinePointerInfo(MI.getRawDest()),
	MachinePointerInfo(MI.getRawSource()));
	updateDAGForMaybeTailCall(MC);
	return;
	}
	case Intrinsic::memmove_element_unordered_atomic: {
	auto &MI = cast<AtomicMemMoveInst>(I);
	SDValue Dst = getValue(MI.getRawDest());
	SDValue Src = getValue(MI.getRawSource());
	SDValue Length = getValue(MI.getLength());

	unsigned DstAlign = MI.getDestAlignment();
	unsigned SrcAlign = MI.getSourceAlignment();
	Type *LengthTy = MI.getLength()->getType();
	unsigned ElemSz = MI.getElementSizeInBytes();
	bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
	SDValue MC = DAG.getAtomicMemmove(getRoot(), sdl, Dst, DstAlign, Src,
	SrcAlign, Length, LengthTy, ElemSz, isTC,
	MachinePointerInfo(MI.getRawDest()),
	MachinePointerInfo(MI.getRawSource()));
	updateDAGForMaybeTailCall(MC);
	return;
	}
	case Intrinsic::memset_element_unordered_atomic: {
	auto &MI = cast<AtomicMemSetInst>(I);
	SDValue Dst = getValue(MI.getRawDest());
	SDValue Val = getValue(MI.getValue());
	SDValue Length = getValue(MI.getLength());

	unsigned DstAlign = MI.getDestAlignment();
	Type *LengthTy = MI.getLength()->getType();
	unsigned ElemSz = MI.getElementSizeInBytes();
	bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
	SDValue MC = DAG.getAtomicMemset(getRoot(), sdl, Dst, DstAlign, Val, Length,
	LengthTy, ElemSz, isTC,
	MachinePointerInfo(MI.getRawDest()));
	updateDAGForMaybeTailCall(MC);
	return;
	}
	case Intrinsic::call_preallocated_setup: {
	const CallBase *PreallocatedCall = FindPreallocatedCall(&I);
	SDValue SrcValue = DAG.getSrcValue(PreallocatedCall);
	SDValue Res = DAG.getNode(ISD::PREALLOCATED_SETUP, sdl, MVT::Other,
	getRoot(), SrcValue);
	setValue(&I, Res);
	DAG.setRoot(Res);
	return;
	}
	case Intrinsic::call_preallocated_arg: {
	const CallBase *PreallocatedCall = FindPreallocatedCall(I.getOperand(0));
	SDValue SrcValue = DAG.getSrcValue(PreallocatedCall);
	SDValue Ops[3];
	Ops[0] = getRoot();
	Ops[1] = SrcValue;
	Ops[2] = DAG.getTargetConstant(*cast<ConstantInt>(I.getArgOperand(1)), sdl,
	MVT::i32); // arg index
	SDValue Res = DAG.getNode(
	ISD::PREALLOCATED_ARG, sdl,
	DAG.getVTList(TLI.getPointerTy(DAG.getDataLayout()), MVT::Other), Ops);
	setValue(&I, Res);
	DAG.setRoot(Res.getValue(1));
	return;
	}
	case Intrinsic::dbg_addr:
	case Intrinsic::dbg_declare: {
	// Assume dbg.addr and dbg.declare can not currently use DIArgList, i.e.
	// they are non-variadic.
	const auto &DI = cast<DbgVariableIntrinsic>(I);
	assert(!DI.hasArgList() && "Only dbg.value should currently use DIArgList");
	DILocalVariable *Variable = DI.getVariable();
	DIExpression *Expression = DI.getExpression();
	dropDanglingDebugInfo(Variable, Expression);
	assert(Variable && "Missing variable");
	LLVM_DEBUG(dbgs() << "SelectionDAG visiting debug intrinsic: " << DI
	<< "\n");
	// Check if address has undef value.
	const Value *Address = DI.getVariableLocationOp(0);
	if (!Address \|\| isa<UndefValue>(Address) \|\|
	(Address->use_empty() && !isa<Argument>(Address))) {
	LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI
	<< " (bad/undef/unused-arg address)\n");
	return;
	}

	bool isParameter = Variable->isParameter() \|\| isa<Argument>(Address);

	// Check if this variable can be described by a frame index, typically
	// either as a static alloca or a byval parameter.
	int FI = std::numeric_limits<int>::max();
	if (const auto *AI =
	dyn_cast<AllocaInst>(Address->stripInBoundsConstantOffsets())) {
	if (AI->isStaticAlloca()) {
	auto I = FuncInfo.StaticAllocaMap.find(AI);
	if (I != FuncInfo.StaticAllocaMap.end())
	FI = I->second;
	}
	} else if (const auto *Arg = dyn_cast<Argument>(
	Address->stripInBoundsConstantOffsets())) {
	FI = FuncInfo.getArgumentFrameIndex(Arg);
	}

	// llvm.dbg.addr is control dependent and always generates indirect
	// DBG_VALUE instructions. llvm.dbg.declare is handled as a frame index in
	// the MachineFunction variable table.
	if (FI != std::numeric_limits<int>::max()) {
	if (Intrinsic == Intrinsic::dbg_addr) {
	SDDbgValue *SDV = DAG.getFrameIndexDbgValue(
	Variable, Expression, FI, getRoot().getNode(), /IsIndirect/ true,
	dl, SDNodeOrder);
	DAG.AddDbgValue(SDV, isParameter);
	} else {
	LLVM_DEBUG(dbgs() << "Skipping " << DI
	<< " (variable info stashed in MF side table)\n");
	}
	return;
	}

	SDValue &N = NodeMap[Address];
	if (!N.getNode() && isa<Argument>(Address))
	// Check unused arguments map.
	N = UnusedArgNodeMap[Address];
	SDDbgValue *SDV;
	if (N.getNode()) {
	if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
	Address = BCI->getOperand(0);
	// Parameters are handled specially.
	auto FINode = dyn_cast<FrameIndexSDNode>(N.getNode());
	if (isParameter && FINode) {
	// Byval parameter. We have a frame index at this point.
	SDV =
	DAG.getFrameIndexDbgValue(Variable, Expression, FINode->getIndex(),
	/IsIndirect/ true, dl, SDNodeOrder);
	} else if (isa<Argument>(Address)) {
	// Address is an argument, so try to emit its dbg value using
	// virtual register info from the FuncInfo.ValueMap.
	EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, true, N);
	return;
	} else {
	SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(),
	true, dl, SDNodeOrder);
	}
	DAG.AddDbgValue(SDV, isParameter);
	} else {
	// If Address is an argument then try to emit its dbg value using
	// virtual register info from the FuncInfo.ValueMap.
	if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, true,
	N)) {
	LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI
	<< " (could not emit func-arg dbg_value)\n");
	}
	}
	return;
	}
	case Intrinsic::dbg_label: {
	const DbgLabelInst &DI = cast<DbgLabelInst>(I);
	DILabel *Label = DI.getLabel();
	assert(Label && "Missing label");

	SDDbgLabel *SDV;
	SDV = DAG.getDbgLabel(Label, dl, SDNodeOrder);
	DAG.AddDbgLabel(SDV);
	return;
	}
	case Intrinsic::dbg_value: {
	const DbgValueInst &DI = cast<DbgValueInst>(I);
	assert(DI.getVariable() && "Missing variable");

	DILocalVariable *Variable = DI.getVariable();
	DIExpression *Expression = DI.getExpression();
	dropDanglingDebugInfo(Variable, Expression);
	SmallVector<Value *, 4> Values(DI.getValues());
	if (Values.empty())
	return;

	if (std::count(Values.begin(), Values.end(), nullptr))
	return;

	bool IsVariadic = DI.hasArgList();
	if (!handleDebugValue(Values, Variable, Expression, dl, DI.getDebugLoc(),
	SDNodeOrder, IsVariadic))
	addDanglingDebugInfo(&DI, dl, SDNodeOrder);
	return;
	}

	case Intrinsic::eh_typeid_for: {
	// Find the type id for the given typeinfo.
	GlobalValue *GV = ExtractTypeInfo(I.getArgOperand(0));
	unsigned TypeID = DAG.getMachineFunction().getTypeIDFor(GV);
	Res = DAG.getConstant(TypeID, sdl, MVT::i32);
	setValue(&I, Res);
	return;
	}

	case Intrinsic::eh_return_i32:
	case Intrinsic::eh_return_i64:
	DAG.getMachineFunction().setCallsEHReturn(true);
	DAG.setRoot(DAG.getNode(ISD::EH_RETURN, sdl,
	MVT::Other,
	getControlRoot(),
	getValue(I.getArgOperand(0)),
	getValue(I.getArgOperand(1))));
	return;
	case Intrinsic::eh_unwind_init:
	DAG.getMachineFunction().setCallsUnwindInit(true);
	return;
	case Intrinsic::eh_dwarf_cfa:
	setValue(&I, DAG.getNode(ISD::EH_DWARF_CFA, sdl,
	TLI.getPointerTy(DAG.getDataLayout()),
	getValue(I.getArgOperand(0))));
	return;
	case Intrinsic::eh_sjlj_callsite: {
	MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
	ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(0));
	assert(CI && "Non-constant call site value in eh.sjlj.callsite!");
	assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!");

	MMI.setCurrentCallSite(CI->getZExtValue());
	return;
	}
	case Intrinsic::eh_sjlj_functioncontext: {
	// Get and store the index of the function context.
	MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
	AllocaInst *FnCtx =
	cast<AllocaInst>(I.getArgOperand(0)->stripPointerCasts());
	int FI = FuncInfo.StaticAllocaMap[FnCtx];
	MFI.setFunctionContextIndex(FI);
	return;
	}
	case Intrinsic::eh_sjlj_setjmp: {
	SDValue Ops[2];
	Ops[0] = getRoot();
	Ops[1] = getValue(I.getArgOperand(0));
	SDValue Op = DAG.getNode(ISD::EH_SJLJ_SETJMP, sdl,
	DAG.getVTList(MVT::i32, MVT::Other), Ops);
	setValue(&I, Op.getValue(0));
	DAG.setRoot(Op.getValue(1));
	return;
	}
	case Intrinsic::eh_sjlj_longjmp:
	DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, sdl, MVT::Other,
	getRoot(), getValue(I.getArgOperand(0))));
	return;
	case Intrinsic::eh_sjlj_setup_dispatch:
	DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_SETUP_DISPATCH, sdl, MVT::Other,
	getRoot()));
	return;
	case Intrinsic::masked_gather:
	visitMaskedGather(I);
	return;
	case Intrinsic::masked_load:
	visitMaskedLoad(I);
	return;
	case Intrinsic::masked_scatter:
	visitMaskedScatter(I);
	return;
	case Intrinsic::masked_store:
	visitMaskedStore(I);
	return;
	case Intrinsic::masked_expandload:
	visitMaskedLoad(I, true /* IsExpanding */);
	return;
	case Intrinsic::masked_compressstore:
	visitMaskedStore(I, true /* IsCompressing */);
	return;
	case Intrinsic::powi:
	setValue(&I, ExpandPowI(sdl, getValue(I.getArgOperand(0)),
	getValue(I.getArgOperand(1)), DAG));
	return;
	case Intrinsic::log:
	setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags));
	return;
	case Intrinsic::log2:
	setValue(&I,
	expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags));
	return;
	case Intrinsic::log10:
	setValue(&I,
	expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags));
	return;
	case Intrinsic::exp:
	setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags));
	return;
	case Intrinsic::exp2:
	setValue(&I,
	expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags));
	return;
	case Intrinsic::pow:
	setValue(&I, expandPow(sdl, getValue(I.getArgOperand(0)),
	getValue(I.getArgOperand(1)), DAG, TLI, Flags));
	return;
	case Intrinsic::sqrt:
	case Intrinsic::fabs:
	case Intrinsic::sin:
	case Intrinsic::cos:
	case Intrinsic::floor:
	case Intrinsic::ceil:
	case Intrinsic::trunc:
	case Intrinsic::rint:
	case Intrinsic::nearbyint:
	case Intrinsic::round:
	case Intrinsic::roundeven:
	case Intrinsic::canonicalize: {
	unsigned Opcode;
	switch (Intrinsic) {
	default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
	case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
	case Intrinsic::fabs: Opcode = ISD::FABS; break;
	case Intrinsic::sin: Opcode = ISD::FSIN; break;
	case Intrinsic::cos: Opcode = ISD::FCOS; break;
	case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
	case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
	case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
	case Intrinsic::rint: Opcode = ISD::FRINT; break;
	case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
	case Intrinsic::round: Opcode = ISD::FROUND; break;
	case Intrinsic::roundeven: Opcode = ISD::FROUNDEVEN; break;
	case Intrinsic::canonicalize: Opcode = ISD::FCANONICALIZE; break;
	}

	setValue(&I, DAG.getNode(Opcode, sdl,
	getValue(I.getArgOperand(0)).getValueType(),
	getValue(I.getArgOperand(0)), Flags));
	return;
	}
	case Intrinsic::lround:
	case Intrinsic::llround:
	case Intrinsic::lrint:
	case Intrinsic::llrint: {
	unsigned Opcode;
	switch (Intrinsic) {
	default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
	case Intrinsic::lround: Opcode = ISD::LROUND; break;
	case Intrinsic::llround: Opcode = ISD::LLROUND; break;
	case Intrinsic::lrint: Opcode = ISD::LRINT; break;
	case Intrinsic::llrint: Opcode = ISD::LLRINT; break;
	}

	EVT RetVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
	setValue(&I, DAG.getNode(Opcode, sdl, RetVT,
	getValue(I.getArgOperand(0))));
	return;
	}
	case Intrinsic::minnum:
	setValue(&I, DAG.getNode(ISD::FMINNUM, sdl,
	getValue(I.getArgOperand(0)).getValueType(),
	getValue(I.getArgOperand(0)),
	getValue(I.getArgOperand(1)), Flags));
	return;
	case Intrinsic::maxnum:
	setValue(&I, DAG.getNode(ISD::FMAXNUM, sdl,
	getValue(I.getArgOperand(0)).getValueType(),
	getValue(I.getArgOperand(0)),
	getValue(I.getArgOperand(1)), Flags));
	return;
	case Intrinsic::minimum:
	setValue(&I, DAG.getNode(ISD::FMINIMUM, sdl,
	getValue(I.getArgOperand(0)).getValueType(),
	getValue(I.getArgOperand(0)),
	getValue(I.getArgOperand(1)), Flags));
	return;
	case Intrinsic::maximum:
	setValue(&I, DAG.getNode(ISD::FMAXIMUM, sdl,
	getValue(I.getArgOperand(0)).getValueType(),
	getValue(I.getArgOperand(0)),
	getValue(I.getArgOperand(1)), Flags));
	return;
	case Intrinsic::copysign:
	setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl,
	getValue(I.getArgOperand(0)).getValueType(),
	getValue(I.getArgOperand(0)),
	getValue(I.getArgOperand(1)), Flags));
	return;
	case Intrinsic::arithmetic_fence: {
	setValue(&I, DAG.getNode(ISD::ARITH_FENCE, sdl,
	getValue(I.getArgOperand(0)).getValueType(),
	getValue(I.getArgOperand(0)), Flags));
	return;
	}
	case Intrinsic::fma:
	setValue(&I, DAG.getNode(
	ISD::FMA, sdl, getValue(I.getArgOperand(0)).getValueType(),
	getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)),
	getValue(I.getArgOperand(2)), Flags));
	return;
	#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \
	case Intrinsic::INTRINSIC:
	#include "llvm/IR/ConstrainedOps.def"
	visitConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(I));
	return;
	#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID:
	#include "llvm/IR/VPIntrinsics.def"
	visitVectorPredicationIntrinsic(cast<VPIntrinsic>(I));
	return;
	case Intrinsic::fmuladd: {
	EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
	if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
	TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT)) {
	setValue(&I, DAG.getNode(ISD::FMA, sdl,
	getValue(I.getArgOperand(0)).getValueType(),
	getValue(I.getArgOperand(0)),
	getValue(I.getArgOperand(1)),
	getValue(I.getArgOperand(2)), Flags));
	} else {
	// TODO: Intrinsic calls should have fast-math-flags.
	SDValue Mul = DAG.getNode(
	ISD::FMUL, sdl, getValue(I.getArgOperand(0)).getValueType(),
	getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), Flags);
	SDValue Add = DAG.getNode(ISD::FADD, sdl,
	getValue(I.getArgOperand(0)).getValueType(),
	Mul, getValue(I.getArgOperand(2)), Flags);
	setValue(&I, Add);
	}
	return;
	}
	case Intrinsic::convert_to_fp16:
	setValue(&I, DAG.getNode(ISD::BITCAST, sdl, MVT::i16,
	DAG.getNode(ISD::FP_ROUND, sdl, MVT::f16,
	getValue(I.getArgOperand(0)),
	DAG.getTargetConstant(0, sdl,
	MVT::i32))));
	return;
	case Intrinsic::convert_from_fp16:
	setValue(&I, DAG.getNode(ISD::FP_EXTEND, sdl,
	TLI.getValueType(DAG.getDataLayout(), I.getType()),
	DAG.getNode(ISD::BITCAST, sdl, MVT::f16,
	getValue(I.getArgOperand(0)))));
	return;
	case Intrinsic::fptosi_sat: {
	EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
	setValue(&I, DAG.getNode(ISD::FP_TO_SINT_SAT, sdl, VT,
	getValue(I.getArgOperand(0)),
	DAG.getValueType(VT.getScalarType())));
	return;
	}
	case Intrinsic::fptoui_sat: {
	EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
	setValue(&I, DAG.getNode(ISD::FP_TO_UINT_SAT, sdl, VT,
	getValue(I.getArgOperand(0)),
	DAG.getValueType(VT.getScalarType())));
	return;
	}
	case Intrinsic::set_rounding:
	Res = DAG.getNode(ISD::SET_ROUNDING, sdl, MVT::Other,
	{getRoot(), getValue(I.getArgOperand(0))});
	setValue(&I, Res);
	DAG.setRoot(Res.getValue(0));
	return;
	case Intrinsic::pcmarker: {
	SDValue Tmp = getValue(I.getArgOperand(0));
	DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp));
	return;
	}
	case Intrinsic::readcyclecounter: {
	SDValue Op = getRoot();
	Res = DAG.getNode(ISD::READCYCLECOUNTER, sdl,
	DAG.getVTList(MVT::i64, MVT::Other), Op);
	setValue(&I, Res);
	DAG.setRoot(Res.getValue(1));
	return;
	}
	case Intrinsic::bitreverse:
	setValue(&I, DAG.getNode(ISD::BITREVERSE, sdl,
	getValue(I.getArgOperand(0)).getValueType(),
	getValue(I.getArgOperand(0))));
	return;
	case Intrinsic::bswap:
	setValue(&I, DAG.getNode(ISD::BSWAP, sdl,
	getValue(I.getArgOperand(0)).getValueType(),
	getValue(I.getArgOperand(0))));
	return;
	case Intrinsic::cttz: {
	SDValue Arg = getValue(I.getArgOperand(0));
	ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
	EVT Ty = Arg.getValueType();
	setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTTZ : ISD::CTTZ_ZERO_UNDEF,
	sdl, Ty, Arg));
	return;
	}
	case Intrinsic::ctlz: {
	SDValue Arg = getValue(I.getArgOperand(0));
	ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
	EVT Ty = Arg.getValueType();
	setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTLZ : ISD::CTLZ_ZERO_UNDEF,
	sdl, Ty, Arg));
	return;
	}
	case Intrinsic::ctpop: {
	SDValue Arg = getValue(I.getArgOperand(0));
	EVT Ty = Arg.getValueType();
	setValue(&I, DAG.getNode(ISD::CTPOP, sdl, Ty, Arg));
	return;
	}
	case Intrinsic::fshl:
	case Intrinsic::fshr: {
	bool IsFSHL = Intrinsic == Intrinsic::fshl;
	SDValue X = getValue(I.getArgOperand(0));
	SDValue Y = getValue(I.getArgOperand(1));
	SDValue Z = getValue(I.getArgOperand(2));
	EVT VT = X.getValueType();

	if (X == Y) {
	auto RotateOpcode = IsFSHL ? ISD::ROTL : ISD::ROTR;
	setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, Z));
	} else {
	auto FunnelOpcode = IsFSHL ? ISD::FSHL : ISD::FSHR;
	setValue(&I, DAG.getNode(FunnelOpcode, sdl, VT, X, Y, Z));
	}
	return;
	}
	case Intrinsic::sadd_sat: {
	SDValue Op1 = getValue(I.getArgOperand(0));
	SDValue Op2 = getValue(I.getArgOperand(1));
	setValue(&I, DAG.getNode(ISD::SADDSAT, sdl, Op1.getValueType(), Op1, Op2));
	return;
	}
	case Intrinsic::uadd_sat: {
	SDValue Op1 = getValue(I.getArgOperand(0));
	SDValue Op2 = getValue(I.getArgOperand(1));
	setValue(&I, DAG.getNode(ISD::UADDSAT, sdl, Op1.getValueType(), Op1, Op2));
	return;
	}
	case Intrinsic::ssub_sat: {
	SDValue Op1 = getValue(I.getArgOperand(0));
	SDValue Op2 = getValue(I.getArgOperand(1));
	setValue(&I, DAG.getNode(ISD::SSUBSAT, sdl, Op1.getValueType(), Op1, Op2));
	return;
	}
	case Intrinsic::usub_sat: {
	SDValue Op1 = getValue(I.getArgOperand(0));
	SDValue Op2 = getValue(I.getArgOperand(1));
	setValue(&I, DAG.getNode(ISD::USUBSAT, sdl, Op1.getValueType(), Op1, Op2));
	return;
	}
	case Intrinsic::sshl_sat: {
	SDValue Op1 = getValue(I.getArgOperand(0));
	SDValue Op2 = getValue(I.getArgOperand(1));
	setValue(&I, DAG.getNode(ISD::SSHLSAT, sdl, Op1.getValueType(), Op1, Op2));
	return;
	}
	case Intrinsic::ushl_sat: {
	SDValue Op1 = getValue(I.getArgOperand(0));
	SDValue Op2 = getValue(I.getArgOperand(1));
	setValue(&I, DAG.getNode(ISD::USHLSAT, sdl, Op1.getValueType(), Op1, Op2));
	return;
	}
	case Intrinsic::smul_fix:
	case Intrinsic::umul_fix:
	case Intrinsic::smul_fix_sat:
	case Intrinsic::umul_fix_sat: {
	SDValue Op1 = getValue(I.getArgOperand(0));
	SDValue Op2 = getValue(I.getArgOperand(1));
	SDValue Op3 = getValue(I.getArgOperand(2));
	setValue(&I, DAG.getNode(FixedPointIntrinsicToOpcode(Intrinsic), sdl,
	Op1.getValueType(), Op1, Op2, Op3));
	return;
	}
	case Intrinsic::sdiv_fix:
	case Intrinsic::udiv_fix:
	case Intrinsic::sdiv_fix_sat:
	case Intrinsic::udiv_fix_sat: {
	SDValue Op1 = getValue(I.getArgOperand(0));
	SDValue Op2 = getValue(I.getArgOperand(1));
	SDValue Op3 = getValue(I.getArgOperand(2));
	setValue(&I, expandDivFix(FixedPointIntrinsicToOpcode(Intrinsic), sdl,
	Op1, Op2, Op3, DAG, TLI));
	return;
	}
	case Intrinsic::smax: {
	SDValue Op1 = getValue(I.getArgOperand(0));
	SDValue Op2 = getValue(I.getArgOperand(1));
	setValue(&I, DAG.getNode(ISD::SMAX, sdl, Op1.getValueType(), Op1, Op2));
	return;
	}
	case Intrinsic::smin: {
	SDValue Op1 = getValue(I.getArgOperand(0));
	SDValue Op2 = getValue(I.getArgOperand(1));
	setValue(&I, DAG.getNode(ISD::SMIN, sdl, Op1.getValueType(), Op1, Op2));
	return;
	}
	case Intrinsic::umax: {
	SDValue Op1 = getValue(I.getArgOperand(0));
	SDValue Op2 = getValue(I.getArgOperand(1));
	setValue(&I, DAG.getNode(ISD::UMAX, sdl, Op1.getValueType(), Op1, Op2));
	return;
	}
	case Intrinsic::umin: {
	SDValue Op1 = getValue(I.getArgOperand(0));
	SDValue Op2 = getValue(I.getArgOperand(1));
	setValue(&I, DAG.getNode(ISD::UMIN, sdl, Op1.getValueType(), Op1, Op2));
	return;
	}
	case Intrinsic::abs: {
	// TODO: Preserve "int min is poison" arg in SDAG?
	SDValue Op1 = getValue(I.getArgOperand(0));
	setValue(&I, DAG.getNode(ISD::ABS, sdl, Op1.getValueType(), Op1));
	return;
	}
	case Intrinsic::stacksave: {
	SDValue Op = getRoot();
	EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
	Res = DAG.getNode(ISD::STACKSAVE, sdl, DAG.getVTList(VT, MVT::Other), Op);
	setValue(&I, Res);
	DAG.setRoot(Res.getValue(1));
	return;
	}
	case Intrinsic::stackrestore:
	Res = getValue(I.getArgOperand(0));
	DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, sdl, MVT::Other, getRoot(), Res));
	return;
	case Intrinsic::get_dynamic_area_offset: {
	SDValue Op = getRoot();
	EVT PtrTy = TLI.getFrameIndexTy(DAG.getDataLayout());
	EVT ResTy = TLI.getValueType(DAG.getDataLayout(), I.getType());
	// Result type for @llvm.get.dynamic.area.offset should match PtrTy for
	// target.
	if (PtrTy.getFixedSizeInBits() < ResTy.getFixedSizeInBits())
	report_fatal_error("Wrong result type for @llvm.get.dynamic.area.offset"
	" intrinsic!");
	Res = DAG.getNode(ISD::GET_DYNAMIC_AREA_OFFSET, sdl, DAG.getVTList(ResTy),
	Op);
	DAG.setRoot(Op);
	setValue(&I, Res);
	return;
	}
	case Intrinsic::stackguard: {
	MachineFunction &MF = DAG.getMachineFunction();
	const Module &M = *MF.getFunction().getParent();
	SDValue Chain = getRoot();
	if (TLI.useLoadStackGuardNode()) {
	Res = getLoadStackGuard(DAG, sdl, Chain);
	} else {
	EVT PtrTy = TLI.getValueType(DAG.getDataLayout(), I.getType());
	const Value *Global = TLI.getSDagStackGuard(M);
	Align Align = DL->getPrefTypeAlign(Global->getType());
	Res = DAG.getLoad(PtrTy, sdl, Chain, getValue(Global),
	MachinePointerInfo(Global, 0), Align,
	MachineMemOperand::MOVolatile);
	}
	if (TLI.useStackGuardXorFP())
	Res = TLI.emitStackGuardXorFP(DAG, Res, sdl);
	DAG.setRoot(Chain);
	setValue(&I, Res);
	return;
	}
	case Intrinsic::stackprotector: {
	// Emit code into the DAG to store the stack guard onto the stack.
	MachineFunction &MF = DAG.getMachineFunction();
	MachineFrameInfo &MFI = MF.getFrameInfo();
	SDValue Src, Chain = getRoot();

	if (TLI.useLoadStackGuardNode())
	Src = getLoadStackGuard(DAG, sdl, Chain);
	else
	Src = getValue(I.getArgOperand(0)); // The guard's value.

	AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1));

	int FI = FuncInfo.StaticAllocaMap[Slot];
	MFI.setStackProtectorIndex(FI);
	EVT PtrTy = TLI.getFrameIndexTy(DAG.getDataLayout());

	SDValue FIN = DAG.getFrameIndex(FI, PtrTy);

	// Store the stack protector onto the stack.
	Res = DAG.getStore(
	Chain, sdl, Src, FIN,
	MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
	MaybeAlign(), MachineMemOperand::MOVolatile);
	setValue(&I, Res);
	DAG.setRoot(Res);
	return;
	}
	case Intrinsic::objectsize:
	llvm_unreachable("llvm.objectsize.* should have been lowered already");

	case Intrinsic::is_constant:
	llvm_unreachable("llvm.is.constant.* should have been lowered already");

	case Intrinsic::annotation:
	case Intrinsic::ptr_annotation:
	case Intrinsic::launder_invariant_group:
	case Intrinsic::strip_invariant_group:
	// Drop the intrinsic, but forward the value
	setValue(&I, getValue(I.getOperand(0)));
	return;

	case Intrinsic::assume:
	case Intrinsic::experimental_noalias_scope_decl:
	case Intrinsic::var_annotation:
	case Intrinsic::sideeffect:
	// Discard annotate attributes, noalias scope declarations, assumptions, and
	// artificial side-effects.
	return;

	case Intrinsic::codeview_annotation: {
	// Emit a label associated with this metadata.
	MachineFunction &MF = DAG.getMachineFunction();
	MCSymbol *Label =
	MF.getMMI().getContext().createTempSymbol("annotation", true);
	Metadata *MD = cast<MetadataAsValue>(I.getArgOperand(0))->getMetadata();
	MF.addCodeViewAnnotation(Label, cast<MDNode>(MD));
	Res = DAG.getLabelNode(ISD::ANNOTATION_LABEL, sdl, getRoot(), Label);
	DAG.setRoot(Res);
	return;
	}

	case Intrinsic::init_trampoline: {
	const Function *F = cast<Function>(I.getArgOperand(1)->stripPointerCasts());

	SDValue Ops[6];
	Ops[0] = getRoot();
	Ops[1] = getValue(I.getArgOperand(0));
	Ops[2] = getValue(I.getArgOperand(1));
	Ops[3] = getValue(I.getArgOperand(2));
	Ops[4] = DAG.getSrcValue(I.getArgOperand(0));
	Ops[5] = DAG.getSrcValue(F);

	Res = DAG.getNode(ISD::INIT_TRAMPOLINE, sdl, MVT::Other, Ops);

	DAG.setRoot(Res);
	return;
	}
	case Intrinsic::adjust_trampoline:
	setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, sdl,
	TLI.getPointerTy(DAG.getDataLayout()),
	getValue(I.getArgOperand(0))));
	return;
	case Intrinsic::gcroot: {
	assert(DAG.getMachineFunction().getFunction().hasGC() &&
	"only valid in functions with gc specified, enforced by Verifier");
	assert(GFI && "implied by previous");
	const Value *Alloca = I.getArgOperand(0)->stripPointerCasts();
	const Constant *TypeMap = cast<Constant>(I.getArgOperand(1));

	FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode());
	GFI->addStackRoot(FI->getIndex(), TypeMap);
	return;
	}
	case Intrinsic::gcread:
	case Intrinsic::gcwrite:
	llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!");
	case Intrinsic::flt_rounds:
	Res = DAG.getNode(ISD::FLT_ROUNDS_, sdl, {MVT::i32, MVT::Other}, getRoot());
	setValue(&I, Res);
	DAG.setRoot(Res.getValue(1));
	return;

	case Intrinsic::expect:
	// Just replace __builtin_expect(exp, c) with EXP.
	setValue(&I, getValue(I.getArgOperand(0)));
	return;

	case Intrinsic::ubsantrap:
	case Intrinsic::debugtrap:
	case Intrinsic::trap: {
	StringRef TrapFuncName =
	I.getAttributes()
	.getAttribute(AttributeList::FunctionIndex, "trap-func-name")
	.getValueAsString();
	if (TrapFuncName.empty()) {
	switch (Intrinsic) {
	case Intrinsic::trap:
	DAG.setRoot(DAG.getNode(ISD::TRAP, sdl, MVT::Other, getRoot()));
	break;
	case Intrinsic::debugtrap:
	DAG.setRoot(DAG.getNode(ISD::DEBUGTRAP, sdl, MVT::Other, getRoot()));
	break;
	case Intrinsic::ubsantrap:
	DAG.setRoot(DAG.getNode(
	ISD::UBSANTRAP, sdl, MVT::Other, getRoot(),
	DAG.getTargetConstant(
	cast<ConstantInt>(I.getArgOperand(0))->getZExtValue(), sdl,
	MVT::i32)));
	break;
	default: llvm_unreachable("unknown trap intrinsic");
	}
	return;
	}
	TargetLowering::ArgListTy Args;
	if (Intrinsic == Intrinsic::ubsantrap) {
	Args.push_back(TargetLoweringBase::ArgListEntry());
	Args[0].Val = I.getArgOperand(0);
	Args[0].Node = getValue(Args[0].Val);
	Args[0].Ty = Args[0].Val->getType();
	}

	TargetLowering::CallLoweringInfo CLI(DAG);
	CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee(
	CallingConv::C, I.getType(),
	DAG.getExternalSymbol(TrapFuncName.data(),
	TLI.getPointerTy(DAG.getDataLayout())),
	std::move(Args));

	std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
	DAG.setRoot(Result.second);
	return;
	}

	case Intrinsic::uadd_with_overflow:
	case Intrinsic::sadd_with_overflow:
	case Intrinsic::usub_with_overflow:
	case Intrinsic::ssub_with_overflow:
	case Intrinsic::umul_with_overflow:
	case Intrinsic::smul_with_overflow: {
	ISD::NodeType Op;
	switch (Intrinsic) {
	default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
	case Intrinsic::uadd_with_overflow: Op = ISD::UADDO; break;
	case Intrinsic::sadd_with_overflow: Op = ISD::SADDO; break;
	case Intrinsic::usub_with_overflow: Op = ISD::USUBO; break;
	case Intrinsic::ssub_with_overflow: Op = ISD::SSUBO; break;
	case Intrinsic::umul_with_overflow: Op = ISD::UMULO; break;
	case Intrinsic::smul_with_overflow: Op = ISD::SMULO; break;
	}
	SDValue Op1 = getValue(I.getArgOperand(0));
	SDValue Op2 = getValue(I.getArgOperand(1));

	EVT ResultVT = Op1.getValueType();
	EVT OverflowVT = MVT::i1;
	if (ResultVT.isVector())
	OverflowVT = EVT::getVectorVT(
	*Context, OverflowVT, ResultVT.getVectorElementCount());

	SDVTList VTs = DAG.getVTList(ResultVT, OverflowVT);
	setValue(&I, DAG.getNode(Op, sdl, VTs, Op1, Op2));
	return;
	}
	case Intrinsic::prefetch: {
	SDValue Ops[5];
	unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
	auto Flags = rw == 0 ? MachineMemOperand::MOLoad :MachineMemOperand::MOStore;
	Ops[0] = DAG.getRoot();
	Ops[1] = getValue(I.getArgOperand(0));
	Ops[2] = getValue(I.getArgOperand(1));
	Ops[3] = getValue(I.getArgOperand(2));
	Ops[4] = getValue(I.getArgOperand(3));
	SDValue Result = DAG.getMemIntrinsicNode(
	ISD::PREFETCH, sdl, DAG.getVTList(MVT::Other), Ops,
	EVT::getIntegerVT(*Context, 8), MachinePointerInfo(I.getArgOperand(0)),
	/* align */ None, Flags);

	// Chain the prefetch in parallell with any pending loads, to stay out of
	// the way of later optimizations.
	PendingLoads.push_back(Result);
	Result = getRoot();
	DAG.setRoot(Result);
	return;
	}
	case Intrinsic::lifetime_start:
	case Intrinsic::lifetime_end: {
	bool IsStart = (Intrinsic == Intrinsic::lifetime_start);
	// Stack coloring is not enabled in O0, discard region information.
	if (TM.getOptLevel() == CodeGenOpt::None)
	return;

	const int64_t ObjectSize =
	cast<ConstantInt>(I.getArgOperand(0))->getSExtValue();
	Value *const ObjectPtr = I.getArgOperand(1);
	SmallVector<const Value *, 4> Allocas;
	getUnderlyingObjects(ObjectPtr, Allocas);

	for (const Value *Alloca : Allocas) {
	const AllocaInst *LifetimeObject = dyn_cast_or_null<AllocaInst>(Alloca);

	// Could not find an Alloca.
	if (!LifetimeObject)
	continue;

	// First check that the Alloca is static, otherwise it won't have a
	// valid frame index.
	auto SI = FuncInfo.StaticAllocaMap.find(LifetimeObject);
	if (SI == FuncInfo.StaticAllocaMap.end())
	return;

	const int FrameIndex = SI->second;
	int64_t Offset;
	if (GetPointerBaseWithConstantOffset(
	ObjectPtr, Offset, DAG.getDataLayout()) != LifetimeObject)
	Offset = -1; // Cannot determine offset from alloca to lifetime object.
	Res = DAG.getLifetimeNode(IsStart, sdl, getRoot(), FrameIndex, ObjectSize,
	Offset);
	DAG.setRoot(Res);
	}
	return;
	}
	case Intrinsic::pseudoprobe: {
	auto Guid = cast<ConstantInt>(I.getArgOperand(0))->getZExtValue();
	auto Index = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
	auto Attr = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
	Res = DAG.getPseudoProbeNode(sdl, getRoot(), Guid, Index, Attr);
	DAG.setRoot(Res);
	return;
	}
	case Intrinsic::invariant_start:
	// Discard region information.
	setValue(&I, DAG.getUNDEF(TLI.getPointerTy(DAG.getDataLayout())));
	return;
	case Intrinsic::invariant_end:
	// Discard region information.
	return;
	case Intrinsic::clear_cache:
	/// FunctionName may be null.
	if (const char *FunctionName = TLI.getClearCacheBuiltinName())
	lowerCallToExternalSymbol(I, FunctionName);
	return;
	case Intrinsic::donothing:
	case Intrinsic::seh_try_begin:
	case Intrinsic::seh_scope_begin:
	case Intrinsic::seh_try_end:
	case Intrinsic::seh_scope_end:
	// ignore
	return;
	case Intrinsic::experimental_stackmap:
	visitStackmap(I);
	return;
	case Intrinsic::experimental_patchpoint_void:
	case Intrinsic::experimental_patchpoint_i64:
	visitPatchpoint(I);
	return;
	case Intrinsic::experimental_gc_statepoint:
	LowerStatepoint(cast<GCStatepointInst>(I));
	return;
	case Intrinsic::experimental_gc_result:
	visitGCResult(cast<GCResultInst>(I));
	return;
	case Intrinsic::experimental_gc_relocate:
	visitGCRelocate(cast<GCRelocateInst>(I));
	return;
	case Intrinsic::instrprof_increment:
	llvm_unreachable("instrprof failed to lower an increment");
	case Intrinsic::instrprof_value_profile:
	llvm_unreachable("instrprof failed to lower a value profiling call");
	case Intrinsic::localescape: {
	MachineFunction &MF = DAG.getMachineFunction();
	const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();

	// Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission
	// is the same on all targets.
	for (unsigned Idx = 0, E = I.getNumArgOperands(); Idx < E; ++Idx) {
	Value *Arg = I.getArgOperand(Idx)->stripPointerCasts();
	if (isa<ConstantPointerNull>(Arg))
	continue; // Skip null pointers. They represent a hole in index space.
	AllocaInst *Slot = cast<AllocaInst>(Arg);
	assert(FuncInfo.StaticAllocaMap.count(Slot) &&
	"can only escape static allocas");
	int FI = FuncInfo.StaticAllocaMap[Slot];
	MCSymbol *FrameAllocSym =
	MF.getMMI().getContext().getOrCreateFrameAllocSymbol(
	GlobalValue::dropLLVMManglingEscape(MF.getName()), Idx);
	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl,
	TII->get(TargetOpcode::LOCAL_ESCAPE))
	.addSym(FrameAllocSym)
	.addFrameIndex(FI);
	}

	return;
	}

	case Intrinsic::localrecover: {
	// i8* @llvm.localrecover(i8* %fn, i8* %fp, i32 %idx)
	MachineFunction &MF = DAG.getMachineFunction();

	// Get the symbol that defines the frame offset.
	auto *Fn = cast<Function>(I.getArgOperand(0)->stripPointerCasts());
	auto *Idx = cast<ConstantInt>(I.getArgOperand(2));
	unsigned IdxVal =
	unsigned(Idx->getLimitedValue(std::numeric_limits<int>::max()));
	MCSymbol *FrameAllocSym =
	MF.getMMI().getContext().getOrCreateFrameAllocSymbol(
	GlobalValue::dropLLVMManglingEscape(Fn->getName()), IdxVal);

	Value *FP = I.getArgOperand(1);
	SDValue FPVal = getValue(FP);
	EVT PtrVT = FPVal.getValueType();

	// Create a MCSymbol for the label to avoid any target lowering
	// that would make this PC relative.
	SDValue OffsetSym = DAG.getMCSymbol(FrameAllocSym, PtrVT);
	SDValue OffsetVal =
	DAG.getNode(ISD::LOCAL_RECOVER, sdl, PtrVT, OffsetSym);

	// Add the offset to the FP.
	SDValue Add = DAG.getMemBasePlusOffset(FPVal, OffsetVal, sdl);
	setValue(&I, Add);

	return;
	}

	case Intrinsic::eh_exceptionpointer:
	case Intrinsic::eh_exceptioncode: {
	// Get the exception pointer vreg, copy from it, and resize it to fit.
	const auto *CPI = cast<CatchPadInst>(I.getArgOperand(0));
	MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
	const TargetRegisterClass *PtrRC = TLI.getRegClassFor(PtrVT);
	unsigned VReg = FuncInfo.getCatchPadExceptionPointerVReg(CPI, PtrRC);
	SDValue N =
	DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), VReg, PtrVT);
	if (Intrinsic == Intrinsic::eh_exceptioncode)
	N = DAG.getZExtOrTrunc(N, getCurSDLoc(), MVT::i32);
	setValue(&I, N);
	return;
	}
	case Intrinsic::xray_customevent: {
	// Here we want to make sure that the intrinsic behaves as if it has a
	// specific calling convention, and only for x86_64.
	// FIXME: Support other platforms later.
	const auto &Triple = DAG.getTarget().getTargetTriple();
	if (Triple.getArch() != Triple::x86_64)
	return;

	SDLoc DL = getCurSDLoc();
	SmallVector<SDValue, 8> Ops;

	// We want to say that we always want the arguments in registers.
	SDValue LogEntryVal = getValue(I.getArgOperand(0));
	SDValue StrSizeVal = getValue(I.getArgOperand(1));
	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
	SDValue Chain = getRoot();
	Ops.push_back(LogEntryVal);
	Ops.push_back(StrSizeVal);
	Ops.push_back(Chain);

	// We need to enforce the calling convention for the callsite, so that
	// argument ordering is enforced correctly, and that register allocation can
	// see that some registers may be assumed clobbered and have to preserve
	// them across calls to the intrinsic.
	MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHABLE_EVENT_CALL,
	DL, NodeTys, Ops);
	SDValue patchableNode = SDValue(MN, 0);
	DAG.setRoot(patchableNode);
	setValue(&I, patchableNode);
	return;
	}
	case Intrinsic::xray_typedevent: {
	// Here we want to make sure that the intrinsic behaves as if it has a
	// specific calling convention, and only for x86_64.
	// FIXME: Support other platforms later.
	const auto &Triple = DAG.getTarget().getTargetTriple();
	if (Triple.getArch() != Triple::x86_64)
	return;

	SDLoc DL = getCurSDLoc();
	SmallVector<SDValue, 8> Ops;

	// We want to say that we always want the arguments in registers.
	// It's unclear to me how manipulating the selection DAG here forces callers
	// to provide arguments in registers instead of on the stack.
	SDValue LogTypeId = getValue(I.getArgOperand(0));
	SDValue LogEntryVal = getValue(I.getArgOperand(1));
	SDValue StrSizeVal = getValue(I.getArgOperand(2));
	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
	SDValue Chain = getRoot();
	Ops.push_back(LogTypeId);
	Ops.push_back(LogEntryVal);
	Ops.push_back(StrSizeVal);
	Ops.push_back(Chain);

	// We need to enforce the calling convention for the callsite, so that
	// argument ordering is enforced correctly, and that register allocation can
	// see that some registers may be assumed clobbered and have to preserve
	// them across calls to the intrinsic.
	MachineSDNode *MN = DAG.getMachineNode(
	TargetOpcode::PATCHABLE_TYPED_EVENT_CALL, DL, NodeTys, Ops);
	SDValue patchableNode = SDValue(MN, 0);
	DAG.setRoot(patchableNode);
	setValue(&I, patchableNode);
	return;
	}
	case Intrinsic::experimental_deoptimize:
	LowerDeoptimizeCall(&I);
	return;
	case Intrinsic::experimental_stepvector:
	visitStepVector(I);
	return;
	case Intrinsic::vector_reduce_fadd:
	case Intrinsic::vector_reduce_fmul:
	case Intrinsic::vector_reduce_add:
	case Intrinsic::vector_reduce_mul:
	case Intrinsic::vector_reduce_and:
	case Intrinsic::vector_reduce_or:
	case Intrinsic::vector_reduce_xor:
	case Intrinsic::vector_reduce_smax:
	case Intrinsic::vector_reduce_smin:
	case Intrinsic::vector_reduce_umax:
	case Intrinsic::vector_reduce_umin:
	case Intrinsic::vector_reduce_fmax:
	case Intrinsic::vector_reduce_fmin:
	visitVectorReduce(I, Intrinsic);
	return;

	case Intrinsic::icall_branch_funnel: {
	SmallVector<SDValue, 16> Ops;
	Ops.push_back(getValue(I.getArgOperand(0)));

	int64_t Offset;
	auto *Base = dyn_cast<GlobalObject>(GetPointerBaseWithConstantOffset(
	I.getArgOperand(1), Offset, DAG.getDataLayout()));
	if (!Base)
	report_fatal_error(
	"llvm.icall.branch.funnel operand must be a GlobalValue");
	Ops.push_back(DAG.getTargetGlobalAddress(Base, getCurSDLoc(), MVT::i64, 0));

	struct BranchFunnelTarget {
	int64_t Offset;
	SDValue Target;
	};
	SmallVector<BranchFunnelTarget, 8> Targets;

	for (unsigned Op = 1, N = I.getNumArgOperands(); Op != N; Op += 2) {
	auto *ElemBase = dyn_cast<GlobalObject>(GetPointerBaseWithConstantOffset(
	I.getArgOperand(Op), Offset, DAG.getDataLayout()));
	if (ElemBase != Base)
	report_fatal_error("all llvm.icall.branch.funnel operands must refer "
	"to the same GlobalValue");

	SDValue Val = getValue(I.getArgOperand(Op + 1));
	auto *GA = dyn_cast<GlobalAddressSDNode>(Val);
	if (!GA)
	report_fatal_error(
	"llvm.icall.branch.funnel operand must be a GlobalValue");
	Targets.push_back({Offset, DAG.getTargetGlobalAddress(
	GA->getGlobal(), getCurSDLoc(),
	Val.getValueType(), GA->getOffset())});
	}
	llvm::sort(Targets,
	[](const BranchFunnelTarget &T1, const BranchFunnelTarget &T2) {
	return T1.Offset < T2.Offset;
	});

	for (auto &T : Targets) {
	Ops.push_back(DAG.getTargetConstant(T.Offset, getCurSDLoc(), MVT::i32));
	Ops.push_back(T.Target);
	}

	Ops.push_back(DAG.getRoot()); // Chain
	SDValue N(DAG.getMachineNode(TargetOpcode::ICALL_BRANCH_FUNNEL,
	getCurSDLoc(), MVT::Other, Ops),
	0);
	DAG.setRoot(N);
	setValue(&I, N);
	HasTailCall = true;
	return;
	}

	case Intrinsic::wasm_landingpad_index:
	// Information this intrinsic contained has been transferred to
	// MachineFunction in SelectionDAGISel::PrepareEHLandingPad. We can safely
	// delete it now.
	return;

	case Intrinsic::aarch64_settag:
	case Intrinsic::aarch64_settag_zero: {
	const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
	bool ZeroMemory = Intrinsic == Intrinsic::aarch64_settag_zero;
	SDValue Val = TSI.EmitTargetCodeForSetTag(
	DAG, getCurSDLoc(), getRoot(), getValue(I.getArgOperand(0)),
	getValue(I.getArgOperand(1)), MachinePointerInfo(I.getArgOperand(0)),
	ZeroMemory);
	DAG.setRoot(Val);
	setValue(&I, Val);
	return;
	}
	case Intrinsic::ptrmask: {
	SDValue Ptr = getValue(I.getOperand(0));
	SDValue Const = getValue(I.getOperand(1));

	EVT PtrVT = Ptr.getValueType();
	setValue(&I, DAG.getNode(ISD::AND, getCurSDLoc(), PtrVT, Ptr,
	DAG.getZExtOrTrunc(Const, getCurSDLoc(), PtrVT)));
	return;
	}
	case Intrinsic::get_active_lane_mask: {
	auto DL = getCurSDLoc();
	SDValue Index = getValue(I.getOperand(0));
	SDValue TripCount = getValue(I.getOperand(1));
	Type *ElementTy = I.getOperand(0)->getType();
	EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
	unsigned VecWidth = VT.getVectorNumElements();

	SmallVector<SDValue, 16> OpsTripCount;
	SmallVector<SDValue, 16> OpsIndex;
	SmallVector<SDValue, 16> OpsStepConstants;
	for (unsigned i = 0; i < VecWidth; i++) {
	OpsTripCount.push_back(TripCount);
	OpsIndex.push_back(Index);
	OpsStepConstants.push_back(
	DAG.getConstant(i, DL, EVT::getEVT(ElementTy)));
	}

	EVT CCVT = EVT::getVectorVT(I.getContext(), MVT::i1, VecWidth);

	auto VecTy = EVT::getEVT(FixedVectorType::get(ElementTy, VecWidth));
	SDValue VectorIndex = DAG.getBuildVector(VecTy, DL, OpsIndex);
	SDValue VectorStep = DAG.getBuildVector(VecTy, DL, OpsStepConstants);
	SDValue VectorInduction = DAG.getNode(
	ISD::UADDO, DL, DAG.getVTList(VecTy, CCVT), VectorIndex, VectorStep);
	SDValue VectorTripCount = DAG.getBuildVector(VecTy, DL, OpsTripCount);
	SDValue SetCC = DAG.getSetCC(DL, CCVT, VectorInduction.getValue(0),
	VectorTripCount, ISD::CondCode::SETULT);
	setValue(&I, DAG.getNode(ISD::AND, DL, CCVT,
	DAG.getNOT(DL, VectorInduction.getValue(1), CCVT),
	SetCC));
	return;
	}
	case Intrinsic::experimental_vector_insert: {
	auto DL = getCurSDLoc();

	SDValue Vec = getValue(I.getOperand(0));
	SDValue SubVec = getValue(I.getOperand(1));
	SDValue Index = getValue(I.getOperand(2));

	// The intrinsic's index type is i64, but the SDNode requires an index type
	// suitable for the target. Convert the index as required.
	MVT VectorIdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
	if (Index.getValueType() != VectorIdxTy)
	Index = DAG.getVectorIdxConstant(
	cast<ConstantSDNode>(Index)->getZExtValue(), DL);

	EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
	setValue(&I, DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResultVT, Vec, SubVec,
	Index));
	return;
	}
	case Intrinsic::experimental_vector_extract: {
	auto DL = getCurSDLoc();

	SDValue Vec = getValue(I.getOperand(0));
	SDValue Index = getValue(I.getOperand(1));
	EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType());

	// The intrinsic's index type is i64, but the SDNode requires an index type
	// suitable for the target. Convert the index as required.
	MVT VectorIdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
	if (Index.getValueType() != VectorIdxTy)
	Index = DAG.getVectorIdxConstant(
	cast<ConstantSDNode>(Index)->getZExtValue(), DL);

	setValue(&I, DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResultVT, Vec, Index));
	return;
	}
	case Intrinsic::experimental_vector_reverse:
	visitVectorReverse(I);
	return;
	case Intrinsic::experimental_vector_splice:
	visitVectorSplice(I);
	return;
	}
	}

	void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
	const ConstrainedFPIntrinsic &FPI) {
	SDLoc sdl = getCurSDLoc();

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	SmallVector<EVT, 4> ValueVTs;
	ComputeValueVTs(TLI, DAG.getDataLayout(), FPI.getType(), ValueVTs);
	ValueVTs.push_back(MVT::Other); // Out chain

	// We do not need to serialize constrained FP intrinsics against
	// each other or against (nonvolatile) loads, so they can be
	// chained like loads.
	SDValue Chain = DAG.getRoot();
	SmallVector<SDValue, 4> Opers;
	Opers.push_back(Chain);
	if (FPI.isUnaryOp()) {
	Opers.push_back(getValue(FPI.getArgOperand(0)));
	} else if (FPI.isTernaryOp()) {
	Opers.push_back(getValue(FPI.getArgOperand(0)));
	Opers.push_back(getValue(FPI.getArgOperand(1)));
	Opers.push_back(getValue(FPI.getArgOperand(2)));
	} else {
	Opers.push_back(getValue(FPI.getArgOperand(0)));
	Opers.push_back(getValue(FPI.getArgOperand(1)));
	}

	auto pushOutChain = [this](SDValue Result, fp::ExceptionBehavior EB) {
	assert(Result.getNode()->getNumValues() == 2);

	// Push node to the appropriate list so that future instructions can be
	// chained up correctly.
	SDValue OutChain = Result.getValue(1);
	switch (EB) {
	case fp::ExceptionBehavior::ebIgnore:
	// The only reason why ebIgnore nodes still need to be chained is that
	// they might depend on the current rounding mode, and therefore must
	// not be moved across instruction that may change that mode.
	LLVM_FALLTHROUGH;
	case fp::ExceptionBehavior::ebMayTrap:
	// These must not be moved across calls or instructions that may change
	// floating-point exception masks.
	PendingConstrainedFP.push_back(OutChain);
	break;
	case fp::ExceptionBehavior::ebStrict:
	// These must not be moved across calls or instructions that may change
	// floating-point exception masks or read floating-point exception flags.
	// In addition, they cannot be optimized out even if unused.
	PendingConstrainedFPStrict.push_back(OutChain);
	break;
	}
	};

	SDVTList VTs = DAG.getVTList(ValueVTs);
	fp::ExceptionBehavior EB = FPI.getExceptionBehavior().getValue();

	SDNodeFlags Flags;
	if (EB == fp::ExceptionBehavior::ebIgnore)
	Flags.setNoFPExcept(true);

	if (auto *FPOp = dyn_cast<FPMathOperator>(&FPI))
	Flags.copyFMF(*FPOp);

	unsigned Opcode;
	switch (FPI.getIntrinsicID()) {
	default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
	#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
	case Intrinsic::INTRINSIC: \
	Opcode = ISD::STRICT_##DAGN; \
	break;
	#include "llvm/IR/ConstrainedOps.def"
	case Intrinsic::experimental_constrained_fmuladd: {
	Opcode = ISD::STRICT_FMA;
	// Break fmuladd into fmul and fadd.
	if (TM.Options.AllowFPOpFusion == FPOpFusion::Strict \|\|
	!TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(),
	ValueVTs[0])) {
	Opers.pop_back();
	SDValue Mul = DAG.getNode(ISD::STRICT_FMUL, sdl, VTs, Opers, Flags);
	pushOutChain(Mul, EB);
	Opcode = ISD::STRICT_FADD;
	Opers.clear();
	Opers.push_back(Mul.getValue(1));
	Opers.push_back(Mul.getValue(0));
	Opers.push_back(getValue(FPI.getArgOperand(2)));
	}
	break;
	}
	}

	// A few strict DAG nodes carry additional operands that are not
	// set up by the default code above.
	switch (Opcode) {
	default: break;
	case ISD::STRICT_FP_ROUND:
	Opers.push_back(
	DAG.getTargetConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())));
	break;
	case ISD::STRICT_FSETCC:
	case ISD::STRICT_FSETCCS: {
	auto *FPCmp = dyn_cast<ConstrainedFPCmpIntrinsic>(&FPI);
	ISD::CondCode Condition = getFCmpCondCode(FPCmp->getPredicate());
	if (TM.Options.NoNaNsFPMath)
	Condition = getFCmpCodeWithoutNaN(Condition);
	Opers.push_back(DAG.getCondCode(Condition));
	break;
	}
	}

	SDValue Result = DAG.getNode(Opcode, sdl, VTs, Opers, Flags);
	pushOutChain(Result, EB);

	SDValue FPResult = Result.getValue(0);
	setValue(&FPI, FPResult);
	}

	static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) {
	Optional<unsigned> ResOPC;
	switch (VPIntrin.getIntrinsicID()) {
	#define BEGIN_REGISTER_VP_INTRINSIC(INTRIN, ...) case Intrinsic::INTRIN:
	#define BEGIN_REGISTER_VP_SDNODE(VPSDID, ...) ResOPC = ISD::VPSDID;
	#define END_REGISTER_VP_INTRINSIC(...) break;
	#include "llvm/IR/VPIntrinsics.def"
	}

	if (!ResOPC.hasValue())
	llvm_unreachable(
	"Inconsistency: no SDNode available for this VPIntrinsic!");

	return ResOPC.getValue();
	}

	void SelectionDAGBuilder::visitVectorPredicationIntrinsic(
	const VPIntrinsic &VPIntrin) {
	SDLoc DL = getCurSDLoc();
	unsigned Opcode = getISDForVPIntrinsic(VPIntrin);

	SmallVector<EVT, 4> ValueVTs;
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	ComputeValueVTs(TLI, DAG.getDataLayout(), VPIntrin.getType(), ValueVTs);
	SDVTList VTs = DAG.getVTList(ValueVTs);

	auto EVLParamPos =
	VPIntrinsic::getVectorLengthParamPos(VPIntrin.getIntrinsicID());

	MVT EVLParamVT = TLI.getVPExplicitVectorLengthTy();
	assert(EVLParamVT.isScalarInteger() && EVLParamVT.bitsGE(MVT::i32) &&
	"Unexpected target EVL type");

	// Request operands.
	SmallVector<SDValue, 7> OpValues;
	for (unsigned I = 0; I < VPIntrin.getNumArgOperands(); ++I) {
	auto Op = getValue(VPIntrin.getArgOperand(I));
	if (I == EVLParamPos)
	Op = DAG.getNode(ISD::ZERO_EXTEND, DL, EVLParamVT, Op);
	OpValues.push_back(Op);
	}

	SDValue Result = DAG.getNode(Opcode, DL, VTs, OpValues);
	setValue(&VPIntrin, Result);
	}

	SDValue SelectionDAGBuilder::lowerStartEH(SDValue Chain,
	const BasicBlock *EHPadBB,
	MCSymbol *&BeginLabel) {
	MachineFunction &MF = DAG.getMachineFunction();
	MachineModuleInfo &MMI = MF.getMMI();

	// Insert a label before the invoke call to mark the try range. This can be
	// used to detect deletion of the invoke via the MachineModuleInfo.
	BeginLabel = MMI.getContext().createTempSymbol();

	// For SjLj, keep track of which landing pads go with which invokes
	// so as to maintain the ordering of pads in the LSDA.
	unsigned CallSiteIndex = MMI.getCurrentCallSite();
	if (CallSiteIndex) {
	MF.setCallSiteBeginLabel(BeginLabel, CallSiteIndex);
	LPadToCallSiteMap[FuncInfo.MBBMap[EHPadBB]].push_back(CallSiteIndex);

	// Now that the call site is handled, stop tracking it.
	MMI.setCurrentCallSite(0);
	}

	return DAG.getEHLabel(getCurSDLoc(), Chain, BeginLabel);
	}

	SDValue SelectionDAGBuilder::lowerEndEH(SDValue Chain, const InvokeInst *II,
	const BasicBlock *EHPadBB,
	MCSymbol *BeginLabel) {
	assert(BeginLabel && "BeginLabel should've been set");

	MachineFunction &MF = DAG.getMachineFunction();
	MachineModuleInfo &MMI = MF.getMMI();

	// Insert a label at the end of the invoke call to mark the try range. This
	// can be used to detect deletion of the invoke via the MachineModuleInfo.
	MCSymbol *EndLabel = MMI.getContext().createTempSymbol();
	Chain = DAG.getEHLabel(getCurSDLoc(), Chain, EndLabel);

	// Inform MachineModuleInfo of range.
	auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
	// There is a platform (e.g. wasm) that uses funclet style IR but does not
	// actually use outlined funclets and their LSDA info style.
	if (MF.hasEHFunclets() && isFuncletEHPersonality(Pers)) {
	assert(II && "II should've been set");
	WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo();
	EHInfo->addIPToStateRange(II, BeginLabel, EndLabel);
	} else if (!isScopedEHPersonality(Pers)) {
	assert(EHPadBB);
	MF.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel);
	}

	return Chain;
	}

	std::pair<SDValue, SDValue>
	SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
	const BasicBlock *EHPadBB) {
	MCSymbol *BeginLabel = nullptr;

	if (EHPadBB) {
	// Both PendingLoads and PendingExports must be flushed here;
	// this call might not return.
	(void)getRoot();
	DAG.setRoot(lowerStartEH(getControlRoot(), EHPadBB, BeginLabel));
	CLI.setChain(getRoot());
	}

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);

	assert((CLI.IsTailCall \|\| Result.second.getNode()) &&
	"Non-null chain expected with non-tail call!");
	assert((Result.second.getNode() \|\| !Result.first.getNode()) &&
	"Null value expected with tail call!");

	if (!Result.second.getNode()) {
	// As a special case, a null chain means that a tail call has been emitted
	// and the DAG root is already updated.
	HasTailCall = true;

	// Since there's no actual continuation from this block, nothing can be
	// relying on us setting vregs for them.
	PendingExports.clear();
	} else {
	DAG.setRoot(Result.second);
	}

	if (EHPadBB) {
	DAG.setRoot(lowerEndEH(getRoot(), cast_or_null<InvokeInst>(CLI.CB), EHPadBB,
	BeginLabel));
	}

	return Result;
	}

	void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee,
	bool isTailCall,
	bool isMustTailCall,
	const BasicBlock *EHPadBB) {
	auto &DL = DAG.getDataLayout();
	FunctionType *FTy = CB.getFunctionType();
	Type *RetTy = CB.getType();

	TargetLowering::ArgListTy Args;
	Args.reserve(CB.arg_size());

	const Value *SwiftErrorVal = nullptr;
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();

	if (isTailCall) {
	// Avoid emitting tail calls in functions with the disable-tail-calls
	// attribute.
	auto *Caller = CB.getParent()->getParent();
	if (Caller->getFnAttribute("disable-tail-calls").getValueAsString() ==
	"true" && !isMustTailCall)
	isTailCall = false;

	// We can't tail call inside a function with a swifterror argument. Lowering
	// does not support this yet. It would have to move into the swifterror
	// register before the call.
	if (TLI.supportSwiftError() &&
	Caller->getAttributes().hasAttrSomewhere(Attribute::SwiftError))
	isTailCall = false;
	}

	for (auto I = CB.arg_begin(), E = CB.arg_end(); I != E; ++I) {
	TargetLowering::ArgListEntry Entry;
	const Value V = I;

	// Skip empty types
	if (V->getType()->isEmptyTy())
	continue;

	SDValue ArgNode = getValue(V);
	Entry.Node = ArgNode; Entry.Ty = V->getType();

	Entry.setAttributes(&CB, I - CB.arg_begin());

	// Use swifterror virtual register as input to the call.
	if (Entry.IsSwiftError && TLI.supportSwiftError()) {
	SwiftErrorVal = V;
	// We find the virtual register for the actual swifterror argument.
	// Instead of using the Value, we use the virtual register instead.
	Entry.Node =
	DAG.getRegister(SwiftError.getOrCreateVRegUseAt(&CB, FuncInfo.MBB, V),
	EVT(TLI.getPointerTy(DL)));
	}

	Args.push_back(Entry);

	// If we have an explicit sret argument that is an Instruction, (i.e., it
	// might point to function-local memory), we can't meaningfully tail-call.
	if (Entry.IsSRet && isa<Instruction>(V))
	isTailCall = false;
	}

	// If call site has a cfguardtarget operand bundle, create and add an
	// additional ArgListEntry.
	if (auto Bundle = CB.getOperandBundle(LLVMContext::OB_cfguardtarget)) {
	TargetLowering::ArgListEntry Entry;
	Value *V = Bundle->Inputs[0];
	SDValue ArgNode = getValue(V);
	Entry.Node = ArgNode;
	Entry.Ty = V->getType();
	Entry.IsCFGuardTarget = true;
	Args.push_back(Entry);
	}

	// Check if target-independent constraints permit a tail call here.
	// Target-dependent constraints are checked within TLI->LowerCallTo.
	if (isTailCall && !isInTailCallPosition(CB, DAG.getTarget()))
	isTailCall = false;

	// Disable tail calls if there is an swifterror argument. Targets have not
	// been updated to support tail calls.
	if (TLI.supportSwiftError() && SwiftErrorVal)
	isTailCall = false;

	TargetLowering::CallLoweringInfo CLI(DAG);
	CLI.setDebugLoc(getCurSDLoc())
	.setChain(getRoot())
	.setCallee(RetTy, FTy, Callee, std::move(Args), CB)
	.setTailCall(isTailCall)
	.setConvergent(CB.isConvergent())
	.setIsPreallocated(
	CB.countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0);
	std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);

	if (Result.first.getNode()) {
	Result.first = lowerRangeToAssertZExt(DAG, CB, Result.first);
	setValue(&CB, Result.first);
	}

	// The last element of CLI.InVals has the SDValue for swifterror return.
	// Here we copy it to a virtual register and update SwiftErrorMap for
	// book-keeping.
	if (SwiftErrorVal && TLI.supportSwiftError()) {
	// Get the last element of InVals.
	SDValue Src = CLI.InVals.back();
	Register VReg =
	SwiftError.getOrCreateVRegDefAt(&CB, FuncInfo.MBB, SwiftErrorVal);
	SDValue CopyNode = CLI.DAG.getCopyToReg(Result.second, CLI.DL, VReg, Src);
	DAG.setRoot(CopyNode);
	}
	}

	static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
	SelectionDAGBuilder &Builder) {
	// Check to see if this load can be trivially constant folded, e.g. if the
	// input is from a string literal.
	if (const Constant *LoadInput = dyn_cast<Constant>(PtrVal)) {
	// Cast pointer to the type we really want to load.
	Type *LoadTy =
	Type::getIntNTy(PtrVal->getContext(), LoadVT.getScalarSizeInBits());
	if (LoadVT.isVector())
	LoadTy = FixedVectorType::get(LoadTy, LoadVT.getVectorNumElements());

	LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput),
	PointerType::getUnqual(LoadTy));

	if (const Constant *LoadCst = ConstantFoldLoadFromConstPtr(
	const_cast<Constant >(LoadInput), LoadTy, Builder.DL))
	return Builder.getValue(LoadCst);
	}

	// Otherwise, we have to emit the load. If the pointer is to unfoldable but
	// still constant memory, the input chain can be the entry node.
	SDValue Root;
	bool ConstantMemory = false;

	// Do not serialize (non-volatile) loads of constant memory with anything.
	if (Builder.AA && Builder.AA->pointsToConstantMemory(PtrVal)) {
	Root = Builder.DAG.getEntryNode();
	ConstantMemory = true;
	} else {
	// Do not serialize non-volatile loads against each other.
	Root = Builder.DAG.getRoot();
	}

	SDValue Ptr = Builder.getValue(PtrVal);
	SDValue LoadVal =
	Builder.DAG.getLoad(LoadVT, Builder.getCurSDLoc(), Root, Ptr,
	MachinePointerInfo(PtrVal), Align(1));

	if (!ConstantMemory)
	Builder.PendingLoads.push_back(LoadVal.getValue(1));
	return LoadVal;
	}

	/// Record the value for an instruction that produces an integer result,
	/// converting the type where necessary.
	void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I,
	SDValue Value,
	bool IsSigned) {
	EVT VT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
	I.getType(), true);
	if (IsSigned)
	Value = DAG.getSExtOrTrunc(Value, getCurSDLoc(), VT);
	else
	Value = DAG.getZExtOrTrunc(Value, getCurSDLoc(), VT);
	setValue(&I, Value);
	}

	/// See if we can lower a memcmp/bcmp call into an optimized form. If so, return
	/// true and lower it. Otherwise return false, and it will be lowered like a
	/// normal call.
	/// The caller already checked that \p I calls the appropriate LibFunc with a
	/// correct prototype.
	bool SelectionDAGBuilder::visitMemCmpBCmpCall(const CallInst &I) {
	const Value LHS = I.getArgOperand(0), RHS = I.getArgOperand(1);
	const Value *Size = I.getArgOperand(2);
	const ConstantInt *CSize = dyn_cast<ConstantInt>(Size);
	if (CSize && CSize->getZExtValue() == 0) {
	EVT CallVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
	I.getType(), true);
	setValue(&I, DAG.getConstant(0, getCurSDLoc(), CallVT));
	return true;
	}

	const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
	std::pair<SDValue, SDValue> Res = TSI.EmitTargetCodeForMemcmp(
	DAG, getCurSDLoc(), DAG.getRoot(), getValue(LHS), getValue(RHS),
	getValue(Size), MachinePointerInfo(LHS), MachinePointerInfo(RHS));
	if (Res.first.getNode()) {
	processIntegerCallValue(I, Res.first, true);
	PendingLoads.push_back(Res.second);
	return true;
	}

	// memcmp(S1,S2,2) != 0 -> ((short)LHS != (short)RHS) != 0
	// memcmp(S1,S2,4) != 0 -> ((int)LHS != (int)RHS) != 0
	if (!CSize \|\| !isOnlyUsedInZeroEqualityComparison(&I))
	return false;

	// If the target has a fast compare for the given size, it will return a
	// preferred load type for that size. Require that the load VT is legal and
	// that the target supports unaligned loads of that type. Otherwise, return
	// INVALID.
	auto hasFastLoadsAndCompare = [&](unsigned NumBits) {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	MVT LVT = TLI.hasFastEqualityCompare(NumBits);
	if (LVT != MVT::INVALID_SIMPLE_VALUE_TYPE) {
	// TODO: Handle 5 byte compare as 4-byte + 1 byte.
	// TODO: Handle 8 byte compare on x86-32 as two 32-bit loads.
	// TODO: Check alignment of src and dest ptrs.
	unsigned DstAS = LHS->getType()->getPointerAddressSpace();
	unsigned SrcAS = RHS->getType()->getPointerAddressSpace();
	if (!TLI.isTypeLegal(LVT) \|\|
	!TLI.allowsMisalignedMemoryAccesses(LVT, SrcAS) \|\|
	!TLI.allowsMisalignedMemoryAccesses(LVT, DstAS))
	LVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
	}

	return LVT;
	};

	// This turns into unaligned loads. We only do this if the target natively
	// supports the MVT we'll be loading or if it is small enough (<= 4) that
	// we'll only produce a small number of byte loads.
	MVT LoadVT;
	unsigned NumBitsToCompare = CSize->getZExtValue() * 8;
	switch (NumBitsToCompare) {
	default:
	return false;
	case 16:
	LoadVT = MVT::i16;
	break;
	case 32:
	LoadVT = MVT::i32;
	break;
	case 64:
	case 128:
	case 256:
	LoadVT = hasFastLoadsAndCompare(NumBitsToCompare);
	break;
	}

	if (LoadVT == MVT::INVALID_SIMPLE_VALUE_TYPE)
	return false;

	SDValue LoadL = getMemCmpLoad(LHS, LoadVT, *this);
	SDValue LoadR = getMemCmpLoad(RHS, LoadVT, *this);

	// Bitcast to a wide integer type if the loads are vectors.
	if (LoadVT.isVector()) {
	EVT CmpVT = EVT::getIntegerVT(LHS->getContext(), LoadVT.getSizeInBits());
	LoadL = DAG.getBitcast(CmpVT, LoadL);
	LoadR = DAG.getBitcast(CmpVT, LoadR);
	}

	SDValue Cmp = DAG.getSetCC(getCurSDLoc(), MVT::i1, LoadL, LoadR, ISD::SETNE);
	processIntegerCallValue(I, Cmp, false);
	return true;
	}

	/// See if we can lower a memchr call into an optimized form. If so, return
	/// true and lower it. Otherwise return false, and it will be lowered like a
	/// normal call.
	/// The caller already checked that \p I calls the appropriate LibFunc with a
	/// correct prototype.
	bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) {
	const Value *Src = I.getArgOperand(0);
	const Value *Char = I.getArgOperand(1);
	const Value *Length = I.getArgOperand(2);

	const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
	std::pair<SDValue, SDValue> Res =
	TSI.EmitTargetCodeForMemchr(DAG, getCurSDLoc(), DAG.getRoot(),
	getValue(Src), getValue(Char), getValue(Length),
	MachinePointerInfo(Src));
	if (Res.first.getNode()) {
	setValue(&I, Res.first);
	PendingLoads.push_back(Res.second);
	return true;
	}

	return false;
	}

	/// See if we can lower a mempcpy call into an optimized form. If so, return
	/// true and lower it. Otherwise return false, and it will be lowered like a
	/// normal call.
	/// The caller already checked that \p I calls the appropriate LibFunc with a
	/// correct prototype.
	bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) {
	SDValue Dst = getValue(I.getArgOperand(0));
	SDValue Src = getValue(I.getArgOperand(1));
	SDValue Size = getValue(I.getArgOperand(2));

	Align DstAlign = DAG.InferPtrAlign(Dst).valueOrOne();
	Align SrcAlign = DAG.InferPtrAlign(Src).valueOrOne();
	// DAG::getMemcpy needs Alignment to be defined.
	Align Alignment = std::min(DstAlign, SrcAlign);

	bool isVol = false;
	SDLoc sdl = getCurSDLoc();

	// In the mempcpy context we need to pass in a false value for isTailCall
	// because the return pointer needs to be adjusted by the size of
	// the copied memory.
	SDValue Root = isVol ? getRoot() : getMemoryRoot();
	AAMDNodes AAInfo;
	I.getAAMetadata(AAInfo);
	SDValue MC = DAG.getMemcpy(Root, sdl, Dst, Src, Size, Alignment, isVol, false,
	/isTailCall=/false,
	MachinePointerInfo(I.getArgOperand(0)),
	MachinePointerInfo(I.getArgOperand(1)), AAInfo);
	assert(MC.getNode() != nullptr &&
	" memcpy should not be lowered as TailCall in mempcpy context ");
	DAG.setRoot(MC);

	// Check if Size needs to be truncated or extended.
	Size = DAG.getSExtOrTrunc(Size, sdl, Dst.getValueType());

	// Adjust return pointer to point just past the last dst byte.
	SDValue DstPlusSize = DAG.getNode(ISD::ADD, sdl, Dst.getValueType(),
	Dst, Size);
	setValue(&I, DstPlusSize);
	return true;
	}

	/// See if we can lower a strcpy call into an optimized form. If so, return
	/// true and lower it, otherwise return false and it will be lowered like a
	/// normal call.
	/// The caller already checked that \p I calls the appropriate LibFunc with a
	/// correct prototype.
	bool SelectionDAGBuilder::visitStrCpyCall(const CallInst &I, bool isStpcpy) {
	const Value Arg0 = I.getArgOperand(0), Arg1 = I.getArgOperand(1);

	const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
	std::pair<SDValue, SDValue> Res =
	TSI.EmitTargetCodeForStrcpy(DAG, getCurSDLoc(), getRoot(),
	getValue(Arg0), getValue(Arg1),
	MachinePointerInfo(Arg0),
	MachinePointerInfo(Arg1), isStpcpy);
	if (Res.first.getNode()) {
	setValue(&I, Res.first);
	DAG.setRoot(Res.second);
	return true;
	}

	return false;
	}

	/// See if we can lower a strcmp call into an optimized form. If so, return
	/// true and lower it, otherwise return false and it will be lowered like a
	/// normal call.
	/// The caller already checked that \p I calls the appropriate LibFunc with a
	/// correct prototype.
	bool SelectionDAGBuilder::visitStrCmpCall(const CallInst &I) {
	const Value Arg0 = I.getArgOperand(0), Arg1 = I.getArgOperand(1);

	const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
	std::pair<SDValue, SDValue> Res =
	TSI.EmitTargetCodeForStrcmp(DAG, getCurSDLoc(), DAG.getRoot(),
	getValue(Arg0), getValue(Arg1),
	MachinePointerInfo(Arg0),
	MachinePointerInfo(Arg1));
	if (Res.first.getNode()) {
	processIntegerCallValue(I, Res.first, true);
	PendingLoads.push_back(Res.second);
	return true;
	}

	return false;
	}

	/// See if we can lower a strlen call into an optimized form. If so, return
	/// true and lower it, otherwise return false and it will be lowered like a
	/// normal call.
	/// The caller already checked that \p I calls the appropriate LibFunc with a
	/// correct prototype.
	bool SelectionDAGBuilder::visitStrLenCall(const CallInst &I) {
	const Value *Arg0 = I.getArgOperand(0);

	const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
	std::pair<SDValue, SDValue> Res =
	TSI.EmitTargetCodeForStrlen(DAG, getCurSDLoc(), DAG.getRoot(),
	getValue(Arg0), MachinePointerInfo(Arg0));
	if (Res.first.getNode()) {
	processIntegerCallValue(I, Res.first, false);
	PendingLoads.push_back(Res.second);
	return true;
	}

	return false;
	}

	/// See if we can lower a strnlen call into an optimized form. If so, return
	/// true and lower it, otherwise return false and it will be lowered like a
	/// normal call.
	/// The caller already checked that \p I calls the appropriate LibFunc with a
	/// correct prototype.
	bool SelectionDAGBuilder::visitStrNLenCall(const CallInst &I) {
	const Value Arg0 = I.getArgOperand(0), Arg1 = I.getArgOperand(1);

	const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
	std::pair<SDValue, SDValue> Res =
	TSI.EmitTargetCodeForStrnlen(DAG, getCurSDLoc(), DAG.getRoot(),
	getValue(Arg0), getValue(Arg1),
	MachinePointerInfo(Arg0));
	if (Res.first.getNode()) {
	processIntegerCallValue(I, Res.first, false);
	PendingLoads.push_back(Res.second);
	return true;
	}

	return false;
	}

	/// See if we can lower a unary floating-point operation into an SDNode with
	/// the specified Opcode. If so, return true and lower it, otherwise return
	/// false and it will be lowered like a normal call.
	/// The caller already checked that \p I calls the appropriate LibFunc with a
	/// correct prototype.
	bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I,
	unsigned Opcode) {
	// We already checked this call's prototype; verify it doesn't modify errno.
	if (!I.onlyReadsMemory())
	return false;

	SDNodeFlags Flags;
	Flags.copyFMF(cast<FPMathOperator>(I));

	SDValue Tmp = getValue(I.getArgOperand(0));
	setValue(&I,
	DAG.getNode(Opcode, getCurSDLoc(), Tmp.getValueType(), Tmp, Flags));
	return true;
	}

	/// See if we can lower a binary floating-point operation into an SDNode with
	/// the specified Opcode. If so, return true and lower it. Otherwise return
	/// false, and it will be lowered like a normal call.
	/// The caller already checked that \p I calls the appropriate LibFunc with a
	/// correct prototype.
	bool SelectionDAGBuilder::visitBinaryFloatCall(const CallInst &I,
	unsigned Opcode) {
	// We already checked this call's prototype; verify it doesn't modify errno.
	if (!I.onlyReadsMemory())
	return false;

	SDNodeFlags Flags;
	Flags.copyFMF(cast<FPMathOperator>(I));

	SDValue Tmp0 = getValue(I.getArgOperand(0));
	SDValue Tmp1 = getValue(I.getArgOperand(1));
	EVT VT = Tmp0.getValueType();
	setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), VT, Tmp0, Tmp1, Flags));
	return true;
	}

	void SelectionDAGBuilder::visitCall(const CallInst &I) {
	// Handle inline assembly differently.
	if (I.isInlineAsm()) {
	visitInlineAsm(I);
	return;
	}

	if (Function *F = I.getCalledFunction()) {
	if (F->isDeclaration()) {
	// Is this an LLVM intrinsic or a target-specific intrinsic?
	unsigned IID = F->getIntrinsicID();
	if (!IID)
	if (const TargetIntrinsicInfo *II = TM.getIntrinsicInfo())
	IID = II->getIntrinsicID(F);

	if (IID) {
	visitIntrinsicCall(I, IID);
	return;
	}
	}

	// Check for well-known libc/libm calls. If the function is internal, it
	// can't be a library call. Don't do the check if marked as nobuiltin for
	// some reason or the call site requires strict floating point semantics.
	LibFunc Func;
	if (!I.isNoBuiltin() && !I.isStrictFP() && !F->hasLocalLinkage() &&
	F->hasName() && LibInfo->getLibFunc(*F, Func) &&
	LibInfo->hasOptimizedCodeGen(Func)) {
	switch (Func) {
	default: break;
	case LibFunc_bcmp:
	if (visitMemCmpBCmpCall(I))
	return;
	break;
	case LibFunc_copysign:
	case LibFunc_copysignf:
	case LibFunc_copysignl:
	// We already checked this call's prototype; verify it doesn't modify
	// errno.
	if (I.onlyReadsMemory()) {
	SDValue LHS = getValue(I.getArgOperand(0));
	SDValue RHS = getValue(I.getArgOperand(1));
	setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurSDLoc(),
	LHS.getValueType(), LHS, RHS));
	return;
	}
	break;
	case LibFunc_fabs:
	case LibFunc_fabsf:
	case LibFunc_fabsl:
	if (visitUnaryFloatCall(I, ISD::FABS))
	return;
	break;
	case LibFunc_fmin:
	case LibFunc_fminf:
	case LibFunc_fminl:
	if (visitBinaryFloatCall(I, ISD::FMINNUM))
	return;
	break;
	case LibFunc_fmax:
	case LibFunc_fmaxf:
	case LibFunc_fmaxl:
	if (visitBinaryFloatCall(I, ISD::FMAXNUM))
	return;
	break;
	case LibFunc_sin:
	case LibFunc_sinf:
	case LibFunc_sinl:
	if (visitUnaryFloatCall(I, ISD::FSIN))
	return;
	break;
	case LibFunc_cos:
	case LibFunc_cosf:
	case LibFunc_cosl:
	if (visitUnaryFloatCall(I, ISD::FCOS))
	return;
	break;
	case LibFunc_sqrt:
	case LibFunc_sqrtf:
	case LibFunc_sqrtl:
	case LibFunc_sqrt_finite:
	case LibFunc_sqrtf_finite:
	case LibFunc_sqrtl_finite:
	if (visitUnaryFloatCall(I, ISD::FSQRT))
	return;
	break;
	case LibFunc_floor:
	case LibFunc_floorf:
	case LibFunc_floorl:
	if (visitUnaryFloatCall(I, ISD::FFLOOR))
	return;
	break;
	case LibFunc_nearbyint:
	case LibFunc_nearbyintf:
	case LibFunc_nearbyintl:
	if (visitUnaryFloatCall(I, ISD::FNEARBYINT))
	return;
	break;
	case LibFunc_ceil:
	case LibFunc_ceilf:
	case LibFunc_ceill:
	if (visitUnaryFloatCall(I, ISD::FCEIL))
	return;
	break;
	case LibFunc_rint:
	case LibFunc_rintf:
	case LibFunc_rintl:
	if (visitUnaryFloatCall(I, ISD::FRINT))
	return;
	break;
	case LibFunc_round:
	case LibFunc_roundf:
	case LibFunc_roundl:
	if (visitUnaryFloatCall(I, ISD::FROUND))
	return;
	break;
	case LibFunc_trunc:
	case LibFunc_truncf:
	case LibFunc_truncl:
	if (visitUnaryFloatCall(I, ISD::FTRUNC))
	return;
	break;
	case LibFunc_log2:
	case LibFunc_log2f:
	case LibFunc_log2l:
	if (visitUnaryFloatCall(I, ISD::FLOG2))
	return;
	break;
	case LibFunc_exp2:
	case LibFunc_exp2f:
	case LibFunc_exp2l:
	if (visitUnaryFloatCall(I, ISD::FEXP2))
	return;
	break;
	case LibFunc_memcmp:
	if (visitMemCmpBCmpCall(I))
	return;
	break;
	case LibFunc_mempcpy:
	if (visitMemPCpyCall(I))
	return;
	break;
	case LibFunc_memchr:
	if (visitMemChrCall(I))
	return;
	break;
	case LibFunc_strcpy:
	if (visitStrCpyCall(I, false))
	return;
	break;
	case LibFunc_stpcpy:
	if (visitStrCpyCall(I, true))
	return;
	break;
	case LibFunc_strcmp:
	if (visitStrCmpCall(I))
	return;
	break;
	case LibFunc_strlen:
	if (visitStrLenCall(I))
	return;
	break;
	case LibFunc_strnlen:
	if (visitStrNLenCall(I))
	return;
	break;
	}
	}
	}

	// Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
	// have to do anything here to lower funclet bundles.
	// CFGuardTarget bundles are lowered in LowerCallTo.
	assert(!I.hasOperandBundlesOtherThan(
	{LLVMContext::OB_deopt, LLVMContext::OB_funclet,
	LLVMContext::OB_cfguardtarget, LLVMContext::OB_preallocated,
	LLVMContext::OB_clang_arc_attachedcall}) &&
	"Cannot lower calls with arbitrary operand bundles!");

	SDValue Callee = getValue(I.getCalledOperand());

	if (I.countOperandBundlesOfType(LLVMContext::OB_deopt))
	LowerCallSiteWithDeoptBundle(&I, Callee, nullptr);
	else
	// Check if we can potentially perform a tail call. More detailed checking
	// is be done within LowerCallTo, after more information about the call is
	// known.
	LowerCallTo(I, Callee, I.isTailCall(), I.isMustTailCall());
	}

	namespace {

	/// AsmOperandInfo - This contains information for each constraint that we are
	/// lowering.
	class SDISelAsmOperandInfo : public TargetLowering::AsmOperandInfo {
	public:
	/// CallOperand - If this is the result output operand or a clobber
	/// this is null, otherwise it is the incoming operand to the CallInst.
	/// This gets modified as the asm is processed.
	SDValue CallOperand;

	/// AssignedRegs - If this is a register or register class operand, this
	/// contains the set of register corresponding to the operand.
	RegsForValue AssignedRegs;

	explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info)
	: TargetLowering::AsmOperandInfo(info), CallOperand(nullptr, 0) {
	}

	/// Whether or not this operand accesses memory
	bool hasMemory(const TargetLowering &TLI) const {
	// Indirect operand accesses access memory.
	if (isIndirect)
	return true;

	for (const auto &Code : Codes)
	if (TLI.getConstraintType(Code) == TargetLowering::C_Memory)
	return true;

	return false;
	}

	/// getCallOperandValEVT - Return the EVT of the Value* that this operand
	/// corresponds to. If there is no Value* for this operand, it returns
	/// MVT::Other.
	EVT getCallOperandValEVT(LLVMContext &Context, const TargetLowering &TLI,
	const DataLayout &DL) const {
	if (!CallOperandVal) return MVT::Other;

	if (isa<BasicBlock>(CallOperandVal))
	return TLI.getProgramPointerTy(DL);

	llvm::Type *OpTy = CallOperandVal->getType();

	// FIXME: code duplicated from TargetLowering::ParseConstraints().
	// If this is an indirect operand, the operand is a pointer to the
	// accessed type.
	if (isIndirect) {
	PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
	if (!PtrTy)
	report_fatal_error("Indirect operand for inline asm not a pointer!");
	OpTy = PtrTy->getElementType();
	}

	// Look for vector wrapped in a struct. e.g. { <16 x i8> }.
	if (StructType *STy = dyn_cast<StructType>(OpTy))
	if (STy->getNumElements() == 1)
	OpTy = STy->getElementType(0);

	// If OpTy is not a single value, it may be a struct/union that we
	// can tile with integers.
	if (!OpTy->isSingleValueType() && OpTy->isSized()) {
	unsigned BitSize = DL.getTypeSizeInBits(OpTy);
	switch (BitSize) {
	default: break;
	case 1:
	case 8:
	case 16:
	case 32:
	case 64:
	case 128:
	OpTy = IntegerType::get(Context, BitSize);
	break;
	}
	}

	return TLI.getAsmOperandValueType(DL, OpTy, true);
	}
	};


	} // end anonymous namespace

	/// Make sure that the output operand \p OpInfo and its corresponding input
	/// operand \p MatchingOpInfo have compatible constraint types (otherwise error
	/// out).
	static void patchMatchingInput(const SDISelAsmOperandInfo &OpInfo,
	SDISelAsmOperandInfo &MatchingOpInfo,
	SelectionDAG &DAG) {
	if (OpInfo.ConstraintVT == MatchingOpInfo.ConstraintVT)
	return;

	const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
	const auto &TLI = DAG.getTargetLoweringInfo();

	std::pair<unsigned, const TargetRegisterClass *> MatchRC =
	TLI.getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
	OpInfo.ConstraintVT);
	std::pair<unsigned, const TargetRegisterClass *> InputRC =
	TLI.getRegForInlineAsmConstraint(TRI, MatchingOpInfo.ConstraintCode,
	MatchingOpInfo.ConstraintVT);
	if ((OpInfo.ConstraintVT.isInteger() !=
	MatchingOpInfo.ConstraintVT.isInteger()) \|\|
	(MatchRC.second != InputRC.second)) {
	// FIXME: error out in a more elegant fashion
	report_fatal_error("Unsupported asm: input constraint"
	" with a matching output constraint of"
	" incompatible type!");
	}
	MatchingOpInfo.ConstraintVT = OpInfo.ConstraintVT;
	}

	/// Get a direct memory input to behave well as an indirect operand.
	/// This may introduce stores, hence the need for a \p Chain.
	/// \return The (possibly updated) chain.
	static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location,
	SDISelAsmOperandInfo &OpInfo,
	SelectionDAG &DAG) {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();

	// If we don't have an indirect input, put it in the constpool if we can,
	// otherwise spill it to a stack slot.
	// TODO: This isn't quite right. We need to handle these according to
	// the addressing mode that the constraint wants. Also, this may take
	// an additional register for the computation and we don't want that
	// either.

	// If the operand is a float, integer, or vector constant, spill to a
	// constant pool entry to get its address.
	const Value *OpVal = OpInfo.CallOperandVal;
	if (isa<ConstantFP>(OpVal) \|\| isa<ConstantInt>(OpVal) \|\|
	isa<ConstantVector>(OpVal) \|\| isa<ConstantDataVector>(OpVal)) {
	OpInfo.CallOperand = DAG.getConstantPool(
	cast<Constant>(OpVal), TLI.getPointerTy(DAG.getDataLayout()));
	return Chain;
	}

	// Otherwise, create a stack slot and emit a store to it before the asm.
	Type *Ty = OpVal->getType();
	auto &DL = DAG.getDataLayout();
	uint64_t TySize = DL.getTypeAllocSize(Ty);
	MachineFunction &MF = DAG.getMachineFunction();
	int SSFI = MF.getFrameInfo().CreateStackObject(
	TySize, DL.getPrefTypeAlign(Ty), false);
	SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getFrameIndexTy(DL));
	Chain = DAG.getTruncStore(Chain, Location, OpInfo.CallOperand, StackSlot,
	MachinePointerInfo::getFixedStack(MF, SSFI),
	TLI.getMemValueType(DL, Ty));
	OpInfo.CallOperand = StackSlot;

	return Chain;
	}

	/// GetRegistersForValue - Assign registers (virtual or physical) for the
	/// specified operand. We prefer to assign virtual registers, to allow the
	/// register allocator to handle the assignment process. However, if the asm
	/// uses features that we can't model on machineinstrs, we have SDISel do the
	/// allocation. This produces generally horrible, but correct, code.
	///
	/// OpInfo describes the operand
	/// RefOpInfo describes the matching operand if any, the operand otherwise
	static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
	SDISelAsmOperandInfo &OpInfo,
	SDISelAsmOperandInfo &RefOpInfo) {
	LLVMContext &Context = *DAG.getContext();
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();

	MachineFunction &MF = DAG.getMachineFunction();
	SmallVector<unsigned, 4> Regs;
	const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();

	// No work to do for memory operations.
	if (OpInfo.ConstraintType == TargetLowering::C_Memory)
	return;

	// If this is a constraint for a single physreg, or a constraint for a
	// register class, find it.
	unsigned AssignedReg;
	const TargetRegisterClass *RC;
	std::tie(AssignedReg, RC) = TLI.getRegForInlineAsmConstraint(
	&TRI, RefOpInfo.ConstraintCode, RefOpInfo.ConstraintVT);
	// RC is unset only on failure. Return immediately.
	if (!RC)
	return;

	// Get the actual register value type. This is important, because the user
	// may have asked for (e.g.) the AX register in i32 type. We need to
	// remember that AX is actually i16 to get the right extension.
	const MVT RegVT = TRI.legalclasstypes_begin(RC);

	if (OpInfo.ConstraintVT != MVT::Other && RegVT != MVT::Untyped) {
	// If this is an FP operand in an integer register (or visa versa), or more
	// generally if the operand value disagrees with the register class we plan
	// to stick it in, fix the operand type.
	//
	// If this is an input value, the bitcast to the new type is done now.
	// Bitcast for output value is done at the end of visitInlineAsm().
	if ((OpInfo.Type == InlineAsm::isOutput \|\|
	OpInfo.Type == InlineAsm::isInput) &&
	!TRI.isTypeLegalForClass(*RC, OpInfo.ConstraintVT)) {
	// Try to convert to the first EVT that the reg class contains. If the
	// types are identical size, use a bitcast to convert (e.g. two differing
	// vector types). Note: output bitcast is done at the end of
	// visitInlineAsm().
	if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) {
	// Exclude indirect inputs while they are unsupported because the code
	// to perform the load is missing and thus OpInfo.CallOperand still
	// refers to the input address rather than the pointed-to value.
	if (OpInfo.Type == InlineAsm::isInput && !OpInfo.isIndirect)
	OpInfo.CallOperand =
	DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand);
	OpInfo.ConstraintVT = RegVT;
	// If the operand is an FP value and we want it in integer registers,
	// use the corresponding integer type. This turns an f64 value into
	// i64, which can be passed with two i32 values on a 32-bit machine.
	} else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) {
	MVT VT = MVT::getIntegerVT(OpInfo.ConstraintVT.getSizeInBits());
	if (OpInfo.Type == InlineAsm::isInput)
	OpInfo.CallOperand =
	DAG.getNode(ISD::BITCAST, DL, VT, OpInfo.CallOperand);
	OpInfo.ConstraintVT = VT;
	}
	}
	}

	// No need to allocate a matching input constraint since the constraint it's
	// matching to has already been allocated.
	if (OpInfo.isMatchingInputConstraint())
	return;

	EVT ValueVT = OpInfo.ConstraintVT;
	if (OpInfo.ConstraintVT == MVT::Other)
	ValueVT = RegVT;

	// Initialize NumRegs.
	unsigned NumRegs = 1;
	if (OpInfo.ConstraintVT != MVT::Other)
	NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT, RegVT);

	// If this is a constraint for a specific physical register, like {r17},
	// assign it now.

	// If this associated to a specific register, initialize iterator to correct
	// place. If virtual, make sure we have enough registers

	// Initialize iterator if necessary
	TargetRegisterClass::iterator I = RC->begin();
	MachineRegisterInfo &RegInfo = MF.getRegInfo();

	// Do not check for single registers.
	if (AssignedReg) {
	for (; *I != AssignedReg; ++I)
	assert(I != RC->end() && "AssignedReg should be member of RC");
	}

	for (; NumRegs; --NumRegs, ++I) {
	assert(I != RC->end() && "Ran out of registers to allocate!");
	Register R = AssignedReg ? Register(*I) : RegInfo.createVirtualRegister(RC);
	Regs.push_back(R);
	}

	OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
	}

	static unsigned
	findMatchingInlineAsmOperand(unsigned OperandNo,
	const std::vector<SDValue> &AsmNodeOperands) {
	// Scan until we find the definition we already emitted of this operand.
	unsigned CurOp = InlineAsm::Op_FirstOperand;
	for (; OperandNo; --OperandNo) {
	// Advance to the next operand.
	unsigned OpFlag =
	cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
	assert((InlineAsm::isRegDefKind(OpFlag) \|\|
	InlineAsm::isRegDefEarlyClobberKind(OpFlag) \|\|
	InlineAsm::isMemKind(OpFlag)) &&
	"Skipped past definitions?");
	CurOp += InlineAsm::getNumOperandRegisters(OpFlag) + 1;
	}
	return CurOp;
	}

	namespace {

	class ExtraFlags {
	unsigned Flags = 0;

	public:
	explicit ExtraFlags(const CallBase &Call) {
	const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
	if (IA->hasSideEffects())
	Flags \|= InlineAsm::Extra_HasSideEffects;
	if (IA->isAlignStack())
	Flags \|= InlineAsm::Extra_IsAlignStack;
	if (Call.isConvergent())
	Flags \|= InlineAsm::Extra_IsConvergent;
	Flags \|= IA->getDialect() * InlineAsm::Extra_AsmDialect;
	}

	void update(const TargetLowering::AsmOperandInfo &OpInfo) {
	// Ideally, we would only check against memory constraints. However, the
	// meaning of an Other constraint can be target-specific and we can't easily
	// reason about it. Therefore, be conservative and set MayLoad/MayStore
	// for Other constraints as well.
	if (OpInfo.ConstraintType == TargetLowering::C_Memory \|\|
	OpInfo.ConstraintType == TargetLowering::C_Other) {
	if (OpInfo.Type == InlineAsm::isInput)
	Flags \|= InlineAsm::Extra_MayLoad;
	else if (OpInfo.Type == InlineAsm::isOutput)
	Flags \|= InlineAsm::Extra_MayStore;
	else if (OpInfo.Type == InlineAsm::isClobber)
	Flags \|= (InlineAsm::Extra_MayLoad \| InlineAsm::Extra_MayStore);
	}
	}

	unsigned get() const { return Flags; }
	};

	} // end anonymous namespace

	/// visitInlineAsm - Handle a call to an InlineAsm object.
	void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
	const BasicBlock *EHPadBB) {
	const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());

	/// ConstraintOperands - Information about all of the constraints.
	SmallVector<SDISelAsmOperandInfo, 16> ConstraintOperands;

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(
	DAG.getDataLayout(), DAG.getSubtarget().getRegisterInfo(), Call);

	// First Pass: Calculate HasSideEffects and ExtraFlags (AlignStack,
	// AsmDialect, MayLoad, MayStore).
	bool HasSideEffect = IA->hasSideEffects();
	ExtraFlags ExtraInfo(Call);

	unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
	unsigned ResNo = 0; // ResNo - The result number of the next output.
	unsigned NumMatchingOps = 0;
	for (auto &T : TargetConstraints) {
	ConstraintOperands.push_back(SDISelAsmOperandInfo(T));
	SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back();

	// Compute the value type for each operand.
	if (OpInfo.Type == InlineAsm::isInput \|\|
	(OpInfo.Type == InlineAsm::isOutput && OpInfo.isIndirect)) {
	OpInfo.CallOperandVal = Call.getArgOperand(ArgNo++);

	// Process the call argument. BasicBlocks are labels, currently appearing
	// only in asm's.
	if (isa<CallBrInst>(Call) &&
	ArgNo - 1 >= (cast<CallBrInst>(&Call)->getNumArgOperands() -
	cast<CallBrInst>(&Call)->getNumIndirectDests() -
	NumMatchingOps) &&
	(NumMatchingOps == 0 \|\|
	ArgNo - 1 < (cast<CallBrInst>(&Call)->getNumArgOperands() -
	NumMatchingOps))) {
	const auto *BA = cast<BlockAddress>(OpInfo.CallOperandVal);
	EVT VT = TLI.getValueType(DAG.getDataLayout(), BA->getType(), true);
	OpInfo.CallOperand = DAG.getTargetBlockAddress(BA, VT);
	} else if (const auto *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) {
	OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]);
	} else {
	OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
	}

	EVT VT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI,
	DAG.getDataLayout());
	OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
	} else if (OpInfo.Type == InlineAsm::isOutput && !OpInfo.isIndirect) {
	// The return value of the call is this value. As such, there is no
	// corresponding argument.
	assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
	if (StructType *STy = dyn_cast<StructType>(Call.getType())) {
	OpInfo.ConstraintVT = TLI.getSimpleValueType(
	DAG.getDataLayout(), STy->getElementType(ResNo));
	} else {
	assert(ResNo == 0 && "Asm only has one result!");
	OpInfo.ConstraintVT = TLI.getAsmOperandValueType(
	DAG.getDataLayout(), Call.getType()).getSimpleVT();
	}
	++ResNo;
	} else {
	OpInfo.ConstraintVT = MVT::Other;
	}

	if (OpInfo.hasMatchingInput())
	++NumMatchingOps;

	if (!HasSideEffect)
	HasSideEffect = OpInfo.hasMemory(TLI);

	// Determine if this InlineAsm MayLoad or MayStore based on the constraints.
	// FIXME: Could we compute this on OpInfo rather than T?

	// Compute the constraint code and ConstraintType to use.
	TLI.ComputeConstraintToUse(T, SDValue());

	if (T.ConstraintType == TargetLowering::C_Immediate &&
	OpInfo.CallOperand && !isa<ConstantSDNode>(OpInfo.CallOperand))
	// We've delayed emitting a diagnostic like the "n" constraint because
	// inlining could cause an integer showing up.
	return emitInlineAsmError(Call, "constraint '" + Twine(T.ConstraintCode) +
	"' expects an integer constant "
	"expression");

	ExtraInfo.update(T);
	}

	// We won't need to flush pending loads if this asm doesn't touch
	// memory and is nonvolatile.
	SDValue Flag, Chain = (HasSideEffect) ? getRoot() : DAG.getRoot();

	bool EmitEHLabels = isa<InvokeInst>(Call) && IA->canThrow();
	if (EmitEHLabels) {
	assert(EHPadBB && "InvokeInst must have an EHPadBB");
	}
	bool IsCallBr = isa<CallBrInst>(Call);

	if (IsCallBr \|\| EmitEHLabels) {
	// If this is a callbr or invoke we need to flush pending exports since
	// inlineasm_br and invoke are terminators.
	// We need to do this before nodes are glued to the inlineasm_br node.
	Chain = getControlRoot();
	}

	MCSymbol *BeginLabel = nullptr;
	if (EmitEHLabels) {
	Chain = lowerStartEH(Chain, EHPadBB, BeginLabel);
	}

	// Second pass over the constraints: compute which constraint option to use.
	for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
	// If this is an output operand with a matching input operand, look up the
	// matching input. If their types mismatch, e.g. one is an integer, the
	// other is floating point, or their sizes are different, flag it as an
	// error.
	if (OpInfo.hasMatchingInput()) {
	SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
	patchMatchingInput(OpInfo, Input, DAG);
	}

	// Compute the constraint code and ConstraintType to use.
	TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG);

	if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
	OpInfo.Type == InlineAsm::isClobber)
	continue;

	// If this is a memory input, and if the operand is not indirect, do what we
	// need to provide an address for the memory input.
	if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
	!OpInfo.isIndirect) {
	assert((OpInfo.isMultipleAlternative \|\|
	(OpInfo.Type == InlineAsm::isInput)) &&
	"Can only indirectify direct input operands!");

	// Memory operands really want the address of the value.
	Chain = getAddressForMemoryInput(Chain, getCurSDLoc(), OpInfo, DAG);

	// There is no longer a Value* corresponding to this operand.
	OpInfo.CallOperandVal = nullptr;

	// It is now an indirect operand.
	OpInfo.isIndirect = true;
	}

	}

	// AsmNodeOperands - The operands for the ISD::INLINEASM node.
	std::vector<SDValue> AsmNodeOperands;
	AsmNodeOperands.push_back(SDValue()); // reserve space for input chain
	AsmNodeOperands.push_back(DAG.getTargetExternalSymbol(
	IA->getAsmString().c_str(), TLI.getProgramPointerTy(DAG.getDataLayout())));

	// If we have a !srcloc metadata node associated with it, we want to attach
	// this to the ultimately generated inline asm machineinstr. To do this, we
	// pass in the third operand as this (potentially null) inline asm MDNode.
	const MDNode *SrcLoc = Call.getMetadata("srcloc");
	AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc));

	// Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore
	// bits as operand 3.
	AsmNodeOperands.push_back(DAG.getTargetConstant(
	ExtraInfo.get(), getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));

	// Third pass: Loop over operands to prepare DAG-level operands.. As part of
	// this, assign virtual and physical registers for inputs and otput.
	for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
	// Assign Registers.
	SDISelAsmOperandInfo &RefOpInfo =
	OpInfo.isMatchingInputConstraint()
	? ConstraintOperands[OpInfo.getMatchedOperand()]
	: OpInfo;
	GetRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo);

	auto DetectWriteToReservedRegister = [&]() {
	const MachineFunction &MF = DAG.getMachineFunction();
	const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
	for (unsigned Reg : OpInfo.AssignedRegs.Regs) {
	if (Register::isPhysicalRegister(Reg) &&
	TRI.isInlineAsmReadOnlyReg(MF, Reg)) {
	const char *RegName = TRI.getName(Reg);
	emitInlineAsmError(Call, "write to reserved register '" +
	Twine(RegName) + "'");
	return true;
	}
	}
	return false;
	};

	switch (OpInfo.Type) {
	case InlineAsm::isOutput:
	if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
	unsigned ConstraintID =
	TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
	assert(ConstraintID != InlineAsm::Constraint_Unknown &&
	"Failed to convert memory constraint code to constraint id.");

	// Add information to the INLINEASM node to know about this output.
	unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
	OpFlags = InlineAsm::getFlagWordForMem(OpFlags, ConstraintID);
	AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, getCurSDLoc(),
	MVT::i32));
	AsmNodeOperands.push_back(OpInfo.CallOperand);
	} else {
	// Otherwise, this outputs to a register (directly for C_Register /
	// C_RegisterClass, and a target-defined fashion for
	// C_Immediate/C_Other). Find a register that we can use.
	if (OpInfo.AssignedRegs.Regs.empty()) {
	emitInlineAsmError(
	Call, "couldn't allocate output register for constraint '" +
	Twine(OpInfo.ConstraintCode) + "'");
	return;
	}

	if (DetectWriteToReservedRegister())
	return;

	// Add information to the INLINEASM node to know that this register is
	// set.
	OpInfo.AssignedRegs.AddInlineAsmOperands(
	OpInfo.isEarlyClobber ? InlineAsm::Kind_RegDefEarlyClobber
	: InlineAsm::Kind_RegDef,
	false, 0, getCurSDLoc(), DAG, AsmNodeOperands);
	}
	break;

	case InlineAsm::isInput: {
	SDValue InOperandVal = OpInfo.CallOperand;

	if (OpInfo.isMatchingInputConstraint()) {
	// If this is required to match an output register we have already set,
	// just use its register.
	auto CurOp = findMatchingInlineAsmOperand(OpInfo.getMatchedOperand(),
	AsmNodeOperands);
	unsigned OpFlag =
	cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
	if (InlineAsm::isRegDefKind(OpFlag) \|\|
	InlineAsm::isRegDefEarlyClobberKind(OpFlag)) {
	// Add (OpFlag&0xffff)>>3 registers to MatchedRegs.
	if (OpInfo.isIndirect) {
	// This happens on gcc/testsuite/gcc.dg/pr8788-1.c
	emitInlineAsmError(Call, "inline asm not supported yet: "
	"don't know how to handle tied "
	"indirect register inputs");
	return;
	}

	SmallVector<unsigned, 4> Regs;
	MachineFunction &MF = DAG.getMachineFunction();
	MachineRegisterInfo &MRI = MF.getRegInfo();
	const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
	RegisterSDNode *R = dyn_cast<RegisterSDNode>(AsmNodeOperands[CurOp+1]);
	Register TiedReg = R->getReg();
	MVT RegVT = R->getSimpleValueType(0);
	- const TargetRegisterClass *RC = TiedReg.isVirtual() ?
	- MRI.getRegClass(TiedReg) : TRI.getMinimalPhysRegClass(TiedReg);
	+ const TargetRegisterClass *RC =
	+ TiedReg.isVirtual() ? MRI.getRegClass(TiedReg)
	+ : RegVT != MVT::Untyped ? TLI.getRegClassFor(RegVT)
	+ : TRI.getMinimalPhysRegClass(TiedReg);
	unsigned NumRegs = InlineAsm::getNumOperandRegisters(OpFlag);
	for (unsigned i = 0; i != NumRegs; ++i)
	Regs.push_back(MRI.createVirtualRegister(RC));

	RegsForValue MatchedRegs(Regs, RegVT, InOperandVal.getValueType());

	SDLoc dl = getCurSDLoc();
	// Use the produced MatchedRegs object to
	MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag, &Call);
	MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse,
	true, OpInfo.getMatchedOperand(), dl,
	DAG, AsmNodeOperands);
	break;
	}

	assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!");
	assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 &&
	"Unexpected number of operands");
	// Add information to the INLINEASM node to know about this input.
	// See InlineAsm.h isUseOperandTiedToDef.
	OpFlag = InlineAsm::convertMemFlagWordToMatchingFlagWord(OpFlag);
	OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag,
	OpInfo.getMatchedOperand());
	AsmNodeOperands.push_back(DAG.getTargetConstant(
	OpFlag, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
	AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]);
	break;
	}

	// Treat indirect 'X' constraint as memory.
	if (OpInfo.ConstraintType == TargetLowering::C_Other &&
	OpInfo.isIndirect)
	OpInfo.ConstraintType = TargetLowering::C_Memory;

	if (OpInfo.ConstraintType == TargetLowering::C_Immediate \|\|
	OpInfo.ConstraintType == TargetLowering::C_Other) {
	std::vector<SDValue> Ops;
	TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode,
	Ops, DAG);
	if (Ops.empty()) {
	if (OpInfo.ConstraintType == TargetLowering::C_Immediate)
	if (isa<ConstantSDNode>(InOperandVal)) {
	emitInlineAsmError(Call, "value out of range for constraint '" +
	Twine(OpInfo.ConstraintCode) + "'");
	return;
	}

	emitInlineAsmError(Call,
	"invalid operand for inline asm constraint '" +
	Twine(OpInfo.ConstraintCode) + "'");
	return;
	}

	// Add information to the INLINEASM node to know about this input.
	unsigned ResOpType =
	InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size());
	AsmNodeOperands.push_back(DAG.getTargetConstant(
	ResOpType, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
	llvm::append_range(AsmNodeOperands, Ops);
	break;
	}

	if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
	assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!");
	assert(InOperandVal.getValueType() ==
	TLI.getPointerTy(DAG.getDataLayout()) &&
	"Memory operands expect pointer values");

	unsigned ConstraintID =
	TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
	assert(ConstraintID != InlineAsm::Constraint_Unknown &&
	"Failed to convert memory constraint code to constraint id.");

	// Add information to the INLINEASM node to know about this input.
	unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
	ResOpType = InlineAsm::getFlagWordForMem(ResOpType, ConstraintID);
	AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
	getCurSDLoc(),
	MVT::i32));
	AsmNodeOperands.push_back(InOperandVal);
	break;
	}

	assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass \|\|
	OpInfo.ConstraintType == TargetLowering::C_Register) &&
	"Unknown constraint type!");

	// TODO: Support this.
	if (OpInfo.isIndirect) {
	emitInlineAsmError(
	Call, "Don't know how to handle indirect register inputs yet "
	"for constraint '" +
	Twine(OpInfo.ConstraintCode) + "'");
	return;
	}

	// Copy the input into the appropriate registers.
	if (OpInfo.AssignedRegs.Regs.empty()) {
	emitInlineAsmError(Call,
	"couldn't allocate input reg for constraint '" +
	Twine(OpInfo.ConstraintCode) + "'");
	return;
	}

	if (DetectWriteToReservedRegister())
	return;

	SDLoc dl = getCurSDLoc();

	OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag,
	&Call);

	OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0,
	dl, DAG, AsmNodeOperands);
	break;
	}
	case InlineAsm::isClobber:
	// Add the clobbered value to the operand list, so that the register
	// allocator is aware that the physreg got clobbered.
	if (!OpInfo.AssignedRegs.Regs.empty())
	OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_Clobber,
	false, 0, getCurSDLoc(), DAG,
	AsmNodeOperands);
	break;
	}
	}

	// Finish up input operands. Set the input chain and add the flag last.
	AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
	if (Flag.getNode()) AsmNodeOperands.push_back(Flag);

	unsigned ISDOpc = IsCallBr ? ISD::INLINEASM_BR : ISD::INLINEASM;
	Chain = DAG.getNode(ISDOpc, getCurSDLoc(),
	DAG.getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
	Flag = Chain.getValue(1);

	// Do additional work to generate outputs.

	SmallVector<EVT, 1> ResultVTs;
	SmallVector<SDValue, 1> ResultValues;
	SmallVector<SDValue, 8> OutChains;

	llvm::Type *CallResultType = Call.getType();
	ArrayRef<Type *> ResultTypes;
	if (StructType *StructResult = dyn_cast<StructType>(CallResultType))
	ResultTypes = StructResult->elements();
	else if (!CallResultType->isVoidTy())
	ResultTypes = makeArrayRef(CallResultType);

	auto CurResultType = ResultTypes.begin();
	auto handleRegAssign = [&](SDValue V) {
	assert(CurResultType != ResultTypes.end() && "Unexpected value");
	assert((*CurResultType)->isSized() && "Unexpected unsized type");
	EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), *CurResultType);
	++CurResultType;
	// If the type of the inline asm call site return value is different but has
	// same size as the type of the asm output bitcast it. One example of this
	// is for vectors with different width / number of elements. This can
	// happen for register classes that can contain multiple different value
	// types. The preg or vreg allocated may not have the same VT as was
	// expected.
	//
	// This can also happen for a return value that disagrees with the register
	// class it is put in, eg. a double in a general-purpose register on a
	// 32-bit machine.
	if (ResultVT != V.getValueType() &&
	ResultVT.getSizeInBits() == V.getValueSizeInBits())
	V = DAG.getNode(ISD::BITCAST, getCurSDLoc(), ResultVT, V);
	else if (ResultVT != V.getValueType() && ResultVT.isInteger() &&
	V.getValueType().isInteger()) {
	// If a result value was tied to an input value, the computed result
	// may have a wider width than the expected result. Extract the
	// relevant portion.
	V = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), ResultVT, V);
	}
	assert(ResultVT == V.getValueType() && "Asm result value mismatch!");
	ResultVTs.push_back(ResultVT);
	ResultValues.push_back(V);
	};

	// Deal with output operands.
	for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
	if (OpInfo.Type == InlineAsm::isOutput) {
	SDValue Val;
	// Skip trivial output operands.
	if (OpInfo.AssignedRegs.Regs.empty())
	continue;

	switch (OpInfo.ConstraintType) {
	case TargetLowering::C_Register:
	case TargetLowering::C_RegisterClass:
	Val = OpInfo.AssignedRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(),
	Chain, &Flag, &Call);
	break;
	case TargetLowering::C_Immediate:
	case TargetLowering::C_Other:
	Val = TLI.LowerAsmOutputForConstraint(Chain, Flag, getCurSDLoc(),
	OpInfo, DAG);
	break;
	case TargetLowering::C_Memory:
	break; // Already handled.
	case TargetLowering::C_Unknown:
	assert(false && "Unexpected unknown constraint");
	}

	// Indirect output manifest as stores. Record output chains.
	if (OpInfo.isIndirect) {
	const Value *Ptr = OpInfo.CallOperandVal;
	assert(Ptr && "Expected value CallOperandVal for indirect asm operand");
	SDValue Store = DAG.getStore(Chain, getCurSDLoc(), Val, getValue(Ptr),
	MachinePointerInfo(Ptr));
	OutChains.push_back(Store);
	} else {
	// generate CopyFromRegs to associated registers.
	assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
	if (Val.getOpcode() == ISD::MERGE_VALUES) {
	for (const SDValue &V : Val->op_values())
	handleRegAssign(V);
	} else
	handleRegAssign(Val);
	}
	}
	}

	// Set results.
	if (!ResultValues.empty()) {
	assert(CurResultType == ResultTypes.end() &&
	"Mismatch in number of ResultTypes");
	assert(ResultValues.size() == ResultTypes.size() &&
	"Mismatch in number of output operands in asm result");

	SDValue V = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
	DAG.getVTList(ResultVTs), ResultValues);
	setValue(&Call, V);
	}

	// Collect store chains.
	if (!OutChains.empty())
	Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, OutChains);

	if (EmitEHLabels) {
	Chain = lowerEndEH(Chain, cast<InvokeInst>(&Call), EHPadBB, BeginLabel);
	}

	// Only Update Root if inline assembly has a memory effect.
	if (ResultValues.empty() \|\| HasSideEffect \|\| !OutChains.empty() \|\| IsCallBr \|\|
	EmitEHLabels)
	DAG.setRoot(Chain);
	}

	void SelectionDAGBuilder::emitInlineAsmError(const CallBase &Call,
	const Twine &Message) {
	LLVMContext &Ctx = *DAG.getContext();
	Ctx.emitError(&Call, Message);

	// Make sure we leave the DAG in a valid state
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	SmallVector<EVT, 1> ValueVTs;
	ComputeValueVTs(TLI, DAG.getDataLayout(), Call.getType(), ValueVTs);

	if (ValueVTs.empty())
	return;

	SmallVector<SDValue, 1> Ops;
	for (unsigned i = 0, e = ValueVTs.size(); i != e; ++i)
	Ops.push_back(DAG.getUNDEF(ValueVTs[i]));

	setValue(&Call, DAG.getMergeValues(Ops, getCurSDLoc()));
	}

	void SelectionDAGBuilder::visitVAStart(const CallInst &I) {
	DAG.setRoot(DAG.getNode(ISD::VASTART, getCurSDLoc(),
	MVT::Other, getRoot(),
	getValue(I.getArgOperand(0)),
	DAG.getSrcValue(I.getArgOperand(0))));
	}

	void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	const DataLayout &DL = DAG.getDataLayout();
	SDValue V = DAG.getVAArg(
	TLI.getMemValueType(DAG.getDataLayout(), I.getType()), getCurSDLoc(),
	getRoot(), getValue(I.getOperand(0)), DAG.getSrcValue(I.getOperand(0)),
	DL.getABITypeAlign(I.getType()).value());
	DAG.setRoot(V.getValue(1));

	if (I.getType()->isPointerTy())
	V = DAG.getPtrExtOrTrunc(
	V, getCurSDLoc(), TLI.getValueType(DAG.getDataLayout(), I.getType()));
	setValue(&I, V);
	}

	void SelectionDAGBuilder::visitVAEnd(const CallInst &I) {
	DAG.setRoot(DAG.getNode(ISD::VAEND, getCurSDLoc(),
	MVT::Other, getRoot(),
	getValue(I.getArgOperand(0)),
	DAG.getSrcValue(I.getArgOperand(0))));
	}

	void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
	DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurSDLoc(),
	MVT::Other, getRoot(),
	getValue(I.getArgOperand(0)),
	getValue(I.getArgOperand(1)),
	DAG.getSrcValue(I.getArgOperand(0)),
	DAG.getSrcValue(I.getArgOperand(1))));
	}

	SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG,
	const Instruction &I,
	SDValue Op) {
	const MDNode *Range = I.getMetadata(LLVMContext::MD_range);
	if (!Range)
	return Op;

	ConstantRange CR = getConstantRangeFromMetadata(*Range);
	if (CR.isFullSet() \|\| CR.isEmptySet() \|\| CR.isUpperWrapped())
	return Op;

	APInt Lo = CR.getUnsignedMin();
	if (!Lo.isMinValue())
	return Op;

	APInt Hi = CR.getUnsignedMax();
	unsigned Bits = std::max(Hi.getActiveBits(),
	static_cast<unsigned>(IntegerType::MIN_INT_BITS));

	EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), Bits);

	SDLoc SL = getCurSDLoc();

	SDValue ZExt = DAG.getNode(ISD::AssertZext, SL, Op.getValueType(), Op,
	DAG.getValueType(SmallVT));
	unsigned NumVals = Op.getNode()->getNumValues();
	if (NumVals == 1)
	return ZExt;

	SmallVector<SDValue, 4> Ops;

	Ops.push_back(ZExt);
	for (unsigned I = 1; I != NumVals; ++I)
	Ops.push_back(Op.getValue(I));

	return DAG.getMergeValues(Ops, SL);
	}

	/// Populate a CallLowerinInfo (into \p CLI) based on the properties of
	/// the call being lowered.
	///
	/// This is a helper for lowering intrinsics that follow a target calling
	/// convention or require stack pointer adjustment. Only a subset of the
	/// intrinsic's operands need to participate in the calling convention.
	void SelectionDAGBuilder::populateCallLoweringInfo(
	TargetLowering::CallLoweringInfo &CLI, const CallBase *Call,
	unsigned ArgIdx, unsigned NumArgs, SDValue Callee, Type *ReturnTy,
	bool IsPatchPoint) {
	TargetLowering::ArgListTy Args;
	Args.reserve(NumArgs);

	// Populate the argument list.
	// Attributes for args start at offset 1, after the return attribute.
	for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs;
	ArgI != ArgE; ++ArgI) {
	const Value *V = Call->getOperand(ArgI);

	assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic.");

	TargetLowering::ArgListEntry Entry;
	Entry.Node = getValue(V);
	Entry.Ty = V->getType();
	Entry.setAttributes(Call, ArgI);
	Args.push_back(Entry);
	}

	CLI.setDebugLoc(getCurSDLoc())
	.setChain(getRoot())
	.setCallee(Call->getCallingConv(), ReturnTy, Callee, std::move(Args))
	.setDiscardResult(Call->use_empty())
	.setIsPatchPoint(IsPatchPoint)
	.setIsPreallocated(
	Call->countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0);
	}

	/// Add a stack map intrinsic call's live variable operands to a stackmap
	/// or patchpoint target node's operand list.
	///
	/// Constants are converted to TargetConstants purely as an optimization to
	/// avoid constant materialization and register allocation.
	///
	/// FrameIndex operands are converted to TargetFrameIndex so that ISEL does not
	/// generate addess computation nodes, and so FinalizeISel can convert the
	/// TargetFrameIndex into a DirectMemRefOp StackMap location. This avoids
	/// address materialization and register allocation, but may also be required
	/// for correctness. If a StackMap (or PatchPoint) intrinsic directly uses an
	/// alloca in the entry block, then the runtime may assume that the alloca's
	/// StackMap location can be read immediately after compilation and that the
	/// location is valid at any point during execution (this is similar to the
	/// assumption made by the llvm.gcroot intrinsic). If the alloca's location were
	/// only available in a register, then the runtime would need to trap when
	/// execution reaches the StackMap in order to read the alloca's location.
	static void addStackMapLiveVars(const CallBase &Call, unsigned StartIdx,
	const SDLoc &DL, SmallVectorImpl<SDValue> &Ops,
	SelectionDAGBuilder &Builder) {
	for (unsigned i = StartIdx, e = Call.arg_size(); i != e; ++i) {
	SDValue OpVal = Builder.getValue(Call.getArgOperand(i));
	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpVal)) {
	Ops.push_back(
	Builder.DAG.getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64));
	Ops.push_back(
	Builder.DAG.getTargetConstant(C->getSExtValue(), DL, MVT::i64));
	} else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(OpVal)) {
	const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo();
	Ops.push_back(Builder.DAG.getTargetFrameIndex(
	FI->getIndex(), TLI.getFrameIndexTy(Builder.DAG.getDataLayout())));
	} else
	Ops.push_back(OpVal);
	}
	}

	/// Lower llvm.experimental.stackmap directly to its target opcode.
	void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
	// void @llvm.experimental.stackmap(i32 <id>, i32 <numShadowBytes>,
	// [live variables...])

	assert(CI.getType()->isVoidTy() && "Stackmap cannot return a value.");

	SDValue Chain, InFlag, Callee, NullPtr;
	SmallVector<SDValue, 32> Ops;

	SDLoc DL = getCurSDLoc();
	Callee = getValue(CI.getCalledOperand());
	NullPtr = DAG.getIntPtrConstant(0, DL, true);

	// The stackmap intrinsic only records the live variables (the arguments
	// passed to it) and emits NOPS (if requested). Unlike the patchpoint
	// intrinsic, this won't be lowered to a function call. This means we don't
	// have to worry about calling conventions and target specific lowering code.
	// Instead we perform the call lowering right here.
	//
	// chain, flag = CALLSEQ_START(chain, 0, 0)
	// chain, flag = STACKMAP(id, nbytes, ..., chain, flag)
	// chain, flag = CALLSEQ_END(chain, 0, 0, flag)
	//
	Chain = DAG.getCALLSEQ_START(getRoot(), 0, 0, DL);
	InFlag = Chain.getValue(1);

	// Add the <id> and <numBytes> constants.
	SDValue IDVal = getValue(CI.getOperand(PatchPointOpers::IDPos));
	Ops.push_back(DAG.getTargetConstant(
	cast<ConstantSDNode>(IDVal)->getZExtValue(), DL, MVT::i64));
	SDValue NBytesVal = getValue(CI.getOperand(PatchPointOpers::NBytesPos));
	Ops.push_back(DAG.getTargetConstant(
	cast<ConstantSDNode>(NBytesVal)->getZExtValue(), DL,
	MVT::i32));

	// Push live variables for the stack map.
	addStackMapLiveVars(CI, 2, DL, Ops, *this);

	// We are not pushing any register mask info here on the operands list,
	// because the stackmap doesn't clobber anything.

	// Push the chain and the glue flag.
	Ops.push_back(Chain);
	Ops.push_back(InFlag);

	// Create the STACKMAP node.
	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
	SDNode *SM = DAG.getMachineNode(TargetOpcode::STACKMAP, DL, NodeTys, Ops);
	Chain = SDValue(SM, 0);
	InFlag = Chain.getValue(1);

	Chain = DAG.getCALLSEQ_END(Chain, NullPtr, NullPtr, InFlag, DL);

	// Stackmaps don't generate values, so nothing goes into the NodeMap.

	// Set the root to the target-lowered call chain.
	DAG.setRoot(Chain);

	// Inform the Frame Information that we have a stackmap in this function.
	FuncInfo.MF->getFrameInfo().setHasStackMap();
	}

	/// Lower llvm.experimental.patchpoint directly to its target opcode.
	void SelectionDAGBuilder::visitPatchpoint(const CallBase &CB,
	const BasicBlock *EHPadBB) {
	// void\|i64 @llvm.experimental.patchpoint.void\|i64(i64 <id>,
	// i32 <numBytes>,
	// i8* <target>,
	// i32 <numArgs>,
	// [Args...],
	// [live variables...])

	CallingConv::ID CC = CB.getCallingConv();
	bool IsAnyRegCC = CC == CallingConv::AnyReg;
	bool HasDef = !CB.getType()->isVoidTy();
	SDLoc dl = getCurSDLoc();
	SDValue Callee = getValue(CB.getArgOperand(PatchPointOpers::TargetPos));

	// Handle immediate and symbolic callees.
	if (auto* ConstCallee = dyn_cast<ConstantSDNode>(Callee))
	Callee = DAG.getIntPtrConstant(ConstCallee->getZExtValue(), dl,
	/isTarget=/true);
	else if (auto* SymbolicCallee = dyn_cast<GlobalAddressSDNode>(Callee))
	Callee = DAG.getTargetGlobalAddress(SymbolicCallee->getGlobal(),
	SDLoc(SymbolicCallee),
	SymbolicCallee->getValueType(0));

	// Get the real number of arguments participating in the call <numArgs>
	SDValue NArgVal = getValue(CB.getArgOperand(PatchPointOpers::NArgPos));
	unsigned NumArgs = cast<ConstantSDNode>(NArgVal)->getZExtValue();

	// Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs>
	// Intrinsics include all meta-operands up to but not including CC.
	unsigned NumMetaOpers = PatchPointOpers::CCPos;
	assert(CB.arg_size() >= NumMetaOpers + NumArgs &&
	"Not enough arguments provided to the patchpoint intrinsic");

	// For AnyRegCC the arguments are lowered later on manually.
	unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs;
	Type *ReturnTy =
	IsAnyRegCC ? Type::getVoidTy(*DAG.getContext()) : CB.getType();

	TargetLowering::CallLoweringInfo CLI(DAG);
	populateCallLoweringInfo(CLI, &CB, NumMetaOpers, NumCallArgs, Callee,
	ReturnTy, true);
	std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);

	SDNode *CallEnd = Result.second.getNode();
	if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg))
	CallEnd = CallEnd->getOperand(0).getNode();

	/// Get a call instruction from the call sequence chain.
	/// Tail calls are not allowed.
	assert(CallEnd->getOpcode() == ISD::CALLSEQ_END &&
	"Expected a callseq node.");
	SDNode *Call = CallEnd->getOperand(0).getNode();
	bool HasGlue = Call->getGluedNode();

	// Replace the target specific call node with the patchable intrinsic.
	SmallVector<SDValue, 8> Ops;

	// Add the <id> and <numBytes> constants.
	SDValue IDVal = getValue(CB.getArgOperand(PatchPointOpers::IDPos));
	Ops.push_back(DAG.getTargetConstant(
	cast<ConstantSDNode>(IDVal)->getZExtValue(), dl, MVT::i64));
	SDValue NBytesVal = getValue(CB.getArgOperand(PatchPointOpers::NBytesPos));
	Ops.push_back(DAG.getTargetConstant(
	cast<ConstantSDNode>(NBytesVal)->getZExtValue(), dl,
	MVT::i32));

	// Add the callee.
	Ops.push_back(Callee);

	// Adjust <numArgs> to account for any arguments that have been passed on the
	// stack instead.
	// Call Node: Chain, Target, {Args}, RegMask, [Glue]
	unsigned NumCallRegArgs = Call->getNumOperands() - (HasGlue ? 4 : 3);
	NumCallRegArgs = IsAnyRegCC ? NumArgs : NumCallRegArgs;
	Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, dl, MVT::i32));

	// Add the calling convention
	Ops.push_back(DAG.getTargetConstant((unsigned)CC, dl, MVT::i32));

	// Add the arguments we omitted previously. The register allocator should
	// place these in any free register.
	if (IsAnyRegCC)
	for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i)
	Ops.push_back(getValue(CB.getArgOperand(i)));

	// Push the arguments from the call instruction up to the register mask.
	SDNode::op_iterator e = HasGlue ? Call->op_end()-2 : Call->op_end()-1;
	Ops.append(Call->op_begin() + 2, e);

	// Push live variables for the stack map.
	addStackMapLiveVars(CB, NumMetaOpers + NumArgs, dl, Ops, *this);

	// Push the register mask info.
	if (HasGlue)
	Ops.push_back(*(Call->op_end()-2));
	else
	Ops.push_back(*(Call->op_end()-1));

	// Push the chain (this is originally the first operand of the call, but
	// becomes now the last or second to last operand).
	Ops.push_back(*(Call->op_begin()));

	// Push the glue flag (last operand).
	if (HasGlue)
	Ops.push_back(*(Call->op_end()-1));

	SDVTList NodeTys;
	if (IsAnyRegCC && HasDef) {
	// Create the return types based on the intrinsic definition
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	SmallVector<EVT, 3> ValueVTs;
	ComputeValueVTs(TLI, DAG.getDataLayout(), CB.getType(), ValueVTs);
	assert(ValueVTs.size() == 1 && "Expected only one return value type.");

	// There is always a chain and a glue type at the end
	ValueVTs.push_back(MVT::Other);
	ValueVTs.push_back(MVT::Glue);
	NodeTys = DAG.getVTList(ValueVTs);
	} else
	NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);

	// Replace the target specific call node with a PATCHPOINT node.
	MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHPOINT,
	dl, NodeTys, Ops);

	// Update the NodeMap.
	if (HasDef) {
	if (IsAnyRegCC)
	setValue(&CB, SDValue(MN, 0));
	else
	setValue(&CB, Result.first);
	}

	// Fixup the consumers of the intrinsic. The chain and glue may be used in the
	// call sequence. Furthermore the location of the chain and glue can change
	// when the AnyReg calling convention is used and the intrinsic returns a
	// value.
	if (IsAnyRegCC && HasDef) {
	SDValue From[] = {SDValue(Call, 0), SDValue(Call, 1)};
	SDValue To[] = {SDValue(MN, 1), SDValue(MN, 2)};
	DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
	} else
	DAG.ReplaceAllUsesWith(Call, MN);
	DAG.DeleteNode(Call);

	// Inform the Frame Information that we have a patchpoint in this function.
	FuncInfo.MF->getFrameInfo().setHasPatchPoint();
	}

	void SelectionDAGBuilder::visitVectorReduce(const CallInst &I,
	unsigned Intrinsic) {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	SDValue Op1 = getValue(I.getArgOperand(0));
	SDValue Op2;
	if (I.getNumArgOperands() > 1)
	Op2 = getValue(I.getArgOperand(1));
	SDLoc dl = getCurSDLoc();
	EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
	SDValue Res;
	SDNodeFlags SDFlags;
	if (auto *FPMO = dyn_cast<FPMathOperator>(&I))
	SDFlags.copyFMF(*FPMO);

	switch (Intrinsic) {
	case Intrinsic::vector_reduce_fadd:
	if (SDFlags.hasAllowReassociation())
	Res = DAG.getNode(ISD::FADD, dl, VT, Op1,
	DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2, SDFlags),
	SDFlags);
	else
	Res = DAG.getNode(ISD::VECREDUCE_SEQ_FADD, dl, VT, Op1, Op2, SDFlags);
	break;
	case Intrinsic::vector_reduce_fmul:
	if (SDFlags.hasAllowReassociation())
	Res = DAG.getNode(ISD::FMUL, dl, VT, Op1,
	DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2, SDFlags),
	SDFlags);
	else
	Res = DAG.getNode(ISD::VECREDUCE_SEQ_FMUL, dl, VT, Op1, Op2, SDFlags);
	break;
	case Intrinsic::vector_reduce_add:
	Res = DAG.getNode(ISD::VECREDUCE_ADD, dl, VT, Op1);
	break;
	case Intrinsic::vector_reduce_mul:
	Res = DAG.getNode(ISD::VECREDUCE_MUL, dl, VT, Op1);
	break;
	case Intrinsic::vector_reduce_and:
	Res = DAG.getNode(ISD::VECREDUCE_AND, dl, VT, Op1);
	break;
	case Intrinsic::vector_reduce_or:
	Res = DAG.getNode(ISD::VECREDUCE_OR, dl, VT, Op1);
	break;
	case Intrinsic::vector_reduce_xor:
	Res = DAG.getNode(ISD::VECREDUCE_XOR, dl, VT, Op1);
	break;
	case Intrinsic::vector_reduce_smax:
	Res = DAG.getNode(ISD::VECREDUCE_SMAX, dl, VT, Op1);
	break;
	case Intrinsic::vector_reduce_smin:
	Res = DAG.getNode(ISD::VECREDUCE_SMIN, dl, VT, Op1);
	break;
	case Intrinsic::vector_reduce_umax:
	Res = DAG.getNode(ISD::VECREDUCE_UMAX, dl, VT, Op1);
	break;
	case Intrinsic::vector_reduce_umin:
	Res = DAG.getNode(ISD::VECREDUCE_UMIN, dl, VT, Op1);
	break;
	case Intrinsic::vector_reduce_fmax:
	Res = DAG.getNode(ISD::VECREDUCE_FMAX, dl, VT, Op1, SDFlags);
	break;
	case Intrinsic::vector_reduce_fmin:
	Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1, SDFlags);
	break;
	default:
	llvm_unreachable("Unhandled vector reduce intrinsic");
	}
	setValue(&I, Res);
	}

	/// Returns an AttributeList representing the attributes applied to the return
	/// value of the given call.
	static AttributeList getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) {
	SmallVector<Attribute::AttrKind, 2> Attrs;
	if (CLI.RetSExt)
	Attrs.push_back(Attribute::SExt);
	if (CLI.RetZExt)
	Attrs.push_back(Attribute::ZExt);
	if (CLI.IsInReg)
	Attrs.push_back(Attribute::InReg);

	return AttributeList::get(CLI.RetTy->getContext(), AttributeList::ReturnIndex,
	Attrs);
	}

	/// TargetLowering::LowerCallTo - This is the default LowerCallTo
	/// implementation, which just calls LowerCall.
	/// FIXME: When all targets are
	/// migrated to using LowerCall, this hook should be integrated into SDISel.
	std::pair<SDValue, SDValue>
	TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
	// Handle the incoming return values from the call.
	CLI.Ins.clear();
	Type *OrigRetTy = CLI.RetTy;
	SmallVector<EVT, 4> RetTys;
	SmallVector<uint64_t, 4> Offsets;
	auto &DL = CLI.DAG.getDataLayout();
	ComputeValueVTs(*this, DL, CLI.RetTy, RetTys, &Offsets);

	if (CLI.IsPostTypeLegalization) {
	// If we are lowering a libcall after legalization, split the return type.
	SmallVector<EVT, 4> OldRetTys;
	SmallVector<uint64_t, 4> OldOffsets;
	RetTys.swap(OldRetTys);
	Offsets.swap(OldOffsets);

	for (size_t i = 0, e = OldRetTys.size(); i != e; ++i) {
	EVT RetVT = OldRetTys[i];
	uint64_t Offset = OldOffsets[i];
	MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), RetVT);
	unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), RetVT);
	unsigned RegisterVTByteSZ = RegisterVT.getSizeInBits() / 8;
	RetTys.append(NumRegs, RegisterVT);
	for (unsigned j = 0; j != NumRegs; ++j)
	Offsets.push_back(Offset + j * RegisterVTByteSZ);
	}
	}

	SmallVector<ISD::OutputArg, 4> Outs;
	GetReturnInfo(CLI.CallConv, CLI.RetTy, getReturnAttrs(CLI), Outs, *this, DL);

	bool CanLowerReturn =
	this->CanLowerReturn(CLI.CallConv, CLI.DAG.getMachineFunction(),
	CLI.IsVarArg, Outs, CLI.RetTy->getContext());

	SDValue DemoteStackSlot;
	int DemoteStackIdx = -100;
	if (!CanLowerReturn) {
	// FIXME: equivalent assert?
	// assert(!CS.hasInAllocaArgument() &&
	// "sret demotion is incompatible with inalloca");
	uint64_t TySize = DL.getTypeAllocSize(CLI.RetTy);
	Align Alignment = DL.getPrefTypeAlign(CLI.RetTy);
	MachineFunction &MF = CLI.DAG.getMachineFunction();
	DemoteStackIdx =
	MF.getFrameInfo().CreateStackObject(TySize, Alignment, false);
	Type *StackSlotPtrType = PointerType::get(CLI.RetTy,
	DL.getAllocaAddrSpace());

	DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getFrameIndexTy(DL));
	ArgListEntry Entry;
	Entry.Node = DemoteStackSlot;
	Entry.Ty = StackSlotPtrType;
	Entry.IsSExt = false;
	Entry.IsZExt = false;
	Entry.IsInReg = false;
	Entry.IsSRet = true;
	Entry.IsNest = false;
	Entry.IsByVal = false;
	Entry.IsByRef = false;
	Entry.IsReturned = false;
	Entry.IsSwiftSelf = false;
	Entry.IsSwiftAsync = false;
	Entry.IsSwiftError = false;
	Entry.IsCFGuardTarget = false;
	Entry.Alignment = Alignment;
	CLI.getArgs().insert(CLI.getArgs().begin(), Entry);
	CLI.NumFixedArgs += 1;
	CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext());

	// sret demotion isn't compatible with tail-calls, since the sret argument
	// points into the callers stack frame.
	CLI.IsTailCall = false;
	} else {
	bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters(
	CLI.RetTy, CLI.CallConv, CLI.IsVarArg, DL);
	for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
	ISD::ArgFlagsTy Flags;
	if (NeedsRegBlock) {
	Flags.setInConsecutiveRegs();
	if (I == RetTys.size() - 1)
	Flags.setInConsecutiveRegsLast();
	}
	EVT VT = RetTys[I];
	MVT RegisterVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(),
	CLI.CallConv, VT);
	unsigned NumRegs = getNumRegistersForCallingConv(CLI.RetTy->getContext(),
	CLI.CallConv, VT);
	for (unsigned i = 0; i != NumRegs; ++i) {
	ISD::InputArg MyFlags;
	MyFlags.Flags = Flags;
	MyFlags.VT = RegisterVT;
	MyFlags.ArgVT = VT;
	MyFlags.Used = CLI.IsReturnValueUsed;
	if (CLI.RetTy->isPointerTy()) {
	MyFlags.Flags.setPointer();
	MyFlags.Flags.setPointerAddrSpace(
	cast<PointerType>(CLI.RetTy)->getAddressSpace());
	}
	if (CLI.RetSExt)
	MyFlags.Flags.setSExt();
	if (CLI.RetZExt)
	MyFlags.Flags.setZExt();
	if (CLI.IsInReg)
	MyFlags.Flags.setInReg();
	CLI.Ins.push_back(MyFlags);
	}
	}
	}

	// We push in swifterror return as the last element of CLI.Ins.
	ArgListTy &Args = CLI.getArgs();
	if (supportSwiftError()) {
	for (unsigned i = 0, e = Args.size(); i != e; ++i) {
	if (Args[i].IsSwiftError) {
	ISD::InputArg MyFlags;
	MyFlags.VT = getPointerTy(DL);
	MyFlags.ArgVT = EVT(getPointerTy(DL));
	MyFlags.Flags.setSwiftError();
	CLI.Ins.push_back(MyFlags);
	}
	}
	}

	// Handle all of the outgoing arguments.
	CLI.Outs.clear();
	CLI.OutVals.clear();
	for (unsigned i = 0, e = Args.size(); i != e; ++i) {
	SmallVector<EVT, 4> ValueVTs;
	ComputeValueVTs(*this, DL, Args[i].Ty, ValueVTs);
	// FIXME: Split arguments if CLI.IsPostTypeLegalization
	Type *FinalType = Args[i].Ty;
	if (Args[i].IsByVal)
	FinalType = Args[i].IndirectType;
	bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters(
	FinalType, CLI.CallConv, CLI.IsVarArg, DL);
	for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues;
	++Value) {
	EVT VT = ValueVTs[Value];
	Type *ArgTy = VT.getTypeForEVT(CLI.RetTy->getContext());
	SDValue Op = SDValue(Args[i].Node.getNode(),
	Args[i].Node.getResNo() + Value);
	ISD::ArgFlagsTy Flags;

	// Certain targets (such as MIPS), may have a different ABI alignment
	// for a type depending on the context. Give the target a chance to
	// specify the alignment it wants.
	const Align OriginalAlignment(getABIAlignmentForCallingConv(ArgTy, DL));
	Flags.setOrigAlign(OriginalAlignment);

	if (Args[i].Ty->isPointerTy()) {
	Flags.setPointer();
	Flags.setPointerAddrSpace(
	cast<PointerType>(Args[i].Ty)->getAddressSpace());
	}
	if (Args[i].IsZExt)
	Flags.setZExt();
	if (Args[i].IsSExt)
	Flags.setSExt();
	if (Args[i].IsInReg) {
	// If we are using vectorcall calling convention, a structure that is
	// passed InReg - is surely an HVA
	if (CLI.CallConv == CallingConv::X86_VectorCall &&
	isa<StructType>(FinalType)) {
	// The first value of a structure is marked
	if (0 == Value)
	Flags.setHvaStart();
	Flags.setHva();
	}
	// Set InReg Flag
	Flags.setInReg();
	}
	if (Args[i].IsSRet)
	Flags.setSRet();
	if (Args[i].IsSwiftSelf)
	Flags.setSwiftSelf();
	if (Args[i].IsSwiftAsync)
	Flags.setSwiftAsync();
	if (Args[i].IsSwiftError)
	Flags.setSwiftError();
	if (Args[i].IsCFGuardTarget)
	Flags.setCFGuardTarget();
	if (Args[i].IsByVal)
	Flags.setByVal();
	if (Args[i].IsByRef)
	Flags.setByRef();
	if (Args[i].IsPreallocated) {
	Flags.setPreallocated();
	// Set the byval flag for CCAssignFn callbacks that don't know about
	// preallocated. This way we can know how many bytes we should've
	// allocated and how many bytes a callee cleanup function will pop. If
	// we port preallocated to more targets, we'll have to add custom
	// preallocated handling in the various CC lowering callbacks.
	Flags.setByVal();
	}
	if (Args[i].IsInAlloca) {
	Flags.setInAlloca();
	// Set the byval flag for CCAssignFn callbacks that don't know about
	// inalloca. This way we can know how many bytes we should've allocated
	// and how many bytes a callee cleanup function will pop. If we port
	// inalloca to more targets, we'll have to add custom inalloca handling
	// in the various CC lowering callbacks.
	Flags.setByVal();
	}
	Align MemAlign;
	if (Args[i].IsByVal \|\| Args[i].IsInAlloca \|\| Args[i].IsPreallocated) {
	unsigned FrameSize = DL.getTypeAllocSize(Args[i].IndirectType);
	Flags.setByValSize(FrameSize);

	// info is not there but there are cases it cannot get right.
	if (auto MA = Args[i].Alignment)
	MemAlign = *MA;
	else
	MemAlign = Align(getByValTypeAlignment(Args[i].IndirectType, DL));
	} else if (auto MA = Args[i].Alignment) {
	MemAlign = *MA;
	} else {
	MemAlign = OriginalAlignment;
	}
	Flags.setMemAlign(MemAlign);
	if (Args[i].IsNest)
	Flags.setNest();
	if (NeedsRegBlock)
	Flags.setInConsecutiveRegs();

	MVT PartVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(),
	CLI.CallConv, VT);
	unsigned NumParts = getNumRegistersForCallingConv(CLI.RetTy->getContext(),
	CLI.CallConv, VT);
	SmallVector<SDValue, 4> Parts(NumParts);
	ISD::NodeType ExtendKind = ISD::ANY_EXTEND;

	if (Args[i].IsSExt)
	ExtendKind = ISD::SIGN_EXTEND;
	else if (Args[i].IsZExt)
	ExtendKind = ISD::ZERO_EXTEND;

	// Conservatively only handle 'returned' on non-vectors that can be lowered,
	// for now.
	if (Args[i].IsReturned && !Op.getValueType().isVector() &&
	CanLowerReturn) {
	assert((CLI.RetTy == Args[i].Ty \|\|
	(CLI.RetTy->isPointerTy() && Args[i].Ty->isPointerTy() &&
	CLI.RetTy->getPointerAddressSpace() ==
	Args[i].Ty->getPointerAddressSpace())) &&
	RetTys.size() == NumValues && "unexpected use of 'returned'");
	// Before passing 'returned' to the target lowering code, ensure that
	// either the register MVT and the actual EVT are the same size or that
	// the return value and argument are extended in the same way; in these
	// cases it's safe to pass the argument register value unchanged as the
	// return register value (although it's at the target's option whether
	// to do so)
	// TODO: allow code generation to take advantage of partially preserved
	// registers rather than clobbering the entire register when the
	// parameter extension method is not compatible with the return
	// extension method
	if ((NumParts * PartVT.getSizeInBits() == VT.getSizeInBits()) \|\|
	(ExtendKind != ISD::ANY_EXTEND && CLI.RetSExt == Args[i].IsSExt &&
	CLI.RetZExt == Args[i].IsZExt))
	Flags.setReturned();
	}

	getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT, CLI.CB,
	CLI.CallConv, ExtendKind);

	for (unsigned j = 0; j != NumParts; ++j) {
	// if it isn't first piece, alignment must be 1
	// For scalable vectors the scalable part is currently handled
	// by individual targets, so we just use the known minimum size here.
	ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), VT,
	i < CLI.NumFixedArgs, i,
	j*Parts[j].getValueType().getStoreSize().getKnownMinSize());
	if (NumParts > 1 && j == 0)
	MyFlags.Flags.setSplit();
	else if (j != 0) {
	MyFlags.Flags.setOrigAlign(Align(1));
	if (j == NumParts - 1)
	MyFlags.Flags.setSplitEnd();
	}

	CLI.Outs.push_back(MyFlags);
	CLI.OutVals.push_back(Parts[j]);
	}

	if (NeedsRegBlock && Value == NumValues - 1)
	CLI.Outs[CLI.Outs.size() - 1].Flags.setInConsecutiveRegsLast();
	}
	}

	SmallVector<SDValue, 4> InVals;
	CLI.Chain = LowerCall(CLI, InVals);

	// Update CLI.InVals to use outside of this function.
	CLI.InVals = InVals;

	// Verify that the target's LowerCall behaved as expected.
	assert(CLI.Chain.getNode() && CLI.Chain.getValueType() == MVT::Other &&
	"LowerCall didn't return a valid chain!");
	assert((!CLI.IsTailCall \|\| InVals.empty()) &&
	"LowerCall emitted a return value for a tail call!");
	assert((CLI.IsTailCall \|\| InVals.size() == CLI.Ins.size()) &&
	"LowerCall didn't emit the correct number of values!");

	// For a tail call, the return value is merely live-out and there aren't
	// any nodes in the DAG representing it. Return a special value to
	// indicate that a tail call has been emitted and no more Instructions
	// should be processed in the current block.
	if (CLI.IsTailCall) {
	CLI.DAG.setRoot(CLI.Chain);
	return std::make_pair(SDValue(), SDValue());
	}

	#ifndef NDEBUG
	for (unsigned i = 0, e = CLI.Ins.size(); i != e; ++i) {
	assert(InVals[i].getNode() && "LowerCall emitted a null value!");
	assert(EVT(CLI.Ins[i].VT) == InVals[i].getValueType() &&
	"LowerCall emitted a value with the wrong type!");
	}
	#endif

	SmallVector<SDValue, 4> ReturnValues;
	if (!CanLowerReturn) {
	// The instruction result is the result of loading from the
	// hidden sret parameter.
	SmallVector<EVT, 1> PVTs;
	Type *PtrRetTy = OrigRetTy->getPointerTo(DL.getAllocaAddrSpace());

	ComputeValueVTs(*this, DL, PtrRetTy, PVTs);
	assert(PVTs.size() == 1 && "Pointers should fit in one register");
	EVT PtrVT = PVTs[0];

	unsigned NumValues = RetTys.size();
	ReturnValues.resize(NumValues);
	SmallVector<SDValue, 4> Chains(NumValues);

	// An aggregate return value cannot wrap around the address space, so
	// offsets to its parts don't wrap either.
	SDNodeFlags Flags;
	Flags.setNoUnsignedWrap(true);

	MachineFunction &MF = CLI.DAG.getMachineFunction();
	Align HiddenSRetAlign = MF.getFrameInfo().getObjectAlign(DemoteStackIdx);
	for (unsigned i = 0; i < NumValues; ++i) {
	SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot,
	CLI.DAG.getConstant(Offsets[i], CLI.DL,
	PtrVT), Flags);
	SDValue L = CLI.DAG.getLoad(
	RetTys[i], CLI.DL, CLI.Chain, Add,
	MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(),
	DemoteStackIdx, Offsets[i]),
	HiddenSRetAlign);
	ReturnValues[i] = L;
	Chains[i] = L.getValue(1);
	}

	CLI.Chain = CLI.DAG.getNode(ISD::TokenFactor, CLI.DL, MVT::Other, Chains);
	} else {
	// Collect the legal value parts into potentially illegal values
	// that correspond to the original function's return values.
	Optional<ISD::NodeType> AssertOp;
	if (CLI.RetSExt)
	AssertOp = ISD::AssertSext;
	else if (CLI.RetZExt)
	AssertOp = ISD::AssertZext;
	unsigned CurReg = 0;
	for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
	EVT VT = RetTys[I];
	MVT RegisterVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(),
	CLI.CallConv, VT);
	unsigned NumRegs = getNumRegistersForCallingConv(CLI.RetTy->getContext(),
	CLI.CallConv, VT);

	ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg],
	NumRegs, RegisterVT, VT, nullptr,
	CLI.CallConv, AssertOp));
	CurReg += NumRegs;
	}

	// For a function returning void, there is no return value. We can't create
	// such a node, so we just return a null return value in that case. In
	// that case, nothing will actually look at the value.
	if (ReturnValues.empty())
	return std::make_pair(SDValue(), CLI.Chain);
	}

	SDValue Res = CLI.DAG.getNode(ISD::MERGE_VALUES, CLI.DL,
	CLI.DAG.getVTList(RetTys), ReturnValues);
	return std::make_pair(Res, CLI.Chain);
	}

	/// Places new result values for the node in Results (their number
	/// and types must exactly match those of the original return values of
	/// the node), or leaves Results empty, which indicates that the node is not
	/// to be custom lowered after all.
	void TargetLowering::LowerOperationWrapper(SDNode *N,
	SmallVectorImpl<SDValue> &Results,
	SelectionDAG &DAG) const {
	SDValue Res = LowerOperation(SDValue(N, 0), DAG);

	if (!Res.getNode())
	return;

	// If the original node has one result, take the return value from
	// LowerOperation as is. It might not be result number 0.
	if (N->getNumValues() == 1) {
	Results.push_back(Res);
	return;
	}

	// If the original node has multiple results, then the return node should
	// have the same number of results.
	assert((N->getNumValues() == Res->getNumValues()) &&
	"Lowering returned the wrong number of results!");

	// Places new result values base on N result number.
	for (unsigned I = 0, E = N->getNumValues(); I != E; ++I)
	Results.push_back(Res.getValue(I));
	}

	SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
	llvm_unreachable("LowerOperation not implemented for this target!");
	}

	void
	SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
	SDValue Op = getNonRegisterValue(V);
	assert((Op.getOpcode() != ISD::CopyFromReg \|\|
	cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) &&
	"Copy from a reg to the same reg!");
	assert(!Register::isPhysicalRegister(Reg) && "Is a physreg");

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	// If this is an InlineAsm we have to match the registers required, not the
	// notional registers required by the type.

	RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, V->getType(),
	None); // This is not an ABI copy.
	SDValue Chain = DAG.getEntryNode();

	ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) ==
	FuncInfo.PreferredExtendType.end())
	? ISD::ANY_EXTEND
	: FuncInfo.PreferredExtendType[V];
	RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, nullptr, V, ExtendType);
	PendingExports.push_back(Chain);
	}

	#include "llvm/CodeGen/SelectionDAGISel.h"

	/// isOnlyUsedInEntryBlock - If the specified argument is only used in the
	/// entry block, return true. This includes arguments used by switches, since
	/// the switch may expand into multiple basic blocks.
	static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) {
	// With FastISel active, we may be splitting blocks, so force creation
	// of virtual registers for all non-dead arguments.
	if (FastISel)
	return A->use_empty();

	const BasicBlock &Entry = A->getParent()->front();
	for (const User *U : A->users())
	if (cast<Instruction>(U)->getParent() != &Entry \|\| isa<SwitchInst>(U))
	return false; // Use not in entry block.

	return true;
	}

	using ArgCopyElisionMapTy =
	DenseMap<const Argument *,
	std::pair<const AllocaInst , const StoreInst >>;

	/// Scan the entry block of the function in FuncInfo for arguments that look
	/// like copies into a local alloca. Record any copied arguments in
	/// ArgCopyElisionCandidates.
	static void
	findArgumentCopyElisionCandidates(const DataLayout &DL,
	FunctionLoweringInfo *FuncInfo,
	ArgCopyElisionMapTy &ArgCopyElisionCandidates) {
	// Record the state of every static alloca used in the entry block. Argument
	// allocas are all used in the entry block, so we need approximately as many
	// entries as we have arguments.
	enum StaticAllocaInfo { Unknown, Clobbered, Elidable };
	SmallDenseMap<const AllocaInst *, StaticAllocaInfo, 8> StaticAllocas;
	unsigned NumArgs = FuncInfo->Fn->arg_size();
	StaticAllocas.reserve(NumArgs * 2);

	auto GetInfoIfStaticAlloca = [&](const Value V) -> StaticAllocaInfo {
	if (!V)
	return nullptr;
	V = V->stripPointerCasts();
	const auto *AI = dyn_cast<AllocaInst>(V);
	if (!AI \|\| !AI->isStaticAlloca() \|\| !FuncInfo->StaticAllocaMap.count(AI))
	return nullptr;
	auto Iter = StaticAllocas.insert({AI, Unknown});
	return &Iter.first->second;
	};

	// Look for stores of arguments to static allocas. Look through bitcasts and
	// GEPs to handle type coercions, as long as the alloca is fully initialized
	// by the store. Any non-store use of an alloca escapes it and any subsequent
	// unanalyzed store might write it.
	// FIXME: Handle structs initialized with multiple stores.
	for (const Instruction &I : FuncInfo->Fn->getEntryBlock()) {
	// Look for stores, and handle non-store uses conservatively.
	const auto *SI = dyn_cast<StoreInst>(&I);
	if (!SI) {
	// We will look through cast uses, so ignore them completely.
	if (I.isCast())
	continue;
	// Ignore debug info and pseudo op intrinsics, they don't escape or store
	// to allocas.
	if (I.isDebugOrPseudoInst())
	continue;
	// This is an unknown instruction. Assume it escapes or writes to all
	// static alloca operands.
	for (const Use &U : I.operands()) {
	if (StaticAllocaInfo *Info = GetInfoIfStaticAlloca(U))
	*Info = StaticAllocaInfo::Clobbered;
	}
	continue;
	}

	// If the stored value is a static alloca, mark it as escaped.
	if (StaticAllocaInfo *Info = GetInfoIfStaticAlloca(SI->getValueOperand()))
	*Info = StaticAllocaInfo::Clobbered;

	// Check if the destination is a static alloca.
	const Value *Dst = SI->getPointerOperand()->stripPointerCasts();
	StaticAllocaInfo *Info = GetInfoIfStaticAlloca(Dst);
	if (!Info)
	continue;
	const AllocaInst *AI = cast<AllocaInst>(Dst);

	// Skip allocas that have been initialized or clobbered.
	if (*Info != StaticAllocaInfo::Unknown)
	continue;

	// Check if the stored value is an argument, and that this store fully
	// initializes the alloca.
	// If the argument type has padding bits we can't directly forward a pointer
	// as the upper bits may contain garbage.
	// Don't elide copies from the same argument twice.
	const Value *Val = SI->getValueOperand()->stripPointerCasts();
	const auto *Arg = dyn_cast<Argument>(Val);
	if (!Arg \|\| Arg->hasPassPointeeByValueCopyAttr() \|\|
	Arg->getType()->isEmptyTy() \|\|
	DL.getTypeStoreSize(Arg->getType()) !=
	DL.getTypeAllocSize(AI->getAllocatedType()) \|\|
	!DL.typeSizeEqualsStoreSize(Arg->getType()) \|\|
	ArgCopyElisionCandidates.count(Arg)) {
	*Info = StaticAllocaInfo::Clobbered;
	continue;
	}

	LLVM_DEBUG(dbgs() << "Found argument copy elision candidate: " << *AI
	<< '\n');

	// Mark this alloca and store for argument copy elision.
	*Info = StaticAllocaInfo::Elidable;
	ArgCopyElisionCandidates.insert({Arg, {AI, SI}});

	// Stop scanning if we've seen all arguments. This will happen early in -O0
	// builds, which is useful, because -O0 builds have large entry blocks and
	// many allocas.
	if (ArgCopyElisionCandidates.size() == NumArgs)
	break;
	}
	}

	/// Try to elide argument copies from memory into a local alloca. Succeeds if
	/// ArgVal is a load from a suitable fixed stack object.
	static void tryToElideArgumentCopy(
	FunctionLoweringInfo &FuncInfo, SmallVectorImpl<SDValue> &Chains,
	DenseMap<int, int> &ArgCopyElisionFrameIndexMap,
	SmallPtrSetImpl<const Instruction *> &ElidedArgCopyInstrs,
	ArgCopyElisionMapTy &ArgCopyElisionCandidates, const Argument &Arg,
	SDValue ArgVal, bool &ArgHasUses) {
	// Check if this is a load from a fixed stack object.
	auto *LNode = dyn_cast<LoadSDNode>(ArgVal);
	if (!LNode)
	return;
	auto *FINode = dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode());
	if (!FINode)
	return;

	// Check that the fixed stack object is the right size and alignment.
	// Look at the alignment that the user wrote on the alloca instead of looking
	// at the stack object.
	auto ArgCopyIter = ArgCopyElisionCandidates.find(&Arg);
	assert(ArgCopyIter != ArgCopyElisionCandidates.end());
	const AllocaInst *AI = ArgCopyIter->second.first;
	int FixedIndex = FINode->getIndex();
	int &AllocaIndex = FuncInfo.StaticAllocaMap[AI];
	int OldIndex = AllocaIndex;
	MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
	if (MFI.getObjectSize(FixedIndex) != MFI.getObjectSize(OldIndex)) {
	LLVM_DEBUG(
	dbgs() << " argument copy elision failed due to bad fixed stack "
	"object size\n");
	return;
	}
	Align RequiredAlignment = AI->getAlign();
	if (MFI.getObjectAlign(FixedIndex) < RequiredAlignment) {
	LLVM_DEBUG(dbgs() << " argument copy elision failed: alignment of alloca "
	"greater than stack argument alignment ("
	<< DebugStr(RequiredAlignment) << " vs "
	<< DebugStr(MFI.getObjectAlign(FixedIndex)) << ")\n");
	return;
	}

	// Perform the elision. Delete the old stack object and replace its only use
	// in the variable info map. Mark the stack object as mutable.
	LLVM_DEBUG({
	dbgs() << "Eliding argument copy from " << Arg << " to " << *AI << '\n'
	<< " Replacing frame index " << OldIndex << " with " << FixedIndex
	<< '\n';
	});
	MFI.RemoveStackObject(OldIndex);
	MFI.setIsImmutableObjectIndex(FixedIndex, false);
	AllocaIndex = FixedIndex;
	ArgCopyElisionFrameIndexMap.insert({OldIndex, FixedIndex});
	Chains.push_back(ArgVal.getValue(1));

	// Avoid emitting code for the store implementing the copy.
	const StoreInst *SI = ArgCopyIter->second.second;
	ElidedArgCopyInstrs.insert(SI);

	// Check for uses of the argument again so that we can avoid exporting ArgVal
	// if it is't used by anything other than the store.
	for (const Value *U : Arg.users()) {
	if (U != SI) {
	ArgHasUses = true;
	break;
	}
	}
	}

	void SelectionDAGISel::LowerArguments(const Function &F) {
	SelectionDAG &DAG = SDB->DAG;
	SDLoc dl = SDB->getCurSDLoc();
	const DataLayout &DL = DAG.getDataLayout();
	SmallVector<ISD::InputArg, 16> Ins;

	// In Naked functions we aren't going to save any registers.
	if (F.hasFnAttribute(Attribute::Naked))
	return;

	if (!FuncInfo->CanLowerReturn) {
	// Put in an sret pointer parameter before all the other parameters.
	SmallVector<EVT, 1> ValueVTs;
	ComputeValueVTs(*TLI, DAG.getDataLayout(),
	F.getReturnType()->getPointerTo(
	DAG.getDataLayout().getAllocaAddrSpace()),
	ValueVTs);

	// NOTE: Assuming that a pointer will never break down to more than one VT
	// or one register.
	ISD::ArgFlagsTy Flags;
	Flags.setSRet();
	MVT RegisterVT = TLI->getRegisterType(*DAG.getContext(), ValueVTs[0]);
	ISD::InputArg RetArg(Flags, RegisterVT, ValueVTs[0], true,
	ISD::InputArg::NoArgIndex, 0);
	Ins.push_back(RetArg);
	}

	// Look for stores of arguments to static allocas. Mark such arguments with a
	// flag to ask the target to give us the memory location of that argument if
	// available.
	ArgCopyElisionMapTy ArgCopyElisionCandidates;
	findArgumentCopyElisionCandidates(DL, FuncInfo.get(),
	ArgCopyElisionCandidates);

	// Set up the incoming argument description vector.
	for (const Argument &Arg : F.args()) {
	unsigned ArgNo = Arg.getArgNo();
	SmallVector<EVT, 4> ValueVTs;
	ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs);
	bool isArgValueUsed = !Arg.use_empty();
	unsigned PartBase = 0;
	Type *FinalType = Arg.getType();
	if (Arg.hasAttribute(Attribute::ByVal))
	FinalType = Arg.getParamByValType();
	bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters(
	FinalType, F.getCallingConv(), F.isVarArg(), DL);
	for (unsigned Value = 0, NumValues = ValueVTs.size();
	Value != NumValues; ++Value) {
	EVT VT = ValueVTs[Value];
	Type ArgTy = VT.getTypeForEVT(DAG.getContext());
	ISD::ArgFlagsTy Flags;


	if (Arg.getType()->isPointerTy()) {
	Flags.setPointer();
	Flags.setPointerAddrSpace(
	cast<PointerType>(Arg.getType())->getAddressSpace());
	}
	if (Arg.hasAttribute(Attribute::ZExt))
	Flags.setZExt();
	if (Arg.hasAttribute(Attribute::SExt))
	Flags.setSExt();
	if (Arg.hasAttribute(Attribute::InReg)) {
	// If we are using vectorcall calling convention, a structure that is
	// passed InReg - is surely an HVA
	if (F.getCallingConv() == CallingConv::X86_VectorCall &&
	isa<StructType>(Arg.getType())) {
	// The first value of a structure is marked
	if (0 == Value)
	Flags.setHvaStart();
	Flags.setHva();
	}
	// Set InReg Flag
	Flags.setInReg();
	}
	if (Arg.hasAttribute(Attribute::StructRet))
	Flags.setSRet();
	if (Arg.hasAttribute(Attribute::SwiftSelf))
	Flags.setSwiftSelf();
	if (Arg.hasAttribute(Attribute::SwiftAsync))
	Flags.setSwiftAsync();
	if (Arg.hasAttribute(Attribute::SwiftError))
	Flags.setSwiftError();
	if (Arg.hasAttribute(Attribute::ByVal))
	Flags.setByVal();
	if (Arg.hasAttribute(Attribute::ByRef))
	Flags.setByRef();
	if (Arg.hasAttribute(Attribute::InAlloca)) {
	Flags.setInAlloca();
	// Set the byval flag for CCAssignFn callbacks that don't know about
	// inalloca. This way we can know how many bytes we should've allocated
	// and how many bytes a callee cleanup function will pop. If we port
	// inalloca to more targets, we'll have to add custom inalloca handling
	// in the various CC lowering callbacks.
	Flags.setByVal();
	}
	if (Arg.hasAttribute(Attribute::Preallocated)) {
	Flags.setPreallocated();
	// Set the byval flag for CCAssignFn callbacks that don't know about
	// preallocated. This way we can know how many bytes we should've
	// allocated and how many bytes a callee cleanup function will pop. If
	// we port preallocated to more targets, we'll have to add custom
	// preallocated handling in the various CC lowering callbacks.
	Flags.setByVal();
	}

	// Certain targets (such as MIPS), may have a different ABI alignment
	// for a type depending on the context. Give the target a chance to
	// specify the alignment it wants.
	const Align OriginalAlignment(
	TLI->getABIAlignmentForCallingConv(ArgTy, DL));
	Flags.setOrigAlign(OriginalAlignment);

	Align MemAlign;
	Type *ArgMemTy = nullptr;
	if (Flags.isByVal() \|\| Flags.isInAlloca() \|\| Flags.isPreallocated() \|\|
	Flags.isByRef()) {
	if (!ArgMemTy)
	ArgMemTy = Arg.getPointeeInMemoryValueType();

	uint64_t MemSize = DL.getTypeAllocSize(ArgMemTy);

	// For in-memory arguments, size and alignment should be passed from FE.
	// BE will guess if this info is not there but there are cases it cannot
	// get right.
	if (auto ParamAlign = Arg.getParamStackAlign())
	MemAlign = *ParamAlign;
	else if ((ParamAlign = Arg.getParamAlign()))
	MemAlign = *ParamAlign;
	else
	MemAlign = Align(TLI->getByValTypeAlignment(ArgMemTy, DL));
	if (Flags.isByRef())
	Flags.setByRefSize(MemSize);
	else
	Flags.setByValSize(MemSize);
	} else if (auto ParamAlign = Arg.getParamStackAlign()) {
	MemAlign = *ParamAlign;
	} else {
	MemAlign = OriginalAlignment;
	}
	Flags.setMemAlign(MemAlign);

	if (Arg.hasAttribute(Attribute::Nest))
	Flags.setNest();
	if (NeedsRegBlock)
	Flags.setInConsecutiveRegs();
	if (ArgCopyElisionCandidates.count(&Arg))
	Flags.setCopyElisionCandidate();
	if (Arg.hasAttribute(Attribute::Returned))
	Flags.setReturned();

	MVT RegisterVT = TLI->getRegisterTypeForCallingConv(
	*CurDAG->getContext(), F.getCallingConv(), VT);
	unsigned NumRegs = TLI->getNumRegistersForCallingConv(
	*CurDAG->getContext(), F.getCallingConv(), VT);
	for (unsigned i = 0; i != NumRegs; ++i) {
	// For scalable vectors, use the minimum size; individual targets
	// are responsible for handling scalable vector arguments and
	// return values.
	ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed,
	ArgNo, PartBase+i*RegisterVT.getStoreSize().getKnownMinSize());
	if (NumRegs > 1 && i == 0)
	MyFlags.Flags.setSplit();
	// if it isn't first piece, alignment must be 1
	else if (i > 0) {
	MyFlags.Flags.setOrigAlign(Align(1));
	if (i == NumRegs - 1)
	MyFlags.Flags.setSplitEnd();
	}
	Ins.push_back(MyFlags);
	}
	if (NeedsRegBlock && Value == NumValues - 1)
	Ins[Ins.size() - 1].Flags.setInConsecutiveRegsLast();
	PartBase += VT.getStoreSize().getKnownMinSize();
	}
	}

	// Call the target to set up the argument values.
	SmallVector<SDValue, 8> InVals;
	SDValue NewRoot = TLI->LowerFormalArguments(
	DAG.getRoot(), F.getCallingConv(), F.isVarArg(), Ins, dl, DAG, InVals);

	// Verify that the target's LowerFormalArguments behaved as expected.
	assert(NewRoot.getNode() && NewRoot.getValueType() == MVT::Other &&
	"LowerFormalArguments didn't return a valid chain!");
	assert(InVals.size() == Ins.size() &&
	"LowerFormalArguments didn't emit the correct number of values!");
	LLVM_DEBUG({
	for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
	assert(InVals[i].getNode() &&
	"LowerFormalArguments emitted a null value!");
	assert(EVT(Ins[i].VT) == InVals[i].getValueType() &&
	"LowerFormalArguments emitted a value with the wrong type!");
	}
	});

	// Update the DAG with the new chain value resulting from argument lowering.
	DAG.setRoot(NewRoot);

	// Set up the argument values.
	unsigned i = 0;
	if (!FuncInfo->CanLowerReturn) {
	// Create a virtual register for the sret pointer, and put in a copy
	// from the sret argument into it.
	SmallVector<EVT, 1> ValueVTs;
	ComputeValueVTs(*TLI, DAG.getDataLayout(),
	F.getReturnType()->getPointerTo(
	DAG.getDataLayout().getAllocaAddrSpace()),
	ValueVTs);
	MVT VT = ValueVTs[0].getSimpleVT();
	MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
	Optional<ISD::NodeType> AssertOp = None;
	SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, RegVT, VT,
	nullptr, F.getCallingConv(), AssertOp);

	MachineFunction& MF = SDB->DAG.getMachineFunction();
	MachineRegisterInfo& RegInfo = MF.getRegInfo();
	Register SRetReg =
	RegInfo.createVirtualRegister(TLI->getRegClassFor(RegVT));
	FuncInfo->DemoteRegister = SRetReg;
	NewRoot =
	SDB->DAG.getCopyToReg(NewRoot, SDB->getCurSDLoc(), SRetReg, ArgValue);
	DAG.setRoot(NewRoot);

	// i indexes lowered arguments. Bump it past the hidden sret argument.
	++i;
	}

	SmallVector<SDValue, 4> Chains;
	DenseMap<int, int> ArgCopyElisionFrameIndexMap;
	for (const Argument &Arg : F.args()) {
	SmallVector<SDValue, 4> ArgValues;
	SmallVector<EVT, 4> ValueVTs;
	ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs);
	unsigned NumValues = ValueVTs.size();
	if (NumValues == 0)
	continue;

	bool ArgHasUses = !Arg.use_empty();

	// Elide the copying store if the target loaded this argument from a
	// suitable fixed stack object.
	if (Ins[i].Flags.isCopyElisionCandidate()) {
	tryToElideArgumentCopy(*FuncInfo, Chains, ArgCopyElisionFrameIndexMap,
	ElidedArgCopyInstrs, ArgCopyElisionCandidates, Arg,
	InVals[i], ArgHasUses);
	}

	// If this argument is unused then remember its value. It is used to generate
	// debugging information.
	bool isSwiftErrorArg =
	TLI->supportSwiftError() &&
	Arg.hasAttribute(Attribute::SwiftError);
	if (!ArgHasUses && !isSwiftErrorArg) {
	SDB->setUnusedArgValue(&Arg, InVals[i]);

	// Also remember any frame index for use in FastISel.
	if (FrameIndexSDNode *FI =
	dyn_cast<FrameIndexSDNode>(InVals[i].getNode()))
	FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());
	}

	for (unsigned Val = 0; Val != NumValues; ++Val) {
	EVT VT = ValueVTs[Val];
	MVT PartVT = TLI->getRegisterTypeForCallingConv(*CurDAG->getContext(),
	F.getCallingConv(), VT);
	unsigned NumParts = TLI->getNumRegistersForCallingConv(
	*CurDAG->getContext(), F.getCallingConv(), VT);

	// Even an apparent 'unused' swifterror argument needs to be returned. So
	// we do generate a copy for it that can be used on return from the
	// function.
	if (ArgHasUses \|\| isSwiftErrorArg) {
	Optional<ISD::NodeType> AssertOp;
	if (Arg.hasAttribute(Attribute::SExt))
	AssertOp = ISD::AssertSext;
	else if (Arg.hasAttribute(Attribute::ZExt))
	AssertOp = ISD::AssertZext;

	ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts,
	PartVT, VT, nullptr,
	F.getCallingConv(), AssertOp));
	}

	i += NumParts;
	}

	// We don't need to do anything else for unused arguments.
	if (ArgValues.empty())
	continue;

	// Note down frame index.
	if (FrameIndexSDNode *FI =
	dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))
	FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());

	SDValue Res = DAG.getMergeValues(makeArrayRef(ArgValues.data(), NumValues),
	SDB->getCurSDLoc());

	SDB->setValue(&Arg, Res);
	if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) {
	// We want to associate the argument with the frame index, among
	// involved operands, that correspond to the lowest address. The
	// getCopyFromParts function, called earlier, is swapping the order of
	// the operands to BUILD_PAIR depending on endianness. The result of
	// that swapping is that the least significant bits of the argument will
	// be in the first operand of the BUILD_PAIR node, and the most
	// significant bits will be in the second operand.
	unsigned LowAddressOp = DAG.getDataLayout().isBigEndian() ? 1 : 0;
	if (LoadSDNode *LNode =
	dyn_cast<LoadSDNode>(Res.getOperand(LowAddressOp).getNode()))
	if (FrameIndexSDNode *FI =
	dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
	FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());
	}

	// Analyses past this point are naive and don't expect an assertion.
	if (Res.getOpcode() == ISD::AssertZext)
	Res = Res.getOperand(0);

	// Update the SwiftErrorVRegDefMap.
	if (Res.getOpcode() == ISD::CopyFromReg && isSwiftErrorArg) {
	unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
	if (Register::isVirtualRegister(Reg))
	SwiftError->setCurrentVReg(FuncInfo->MBB, SwiftError->getFunctionArg(),
	Reg);
	}

	// If this argument is live outside of the entry block, insert a copy from
	// wherever we got it to the vreg that other BB's will reference it as.
	if (Res.getOpcode() == ISD::CopyFromReg) {
	// If we can, though, try to skip creating an unnecessary vreg.
	// FIXME: This isn't very clean... it would be nice to make this more
	// general.
	unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
	if (Register::isVirtualRegister(Reg)) {
	FuncInfo->ValueMap[&Arg] = Reg;
	continue;
	}
	}
	if (!isOnlyUsedInEntryBlock(&Arg, TM.Options.EnableFastISel)) {
	FuncInfo->InitializeRegForValue(&Arg);
	SDB->CopyToExportRegsIfNeeded(&Arg);
	}
	}

	if (!Chains.empty()) {
	Chains.push_back(NewRoot);
	NewRoot = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
	}

	DAG.setRoot(NewRoot);

	assert(i == InVals.size() && "Argument register count mismatch!");

	// If any argument copy elisions occurred and we have debug info, update the
	// stale frame indices used in the dbg.declare variable info table.
	MachineFunction::VariableDbgInfoMapTy &DbgDeclareInfo = MF->getVariableDbgInfo();
	if (!DbgDeclareInfo.empty() && !ArgCopyElisionFrameIndexMap.empty()) {
	for (MachineFunction::VariableDbgInfo &VI : DbgDeclareInfo) {
	auto I = ArgCopyElisionFrameIndexMap.find(VI.Slot);
	if (I != ArgCopyElisionFrameIndexMap.end())
	VI.Slot = I->second;
	}
	}

	// Finally, if the target has anything special to do, allow it to do so.
	emitFunctionEntryCode();
	}

	/// Handle PHI nodes in successor blocks. Emit code into the SelectionDAG to
	/// ensure constants are generated when needed. Remember the virtual registers
	/// that need to be added to the Machine PHI nodes as input. We cannot just
	/// directly add them, because expansion might result in multiple MBB's for one
	/// BB. As such, the start of the BB might correspond to a different MBB than
	/// the end.
	void
	SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
	const Instruction *TI = LLVMBB->getTerminator();

	SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;

	// Check PHI nodes in successors that expect a value to be available from this
	// block.
	for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
	const BasicBlock *SuccBB = TI->getSuccessor(succ);
	if (!isa<PHINode>(SuccBB->begin())) continue;
	MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB];

	// If this terminator has multiple identical successors (common for
	// switches), only handle each succ once.
	if (!SuccsHandled.insert(SuccMBB).second)
	continue;

	MachineBasicBlock::iterator MBBI = SuccMBB->begin();

	// At this point we know that there is a 1-1 correspondence between LLVM PHI
	// nodes and Machine PHI nodes, but the incoming operands have not been
	// emitted yet.
	for (const PHINode &PN : SuccBB->phis()) {
	// Ignore dead phi's.
	if (PN.use_empty())
	continue;

	// Skip empty types
	if (PN.getType()->isEmptyTy())
	continue;

	unsigned Reg;
	const Value *PHIOp = PN.getIncomingValueForBlock(LLVMBB);

	if (const Constant *C = dyn_cast<Constant>(PHIOp)) {
	unsigned &RegOut = ConstantsOut[C];
	if (RegOut == 0) {
	RegOut = FuncInfo.CreateRegs(C);
	CopyValueToVirtualRegister(C, RegOut);
	}
	Reg = RegOut;
	} else {
	DenseMap<const Value *, Register>::iterator I =
	FuncInfo.ValueMap.find(PHIOp);
	if (I != FuncInfo.ValueMap.end())
	Reg = I->second;
	else {
	assert(isa<AllocaInst>(PHIOp) &&
	FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) &&
	"Didn't codegen value into a register!??");
	Reg = FuncInfo.CreateRegs(PHIOp);
	CopyValueToVirtualRegister(PHIOp, Reg);
	}
	}

	// Remember that this register needs to added to the machine PHI node as
	// the input for this MBB.
	SmallVector<EVT, 4> ValueVTs;
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	ComputeValueVTs(TLI, DAG.getDataLayout(), PN.getType(), ValueVTs);
	for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
	EVT VT = ValueVTs[vti];
	unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT);
	for (unsigned i = 0, e = NumRegisters; i != e; ++i)
	FuncInfo.PHINodesToUpdate.push_back(
	std::make_pair(&*MBBI++, Reg + i));
	Reg += NumRegisters;
	}
	}
	}

	ConstantsOut.clear();
	}

	/// Add a successor MBB to ParentMBB< creating a new MachineBB for BB if SuccMBB
	/// is 0.
	MachineBasicBlock *
	SelectionDAGBuilder::StackProtectorDescriptor::
	AddSuccessorMBB(const BasicBlock *BB,
	MachineBasicBlock *ParentMBB,
	bool IsLikely,
	MachineBasicBlock *SuccMBB) {
	// If SuccBB has not been created yet, create it.
	if (!SuccMBB) {
	MachineFunction *MF = ParentMBB->getParent();
	MachineFunction::iterator BBI(ParentMBB);
	SuccMBB = MF->CreateMachineBasicBlock(BB);
	MF->insert(++BBI, SuccMBB);
	}
	// Add it as a successor of ParentMBB.
	ParentMBB->addSuccessor(
	SuccMBB, BranchProbabilityInfo::getBranchProbStackProtector(IsLikely));
	return SuccMBB;
	}

	MachineBasicBlock SelectionDAGBuilder::NextBlock(MachineBasicBlock MBB) {
	MachineFunction::iterator I(MBB);
	if (++I == FuncInfo.MF->end())
	return nullptr;
	return &*I;
	}

	/// During lowering new call nodes can be created (such as memset, etc.).
	/// Those will become new roots of the current DAG, but complications arise
	/// when they are tail calls. In such cases, the call lowering will update
	/// the root, but the builder still needs to know that a tail call has been
	/// lowered in order to avoid generating an additional return.
	void SelectionDAGBuilder::updateDAGForMaybeTailCall(SDValue MaybeTC) {
	// If the node is null, we do have a tail call.
	if (MaybeTC.getNode() != nullptr)
	DAG.setRoot(MaybeTC);
	else
	HasTailCall = true;
	}

	void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
	MachineBasicBlock *SwitchMBB,
	MachineBasicBlock *DefaultMBB) {
	MachineFunction *CurMF = FuncInfo.MF;
	MachineBasicBlock *NextMBB = nullptr;
	MachineFunction::iterator BBI(W.MBB);
	if (++BBI != FuncInfo.MF->end())
	NextMBB = &*BBI;

	unsigned Size = W.LastCluster - W.FirstCluster + 1;

	BranchProbabilityInfo *BPI = FuncInfo.BPI;

	if (Size == 2 && W.MBB == SwitchMBB) {
	// If any two of the cases has the same destination, and if one value
	// is the same as the other, but has one bit unset that the other has set,
	// use bit manipulation to do two compares at once. For example:
	// "if (X == 6 \|\| X == 4)" -> "if ((X\|2) == 6)"
	// TODO: This could be extended to merge any 2 cases in switches with 3
	// cases.
	// TODO: Handle cases where W.CaseBB != SwitchBB.
	CaseCluster &Small = *W.FirstCluster;
	CaseCluster &Big = *W.LastCluster;

	if (Small.Low == Small.High && Big.Low == Big.High &&
	Small.MBB == Big.MBB) {
	const APInt &SmallValue = Small.Low->getValue();
	const APInt &BigValue = Big.Low->getValue();

	// Check that there is only one bit different.
	APInt CommonBit = BigValue ^ SmallValue;
	if (CommonBit.isPowerOf2()) {
	SDValue CondLHS = getValue(Cond);
	EVT VT = CondLHS.getValueType();
	SDLoc DL = getCurSDLoc();

	SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS,
	DAG.getConstant(CommonBit, DL, VT));
	SDValue Cond = DAG.getSetCC(
	DL, MVT::i1, Or, DAG.getConstant(BigValue \| SmallValue, DL, VT),
	ISD::SETEQ);

	// Update successor info.
	// Both Small and Big will jump to Small.BB, so we sum up the
	// probabilities.
	addSuccessorWithProb(SwitchMBB, Small.MBB, Small.Prob + Big.Prob);
	if (BPI)
	addSuccessorWithProb(
	SwitchMBB, DefaultMBB,
	// The default destination is the first successor in IR.
	BPI->getEdgeProbability(SwitchMBB->getBasicBlock(), (unsigned)0));
	else
	addSuccessorWithProb(SwitchMBB, DefaultMBB);

	// Insert the true branch.
	SDValue BrCond =
	DAG.getNode(ISD::BRCOND, DL, MVT::Other, getControlRoot(), Cond,
	DAG.getBasicBlock(Small.MBB));
	// Insert the false branch.
	BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond,
	DAG.getBasicBlock(DefaultMBB));

	DAG.setRoot(BrCond);
	return;
	}
	}
	}

	if (TM.getOptLevel() != CodeGenOpt::None) {
	// Here, we order cases by probability so the most likely case will be
	// checked first. However, two clusters can have the same probability in
	// which case their relative ordering is non-deterministic. So we use Low
	// as a tie-breaker as clusters are guaranteed to never overlap.
	llvm::sort(W.FirstCluster, W.LastCluster + 1,
	[](const CaseCluster &a, const CaseCluster &b) {
	return a.Prob != b.Prob ?
	a.Prob > b.Prob :
	a.Low->getValue().slt(b.Low->getValue());
	});

	// Rearrange the case blocks so that the last one falls through if possible
	// without changing the order of probabilities.
	for (CaseClusterIt I = W.LastCluster; I > W.FirstCluster; ) {
	--I;
	if (I->Prob > W.LastCluster->Prob)
	break;
	if (I->Kind == CC_Range && I->MBB == NextMBB) {
	std::swap(I, W.LastCluster);
	break;
	}
	}
	}

	// Compute total probability.
	BranchProbability DefaultProb = W.DefaultProb;
	BranchProbability UnhandledProbs = DefaultProb;
	for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I)
	UnhandledProbs += I->Prob;

	MachineBasicBlock *CurMBB = W.MBB;
	for (CaseClusterIt I = W.FirstCluster, E = W.LastCluster; I <= E; ++I) {
	bool FallthroughUnreachable = false;
	MachineBasicBlock *Fallthrough;
	if (I == W.LastCluster) {
	// For the last cluster, fall through to the default destination.
	Fallthrough = DefaultMBB;
	FallthroughUnreachable = isa<UnreachableInst>(
	DefaultMBB->getBasicBlock()->getFirstNonPHIOrDbg());
	} else {
	Fallthrough = CurMF->CreateMachineBasicBlock(CurMBB->getBasicBlock());
	CurMF->insert(BBI, Fallthrough);
	// Put Cond in a virtual register to make it available from the new blocks.
	ExportFromCurrentBlock(Cond);
	}
	UnhandledProbs -= I->Prob;

	switch (I->Kind) {
	case CC_JumpTable: {
	// FIXME: Optimize away range check based on pivot comparisons.
	JumpTableHeader *JTH = &SL->JTCases[I->JTCasesIndex].first;
	SwitchCG::JumpTable *JT = &SL->JTCases[I->JTCasesIndex].second;

	// The jump block hasn't been inserted yet; insert it here.
	MachineBasicBlock *JumpMBB = JT->MBB;
	CurMF->insert(BBI, JumpMBB);

	auto JumpProb = I->Prob;
	auto FallthroughProb = UnhandledProbs;

	// If the default statement is a target of the jump table, we evenly
	// distribute the default probability to successors of CurMBB. Also
	// update the probability on the edge from JumpMBB to Fallthrough.
	for (MachineBasicBlock::succ_iterator SI = JumpMBB->succ_begin(),
	SE = JumpMBB->succ_end();
	SI != SE; ++SI) {
	if (*SI == DefaultMBB) {
	JumpProb += DefaultProb / 2;
	FallthroughProb -= DefaultProb / 2;
	JumpMBB->setSuccProbability(SI, DefaultProb / 2);
	JumpMBB->normalizeSuccProbs();
	break;
	}
	}

	if (FallthroughUnreachable) {
	// Skip the range check if the fallthrough block is unreachable.
	JTH->OmitRangeCheck = true;
	}

	if (!JTH->OmitRangeCheck)
	addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb);
	addSuccessorWithProb(CurMBB, JumpMBB, JumpProb);
	CurMBB->normalizeSuccProbs();

	// The jump table header will be inserted in our current block, do the
	// range check, and fall through to our fallthrough block.
	JTH->HeaderBB = CurMBB;
	JT->Default = Fallthrough; // FIXME: Move Default to JumpTableHeader.

	// If we're in the right place, emit the jump table header right now.
	if (CurMBB == SwitchMBB) {
	visitJumpTableHeader(JT, JTH, SwitchMBB);
	JTH->Emitted = true;
	}
	break;
	}
	case CC_BitTests: {
	// FIXME: Optimize away range check based on pivot comparisons.
	BitTestBlock *BTB = &SL->BitTestCases[I->BTCasesIndex];

	// The bit test blocks haven't been inserted yet; insert them here.
	for (BitTestCase &BTC : BTB->Cases)
	CurMF->insert(BBI, BTC.ThisBB);

	// Fill in fields of the BitTestBlock.
	BTB->Parent = CurMBB;
	BTB->Default = Fallthrough;

	BTB->DefaultProb = UnhandledProbs;
	// If the cases in bit test don't form a contiguous range, we evenly
	// distribute the probability on the edge to Fallthrough to two
	// successors of CurMBB.
	if (!BTB->ContiguousRange) {
	BTB->Prob += DefaultProb / 2;
	BTB->DefaultProb -= DefaultProb / 2;
	}

	if (FallthroughUnreachable) {
	// Skip the range check if the fallthrough block is unreachable.
	BTB->OmitRangeCheck = true;
	}

	// If we're in the right place, emit the bit test header right now.
	if (CurMBB == SwitchMBB) {
	visitBitTestHeader(*BTB, SwitchMBB);
	BTB->Emitted = true;
	}
	break;
	}
	case CC_Range: {
	const Value RHS, LHS, *MHS;
	ISD::CondCode CC;
	if (I->Low == I->High) {
	// Check Cond == I->Low.
	CC = ISD::SETEQ;
	LHS = Cond;
	RHS=I->Low;
	MHS = nullptr;
	} else {
	// Check I->Low <= Cond <= I->High.
	CC = ISD::SETLE;
	LHS = I->Low;
	MHS = Cond;
	RHS = I->High;
	}

	// If Fallthrough is unreachable, fold away the comparison.
	if (FallthroughUnreachable)
	CC = ISD::SETTRUE;

	// The false probability is the sum of all unhandled cases.
	CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB,
	getCurSDLoc(), I->Prob, UnhandledProbs);

	if (CurMBB == SwitchMBB)
	visitSwitchCase(CB, SwitchMBB);
	else
	SL->SwitchCases.push_back(CB);

	break;
	}
	}
	CurMBB = Fallthrough;
	}
	}

	unsigned SelectionDAGBuilder::caseClusterRank(const CaseCluster &CC,
	CaseClusterIt First,
	CaseClusterIt Last) {
	return std::count_if(First, Last + 1, [&](const CaseCluster &X) {
	if (X.Prob != CC.Prob)
	return X.Prob > CC.Prob;

	// Ties are broken by comparing the case value.
	return X.Low->getValue().slt(CC.Low->getValue());
	});
	}

	void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
	const SwitchWorkListItem &W,
	Value *Cond,
	MachineBasicBlock *SwitchMBB) {
	assert(W.FirstCluster->Low->getValue().slt(W.LastCluster->Low->getValue()) &&
	"Clusters not sorted?");

	assert(W.LastCluster - W.FirstCluster + 1 >= 2 && "Too small to split!");

	// Balance the tree based on branch probabilities to create a near-optimal (in
	// terms of search time given key frequency) binary search tree. See e.g. Kurt
	// Mehlhorn "Nearly Optimal Binary Search Trees" (1975).
	CaseClusterIt LastLeft = W.FirstCluster;
	CaseClusterIt FirstRight = W.LastCluster;
	auto LeftProb = LastLeft->Prob + W.DefaultProb / 2;
	auto RightProb = FirstRight->Prob + W.DefaultProb / 2;

	// Move LastLeft and FirstRight towards each other from opposite directions to
	// find a partitioning of the clusters which balances the probability on both
	// sides. If LeftProb and RightProb are equal, alternate which side is
	// taken to ensure 0-probability nodes are distributed evenly.
	unsigned I = 0;
	while (LastLeft + 1 < FirstRight) {
	if (LeftProb < RightProb \|\| (LeftProb == RightProb && (I & 1)))
	LeftProb += (++LastLeft)->Prob;
	else
	RightProb += (--FirstRight)->Prob;
	I++;
	}

	while (true) {
	// Our binary search tree differs from a typical BST in that ours can have up
	// to three values in each leaf. The pivot selection above doesn't take that
	// into account, which means the tree might require more nodes and be less
	// efficient. We compensate for this here.

	unsigned NumLeft = LastLeft - W.FirstCluster + 1;
	unsigned NumRight = W.LastCluster - FirstRight + 1;

	if (std::min(NumLeft, NumRight) < 3 && std::max(NumLeft, NumRight) > 3) {
	// If one side has less than 3 clusters, and the other has more than 3,
	// consider taking a cluster from the other side.

	if (NumLeft < NumRight) {
	// Consider moving the first cluster on the right to the left side.
	CaseCluster &CC = *FirstRight;
	unsigned RightSideRank = caseClusterRank(CC, FirstRight, W.LastCluster);
	unsigned LeftSideRank = caseClusterRank(CC, W.FirstCluster, LastLeft);
	if (LeftSideRank <= RightSideRank) {
	// Moving the cluster to the left does not demote it.
	++LastLeft;
	++FirstRight;
	continue;
	}
	} else {
	assert(NumRight < NumLeft);
	// Consider moving the last element on the left to the right side.
	CaseCluster &CC = *LastLeft;
	unsigned LeftSideRank = caseClusterRank(CC, W.FirstCluster, LastLeft);
	unsigned RightSideRank = caseClusterRank(CC, FirstRight, W.LastCluster);
	if (RightSideRank <= LeftSideRank) {
	// Moving the cluster to the right does not demot it.
	--LastLeft;
	--FirstRight;
	continue;
	}
	}
	}
	break;
	}

	assert(LastLeft + 1 == FirstRight);
	assert(LastLeft >= W.FirstCluster);
	assert(FirstRight <= W.LastCluster);

	// Use the first element on the right as pivot since we will make less-than
	// comparisons against it.
	CaseClusterIt PivotCluster = FirstRight;
	assert(PivotCluster > W.FirstCluster);
	assert(PivotCluster <= W.LastCluster);

	CaseClusterIt FirstLeft = W.FirstCluster;
	CaseClusterIt LastRight = W.LastCluster;

	const ConstantInt *Pivot = PivotCluster->Low;

	// New blocks will be inserted immediately after the current one.
	MachineFunction::iterator BBI(W.MBB);
	++BBI;

	// We will branch to the LHS if Value < Pivot. If LHS is a single cluster,
	// we can branch to its destination directly if it's squeezed exactly in
	// between the known lower bound and Pivot - 1.
	MachineBasicBlock *LeftMBB;
	if (FirstLeft == LastLeft && FirstLeft->Kind == CC_Range &&
	FirstLeft->Low == W.GE &&
	(FirstLeft->High->getValue() + 1LL) == Pivot->getValue()) {
	LeftMBB = FirstLeft->MBB;
	} else {
	LeftMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock());
	FuncInfo.MF->insert(BBI, LeftMBB);
	WorkList.push_back(
	{LeftMBB, FirstLeft, LastLeft, W.GE, Pivot, W.DefaultProb / 2});
	// Put Cond in a virtual register to make it available from the new blocks.
	ExportFromCurrentBlock(Cond);
	}

	// Similarly, we will branch to the RHS if Value >= Pivot. If RHS is a
	// single cluster, RHS.Low == Pivot, and we can branch to its destination
	// directly if RHS.High equals the current upper bound.
	MachineBasicBlock *RightMBB;
	if (FirstRight == LastRight && FirstRight->Kind == CC_Range &&
	W.LT && (FirstRight->High->getValue() + 1ULL) == W.LT->getValue()) {
	RightMBB = FirstRight->MBB;
	} else {
	RightMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock());
	FuncInfo.MF->insert(BBI, RightMBB);
	WorkList.push_back(
	{RightMBB, FirstRight, LastRight, Pivot, W.LT, W.DefaultProb / 2});
	// Put Cond in a virtual register to make it available from the new blocks.
	ExportFromCurrentBlock(Cond);
	}

	// Create the CaseBlock record that will be used to lower the branch.
	CaseBlock CB(ISD::SETLT, Cond, Pivot, nullptr, LeftMBB, RightMBB, W.MBB,
	getCurSDLoc(), LeftProb, RightProb);

	if (W.MBB == SwitchMBB)
	visitSwitchCase(CB, SwitchMBB);
	else
	SL->SwitchCases.push_back(CB);
	}

	// Scale CaseProb after peeling a case with the probablity of PeeledCaseProb
	// from the swith statement.
	static BranchProbability scaleCaseProbality(BranchProbability CaseProb,
	BranchProbability PeeledCaseProb) {
	if (PeeledCaseProb == BranchProbability::getOne())
	return BranchProbability::getZero();
	BranchProbability SwitchProb = PeeledCaseProb.getCompl();

	uint32_t Numerator = CaseProb.getNumerator();
	uint32_t Denominator = SwitchProb.scale(CaseProb.getDenominator());
	return BranchProbability(Numerator, std::max(Numerator, Denominator));
	}

	// Try to peel the top probability case if it exceeds the threshold.
	// Return current MachineBasicBlock for the switch statement if the peeling
	// does not occur.
	// If the peeling is performed, return the newly created MachineBasicBlock
	// for the peeled switch statement. Also update Clusters to remove the peeled
	// case. PeeledCaseProb is the BranchProbability for the peeled case.
	MachineBasicBlock *SelectionDAGBuilder::peelDominantCaseCluster(
	const SwitchInst &SI, CaseClusterVector &Clusters,
	BranchProbability &PeeledCaseProb) {
	MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
	// Don't perform if there is only one cluster or optimizing for size.
	if (SwitchPeelThreshold > 100 \|\| !FuncInfo.BPI \|\| Clusters.size() < 2 \|\|
	TM.getOptLevel() == CodeGenOpt::None \|\|
	SwitchMBB->getParent()->getFunction().hasMinSize())
	return SwitchMBB;

	BranchProbability TopCaseProb = BranchProbability(SwitchPeelThreshold, 100);
	unsigned PeeledCaseIndex = 0;
	bool SwitchPeeled = false;
	for (unsigned Index = 0; Index < Clusters.size(); ++Index) {
	CaseCluster &CC = Clusters[Index];
	if (CC.Prob < TopCaseProb)
	continue;
	TopCaseProb = CC.Prob;
	PeeledCaseIndex = Index;
	SwitchPeeled = true;
	}
	if (!SwitchPeeled)
	return SwitchMBB;

	LLVM_DEBUG(dbgs() << "Peeled one top case in switch stmt, prob: "
	<< TopCaseProb << "\n");

	// Record the MBB for the peeled switch statement.
	MachineFunction::iterator BBI(SwitchMBB);
	++BBI;
	MachineBasicBlock *PeeledSwitchMBB =
	FuncInfo.MF->CreateMachineBasicBlock(SwitchMBB->getBasicBlock());
	FuncInfo.MF->insert(BBI, PeeledSwitchMBB);

	ExportFromCurrentBlock(SI.getCondition());
	auto PeeledCaseIt = Clusters.begin() + PeeledCaseIndex;
	SwitchWorkListItem W = {SwitchMBB, PeeledCaseIt, PeeledCaseIt,
	nullptr, nullptr, TopCaseProb.getCompl()};
	lowerWorkItem(W, SI.getCondition(), SwitchMBB, PeeledSwitchMBB);

	Clusters.erase(PeeledCaseIt);
	for (CaseCluster &CC : Clusters) {
	LLVM_DEBUG(
	dbgs() << "Scale the probablity for one cluster, before scaling: "
	<< CC.Prob << "\n");
	CC.Prob = scaleCaseProbality(CC.Prob, TopCaseProb);
	LLVM_DEBUG(dbgs() << "After scaling: " << CC.Prob << "\n");
	}
	PeeledCaseProb = TopCaseProb;
	return PeeledSwitchMBB;
	}

	void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
	// Extract cases from the switch.
	BranchProbabilityInfo *BPI = FuncInfo.BPI;
	CaseClusterVector Clusters;
	Clusters.reserve(SI.getNumCases());
	for (auto I : SI.cases()) {
	MachineBasicBlock *Succ = FuncInfo.MBBMap[I.getCaseSuccessor()];
	const ConstantInt *CaseVal = I.getCaseValue();
	BranchProbability Prob =
	BPI ? BPI->getEdgeProbability(SI.getParent(), I.getSuccessorIndex())
	: BranchProbability(1, SI.getNumCases() + 1);
	Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Prob));
	}

	MachineBasicBlock *DefaultMBB = FuncInfo.MBBMap[SI.getDefaultDest()];

	// Cluster adjacent cases with the same destination. We do this at all
	// optimization levels because it's cheap to do and will make codegen faster
	// if there are many clusters.
	sortAndRangeify(Clusters);

	// The branch probablity of the peeled case.
	BranchProbability PeeledCaseProb = BranchProbability::getZero();
	MachineBasicBlock *PeeledSwitchMBB =
	peelDominantCaseCluster(SI, Clusters, PeeledCaseProb);

	// If there is only the default destination, jump there directly.
	MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
	if (Clusters.empty()) {
	assert(PeeledSwitchMBB == SwitchMBB);
	SwitchMBB->addSuccessor(DefaultMBB);
	if (DefaultMBB != NextBlock(SwitchMBB)) {
	DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
	getControlRoot(), DAG.getBasicBlock(DefaultMBB)));
	}
	return;
	}

	SL->findJumpTables(Clusters, &SI, DefaultMBB, DAG.getPSI(), DAG.getBFI());
	SL->findBitTestClusters(Clusters, &SI);

	LLVM_DEBUG({
	dbgs() << "Case clusters: ";
	for (const CaseCluster &C : Clusters) {
	if (C.Kind == CC_JumpTable)
	dbgs() << "JT:";
	if (C.Kind == CC_BitTests)
	dbgs() << "BT:";

	C.Low->getValue().print(dbgs(), true);
	if (C.Low != C.High) {
	dbgs() << '-';
	C.High->getValue().print(dbgs(), true);
	}
	dbgs() << ' ';
	}
	dbgs() << '\n';
	});

	assert(!Clusters.empty());
	SwitchWorkList WorkList;
	CaseClusterIt First = Clusters.begin();
	CaseClusterIt Last = Clusters.end() - 1;
	auto DefaultProb = getEdgeProbability(PeeledSwitchMBB, DefaultMBB);
	// Scale the branchprobability for DefaultMBB if the peel occurs and
	// DefaultMBB is not replaced.
	if (PeeledCaseProb != BranchProbability::getZero() &&
	DefaultMBB == FuncInfo.MBBMap[SI.getDefaultDest()])
	DefaultProb = scaleCaseProbality(DefaultProb, PeeledCaseProb);
	WorkList.push_back(
	{PeeledSwitchMBB, First, Last, nullptr, nullptr, DefaultProb});

	while (!WorkList.empty()) {
	SwitchWorkListItem W = WorkList.pop_back_val();
	unsigned NumClusters = W.LastCluster - W.FirstCluster + 1;

	if (NumClusters > 3 && TM.getOptLevel() != CodeGenOpt::None &&
	!DefaultMBB->getParent()->getFunction().hasMinSize()) {
	// For optimized builds, lower large range as a balanced binary tree.
	splitWorkItem(WorkList, W, SI.getCondition(), SwitchMBB);
	continue;
	}

	lowerWorkItem(W, SI.getCondition(), SwitchMBB, DefaultMBB);
	}
	}

	void SelectionDAGBuilder::visitStepVector(const CallInst &I) {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	auto DL = getCurSDLoc();
	EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
	setValue(&I, DAG.getStepVector(DL, ResultVT));
	}

	void SelectionDAGBuilder::visitVectorReverse(const CallInst &I) {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());

	SDLoc DL = getCurSDLoc();
	SDValue V = getValue(I.getOperand(0));
	assert(VT == V.getValueType() && "Malformed vector.reverse!");

	if (VT.isScalableVector()) {
	setValue(&I, DAG.getNode(ISD::VECTOR_REVERSE, DL, VT, V));
	return;
	}

	// Use VECTOR_SHUFFLE for the fixed-length vector
	// to maintain existing behavior.
	SmallVector<int, 8> Mask;
	unsigned NumElts = VT.getVectorMinNumElements();
	for (unsigned i = 0; i != NumElts; ++i)
	Mask.push_back(NumElts - 1 - i);

	setValue(&I, DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), Mask));
	}

	void SelectionDAGBuilder::visitFreeze(const FreezeInst &I) {
	SmallVector<EVT, 4> ValueVTs;
	ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), I.getType(),
	ValueVTs);
	unsigned NumValues = ValueVTs.size();
	if (NumValues == 0) return;

	SmallVector<SDValue, 4> Values(NumValues);
	SDValue Op = getValue(I.getOperand(0));

	for (unsigned i = 0; i != NumValues; ++i)
	Values[i] = DAG.getNode(ISD::FREEZE, getCurSDLoc(), ValueVTs[i],
	SDValue(Op.getNode(), Op.getResNo() + i));

	setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
	DAG.getVTList(ValueVTs), Values));
	}

	void SelectionDAGBuilder::visitVectorSplice(const CallInst &I) {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());

	SDLoc DL = getCurSDLoc();
	SDValue V1 = getValue(I.getOperand(0));
	SDValue V2 = getValue(I.getOperand(1));
	int64_t Imm = cast<ConstantInt>(I.getOperand(2))->getSExtValue();

	// VECTOR_SHUFFLE doesn't support a scalable mask so use a dedicated node.
	if (VT.isScalableVector()) {
	MVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
	setValue(&I, DAG.getNode(ISD::VECTOR_SPLICE, DL, VT, V1, V2,
	DAG.getConstant(Imm, DL, IdxVT)));
	return;
	}

	unsigned NumElts = VT.getVectorNumElements();

	if ((-Imm > NumElts) \|\| (Imm >= NumElts)) {
	// Result is undefined if immediate is out-of-bounds.
	setValue(&I, DAG.getUNDEF(VT));
	return;
	}

	uint64_t Idx = (NumElts + Imm) % NumElts;

	// Use VECTOR_SHUFFLE to maintain original behaviour for fixed-length vectors.
	SmallVector<int, 8> Mask;
	for (unsigned i = 0; i < NumElts; ++i)
	Mask.push_back(Idx + i);
	setValue(&I, DAG.getVectorShuffle(VT, DL, V1, V2, Mask));
	}
	diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
	index 5e1786958b6f..7f80ce37e28a 100644
	--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
	+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
	@@ -1,8961 +1,8964 @@
	//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This implements the TargetLowering class.
	//
	//===----------------------------------------------------------------------===//

	#include "llvm/CodeGen/TargetLowering.h"
	#include "llvm/ADT/STLExtras.h"
	#include "llvm/CodeGen/CallingConvLower.h"
	#include "llvm/CodeGen/MachineFrameInfo.h"
	#include "llvm/CodeGen/MachineFunction.h"
	#include "llvm/CodeGen/MachineJumpTableInfo.h"
	#include "llvm/CodeGen/MachineRegisterInfo.h"
	#include "llvm/CodeGen/SelectionDAG.h"
	#include "llvm/CodeGen/TargetRegisterInfo.h"
	#include "llvm/CodeGen/TargetSubtargetInfo.h"
	#include "llvm/IR/DataLayout.h"
	#include "llvm/IR/DerivedTypes.h"
	#include "llvm/IR/GlobalVariable.h"
	#include "llvm/IR/LLVMContext.h"
	#include "llvm/MC/MCAsmInfo.h"
	#include "llvm/MC/MCExpr.h"
	#include "llvm/Support/ErrorHandling.h"
	#include "llvm/Support/KnownBits.h"
	#include "llvm/Support/MathExtras.h"
	#include "llvm/Target/TargetLoweringObjectFile.h"
	#include "llvm/Target/TargetMachine.h"
	#include <cctype>
	using namespace llvm;

	/// NOTE: The TargetMachine owns TLOF.
	TargetLowering::TargetLowering(const TargetMachine &tm)
	: TargetLoweringBase(tm) {}

	const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
	return nullptr;
	}

	bool TargetLowering::isPositionIndependent() const {
	return getTargetMachine().isPositionIndependent();
	}

	/// Check whether a given call node is in tail position within its function. If
	/// so, it sets Chain to the input chain of the tail call.
	bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
	SDValue &Chain) const {
	const Function &F = DAG.getMachineFunction().getFunction();

	// First, check if tail calls have been disabled in this function.
	if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
	return false;

	// Conservatively require the attributes of the call to match those of
	// the return. Ignore following attributes because they don't affect the
	// call sequence.
	AttrBuilder CallerAttrs(F.getAttributes(), AttributeList::ReturnIndex);
	for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
	Attribute::DereferenceableOrNull, Attribute::NoAlias,
	Attribute::NonNull})
	CallerAttrs.removeAttribute(Attr);

	if (CallerAttrs.hasAttributes())
	return false;

	// It's not safe to eliminate the sign / zero extension of the return value.
	if (CallerAttrs.contains(Attribute::ZExt) \|\|
	CallerAttrs.contains(Attribute::SExt))
	return false;

	// Check if the only use is a function return node.
	return isUsedByReturnOnly(Node, Chain);
	}

	bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
	const uint32_t *CallerPreservedMask,
	const SmallVectorImpl<CCValAssign> &ArgLocs,
	const SmallVectorImpl<SDValue> &OutVals) const {
	for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
	const CCValAssign &ArgLoc = ArgLocs[I];
	if (!ArgLoc.isRegLoc())
	continue;
	MCRegister Reg = ArgLoc.getLocReg();
	// Only look at callee saved registers.
	if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
	continue;
	// Check that we pass the value used for the caller.
	// (We look for a CopyFromReg reading a virtual register that is used
	// for the function live-in value of register Reg)
	SDValue Value = OutVals[I];
	if (Value->getOpcode() != ISD::CopyFromReg)
	return false;
	Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
	if (MRI.getLiveInPhysReg(ArgReg) != Reg)
	return false;
	}
	return true;
	}

	/// Set CallLoweringInfo attribute flags based on a call instruction
	/// and called function attributes.
	void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
	unsigned ArgIdx) {
	IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
	IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
	IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
	IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
	IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
	IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
	IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
	IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
	IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
	IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
	IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
	IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
	Alignment = Call->getParamStackAlign(ArgIdx);
	IndirectType = nullptr;
	assert(IsByVal + IsPreallocated + IsInAlloca <= 1 &&
	"multiple ABI attributes?");
	if (IsByVal) {
	IndirectType = Call->getParamByValType(ArgIdx);
	if (!Alignment)
	Alignment = Call->getParamAlign(ArgIdx);
	}
	if (IsPreallocated)
	IndirectType = Call->getParamPreallocatedType(ArgIdx);
	if (IsInAlloca)
	IndirectType = Call->getParamInAllocaType(ArgIdx);
	}

	/// Generate a libcall taking the given operands as arguments and returning a
	/// result of type RetVT.
	std::pair<SDValue, SDValue>
	TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
	ArrayRef<SDValue> Ops,
	MakeLibCallOptions CallOptions,
	const SDLoc &dl,
	SDValue InChain) const {
	if (!InChain)
	InChain = DAG.getEntryNode();

	TargetLowering::ArgListTy Args;
	Args.reserve(Ops.size());

	TargetLowering::ArgListEntry Entry;
	for (unsigned i = 0; i < Ops.size(); ++i) {
	SDValue NewOp = Ops[i];
	Entry.Node = NewOp;
	Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
	Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(),
	CallOptions.IsSExt);
	Entry.IsZExt = !Entry.IsSExt;

	if (CallOptions.IsSoften &&
	!shouldExtendTypeInLibCall(CallOptions.OpsVTBeforeSoften[i])) {
	Entry.IsSExt = Entry.IsZExt = false;
	}
	Args.push_back(Entry);
	}

	if (LC == RTLIB::UNKNOWN_LIBCALL)
	report_fatal_error("Unsupported library call operation!");
	SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
	getPointerTy(DAG.getDataLayout()));

	Type RetTy = RetVT.getTypeForEVT(DAG.getContext());
	TargetLowering::CallLoweringInfo CLI(DAG);
	bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt);
	bool zeroExtend = !signExtend;

	if (CallOptions.IsSoften &&
	!shouldExtendTypeInLibCall(CallOptions.RetVTBeforeSoften)) {
	signExtend = zeroExtend = false;
	}

	CLI.setDebugLoc(dl)
	.setChain(InChain)
	.setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
	.setNoReturn(CallOptions.DoesNotReturn)
	.setDiscardResult(!CallOptions.IsReturnValueUsed)
	.setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization)
	.setSExtResult(signExtend)
	.setZExtResult(zeroExtend);
	return LowerCallTo(CLI);
	}

	bool TargetLowering::findOptimalMemOpLowering(
	std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
	unsigned SrcAS, const AttributeList &FuncAttributes) const {
	if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
	return false;

	EVT VT = getOptimalMemOpType(Op, FuncAttributes);

	if (VT == MVT::Other) {
	// Use the largest integer type whose alignment constraints are satisfied.
	// We only need to check DstAlign here as SrcAlign is always greater or
	// equal to DstAlign (or zero).
	VT = MVT::i64;
	if (Op.isFixedDstAlign())
	while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
	!allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
	VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
	assert(VT.isInteger());

	// Find the largest legal integer type.
	MVT LVT = MVT::i64;
	while (!isTypeLegal(LVT))
	LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
	assert(LVT.isInteger());

	// If the type we've chosen is larger than the largest legal integer type
	// then use that instead.
	if (VT.bitsGT(LVT))
	VT = LVT;
	}

	unsigned NumMemOps = 0;
	uint64_t Size = Op.size();
	while (Size) {
	unsigned VTSize = VT.getSizeInBits() / 8;
	while (VTSize > Size) {
	// For now, only use non-vector load / store's for the left-over pieces.
	EVT NewVT = VT;
	unsigned NewVTSize;

	bool Found = false;
	if (VT.isVector() \|\| VT.isFloatingPoint()) {
	NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
	if (isOperationLegalOrCustom(ISD::STORE, NewVT) &&
	isSafeMemOpType(NewVT.getSimpleVT()))
	Found = true;
	else if (NewVT == MVT::i64 &&
	isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
	isSafeMemOpType(MVT::f64)) {
	// i64 is usually not legal on 32-bit targets, but f64 may be.
	NewVT = MVT::f64;
	Found = true;
	}
	}

	if (!Found) {
	do {
	NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
	if (NewVT == MVT::i8)
	break;
	} while (!isSafeMemOpType(NewVT.getSimpleVT()));
	}
	NewVTSize = NewVT.getSizeInBits() / 8;

	// If the new VT cannot cover all of the remaining bits, then consider
	// issuing a (or a pair of) unaligned and overlapping load / store.
	bool Fast;
	if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
	allowsMisalignedMemoryAccesses(
	VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
	MachineMemOperand::MONone, &Fast) &&
	Fast)
	VTSize = Size;
	else {
	VT = NewVT;
	VTSize = NewVTSize;
	}
	}

	if (++NumMemOps > Limit)
	return false;

	MemOps.push_back(VT);
	Size -= VTSize;
	}

	return true;
	}

	/// Soften the operands of a comparison. This code is shared among BR_CC,
	/// SELECT_CC, and SETCC handlers.
	void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
	SDValue &NewLHS, SDValue &NewRHS,
	ISD::CondCode &CCCode,
	const SDLoc &dl, const SDValue OldLHS,
	const SDValue OldRHS) const {
	SDValue Chain;
	return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
	OldRHS, Chain);
	}

	void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
	SDValue &NewLHS, SDValue &NewRHS,
	ISD::CondCode &CCCode,
	const SDLoc &dl, const SDValue OldLHS,
	const SDValue OldRHS,
	SDValue &Chain,
	bool IsSignaling) const {
	// FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
	// not supporting it. We can update this code when libgcc provides such
	// functions.

	assert((VT == MVT::f32 \|\| VT == MVT::f64 \|\| VT == MVT::f128 \|\| VT == MVT::ppcf128)
	&& "Unsupported setcc type!");

	// Expand into one or more soft-fp libcall(s).
	RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
	bool ShouldInvertCC = false;
	switch (CCCode) {
	case ISD::SETEQ:
	case ISD::SETOEQ:
	LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
	(VT == MVT::f64) ? RTLIB::OEQ_F64 :
	(VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
	break;
	case ISD::SETNE:
	case ISD::SETUNE:
	LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
	(VT == MVT::f64) ? RTLIB::UNE_F64 :
	(VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
	break;
	case ISD::SETGE:
	case ISD::SETOGE:
	LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
	(VT == MVT::f64) ? RTLIB::OGE_F64 :
	(VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
	break;
	case ISD::SETLT:
	case ISD::SETOLT:
	LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
	(VT == MVT::f64) ? RTLIB::OLT_F64 :
	(VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
	break;
	case ISD::SETLE:
	case ISD::SETOLE:
	LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
	(VT == MVT::f64) ? RTLIB::OLE_F64 :
	(VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
	break;
	case ISD::SETGT:
	case ISD::SETOGT:
	LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
	(VT == MVT::f64) ? RTLIB::OGT_F64 :
	(VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
	break;
	case ISD::SETO:
	ShouldInvertCC = true;
	LLVM_FALLTHROUGH;
	case ISD::SETUO:
	LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
	(VT == MVT::f64) ? RTLIB::UO_F64 :
	(VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
	break;
	case ISD::SETONE:
	// SETONE = O && UNE
	ShouldInvertCC = true;
	LLVM_FALLTHROUGH;
	case ISD::SETUEQ:
	LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
	(VT == MVT::f64) ? RTLIB::UO_F64 :
	(VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
	LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
	(VT == MVT::f64) ? RTLIB::OEQ_F64 :
	(VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
	break;
	default:
	// Invert CC for unordered comparisons
	ShouldInvertCC = true;
	switch (CCCode) {
	case ISD::SETULT:
	LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
	(VT == MVT::f64) ? RTLIB::OGE_F64 :
	(VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
	break;
	case ISD::SETULE:
	LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
	(VT == MVT::f64) ? RTLIB::OGT_F64 :
	(VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
	break;
	case ISD::SETUGT:
	LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
	(VT == MVT::f64) ? RTLIB::OLE_F64 :
	(VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
	break;
	case ISD::SETUGE:
	LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
	(VT == MVT::f64) ? RTLIB::OLT_F64 :
	(VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
	break;
	default: llvm_unreachable("Do not know how to soften this setcc!");
	}
	}

	// Use the target specific return value for comparions lib calls.
	EVT RetVT = getCmpLibcallReturnType();
	SDValue Ops[2] = {NewLHS, NewRHS};
	TargetLowering::MakeLibCallOptions CallOptions;
	EVT OpsVT[2] = { OldLHS.getValueType(),
	OldRHS.getValueType() };
	CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
	auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
	NewLHS = Call.first;
	NewRHS = DAG.getConstant(0, dl, RetVT);

	CCCode = getCmpLibcallCC(LC1);
	if (ShouldInvertCC) {
	assert(RetVT.isInteger());
	CCCode = getSetCCInverse(CCCode, RetVT);
	}

	if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
	// Update Chain.
	Chain = Call.second;
	} else {
	EVT SetCCVT =
	getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
	SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
	auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
	CCCode = getCmpLibcallCC(LC2);
	if (ShouldInvertCC)
	CCCode = getSetCCInverse(CCCode, RetVT);
	NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
	if (Chain)
	Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
	Call2.second);
	NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
	Tmp.getValueType(), Tmp, NewLHS);
	NewRHS = SDValue();
	}
	}

	/// Return the entry encoding for a jump table in the current function. The
	/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
	unsigned TargetLowering::getJumpTableEncoding() const {
	// In non-pic modes, just use the address of a block.
	if (!isPositionIndependent())
	return MachineJumpTableInfo::EK_BlockAddress;

	// In PIC mode, if the target supports a GPRel32 directive, use it.
	if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
	return MachineJumpTableInfo::EK_GPRel32BlockAddress;

	// Otherwise, use a label difference.
	return MachineJumpTableInfo::EK_LabelDifference32;
	}

	SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
	SelectionDAG &DAG) const {
	// If our PIC model is GP relative, use the global offset table as the base.
	unsigned JTEncoding = getJumpTableEncoding();

	if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) \|\|
	(JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress))
	return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(DAG.getDataLayout()));

	return Table;
	}

	/// This returns the relocation base for the given PIC jumptable, the same as
	/// getPICJumpTableRelocBase, but as an MCExpr.
	const MCExpr *
	TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
	unsigned JTI,MCContext &Ctx) const{
	// The normal PIC reloc base is the label at the start of the jump table.
	return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
	}

	bool
	TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
	const TargetMachine &TM = getTargetMachine();
	const GlobalValue *GV = GA->getGlobal();

	// If the address is not even local to this DSO we will have to load it from
	// a got and then add the offset.
	if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
	return false;

	// If the code is position independent we will have to add a base register.
	if (isPositionIndependent())
	return false;

	// Otherwise we can do it.
	return true;
	}

	//===----------------------------------------------------------------------===//
	// Optimization Methods
	//===----------------------------------------------------------------------===//

	/// If the specified instruction has a constant integer operand and there are
	/// bits set in that constant that are not demanded, then clear those bits and
	/// return true.
	bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
	const APInt &DemandedBits,
	const APInt &DemandedElts,
	TargetLoweringOpt &TLO) const {
	SDLoc DL(Op);
	unsigned Opcode = Op.getOpcode();

	// Do target-specific constant optimization.
	if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
	return TLO.New.getNode();

	// FIXME: ISD::SELECT, ISD::SELECT_CC
	switch (Opcode) {
	default:
	break;
	case ISD::XOR:
	case ISD::AND:
	case ISD::OR: {
	auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
	if (!Op1C \|\| Op1C->isOpaque())
	return false;

	// If this is a 'not' op, don't touch it because that's a canonical form.
	const APInt &C = Op1C->getAPIntValue();
	if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
	return false;

	if (!C.isSubsetOf(DemandedBits)) {
	EVT VT = Op.getValueType();
	SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
	SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
	return TLO.CombineTo(Op, NewOp);
	}

	break;
	}
	}

	return false;
	}

	bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
	const APInt &DemandedBits,
	TargetLoweringOpt &TLO) const {
	EVT VT = Op.getValueType();
	APInt DemandedElts = VT.isVector()
	? APInt::getAllOnesValue(VT.getVectorNumElements())
	: APInt(1, 1);
	return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
	}

	/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
	/// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
	/// generalized for targets with other types of implicit widening casts.
	bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
	const APInt &Demanded,
	TargetLoweringOpt &TLO) const {
	assert(Op.getNumOperands() == 2 &&
	"ShrinkDemandedOp only supports binary operators!");
	assert(Op.getNode()->getNumValues() == 1 &&
	"ShrinkDemandedOp only supports nodes with one result!");

	SelectionDAG &DAG = TLO.DAG;
	SDLoc dl(Op);

	// Early return, as this function cannot handle vector types.
	if (Op.getValueType().isVector())
	return false;

	// Don't do this if the node has another user, which may require the
	// full value.
	if (!Op.getNode()->hasOneUse())
	return false;

	// Search for the smallest integer type with free casts to and from
	// Op's type. For expedience, just check power-of-2 integer types.
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	unsigned DemandedSize = Demanded.getActiveBits();
	unsigned SmallVTBits = DemandedSize;
	if (!isPowerOf2_32(SmallVTBits))
	SmallVTBits = NextPowerOf2(SmallVTBits);
	for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
	EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
	if (TLI.isTruncateFree(Op.getValueType(), SmallVT) &&
	TLI.isZExtFree(SmallVT, Op.getValueType())) {
	// We found a type with free casts.
	SDValue X = DAG.getNode(
	Op.getOpcode(), dl, SmallVT,
	DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
	DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
	assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
	SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), X);
	return TLO.CombineTo(Op, Z);
	}
	}
	return false;
	}

	bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
	DAGCombinerInfo &DCI) const {
	SelectionDAG &DAG = DCI.DAG;
	TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
	!DCI.isBeforeLegalizeOps());
	KnownBits Known;

	bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
	if (Simplified) {
	DCI.AddToWorklist(Op.getNode());
	DCI.CommitTargetLoweringOpt(TLO);
	}
	return Simplified;
	}

	bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
	KnownBits &Known,
	TargetLoweringOpt &TLO,
	unsigned Depth,
	bool AssumeSingleUse) const {
	EVT VT = Op.getValueType();

	// TODO: We can probably do more work on calculating the known bits and
	// simplifying the operations for scalable vectors, but for now we just
	// bail out.
	if (VT.isScalableVector()) {
	// Pretend we don't know anything for now.
	Known = KnownBits(DemandedBits.getBitWidth());
	return false;
	}

	APInt DemandedElts = VT.isVector()
	? APInt::getAllOnesValue(VT.getVectorNumElements())
	: APInt(1, 1);
	return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
	AssumeSingleUse);
	}

	// TODO: Can we merge SelectionDAG::GetDemandedBits into this?
	// TODO: Under what circumstances can we create nodes? Constant folding?
	SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
	SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
	SelectionDAG &DAG, unsigned Depth) const {
	// Limit search depth.
	if (Depth >= SelectionDAG::MaxRecursionDepth)
	return SDValue();

	// Ignore UNDEFs.
	if (Op.isUndef())
	return SDValue();

	// Not demanding any bits/elts from Op.
	if (DemandedBits == 0 \|\| DemandedElts == 0)
	return DAG.getUNDEF(Op.getValueType());

	unsigned NumElts = DemandedElts.getBitWidth();
	unsigned BitWidth = DemandedBits.getBitWidth();
	KnownBits LHSKnown, RHSKnown;
	switch (Op.getOpcode()) {
	case ISD::BITCAST: {
	SDValue Src = peekThroughBitcasts(Op.getOperand(0));
	EVT SrcVT = Src.getValueType();
	EVT DstVT = Op.getValueType();
	if (SrcVT == DstVT)
	return Src;

	unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
	unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
	if (NumSrcEltBits == NumDstEltBits)
	if (SDValue V = SimplifyMultipleUseDemandedBits(
	Src, DemandedBits, DemandedElts, DAG, Depth + 1))
	return DAG.getBitcast(DstVT, V);

	// TODO - bigendian once we have test coverage.
	if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0 &&
	DAG.getDataLayout().isLittleEndian()) {
	unsigned Scale = NumDstEltBits / NumSrcEltBits;
	unsigned NumSrcElts = SrcVT.getVectorNumElements();
	APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
	APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
	for (unsigned i = 0; i != Scale; ++i) {
	unsigned Offset = i * NumSrcEltBits;
	APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
	if (!Sub.isNullValue()) {
	DemandedSrcBits \|= Sub;
	for (unsigned j = 0; j != NumElts; ++j)
	if (DemandedElts[j])
	DemandedSrcElts.setBit((j * Scale) + i);
	}
	}

	if (SDValue V = SimplifyMultipleUseDemandedBits(
	Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
	return DAG.getBitcast(DstVT, V);
	}

	// TODO - bigendian once we have test coverage.
	if ((NumSrcEltBits % NumDstEltBits) == 0 &&
	DAG.getDataLayout().isLittleEndian()) {
	unsigned Scale = NumSrcEltBits / NumDstEltBits;
	unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
	APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
	APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
	for (unsigned i = 0; i != NumElts; ++i)
	if (DemandedElts[i]) {
	unsigned Offset = (i % Scale) * NumDstEltBits;
	DemandedSrcBits.insertBits(DemandedBits, Offset);
	DemandedSrcElts.setBit(i / Scale);
	}

	if (SDValue V = SimplifyMultipleUseDemandedBits(
	Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
	return DAG.getBitcast(DstVT, V);
	}

	break;
	}
	case ISD::AND: {
	LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
	RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);

	// If all of the demanded bits are known 1 on one side, return the other.
	// These bits cannot contribute to the result of the 'and' in this
	// context.
	if (DemandedBits.isSubsetOf(LHSKnown.Zero \| RHSKnown.One))
	return Op.getOperand(0);
	if (DemandedBits.isSubsetOf(RHSKnown.Zero \| LHSKnown.One))
	return Op.getOperand(1);
	break;
	}
	case ISD::OR: {
	LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
	RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);

	// If all of the demanded bits are known zero on one side, return the
	// other. These bits cannot contribute to the result of the 'or' in this
	// context.
	if (DemandedBits.isSubsetOf(LHSKnown.One \| RHSKnown.Zero))
	return Op.getOperand(0);
	if (DemandedBits.isSubsetOf(RHSKnown.One \| LHSKnown.Zero))
	return Op.getOperand(1);
	break;
	}
	case ISD::XOR: {
	LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
	RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);

	// If all of the demanded bits are known zero on one side, return the
	// other.
	if (DemandedBits.isSubsetOf(RHSKnown.Zero))
	return Op.getOperand(0);
	if (DemandedBits.isSubsetOf(LHSKnown.Zero))
	return Op.getOperand(1);
	break;
	}
	case ISD::SHL: {
	// If we are only demanding sign bits then we can use the shift source
	// directly.
	if (const APInt *MaxSA =
	DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
	SDValue Op0 = Op.getOperand(0);
	unsigned ShAmt = MaxSA->getZExtValue();
	unsigned NumSignBits =
	DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
	unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
	if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
	return Op0;
	}
	break;
	}
	case ISD::SETCC: {
	SDValue Op0 = Op.getOperand(0);
	SDValue Op1 = Op.getOperand(1);
	ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
	// If (1) we only need the sign-bit, (2) the setcc operands are the same
	// width as the setcc result, and (3) the result of a setcc conforms to 0 or
	// -1, we may be able to bypass the setcc.
	if (DemandedBits.isSignMask() &&
	Op0.getScalarValueSizeInBits() == BitWidth &&
	getBooleanContents(Op0.getValueType()) ==
	BooleanContent::ZeroOrNegativeOneBooleanContent) {
	// If we're testing X < 0, then this compare isn't needed - just use X!
	// FIXME: We're limiting to integer types here, but this should also work
	// if we don't care about FP signed-zero. The use of SETLT with FP means
	// that we don't care about NaNs.
	if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
	(isNullConstant(Op1) \|\| ISD::isBuildVectorAllZeros(Op1.getNode())))
	return Op0;
	}
	break;
	}
	case ISD::SIGN_EXTEND_INREG: {
	// If none of the extended bits are demanded, eliminate the sextinreg.
	SDValue Op0 = Op.getOperand(0);
	EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
	unsigned ExBits = ExVT.getScalarSizeInBits();
	if (DemandedBits.getActiveBits() <= ExBits)
	return Op0;
	// If the input is already sign extended, just drop the extension.
	unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
	if (NumSignBits >= (BitWidth - ExBits + 1))
	return Op0;
	break;
	}
	case ISD::ANY_EXTEND_VECTOR_INREG:
	case ISD::SIGN_EXTEND_VECTOR_INREG:
	case ISD::ZERO_EXTEND_VECTOR_INREG: {
	// If we only want the lowest element and none of extended bits, then we can
	// return the bitcasted source vector.
	SDValue Src = Op.getOperand(0);
	EVT SrcVT = Src.getValueType();
	EVT DstVT = Op.getValueType();
	if (DemandedElts == 1 && DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
	DAG.getDataLayout().isLittleEndian() &&
	DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
	return DAG.getBitcast(DstVT, Src);
	}
	break;
	}
	case ISD::INSERT_VECTOR_ELT: {
	// If we don't demand the inserted element, return the base vector.
	SDValue Vec = Op.getOperand(0);
	auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
	EVT VecVT = Vec.getValueType();
	if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
	!DemandedElts[CIdx->getZExtValue()])
	return Vec;
	break;
	}
	case ISD::INSERT_SUBVECTOR: {
	// If we don't demand the inserted subvector, return the base vector.
	SDValue Vec = Op.getOperand(0);
	SDValue Sub = Op.getOperand(1);
	uint64_t Idx = Op.getConstantOperandVal(2);
	unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
	if (DemandedElts.extractBits(NumSubElts, Idx) == 0)
	return Vec;
	break;
	}
	case ISD::VECTOR_SHUFFLE: {
	ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();

	// If all the demanded elts are from one operand and are inline,
	// then we can use the operand directly.
	bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
	for (unsigned i = 0; i != NumElts; ++i) {
	int M = ShuffleMask[i];
	if (M < 0 \|\| !DemandedElts[i])
	continue;
	AllUndef = false;
	IdentityLHS &= (M == (int)i);
	IdentityRHS &= ((M - NumElts) == i);
	}

	if (AllUndef)
	return DAG.getUNDEF(Op.getValueType());
	if (IdentityLHS)
	return Op.getOperand(0);
	if (IdentityRHS)
	return Op.getOperand(1);
	break;
	}
	default:
	if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
	if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
	Op, DemandedBits, DemandedElts, DAG, Depth))
	return V;
	break;
	}
	return SDValue();
	}

	SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
	SDValue Op, const APInt &DemandedBits, SelectionDAG &DAG,
	unsigned Depth) const {
	EVT VT = Op.getValueType();
	APInt DemandedElts = VT.isVector()
	? APInt::getAllOnesValue(VT.getVectorNumElements())
	: APInt(1, 1);
	return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
	Depth);
	}

	SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts(
	SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
	unsigned Depth) const {
	APInt DemandedBits = APInt::getAllOnesValue(Op.getScalarValueSizeInBits());
	return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
	Depth);
	}

	/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
	/// result of Op are ever used downstream. If we can use this information to
	/// simplify Op, create a new simplified DAG node and return true, returning the
	/// original and new nodes in Old and New. Otherwise, analyze the expression and
	/// return a mask of Known bits for the expression (used to simplify the
	/// caller). The Known bits may only be accurate for those bits in the
	/// OriginalDemandedBits and OriginalDemandedElts.
	bool TargetLowering::SimplifyDemandedBits(
	SDValue Op, const APInt &OriginalDemandedBits,
	const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
	unsigned Depth, bool AssumeSingleUse) const {
	unsigned BitWidth = OriginalDemandedBits.getBitWidth();
	assert(Op.getScalarValueSizeInBits() == BitWidth &&
	"Mask size mismatches value type size!");

	// Don't know anything.
	Known = KnownBits(BitWidth);

	// TODO: We can probably do more work on calculating the known bits and
	// simplifying the operations for scalable vectors, but for now we just
	// bail out.
	if (Op.getValueType().isScalableVector())
	return false;

	unsigned NumElts = OriginalDemandedElts.getBitWidth();
	assert((!Op.getValueType().isVector() \|\|
	NumElts == Op.getValueType().getVectorNumElements()) &&
	"Unexpected vector size");

	APInt DemandedBits = OriginalDemandedBits;
	APInt DemandedElts = OriginalDemandedElts;
	SDLoc dl(Op);
	auto &DL = TLO.DAG.getDataLayout();

	// Undef operand.
	if (Op.isUndef())
	return false;

	if (Op.getOpcode() == ISD::Constant) {
	// We know all of the bits for a constant!
	Known = KnownBits::makeConstant(cast<ConstantSDNode>(Op)->getAPIntValue());
	return false;
	}

	if (Op.getOpcode() == ISD::ConstantFP) {
	// We know all of the bits for a floating point constant!
	Known = KnownBits::makeConstant(
	cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
	return false;
	}

	// Other users may use these bits.
	EVT VT = Op.getValueType();
	if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) {
	if (Depth != 0) {
	// If not at the root, Just compute the Known bits to
	// simplify things downstream.
	Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
	return false;
	}
	// If this is the root being simplified, allow it to have multiple uses,
	// just set the DemandedBits/Elts to all bits.
	DemandedBits = APInt::getAllOnesValue(BitWidth);
	DemandedElts = APInt::getAllOnesValue(NumElts);
	} else if (OriginalDemandedBits == 0 \|\| OriginalDemandedElts == 0) {
	// Not demanding any bits/elts from Op.
	return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
	} else if (Depth >= SelectionDAG::MaxRecursionDepth) {
	// Limit search depth.
	return false;
	}

	KnownBits Known2;
	switch (Op.getOpcode()) {
	case ISD::TargetConstant:
	llvm_unreachable("Can't simplify this node");
	case ISD::SCALAR_TO_VECTOR: {
	if (!DemandedElts[0])
	return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));

	KnownBits SrcKnown;
	SDValue Src = Op.getOperand(0);
	unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
	APInt SrcDemandedBits = DemandedBits.zextOrSelf(SrcBitWidth);
	if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
	return true;

	// Upper elements are undef, so only get the knownbits if we just demand
	// the bottom element.
	if (DemandedElts == 1)
	Known = SrcKnown.anyextOrTrunc(BitWidth);
	break;
	}
	case ISD::BUILD_VECTOR:
	// Collect the known bits that are shared by every demanded element.
	// TODO: Call SimplifyDemandedBits for non-constant demanded elements.
	Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
	return false; // Don't fall through, will infinitely loop.
	case ISD::LOAD: {
	auto *LD = cast<LoadSDNode>(Op);
	if (getTargetConstantFromLoad(LD)) {
	Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
	return false; // Don't fall through, will infinitely loop.
	}
	if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
	// If this is a ZEXTLoad and we are looking at the loaded value.
	EVT MemVT = LD->getMemoryVT();
	unsigned MemBits = MemVT.getScalarSizeInBits();
	Known.Zero.setBitsFrom(MemBits);
	return false; // Don't fall through, will infinitely loop.
	}
	break;
	}
	case ISD::INSERT_VECTOR_ELT: {
	SDValue Vec = Op.getOperand(0);
	SDValue Scl = Op.getOperand(1);
	auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
	EVT VecVT = Vec.getValueType();

	// If index isn't constant, assume we need all vector elements AND the
	// inserted element.
	APInt DemandedVecElts(DemandedElts);
	if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
	unsigned Idx = CIdx->getZExtValue();
	DemandedVecElts.clearBit(Idx);

	// Inserted element is not required.
	if (!DemandedElts[Idx])
	return TLO.CombineTo(Op, Vec);
	}

	KnownBits KnownScl;
	unsigned NumSclBits = Scl.getScalarValueSizeInBits();
	APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
	if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
	return true;

	Known = KnownScl.anyextOrTrunc(BitWidth);

	KnownBits KnownVec;
	if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
	Depth + 1))
	return true;

	if (!!DemandedVecElts)
	Known = KnownBits::commonBits(Known, KnownVec);

	return false;
	}
	case ISD::INSERT_SUBVECTOR: {
	// Demand any elements from the subvector and the remainder from the src its
	// inserted into.
	SDValue Src = Op.getOperand(0);
	SDValue Sub = Op.getOperand(1);
	uint64_t Idx = Op.getConstantOperandVal(2);
	unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
	APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
	APInt DemandedSrcElts = DemandedElts;
	DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx);

	KnownBits KnownSub, KnownSrc;
	if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
	Depth + 1))
	return true;
	if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
	Depth + 1))
	return true;

	Known.Zero.setAllBits();
	Known.One.setAllBits();
	if (!!DemandedSubElts)
	Known = KnownBits::commonBits(Known, KnownSub);
	if (!!DemandedSrcElts)
	Known = KnownBits::commonBits(Known, KnownSrc);

	// Attempt to avoid multi-use src if we don't need anything from it.
	if (!DemandedBits.isAllOnesValue() \|\| !DemandedSubElts.isAllOnesValue() \|\|
	!DemandedSrcElts.isAllOnesValue()) {
	SDValue NewSub = SimplifyMultipleUseDemandedBits(
	Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
	SDValue NewSrc = SimplifyMultipleUseDemandedBits(
	Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
	if (NewSub \|\| NewSrc) {
	NewSub = NewSub ? NewSub : Sub;
	NewSrc = NewSrc ? NewSrc : Src;
	SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
	Op.getOperand(2));
	return TLO.CombineTo(Op, NewOp);
	}
	}
	break;
	}
	case ISD::EXTRACT_SUBVECTOR: {
	// Offset the demanded elts by the subvector index.
	SDValue Src = Op.getOperand(0);
	if (Src.getValueType().isScalableVector())
	break;
	uint64_t Idx = Op.getConstantOperandVal(1);
	unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
	APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);

	if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
	Depth + 1))
	return true;

	// Attempt to avoid multi-use src if we don't need anything from it.
	if (!DemandedBits.isAllOnesValue() \|\| !DemandedSrcElts.isAllOnesValue()) {
	SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
	Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
	if (DemandedSrc) {
	SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
	Op.getOperand(1));
	return TLO.CombineTo(Op, NewOp);
	}
	}
	break;
	}
	case ISD::CONCAT_VECTORS: {
	Known.Zero.setAllBits();
	Known.One.setAllBits();
	EVT SubVT = Op.getOperand(0).getValueType();
	unsigned NumSubVecs = Op.getNumOperands();
	unsigned NumSubElts = SubVT.getVectorNumElements();
	for (unsigned i = 0; i != NumSubVecs; ++i) {
	APInt DemandedSubElts =
	DemandedElts.extractBits(NumSubElts, i * NumSubElts);
	if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
	Known2, TLO, Depth + 1))
	return true;
	// Known bits are shared by every demanded subvector element.
	if (!!DemandedSubElts)
	Known = KnownBits::commonBits(Known, Known2);
	}
	break;
	}
	case ISD::VECTOR_SHUFFLE: {
	ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();

	// Collect demanded elements from shuffle operands..
	APInt DemandedLHS(NumElts, 0);
	APInt DemandedRHS(NumElts, 0);
	for (unsigned i = 0; i != NumElts; ++i) {
	if (!DemandedElts[i])
	continue;
	int M = ShuffleMask[i];
	if (M < 0) {
	// For UNDEF elements, we don't know anything about the common state of
	// the shuffle result.
	DemandedLHS.clearAllBits();
	DemandedRHS.clearAllBits();
	break;
	}
	assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
	if (M < (int)NumElts)
	DemandedLHS.setBit(M);
	else
	DemandedRHS.setBit(M - NumElts);
	}

	if (!!DemandedLHS \|\| !!DemandedRHS) {
	SDValue Op0 = Op.getOperand(0);
	SDValue Op1 = Op.getOperand(1);

	Known.Zero.setAllBits();
	Known.One.setAllBits();
	if (!!DemandedLHS) {
	if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
	Depth + 1))
	return true;
	Known = KnownBits::commonBits(Known, Known2);
	}
	if (!!DemandedRHS) {
	if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
	Depth + 1))
	return true;
	Known = KnownBits::commonBits(Known, Known2);
	}

	// Attempt to avoid multi-use ops if we don't need anything from them.
	SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
	Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
	SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
	Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
	if (DemandedOp0 \|\| DemandedOp1) {
	Op0 = DemandedOp0 ? DemandedOp0 : Op0;
	Op1 = DemandedOp1 ? DemandedOp1 : Op1;
	SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
	return TLO.CombineTo(Op, NewOp);
	}
	}
	break;
	}
	case ISD::AND: {
	SDValue Op0 = Op.getOperand(0);
	SDValue Op1 = Op.getOperand(1);

	// If the RHS is a constant, check to see if the LHS would be zero without
	// using the bits from the RHS. Below, we use knowledge about the RHS to
	// simplify the LHS, here we're using information from the LHS to simplify
	// the RHS.
	if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1)) {
	// Do not increment Depth here; that can cause an infinite loop.
	KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
	// If the LHS already has zeros where RHSC does, this 'and' is dead.
	if ((LHSKnown.Zero & DemandedBits) ==
	(~RHSC->getAPIntValue() & DemandedBits))
	return TLO.CombineTo(Op, Op0);

	// If any of the set bits in the RHS are known zero on the LHS, shrink
	// the constant.
	if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
	DemandedElts, TLO))
	return true;

	// Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
	// constant, but if this 'and' is only clearing bits that were just set by
	// the xor, then this 'and' can be eliminated by shrinking the mask of
	// the xor. For example, for a 32-bit X:
	// and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
	if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
	LHSKnown.One == ~RHSC->getAPIntValue()) {
	SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
	return TLO.CombineTo(Op, Xor);
	}
	}

	if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
	Depth + 1))
	return true;
	assert(!Known.hasConflict() && "Bits known to be one AND zero?");
	if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
	Known2, TLO, Depth + 1))
	return true;
	assert(!Known2.hasConflict() && "Bits known to be one AND zero?");

	// Attempt to avoid multi-use ops if we don't need anything from them.
	if (!DemandedBits.isAllOnesValue() \|\| !DemandedElts.isAllOnesValue()) {
	SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
	Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
	SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
	Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
	if (DemandedOp0 \|\| DemandedOp1) {
	Op0 = DemandedOp0 ? DemandedOp0 : Op0;
	Op1 = DemandedOp1 ? DemandedOp1 : Op1;
	SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
	return TLO.CombineTo(Op, NewOp);
	}
	}

	// If all of the demanded bits are known one on one side, return the other.
	// These bits cannot contribute to the result of the 'and'.
	if (DemandedBits.isSubsetOf(Known2.Zero \| Known.One))
	return TLO.CombineTo(Op, Op0);
	if (DemandedBits.isSubsetOf(Known.Zero \| Known2.One))
	return TLO.CombineTo(Op, Op1);
	// If all of the demanded bits in the inputs are known zeros, return zero.
	if (DemandedBits.isSubsetOf(Known.Zero \| Known2.Zero))
	return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
	// If the RHS is a constant, see if we can simplify it.
	if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
	TLO))
	return true;
	// If the operation can be done in a smaller type, do so.
	if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
	return true;

	Known &= Known2;
	break;
	}
	case ISD::OR: {
	SDValue Op0 = Op.getOperand(0);
	SDValue Op1 = Op.getOperand(1);

	if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
	Depth + 1))
	return true;
	assert(!Known.hasConflict() && "Bits known to be one AND zero?");
	if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
	Known2, TLO, Depth + 1))
	return true;
	assert(!Known2.hasConflict() && "Bits known to be one AND zero?");

	// Attempt to avoid multi-use ops if we don't need anything from them.
	if (!DemandedBits.isAllOnesValue() \|\| !DemandedElts.isAllOnesValue()) {
	SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
	Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
	SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
	Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
	if (DemandedOp0 \|\| DemandedOp1) {
	Op0 = DemandedOp0 ? DemandedOp0 : Op0;
	Op1 = DemandedOp1 ? DemandedOp1 : Op1;
	SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
	return TLO.CombineTo(Op, NewOp);
	}
	}

	// If all of the demanded bits are known zero on one side, return the other.
	// These bits cannot contribute to the result of the 'or'.
	if (DemandedBits.isSubsetOf(Known2.One \| Known.Zero))
	return TLO.CombineTo(Op, Op0);
	if (DemandedBits.isSubsetOf(Known.One \| Known2.Zero))
	return TLO.CombineTo(Op, Op1);
	// If the RHS is a constant, see if we can simplify it.
	if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
	return true;
	// If the operation can be done in a smaller type, do so.
	if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
	return true;

	Known \|= Known2;
	break;
	}
	case ISD::XOR: {
	SDValue Op0 = Op.getOperand(0);
	SDValue Op1 = Op.getOperand(1);

	if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
	Depth + 1))
	return true;
	assert(!Known.hasConflict() && "Bits known to be one AND zero?");
	if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
	Depth + 1))
	return true;
	assert(!Known2.hasConflict() && "Bits known to be one AND zero?");

	// Attempt to avoid multi-use ops if we don't need anything from them.
	if (!DemandedBits.isAllOnesValue() \|\| !DemandedElts.isAllOnesValue()) {
	SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
	Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
	SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
	Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
	if (DemandedOp0 \|\| DemandedOp1) {
	Op0 = DemandedOp0 ? DemandedOp0 : Op0;
	Op1 = DemandedOp1 ? DemandedOp1 : Op1;
	SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
	return TLO.CombineTo(Op, NewOp);
	}
	}

	// If all of the demanded bits are known zero on one side, return the other.
	// These bits cannot contribute to the result of the 'xor'.
	if (DemandedBits.isSubsetOf(Known.Zero))
	return TLO.CombineTo(Op, Op0);
	if (DemandedBits.isSubsetOf(Known2.Zero))
	return TLO.CombineTo(Op, Op1);
	// If the operation can be done in a smaller type, do so.
	if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
	return true;

	// If all of the unknown bits are known to be zero on one side or the other
	// turn this into an inclusive or.
	// e.g. (A & C1)^(B & C2) -> (A & C1)\|(B & C2) iff C1&C2 == 0
	if (DemandedBits.isSubsetOf(Known.Zero \| Known2.Zero))
	return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));

	ConstantSDNode* C = isConstOrConstSplat(Op1, DemandedElts);
	if (C) {
	// If one side is a constant, and all of the set bits in the constant are
	// also known set on the other side, turn this into an AND, as we know
	// the bits will be cleared.
	// e.g. (X \| C1) ^ C2 --> (X \| C1) & ~C2 iff (C1&C2) == C2
	// NB: it is okay if more bits are known than are requested
	if (C->getAPIntValue() == Known2.One) {
	SDValue ANDC =
	TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
	return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
	}

	// If the RHS is a constant, see if we can change it. Don't alter a -1
	// constant because that's a 'not' op, and that is better for combining
	// and codegen.
	if (!C->isAllOnesValue() &&
	DemandedBits.isSubsetOf(C->getAPIntValue())) {
	// We're flipping all demanded bits. Flip the undemanded bits too.
	SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
	return TLO.CombineTo(Op, New);
	}
	}

	// If we can't turn this into a 'not', try to shrink the constant.
	if (!C \|\| !C->isAllOnesValue())
	if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
	return true;

	Known ^= Known2;
	break;
	}
	case ISD::SELECT:
	if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known, TLO,
	Depth + 1))
	return true;
	if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, Known2, TLO,
	Depth + 1))
	return true;
	assert(!Known.hasConflict() && "Bits known to be one AND zero?");
	assert(!Known2.hasConflict() && "Bits known to be one AND zero?");

	// If the operands are constants, see if we can simplify them.
	if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
	return true;

	// Only known if known in both the LHS and RHS.
	Known = KnownBits::commonBits(Known, Known2);
	break;
	case ISD::SELECT_CC:
	if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, Known, TLO,
	Depth + 1))
	return true;
	if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known2, TLO,
	Depth + 1))
	return true;
	assert(!Known.hasConflict() && "Bits known to be one AND zero?");
	assert(!Known2.hasConflict() && "Bits known to be one AND zero?");

	// If the operands are constants, see if we can simplify them.
	if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
	return true;

	// Only known if known in both the LHS and RHS.
	Known = KnownBits::commonBits(Known, Known2);
	break;
	case ISD::SETCC: {
	SDValue Op0 = Op.getOperand(0);
	SDValue Op1 = Op.getOperand(1);
	ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
	// If (1) we only need the sign-bit, (2) the setcc operands are the same
	// width as the setcc result, and (3) the result of a setcc conforms to 0 or
	// -1, we may be able to bypass the setcc.
	if (DemandedBits.isSignMask() &&
	Op0.getScalarValueSizeInBits() == BitWidth &&
	getBooleanContents(Op0.getValueType()) ==
	BooleanContent::ZeroOrNegativeOneBooleanContent) {
	// If we're testing X < 0, then this compare isn't needed - just use X!
	// FIXME: We're limiting to integer types here, but this should also work
	// if we don't care about FP signed-zero. The use of SETLT with FP means
	// that we don't care about NaNs.
	if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
	(isNullConstant(Op1) \|\| ISD::isBuildVectorAllZeros(Op1.getNode())))
	return TLO.CombineTo(Op, Op0);

	// TODO: Should we check for other forms of sign-bit comparisons?
	// Examples: X <= -1, X >= 0
	}
	if (getBooleanContents(Op0.getValueType()) ==
	TargetLowering::ZeroOrOneBooleanContent &&
	BitWidth > 1)
	Known.Zero.setBitsFrom(1);
	break;
	}
	case ISD::SHL: {
	SDValue Op0 = Op.getOperand(0);
	SDValue Op1 = Op.getOperand(1);
	EVT ShiftVT = Op1.getValueType();

	if (const APInt *SA =
	TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
	unsigned ShAmt = SA->getZExtValue();
	if (ShAmt == 0)
	return TLO.CombineTo(Op, Op0);

	// If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
	// single shift. We can do this if the bottom bits (which are shifted
	// out) are never demanded.
	// TODO - support non-uniform vector amounts.
	if (Op0.getOpcode() == ISD::SRL) {
	if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
	if (const APInt *SA2 =
	TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
	unsigned C1 = SA2->getZExtValue();
	unsigned Opc = ISD::SHL;
	int Diff = ShAmt - C1;
	if (Diff < 0) {
	Diff = -Diff;
	Opc = ISD::SRL;
	}
	SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
	return TLO.CombineTo(
	Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
	}
	}
	}

	// Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
	// are not demanded. This will likely allow the anyext to be folded away.
	// TODO - support non-uniform vector amounts.
	if (Op0.getOpcode() == ISD::ANY_EXTEND) {
	SDValue InnerOp = Op0.getOperand(0);
	EVT InnerVT = InnerOp.getValueType();
	unsigned InnerBits = InnerVT.getScalarSizeInBits();
	if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
	isTypeDesirableForOp(ISD::SHL, InnerVT)) {
	EVT ShTy = getShiftAmountTy(InnerVT, DL);
	if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
	ShTy = InnerVT;
	SDValue NarrowShl =
	TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp,
	TLO.DAG.getConstant(ShAmt, dl, ShTy));
	return TLO.CombineTo(
	Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
	}

	// Repeat the SHL optimization above in cases where an extension
	// intervenes: (shl (anyext (shr x, c1)), c2) to
	// (shl (anyext x), c2-c1). This requires that the bottom c1 bits
	// aren't demanded (as above) and that the shifted upper c1 bits of
	// x aren't demanded.
	// TODO - support non-uniform vector amounts.
	if (Op0.hasOneUse() && InnerOp.getOpcode() == ISD::SRL &&
	InnerOp.hasOneUse()) {
	if (const APInt *SA2 =
	TLO.DAG.getValidShiftAmountConstant(InnerOp, DemandedElts)) {
	unsigned InnerShAmt = SA2->getZExtValue();
	if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
	DemandedBits.getActiveBits() <=
	(InnerBits - InnerShAmt + ShAmt) &&
	DemandedBits.countTrailingZeros() >= ShAmt) {
	SDValue NewSA =
	TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
	SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
	InnerOp.getOperand(0));
	return TLO.CombineTo(
	Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
	}
	}
	}
	}

	APInt InDemandedMask = DemandedBits.lshr(ShAmt);
	if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
	Depth + 1))
	return true;
	assert(!Known.hasConflict() && "Bits known to be one AND zero?");
	Known.Zero <<= ShAmt;
	Known.One <<= ShAmt;
	// low bits known zero.
	Known.Zero.setLowBits(ShAmt);

	// Try shrinking the operation as long as the shift amount will still be
	// in range.
	if ((ShAmt < DemandedBits.getActiveBits()) &&
	ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
	return true;
	}

	// If we are only demanding sign bits then we can use the shift source
	// directly.
	if (const APInt *MaxSA =
	TLO.DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
	unsigned ShAmt = MaxSA->getZExtValue();
	unsigned NumSignBits =
	TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
	unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
	if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
	return TLO.CombineTo(Op, Op0);
	}
	break;
	}
	case ISD::SRL: {
	SDValue Op0 = Op.getOperand(0);
	SDValue Op1 = Op.getOperand(1);
	EVT ShiftVT = Op1.getValueType();

	if (const APInt *SA =
	TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
	unsigned ShAmt = SA->getZExtValue();
	if (ShAmt == 0)
	return TLO.CombineTo(Op, Op0);

	// If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
	// single shift. We can do this if the top bits (which are shifted out)
	// are never demanded.
	// TODO - support non-uniform vector amounts.
	if (Op0.getOpcode() == ISD::SHL) {
	if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
	if (const APInt *SA2 =
	TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
	unsigned C1 = SA2->getZExtValue();
	unsigned Opc = ISD::SRL;
	int Diff = ShAmt - C1;
	if (Diff < 0) {
	Diff = -Diff;
	Opc = ISD::SHL;
	}
	SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
	return TLO.CombineTo(
	Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
	}
	}
	}

	APInt InDemandedMask = (DemandedBits << ShAmt);

	// If the shift is exact, then it does demand the low bits (and knows that
	// they are zero).
	if (Op->getFlags().hasExact())
	InDemandedMask.setLowBits(ShAmt);

	// Compute the new bits that are at the top now.
	if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
	Depth + 1))
	return true;
	assert(!Known.hasConflict() && "Bits known to be one AND zero?");
	Known.Zero.lshrInPlace(ShAmt);
	Known.One.lshrInPlace(ShAmt);
	// High bits known zero.
	Known.Zero.setHighBits(ShAmt);
	}
	break;
	}
	case ISD::SRA: {
	SDValue Op0 = Op.getOperand(0);
	SDValue Op1 = Op.getOperand(1);
	EVT ShiftVT = Op1.getValueType();

	// If we only want bits that already match the signbit then we don't need
	// to shift.
	unsigned NumHiDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
	if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
	NumHiDemandedBits)
	return TLO.CombineTo(Op, Op0);

	// If this is an arithmetic shift right and only the low-bit is set, we can
	// always convert this into a logical shr, even if the shift amount is
	// variable. The low bit of the shift cannot be an input sign bit unless
	// the shift amount is >= the size of the datatype, which is undefined.
	if (DemandedBits.isOneValue())
	return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));

	if (const APInt *SA =
	TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
	unsigned ShAmt = SA->getZExtValue();
	if (ShAmt == 0)
	return TLO.CombineTo(Op, Op0);

	APInt InDemandedMask = (DemandedBits << ShAmt);

	// If the shift is exact, then it does demand the low bits (and knows that
	// they are zero).
	if (Op->getFlags().hasExact())
	InDemandedMask.setLowBits(ShAmt);

	// If any of the demanded bits are produced by the sign extension, we also
	// demand the input sign bit.
	if (DemandedBits.countLeadingZeros() < ShAmt)
	InDemandedMask.setSignBit();

	if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
	Depth + 1))
	return true;
	assert(!Known.hasConflict() && "Bits known to be one AND zero?");
	Known.Zero.lshrInPlace(ShAmt);
	Known.One.lshrInPlace(ShAmt);

	// If the input sign bit is known to be zero, or if none of the top bits
	// are demanded, turn this into an unsigned shift right.
	if (Known.Zero[BitWidth - ShAmt - 1] \|\|
	DemandedBits.countLeadingZeros() >= ShAmt) {
	SDNodeFlags Flags;
	Flags.setExact(Op->getFlags().hasExact());
	return TLO.CombineTo(
	Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
	}

	int Log2 = DemandedBits.exactLogBase2();
	if (Log2 >= 0) {
	// The bit must come from the sign.
	SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
	return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
	}

	if (Known.One[BitWidth - ShAmt - 1])
	// New bits are known one.
	Known.One.setHighBits(ShAmt);

	// Attempt to avoid multi-use ops if we don't need anything from them.
	if (!InDemandedMask.isAllOnesValue() \|\| !DemandedElts.isAllOnesValue()) {
	SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
	Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
	if (DemandedOp0) {
	SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
	return TLO.CombineTo(Op, NewOp);
	}
	}
	}
	break;
	}
	case ISD::FSHL:
	case ISD::FSHR: {
	SDValue Op0 = Op.getOperand(0);
	SDValue Op1 = Op.getOperand(1);
	SDValue Op2 = Op.getOperand(2);
	bool IsFSHL = (Op.getOpcode() == ISD::FSHL);

	if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
	unsigned Amt = SA->getAPIntValue().urem(BitWidth);

	// For fshl, 0-shift returns the 1st arg.
	// For fshr, 0-shift returns the 2nd arg.
	if (Amt == 0) {
	if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
	Known, TLO, Depth + 1))
	return true;
	break;
	}

	// fshl: (Op0 << Amt) \| (Op1 >> (BW - Amt))
	// fshr: (Op0 << (BW - Amt)) \| (Op1 >> Amt)
	APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
	APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
	if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
	Depth + 1))
	return true;
	if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
	Depth + 1))
	return true;

	Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
	Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
	Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
	Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
	Known.One \|= Known2.One;
	Known.Zero \|= Known2.Zero;
	}

	// For pow-2 bitwidths we only demand the bottom modulo amt bits.
	if (isPowerOf2_32(BitWidth)) {
	APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
	if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
	Known2, TLO, Depth + 1))
	return true;
	}
	break;
	}
	case ISD::ROTL:
	case ISD::ROTR: {
	SDValue Op0 = Op.getOperand(0);
	SDValue Op1 = Op.getOperand(1);

	// If we're rotating an 0/-1 value, then it stays an 0/-1 value.
	if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
	return TLO.CombineTo(Op, Op0);

	// For pow-2 bitwidths we only demand the bottom modulo amt bits.
	if (isPowerOf2_32(BitWidth)) {
	APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
	if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
	Depth + 1))
	return true;
	}
	break;
	}
	case ISD::UMIN: {
	// Check if one arg is always less than (or equal) to the other arg.
	SDValue Op0 = Op.getOperand(0);
	SDValue Op1 = Op.getOperand(1);
	KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
	KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
	Known = KnownBits::umin(Known0, Known1);
	if (Optional<bool> IsULE = KnownBits::ule(Known0, Known1))
	return TLO.CombineTo(Op, IsULE.getValue() ? Op0 : Op1);
	if (Optional<bool> IsULT = KnownBits::ult(Known0, Known1))
	return TLO.CombineTo(Op, IsULT.getValue() ? Op0 : Op1);
	break;
	}
	case ISD::UMAX: {
	// Check if one arg is always greater than (or equal) to the other arg.
	SDValue Op0 = Op.getOperand(0);
	SDValue Op1 = Op.getOperand(1);
	KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
	KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
	Known = KnownBits::umax(Known0, Known1);
	if (Optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
	return TLO.CombineTo(Op, IsUGE.getValue() ? Op0 : Op1);
	if (Optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
	return TLO.CombineTo(Op, IsUGT.getValue() ? Op0 : Op1);
	break;
	}
	case ISD::BITREVERSE: {
	SDValue Src = Op.getOperand(0);
	APInt DemandedSrcBits = DemandedBits.reverseBits();
	if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
	Depth + 1))
	return true;
	Known.One = Known2.One.reverseBits();
	Known.Zero = Known2.Zero.reverseBits();
	break;
	}
	case ISD::BSWAP: {
	SDValue Src = Op.getOperand(0);
	APInt DemandedSrcBits = DemandedBits.byteSwap();
	if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
	Depth + 1))
	return true;
	Known.One = Known2.One.byteSwap();
	Known.Zero = Known2.Zero.byteSwap();
	break;
	}
	case ISD::CTPOP: {
	// If only 1 bit is demanded, replace with PARITY as long as we're before
	// op legalization.
	// FIXME: Limit to scalars for now.
	if (DemandedBits.isOneValue() && !TLO.LegalOps && !VT.isVector())
	return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
	Op.getOperand(0)));

	Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
	break;
	}
	case ISD::SIGN_EXTEND_INREG: {
	SDValue Op0 = Op.getOperand(0);
	EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
	unsigned ExVTBits = ExVT.getScalarSizeInBits();

	// If we only care about the highest bit, don't bother shifting right.
	if (DemandedBits.isSignMask()) {
	unsigned NumSignBits =
	TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
	bool AlreadySignExtended = NumSignBits >= BitWidth - ExVTBits + 1;
	// However if the input is already sign extended we expect the sign
	// extension to be dropped altogether later and do not simplify.
	if (!AlreadySignExtended) {
	// Compute the correct shift amount type, which must be getShiftAmountTy
	// for scalar types after legalization.
	EVT ShiftAmtTy = VT;
	if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
	ShiftAmtTy = getShiftAmountTy(ShiftAmtTy, DL);

	SDValue ShiftAmt =
	TLO.DAG.getConstant(BitWidth - ExVTBits, dl, ShiftAmtTy);
	return TLO.CombineTo(Op,
	TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
	}
	}

	// If none of the extended bits are demanded, eliminate the sextinreg.
	if (DemandedBits.getActiveBits() <= ExVTBits)
	return TLO.CombineTo(Op, Op0);

	APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);

	// Since the sign extended bits are demanded, we know that the sign
	// bit is demanded.
	InputDemandedBits.setBit(ExVTBits - 1);

	if (SimplifyDemandedBits(Op0, InputDemandedBits, Known, TLO, Depth + 1))
	return true;
	assert(!Known.hasConflict() && "Bits known to be one AND zero?");

	// If the sign bit of the input is known set or clear, then we know the
	// top bits of the result.

	// If the input sign bit is known zero, convert this into a zero extension.
	if (Known.Zero[ExVTBits - 1])
	return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));

	APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
	if (Known.One[ExVTBits - 1]) { // Input sign bit known set
	Known.One.setBitsFrom(ExVTBits);
	Known.Zero &= Mask;
	} else { // Input sign bit unknown
	Known.Zero &= Mask;
	Known.One &= Mask;
	}
	break;
	}
	case ISD::BUILD_PAIR: {
	EVT HalfVT = Op.getOperand(0).getValueType();
	unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();

	APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
	APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);

	KnownBits KnownLo, KnownHi;

	if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
	return true;

	if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
	return true;

	Known.Zero = KnownLo.Zero.zext(BitWidth) \|
	KnownHi.Zero.zext(BitWidth).shl(HalfBitWidth);

	Known.One = KnownLo.One.zext(BitWidth) \|
	KnownHi.One.zext(BitWidth).shl(HalfBitWidth);
	break;
	}
	case ISD::ZERO_EXTEND:
	case ISD::ZERO_EXTEND_VECTOR_INREG: {
	SDValue Src = Op.getOperand(0);
	EVT SrcVT = Src.getValueType();
	unsigned InBits = SrcVT.getScalarSizeInBits();
	unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
	bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;

	// If none of the top bits are demanded, convert this into an any_extend.
	if (DemandedBits.getActiveBits() <= InBits) {
	// If we only need the non-extended bits of the bottom element
	// then we can just bitcast to the result.
	if (IsVecInReg && DemandedElts == 1 &&
	VT.getSizeInBits() == SrcVT.getSizeInBits() &&
	TLO.DAG.getDataLayout().isLittleEndian())
	return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));

	unsigned Opc =
	IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
	if (!TLO.LegalOperations() \|\| isOperationLegal(Opc, VT))
	return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
	}

	APInt InDemandedBits = DemandedBits.trunc(InBits);
	APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
	if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
	Depth + 1))
	return true;
	assert(!Known.hasConflict() && "Bits known to be one AND zero?");
	assert(Known.getBitWidth() == InBits && "Src width has changed?");
	Known = Known.zext(BitWidth);

	// Attempt to avoid multi-use ops if we don't need anything from them.
	if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
	Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
	return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
	break;
	}
	case ISD::SIGN_EXTEND:
	case ISD::SIGN_EXTEND_VECTOR_INREG: {
	SDValue Src = Op.getOperand(0);
	EVT SrcVT = Src.getValueType();
	unsigned InBits = SrcVT.getScalarSizeInBits();
	unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
	bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;

	// If none of the top bits are demanded, convert this into an any_extend.
	if (DemandedBits.getActiveBits() <= InBits) {
	// If we only need the non-extended bits of the bottom element
	// then we can just bitcast to the result.
	if (IsVecInReg && DemandedElts == 1 &&
	VT.getSizeInBits() == SrcVT.getSizeInBits() &&
	TLO.DAG.getDataLayout().isLittleEndian())
	return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));

	unsigned Opc =
	IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
	if (!TLO.LegalOperations() \|\| isOperationLegal(Opc, VT))
	return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
	}

	APInt InDemandedBits = DemandedBits.trunc(InBits);
	APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);

	// Since some of the sign extended bits are demanded, we know that the sign
	// bit is demanded.
	InDemandedBits.setBit(InBits - 1);

	if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
	Depth + 1))
	return true;
	assert(!Known.hasConflict() && "Bits known to be one AND zero?");
	assert(Known.getBitWidth() == InBits && "Src width has changed?");

	// If the sign bit is known one, the top bits match.
	Known = Known.sext(BitWidth);

	// If the sign bit is known zero, convert this to a zero extend.
	if (Known.isNonNegative()) {
	unsigned Opc =
	IsVecInReg ? ISD::ZERO_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND;
	if (!TLO.LegalOperations() \|\| isOperationLegal(Opc, VT))
	return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
	}

	// Attempt to avoid multi-use ops if we don't need anything from them.
	if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
	Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
	return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
	break;
	}
	case ISD::ANY_EXTEND:
	case ISD::ANY_EXTEND_VECTOR_INREG: {
	SDValue Src = Op.getOperand(0);
	EVT SrcVT = Src.getValueType();
	unsigned InBits = SrcVT.getScalarSizeInBits();
	unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
	bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;

	// If we only need the bottom element then we can just bitcast.
	// TODO: Handle ANY_EXTEND?
	if (IsVecInReg && DemandedElts == 1 &&
	VT.getSizeInBits() == SrcVT.getSizeInBits() &&
	TLO.DAG.getDataLayout().isLittleEndian())
	return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));

	APInt InDemandedBits = DemandedBits.trunc(InBits);
	APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
	if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
	Depth + 1))
	return true;
	assert(!Known.hasConflict() && "Bits known to be one AND zero?");
	assert(Known.getBitWidth() == InBits && "Src width has changed?");
	Known = Known.anyext(BitWidth);

	// Attempt to avoid multi-use ops if we don't need anything from them.
	if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
	Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
	return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
	break;
	}
	case ISD::TRUNCATE: {
	SDValue Src = Op.getOperand(0);

	// Simplify the input, using demanded bit information, and compute the known
	// zero/one bits live out.
	unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
	APInt TruncMask = DemandedBits.zext(OperandBitWidth);
	if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
	Depth + 1))
	return true;
	Known = Known.trunc(BitWidth);

	// Attempt to avoid multi-use ops if we don't need anything from them.
	if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
	Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
	return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));

	// If the input is only used by this truncate, see if we can shrink it based
	// on the known demanded bits.
	if (Src.getNode()->hasOneUse()) {
	switch (Src.getOpcode()) {
	default:
	break;
	case ISD::SRL:
	// Shrink SRL by a constant if none of the high bits shifted in are
	// demanded.
	if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
	// Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
	// undesirable.
	break;

	const APInt *ShAmtC =
	TLO.DAG.getValidShiftAmountConstant(Src, DemandedElts);
	if (!ShAmtC \|\| ShAmtC->uge(BitWidth))
	break;
	uint64_t ShVal = ShAmtC->getZExtValue();

	APInt HighBits =
	APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
	HighBits.lshrInPlace(ShVal);
	HighBits = HighBits.trunc(BitWidth);

	if (!(HighBits & DemandedBits)) {
	// None of the shifted in bits are needed. Add a truncate of the
	// shift input, then shift it.
	SDValue NewShAmt = TLO.DAG.getConstant(
	ShVal, dl, getShiftAmountTy(VT, DL, TLO.LegalTypes()));
	SDValue NewTrunc =
	TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
	return TLO.CombineTo(
	Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
	}
	break;
	}
	}

	assert(!Known.hasConflict() && "Bits known to be one AND zero?");
	break;
	}
	case ISD::AssertZext: {
	// AssertZext demands all of the high bits, plus any of the low bits
	// demanded by its users.
	EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
	APInt InMask = APInt::getLowBitsSet(BitWidth, ZVT.getSizeInBits());
	if (SimplifyDemandedBits(Op.getOperand(0), ~InMask \| DemandedBits, Known,
	TLO, Depth + 1))
	return true;
	assert(!Known.hasConflict() && "Bits known to be one AND zero?");

	Known.Zero \|= ~InMask;
	break;
	}
	case ISD::EXTRACT_VECTOR_ELT: {
	SDValue Src = Op.getOperand(0);
	SDValue Idx = Op.getOperand(1);
	ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
	unsigned EltBitWidth = Src.getScalarValueSizeInBits();

	if (SrcEltCnt.isScalable())
	return false;

	// Demand the bits from every vector element without a constant index.
	unsigned NumSrcElts = SrcEltCnt.getFixedValue();
	APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts);
	if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
	if (CIdx->getAPIntValue().ult(NumSrcElts))
	DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());

	// If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
	// anything about the extended bits.
	APInt DemandedSrcBits = DemandedBits;
	if (BitWidth > EltBitWidth)
	DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);

	if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
	Depth + 1))
	return true;

	// Attempt to avoid multi-use ops if we don't need anything from them.
	if (!DemandedSrcBits.isAllOnesValue() \|\|
	!DemandedSrcElts.isAllOnesValue()) {
	if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
	Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
	SDValue NewOp =
	TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
	return TLO.CombineTo(Op, NewOp);
	}
	}

	Known = Known2;
	if (BitWidth > EltBitWidth)
	Known = Known.anyext(BitWidth);
	break;
	}
	case ISD::BITCAST: {
	SDValue Src = Op.getOperand(0);
	EVT SrcVT = Src.getValueType();
	unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();

	// If this is an FP->Int bitcast and if the sign bit is the only
	// thing demanded, turn this into a FGETSIGN.
	if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
	DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
	SrcVT.isFloatingPoint()) {
	bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
	bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
	if ((OpVTLegal \|\| i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
	SrcVT != MVT::f128) {
	// Cannot eliminate/lower SHL for f128 yet.
	EVT Ty = OpVTLegal ? VT : MVT::i32;
	// Make a FGETSIGN + SHL to move the sign bit into the appropriate
	// place. We expect the SHL to be eliminated by other optimizations.
	SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
	unsigned OpVTSizeInBits = Op.getValueSizeInBits();
	if (!OpVTLegal && OpVTSizeInBits > 32)
	Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
	unsigned ShVal = Op.getValueSizeInBits() - 1;
	SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
	return TLO.CombineTo(Op,
	TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
	}
	}

	// Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
	// Demand the elt/bit if any of the original elts/bits are demanded.
	// TODO - bigendian once we have test coverage.
	if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0 &&
	TLO.DAG.getDataLayout().isLittleEndian()) {
	unsigned Scale = BitWidth / NumSrcEltBits;
	unsigned NumSrcElts = SrcVT.getVectorNumElements();
	APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
	APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
	for (unsigned i = 0; i != Scale; ++i) {
	unsigned Offset = i * NumSrcEltBits;
	APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
	if (!Sub.isNullValue()) {
	DemandedSrcBits \|= Sub;
	for (unsigned j = 0; j != NumElts; ++j)
	if (DemandedElts[j])
	DemandedSrcElts.setBit((j * Scale) + i);
	}
	}

	APInt KnownSrcUndef, KnownSrcZero;
	if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
	KnownSrcZero, TLO, Depth + 1))
	return true;

	KnownBits KnownSrcBits;
	if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
	KnownSrcBits, TLO, Depth + 1))
	return true;
	} else if ((NumSrcEltBits % BitWidth) == 0 &&
	TLO.DAG.getDataLayout().isLittleEndian()) {
	unsigned Scale = NumSrcEltBits / BitWidth;
	unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
	APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
	APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
	for (unsigned i = 0; i != NumElts; ++i)
	if (DemandedElts[i]) {
	unsigned Offset = (i % Scale) * BitWidth;
	DemandedSrcBits.insertBits(DemandedBits, Offset);
	DemandedSrcElts.setBit(i / Scale);
	}

	if (SrcVT.isVector()) {
	APInt KnownSrcUndef, KnownSrcZero;
	if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
	KnownSrcZero, TLO, Depth + 1))
	return true;
	}

	KnownBits KnownSrcBits;
	if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
	KnownSrcBits, TLO, Depth + 1))
	return true;
	}

	// If this is a bitcast, let computeKnownBits handle it. Only do this on a
	// recursive call where Known may be useful to the caller.
	if (Depth > 0) {
	Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
	return false;
	}
	break;
	}
	case ISD::ADD:
	case ISD::MUL:
	case ISD::SUB: {
	// Add, Sub, and Mul don't demand any bits in positions beyond that
	// of the highest bit demanded of them.
	SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
	SDNodeFlags Flags = Op.getNode()->getFlags();
	unsigned DemandedBitsLZ = DemandedBits.countLeadingZeros();
	APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
	if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO,
	Depth + 1) \|\|
	SimplifyDemandedBits(Op1, LoMask, DemandedElts, Known2, TLO,
	Depth + 1) \|\|
	// See if the operation should be performed at a smaller bit width.
	ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
	if (Flags.hasNoSignedWrap() \|\| Flags.hasNoUnsignedWrap()) {
	// Disable the nsw and nuw flags. We can no longer guarantee that we
	// won't wrap after simplification.
	Flags.setNoSignedWrap(false);
	Flags.setNoUnsignedWrap(false);
	SDValue NewOp =
	TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
	return TLO.CombineTo(Op, NewOp);
	}
	return true;
	}

	// Attempt to avoid multi-use ops if we don't need anything from them.
	if (!LoMask.isAllOnesValue() \|\| !DemandedElts.isAllOnesValue()) {
	SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
	Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
	SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
	Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
	if (DemandedOp0 \|\| DemandedOp1) {
	Flags.setNoSignedWrap(false);
	Flags.setNoUnsignedWrap(false);
	Op0 = DemandedOp0 ? DemandedOp0 : Op0;
	Op1 = DemandedOp1 ? DemandedOp1 : Op1;
	SDValue NewOp =
	TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
	return TLO.CombineTo(Op, NewOp);
	}
	}

	// If we have a constant operand, we may be able to turn it into -1 if we
	// do not demand the high bits. This can make the constant smaller to
	// encode, allow more general folding, or match specialized instruction
	// patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
	// is probably not useful (and could be detrimental).
	ConstantSDNode *C = isConstOrConstSplat(Op1);
	APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
	if (C && !C->isAllOnesValue() && !C->isOne() &&
	(C->getAPIntValue() \| HighMask).isAllOnesValue()) {
	SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
	// Disable the nsw and nuw flags. We can no longer guarantee that we
	// won't wrap after simplification.
	Flags.setNoSignedWrap(false);
	Flags.setNoUnsignedWrap(false);
	SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags);
	return TLO.CombineTo(Op, NewOp);
	}

	LLVM_FALLTHROUGH;
	}
	default:
	if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
	if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
	Known, TLO, Depth))
	return true;
	break;
	}

	// Just use computeKnownBits to compute output bits.
	Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
	break;
	}

	// If we know the value of all of the demanded bits, return this as a
	// constant.
	if (DemandedBits.isSubsetOf(Known.Zero \| Known.One)) {
	// Avoid folding to a constant if any OpaqueConstant is involved.
	const SDNode *N = Op.getNode();
	for (SDNode *Op :
	llvm::make_range(SDNodeIterator::begin(N), SDNodeIterator::end(N))) {
	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
	if (C->isOpaque())
	return false;
	}
	if (VT.isInteger())
	return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
	if (VT.isFloatingPoint())
	return TLO.CombineTo(
	Op,
	TLO.DAG.getConstantFP(
	APFloat(TLO.DAG.EVTToAPFloatSemantics(VT), Known.One), dl, VT));
	}

	return false;
	}

	bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
	const APInt &DemandedElts,
	APInt &KnownUndef,
	APInt &KnownZero,
	DAGCombinerInfo &DCI) const {
	SelectionDAG &DAG = DCI.DAG;
	TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
	!DCI.isBeforeLegalizeOps());

	bool Simplified =
	SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
	if (Simplified) {
	DCI.AddToWorklist(Op.getNode());
	DCI.CommitTargetLoweringOpt(TLO);
	}

	return Simplified;
	}

	/// Given a vector binary operation and known undefined elements for each input
	/// operand, compute whether each element of the output is undefined.
	static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
	const APInt &UndefOp0,
	const APInt &UndefOp1) {
	EVT VT = BO.getValueType();
	assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() &&
	"Vector binop only");

	EVT EltVT = VT.getVectorElementType();
	unsigned NumElts = VT.getVectorNumElements();
	assert(UndefOp0.getBitWidth() == NumElts &&
	UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");

	auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
	const APInt &UndefVals) {
	if (UndefVals[Index])
	return DAG.getUNDEF(EltVT);

	if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
	// Try hard to make sure that the getNode() call is not creating temporary
	// nodes. Ignore opaque integers because they do not constant fold.
	SDValue Elt = BV->getOperand(Index);
	auto *C = dyn_cast<ConstantSDNode>(Elt);
	if (isa<ConstantFPSDNode>(Elt) \|\| Elt.isUndef() \|\| (C && !C->isOpaque()))
	return Elt;
	}

	return SDValue();
	};

	APInt KnownUndef = APInt::getNullValue(NumElts);
	for (unsigned i = 0; i != NumElts; ++i) {
	// If both inputs for this element are either constant or undef and match
	// the element type, compute the constant/undef result for this element of
	// the vector.
	// TODO: Ideally we would use FoldConstantArithmetic() here, but that does
	// not handle FP constants. The code within getNode() should be refactored
	// to avoid the danger of creating a bogus temporary node here.
	SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
	SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
	if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
	if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
	KnownUndef.setBit(i);
	}
	return KnownUndef;
	}

	bool TargetLowering::SimplifyDemandedVectorElts(
	SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
	APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
	bool AssumeSingleUse) const {
	EVT VT = Op.getValueType();
	unsigned Opcode = Op.getOpcode();
	APInt DemandedElts = OriginalDemandedElts;
	unsigned NumElts = DemandedElts.getBitWidth();
	assert(VT.isVector() && "Expected vector op");

	KnownUndef = KnownZero = APInt::getNullValue(NumElts);

	// TODO: For now we assume we know nothing about scalable vectors.
	if (VT.isScalableVector())
	return false;

	assert(VT.getVectorNumElements() == NumElts &&
	"Mask size mismatches value type element count!");

	// Undef operand.
	if (Op.isUndef()) {
	KnownUndef.setAllBits();
	return false;
	}

	// If Op has other users, assume that all elements are needed.
	if (!Op.getNode()->hasOneUse() && !AssumeSingleUse)
	DemandedElts.setAllBits();

	// Not demanding any elements from Op.
	if (DemandedElts == 0) {
	KnownUndef.setAllBits();
	return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
	}

	// Limit search depth.
	if (Depth >= SelectionDAG::MaxRecursionDepth)
	return false;

	SDLoc DL(Op);
	unsigned EltSizeInBits = VT.getScalarSizeInBits();

	// Helper for demanding the specified elements and all the bits of both binary
	// operands.
	auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
	SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
	TLO.DAG, Depth + 1);
	SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
	TLO.DAG, Depth + 1);
	if (NewOp0 \|\| NewOp1) {
	SDValue NewOp = TLO.DAG.getNode(
	Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0, NewOp1 ? NewOp1 : Op1);
	return TLO.CombineTo(Op, NewOp);
	}
	return false;
	};

	switch (Opcode) {
	case ISD::SCALAR_TO_VECTOR: {
	if (!DemandedElts[0]) {
	KnownUndef.setAllBits();
	return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
	}
	SDValue ScalarSrc = Op.getOperand(0);
	if (ScalarSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
	SDValue Src = ScalarSrc.getOperand(0);
	SDValue Idx = ScalarSrc.getOperand(1);
	EVT SrcVT = Src.getValueType();

	ElementCount SrcEltCnt = SrcVT.getVectorElementCount();

	if (SrcEltCnt.isScalable())
	return false;

	unsigned NumSrcElts = SrcEltCnt.getFixedValue();
	if (isNullConstant(Idx)) {
	APInt SrcDemandedElts = APInt::getOneBitSet(NumSrcElts, 0);
	APInt SrcUndef = KnownUndef.zextOrTrunc(NumSrcElts);
	APInt SrcZero = KnownZero.zextOrTrunc(NumSrcElts);
	if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
	TLO, Depth + 1))
	return true;
	}
	}
	KnownUndef.setHighBits(NumElts - 1);
	break;
	}
	case ISD::BITCAST: {
	SDValue Src = Op.getOperand(0);
	EVT SrcVT = Src.getValueType();

	// We only handle vectors here.
	// TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
	if (!SrcVT.isVector())
	break;

	// Fast handling of 'identity' bitcasts.
	unsigned NumSrcElts = SrcVT.getVectorNumElements();
	if (NumSrcElts == NumElts)
	return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
	KnownZero, TLO, Depth + 1);

	APInt SrcZero, SrcUndef;
	APInt SrcDemandedElts = APInt::getNullValue(NumSrcElts);

	// Bitcast from 'large element' src vector to 'small element' vector, we
	// must demand a source element if any DemandedElt maps to it.
	if ((NumElts % NumSrcElts) == 0) {
	unsigned Scale = NumElts / NumSrcElts;
	for (unsigned i = 0; i != NumElts; ++i)
	if (DemandedElts[i])
	SrcDemandedElts.setBit(i / Scale);

	if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
	TLO, Depth + 1))
	return true;

	// Try calling SimplifyDemandedBits, converting demanded elts to the bits
	// of the large element.
	// TODO - bigendian once we have test coverage.
	if (TLO.DAG.getDataLayout().isLittleEndian()) {
	unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
	APInt SrcDemandedBits = APInt::getNullValue(SrcEltSizeInBits);
	for (unsigned i = 0; i != NumElts; ++i)
	if (DemandedElts[i]) {
	unsigned Ofs = (i % Scale) * EltSizeInBits;
	SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
	}

	KnownBits Known;
	if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
	TLO, Depth + 1))
	return true;
	}

	// If the src element is zero/undef then all the output elements will be -
	// only demanded elements are guaranteed to be correct.
	for (unsigned i = 0; i != NumSrcElts; ++i) {
	if (SrcDemandedElts[i]) {
	if (SrcZero[i])
	KnownZero.setBits(i * Scale, (i + 1) * Scale);
	if (SrcUndef[i])
	KnownUndef.setBits(i * Scale, (i + 1) * Scale);
	}
	}
	}

	// Bitcast from 'small element' src vector to 'large element' vector, we
	// demand all smaller source elements covered by the larger demanded element
	// of this vector.
	if ((NumSrcElts % NumElts) == 0) {
	unsigned Scale = NumSrcElts / NumElts;
	for (unsigned i = 0; i != NumElts; ++i)
	if (DemandedElts[i])
	SrcDemandedElts.setBits(i * Scale, (i + 1) * Scale);

	if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
	TLO, Depth + 1))
	return true;

	// If all the src elements covering an output element are zero/undef, then
	// the output element will be as well, assuming it was demanded.
	for (unsigned i = 0; i != NumElts; ++i) {
	if (DemandedElts[i]) {
	if (SrcZero.extractBits(Scale, i * Scale).isAllOnesValue())
	KnownZero.setBit(i);
	if (SrcUndef.extractBits(Scale, i * Scale).isAllOnesValue())
	KnownUndef.setBit(i);
	}
	}
	}
	break;
	}
	case ISD::BUILD_VECTOR: {
	// Check all elements and simplify any unused elements with UNDEF.
	if (!DemandedElts.isAllOnesValue()) {
	// Don't simplify BROADCASTS.
	if (llvm::any_of(Op->op_values(),
	[&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
	SmallVector<SDValue, 32> Ops(Op->op_begin(), Op->op_end());
	bool Updated = false;
	for (unsigned i = 0; i != NumElts; ++i) {
	if (!DemandedElts[i] && !Ops[i].isUndef()) {
	Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
	KnownUndef.setBit(i);
	Updated = true;
	}
	}
	if (Updated)
	return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
	}
	}
	for (unsigned i = 0; i != NumElts; ++i) {
	SDValue SrcOp = Op.getOperand(i);
	if (SrcOp.isUndef()) {
	KnownUndef.setBit(i);
	} else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
	(isNullConstant(SrcOp) \|\| isNullFPConstant(SrcOp))) {
	KnownZero.setBit(i);
	}
	}
	break;
	}
	case ISD::CONCAT_VECTORS: {
	EVT SubVT = Op.getOperand(0).getValueType();
	unsigned NumSubVecs = Op.getNumOperands();
	unsigned NumSubElts = SubVT.getVectorNumElements();
	for (unsigned i = 0; i != NumSubVecs; ++i) {
	SDValue SubOp = Op.getOperand(i);
	APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
	APInt SubUndef, SubZero;
	if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
	Depth + 1))
	return true;
	KnownUndef.insertBits(SubUndef, i * NumSubElts);
	KnownZero.insertBits(SubZero, i * NumSubElts);
	}
	break;
	}
	case ISD::INSERT_SUBVECTOR: {
	// Demand any elements from the subvector and the remainder from the src its
	// inserted into.
	SDValue Src = Op.getOperand(0);
	SDValue Sub = Op.getOperand(1);
	uint64_t Idx = Op.getConstantOperandVal(2);
	unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
	APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
	APInt DemandedSrcElts = DemandedElts;
	DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx);

	APInt SubUndef, SubZero;
	if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
	Depth + 1))
	return true;

	// If none of the src operand elements are demanded, replace it with undef.
	if (!DemandedSrcElts && !Src.isUndef())
	return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
	TLO.DAG.getUNDEF(VT), Sub,
	Op.getOperand(2)));

	if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
	TLO, Depth + 1))
	return true;
	KnownUndef.insertBits(SubUndef, Idx);
	KnownZero.insertBits(SubZero, Idx);

	// Attempt to avoid multi-use ops if we don't need anything from them.
	if (!DemandedSrcElts.isAllOnesValue() \|\|
	!DemandedSubElts.isAllOnesValue()) {
	SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
	Src, DemandedSrcElts, TLO.DAG, Depth + 1);
	SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
	Sub, DemandedSubElts, TLO.DAG, Depth + 1);
	if (NewSrc \|\| NewSub) {
	NewSrc = NewSrc ? NewSrc : Src;
	NewSub = NewSub ? NewSub : Sub;
	SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
	NewSub, Op.getOperand(2));
	return TLO.CombineTo(Op, NewOp);
	}
	}
	break;
	}
	case ISD::EXTRACT_SUBVECTOR: {
	// Offset the demanded elts by the subvector index.
	SDValue Src = Op.getOperand(0);
	if (Src.getValueType().isScalableVector())
	break;
	uint64_t Idx = Op.getConstantOperandVal(1);
	unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
	APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);

	APInt SrcUndef, SrcZero;
	if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
	Depth + 1))
	return true;
	KnownUndef = SrcUndef.extractBits(NumElts, Idx);
	KnownZero = SrcZero.extractBits(NumElts, Idx);

	// Attempt to avoid multi-use ops if we don't need anything from them.
	if (!DemandedElts.isAllOnesValue()) {
	SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
	Src, DemandedSrcElts, TLO.DAG, Depth + 1);
	if (NewSrc) {
	SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
	Op.getOperand(1));
	return TLO.CombineTo(Op, NewOp);
	}
	}
	break;
	}
	case ISD::INSERT_VECTOR_ELT: {
	SDValue Vec = Op.getOperand(0);
	SDValue Scl = Op.getOperand(1);
	auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));

	// For a legal, constant insertion index, if we don't need this insertion
	// then strip it, else remove it from the demanded elts.
	if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
	unsigned Idx = CIdx->getZExtValue();
	if (!DemandedElts[Idx])
	return TLO.CombineTo(Op, Vec);

	APInt DemandedVecElts(DemandedElts);
	DemandedVecElts.clearBit(Idx);
	if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
	KnownZero, TLO, Depth + 1))
	return true;

	KnownUndef.setBitVal(Idx, Scl.isUndef());

	KnownZero.setBitVal(Idx, isNullConstant(Scl) \|\| isNullFPConstant(Scl));
	break;
	}

	APInt VecUndef, VecZero;
	if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
	Depth + 1))
	return true;
	// Without knowing the insertion index we can't set KnownUndef/KnownZero.
	break;
	}
	case ISD::VSELECT: {
	// Try to transform the select condition based on the current demanded
	// elements.
	// TODO: If a condition element is undef, we can choose from one arm of the
	// select (and if one arm is undef, then we can propagate that to the
	// result).
	// TODO - add support for constant vselect masks (see IR version of this).
	APInt UnusedUndef, UnusedZero;
	if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UnusedUndef,
	UnusedZero, TLO, Depth + 1))
	return true;

	// See if we can simplify either vselect operand.
	APInt DemandedLHS(DemandedElts);
	APInt DemandedRHS(DemandedElts);
	APInt UndefLHS, ZeroLHS;
	APInt UndefRHS, ZeroRHS;
	if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedLHS, UndefLHS,
	ZeroLHS, TLO, Depth + 1))
	return true;
	if (SimplifyDemandedVectorElts(Op.getOperand(2), DemandedRHS, UndefRHS,
	ZeroRHS, TLO, Depth + 1))
	return true;

	KnownUndef = UndefLHS & UndefRHS;
	KnownZero = ZeroLHS & ZeroRHS;
	break;
	}
	case ISD::VECTOR_SHUFFLE: {
	ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();

	// Collect demanded elements from shuffle operands..
	APInt DemandedLHS(NumElts, 0);
	APInt DemandedRHS(NumElts, 0);
	for (unsigned i = 0; i != NumElts; ++i) {
	int M = ShuffleMask[i];
	if (M < 0 \|\| !DemandedElts[i])
	continue;
	assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
	if (M < (int)NumElts)
	DemandedLHS.setBit(M);
	else
	DemandedRHS.setBit(M - NumElts);
	}

	// See if we can simplify either shuffle operand.
	APInt UndefLHS, ZeroLHS;
	APInt UndefRHS, ZeroRHS;
	if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedLHS, UndefLHS,
	ZeroLHS, TLO, Depth + 1))
	return true;
	if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedRHS, UndefRHS,
	ZeroRHS, TLO, Depth + 1))
	return true;

	// Simplify mask using undef elements from LHS/RHS.
	bool Updated = false;
	bool IdentityLHS = true, IdentityRHS = true;
	SmallVector<int, 32> NewMask(ShuffleMask.begin(), ShuffleMask.end());
	for (unsigned i = 0; i != NumElts; ++i) {
	int &M = NewMask[i];
	if (M < 0)
	continue;
	if (!DemandedElts[i] \|\| (M < (int)NumElts && UndefLHS[M]) \|\|
	(M >= (int)NumElts && UndefRHS[M - NumElts])) {
	Updated = true;
	M = -1;
	}
	IdentityLHS &= (M < 0) \|\| (M == (int)i);
	IdentityRHS &= (M < 0) \|\| ((M - NumElts) == i);
	}

	// Update legal shuffle masks based on demanded elements if it won't reduce
	// to Identity which can cause premature removal of the shuffle mask.
	if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
	SDValue LegalShuffle =
	buildLegalVectorShuffle(VT, DL, Op.getOperand(0), Op.getOperand(1),
	NewMask, TLO.DAG);
	if (LegalShuffle)
	return TLO.CombineTo(Op, LegalShuffle);
	}

	// Propagate undef/zero elements from LHS/RHS.
	for (unsigned i = 0; i != NumElts; ++i) {
	int M = ShuffleMask[i];
	if (M < 0) {
	KnownUndef.setBit(i);
	} else if (M < (int)NumElts) {
	if (UndefLHS[M])
	KnownUndef.setBit(i);
	if (ZeroLHS[M])
	KnownZero.setBit(i);
	} else {
	if (UndefRHS[M - NumElts])
	KnownUndef.setBit(i);
	if (ZeroRHS[M - NumElts])
	KnownZero.setBit(i);
	}
	}
	break;
	}
	case ISD::ANY_EXTEND_VECTOR_INREG:
	case ISD::SIGN_EXTEND_VECTOR_INREG:
	case ISD::ZERO_EXTEND_VECTOR_INREG: {
	APInt SrcUndef, SrcZero;
	SDValue Src = Op.getOperand(0);
	unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
	APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts);
	if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
	Depth + 1))
	return true;
	KnownZero = SrcZero.zextOrTrunc(NumElts);
	KnownUndef = SrcUndef.zextOrTrunc(NumElts);

	if (Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
	Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
	DemandedSrcElts == 1 && TLO.DAG.getDataLayout().isLittleEndian()) {
	// aext - if we just need the bottom element then we can bitcast.
	return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
	}

	if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
	// zext(undef) upper bits are guaranteed to be zero.
	if (DemandedElts.isSubsetOf(KnownUndef))
	return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
	KnownUndef.clearAllBits();
	}
	break;
	}

	// TODO: There are more binop opcodes that could be handled here - MIN,
	// MAX, saturated math, etc.
	case ISD::OR:
	case ISD::XOR:
	case ISD::ADD:
	case ISD::SUB:
	case ISD::FADD:
	case ISD::FSUB:
	case ISD::FMUL:
	case ISD::FDIV:
	case ISD::FREM: {
	SDValue Op0 = Op.getOperand(0);
	SDValue Op1 = Op.getOperand(1);

	APInt UndefRHS, ZeroRHS;
	if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
	Depth + 1))
	return true;
	APInt UndefLHS, ZeroLHS;
	if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
	Depth + 1))
	return true;

	KnownZero = ZeroLHS & ZeroRHS;
	KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);

	// Attempt to avoid multi-use ops if we don't need anything from them.
	// TODO - use KnownUndef to relax the demandedelts?
	if (!DemandedElts.isAllOnesValue())
	if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
	return true;
	break;
	}
	case ISD::SHL:
	case ISD::SRL:
	case ISD::SRA:
	case ISD::ROTL:
	case ISD::ROTR: {
	SDValue Op0 = Op.getOperand(0);
	SDValue Op1 = Op.getOperand(1);

	APInt UndefRHS, ZeroRHS;
	if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
	Depth + 1))
	return true;
	APInt UndefLHS, ZeroLHS;
	if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
	Depth + 1))
	return true;

	KnownZero = ZeroLHS;
	KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?

	// Attempt to avoid multi-use ops if we don't need anything from them.
	// TODO - use KnownUndef to relax the demandedelts?
	if (!DemandedElts.isAllOnesValue())
	if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
	return true;
	break;
	}
	case ISD::MUL:
	case ISD::AND: {
	SDValue Op0 = Op.getOperand(0);
	SDValue Op1 = Op.getOperand(1);

	APInt SrcUndef, SrcZero;
	if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
	Depth + 1))
	return true;
	if (SimplifyDemandedVectorElts(Op0, DemandedElts, KnownUndef, KnownZero,
	TLO, Depth + 1))
	return true;

	// If either side has a zero element, then the result element is zero, even
	// if the other is an UNDEF.
	// TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
	// and then handle 'and' nodes with the rest of the binop opcodes.
	KnownZero \|= SrcZero;
	KnownUndef &= SrcUndef;
	KnownUndef &= ~KnownZero;

	// Attempt to avoid multi-use ops if we don't need anything from them.
	// TODO - use KnownUndef to relax the demandedelts?
	if (!DemandedElts.isAllOnesValue())
	if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
	return true;
	break;
	}
	case ISD::TRUNCATE:
	case ISD::SIGN_EXTEND:
	case ISD::ZERO_EXTEND:
	if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
	KnownZero, TLO, Depth + 1))
	return true;

	if (Op.getOpcode() == ISD::ZERO_EXTEND) {
	// zext(undef) upper bits are guaranteed to be zero.
	if (DemandedElts.isSubsetOf(KnownUndef))
	return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
	KnownUndef.clearAllBits();
	}
	break;
	default: {
	if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
	if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
	KnownZero, TLO, Depth))
	return true;
	} else {
	KnownBits Known;
	APInt DemandedBits = APInt::getAllOnesValue(EltSizeInBits);
	if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
	TLO, Depth, AssumeSingleUse))
	return true;
	}
	break;
	}
	}
	assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");

	// Constant fold all undef cases.
	// TODO: Handle zero cases as well.
	if (DemandedElts.isSubsetOf(KnownUndef))
	return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));

	return false;
	}

	/// Determine which of the bits specified in Mask are known to be either zero or
	/// one and return them in the Known.
	void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
	KnownBits &Known,
	const APInt &DemandedElts,
	const SelectionDAG &DAG,
	unsigned Depth) const {
	assert((Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN \|\|
	Op.getOpcode() == ISD::INTRINSIC_VOID) &&
	"Should use MaskedValueIsZero if you don't know whether Op"
	" is a target node!");
	Known.resetAll();
	}

	void TargetLowering::computeKnownBitsForTargetInstr(
	GISelKnownBits &Analysis, Register R, KnownBits &Known,
	const APInt &DemandedElts, const MachineRegisterInfo &MRI,
	unsigned Depth) const {
	Known.resetAll();
	}

	void TargetLowering::computeKnownBitsForFrameIndex(
	const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
	// The low bits are known zero if the pointer is aligned.
	Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
	}

	Align TargetLowering::computeKnownAlignForTargetInstr(
	GISelKnownBits &Analysis, Register R, const MachineRegisterInfo &MRI,
	unsigned Depth) const {
	return Align(1);
	}

	/// This method can be implemented by targets that want to expose additional
	/// information about sign bits to the DAG Combiner.
	unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
	const APInt &,
	const SelectionDAG &,
	unsigned Depth) const {
	assert((Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN \|\|
	Op.getOpcode() == ISD::INTRINSIC_VOID) &&
	"Should use ComputeNumSignBits if you don't know whether Op"
	" is a target node!");
	return 1;
	}

	unsigned TargetLowering::computeNumSignBitsForTargetInstr(
	GISelKnownBits &Analysis, Register R, const APInt &DemandedElts,
	const MachineRegisterInfo &MRI, unsigned Depth) const {
	return 1;
	}

	bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
	SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
	TargetLoweringOpt &TLO, unsigned Depth) const {
	assert((Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN \|\|
	Op.getOpcode() == ISD::INTRINSIC_VOID) &&
	"Should use SimplifyDemandedVectorElts if you don't know whether Op"
	" is a target node!");
	return false;
	}

	bool TargetLowering::SimplifyDemandedBitsForTargetNode(
	SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
	KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
	assert((Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN \|\|
	Op.getOpcode() == ISD::INTRINSIC_VOID) &&
	"Should use SimplifyDemandedBits if you don't know whether Op"
	" is a target node!");
	computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
	return false;
	}

	SDValue TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
	SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
	SelectionDAG &DAG, unsigned Depth) const {
	assert(
	(Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN \|\|
	Op.getOpcode() == ISD::INTRINSIC_VOID) &&
	"Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
	" is a target node!");
	return SDValue();
	}

	SDValue
	TargetLowering::buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0,
	SDValue N1, MutableArrayRef<int> Mask,
	SelectionDAG &DAG) const {
	bool LegalMask = isShuffleMaskLegal(Mask, VT);
	if (!LegalMask) {
	std::swap(N0, N1);
	ShuffleVectorSDNode::commuteMask(Mask);
	LegalMask = isShuffleMaskLegal(Mask, VT);
	}

	if (!LegalMask)
	return SDValue();

	return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
	}

	const Constant TargetLowering::getTargetConstantFromLoad(LoadSDNode) const {
	return nullptr;
	}

	bool TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
	SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
	bool PoisonOnly, unsigned Depth) const {
	assert(
	(Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN \|\|
	Op.getOpcode() == ISD::INTRINSIC_VOID) &&
	"Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
	" is a target node!");
	return false;
	}

	bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
	const SelectionDAG &DAG,
	bool SNaN,
	unsigned Depth) const {
	assert((Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN \|\|
	Op.getOpcode() == ISD::INTRINSIC_VOID) &&
	"Should use isKnownNeverNaN if you don't know whether Op"
	" is a target node!");
	return false;
	}

	// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
	// work with truncating build vectors and vectors with elements of less than
	// 8 bits.
	bool TargetLowering::isConstTrueVal(const SDNode *N) const {
	if (!N)
	return false;

	APInt CVal;
	if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
	CVal = CN->getAPIntValue();
	} else if (auto *BV = dyn_cast<BuildVectorSDNode>(N)) {
	auto *CN = BV->getConstantSplatNode();
	if (!CN)
	return false;

	// If this is a truncating build vector, truncate the splat value.
	// Otherwise, we may fail to match the expected values below.
	unsigned BVEltWidth = BV->getValueType(0).getScalarSizeInBits();
	CVal = CN->getAPIntValue();
	if (BVEltWidth < CVal.getBitWidth())
	CVal = CVal.trunc(BVEltWidth);
	} else {
	return false;
	}

	switch (getBooleanContents(N->getValueType(0))) {
	case UndefinedBooleanContent:
	return CVal[0];
	case ZeroOrOneBooleanContent:
	return CVal.isOneValue();
	case ZeroOrNegativeOneBooleanContent:
	return CVal.isAllOnesValue();
	}

	llvm_unreachable("Invalid boolean contents");
	}

	bool TargetLowering::isConstFalseVal(const SDNode *N) const {
	if (!N)
	return false;

	const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
	if (!CN) {
	const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
	if (!BV)
	return false;

	// Only interested in constant splats, we don't care about undef
	// elements in identifying boolean constants and getConstantSplatNode
	// returns NULL if all ops are undef;
	CN = BV->getConstantSplatNode();
	if (!CN)
	return false;
	}

	if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
	return !CN->getAPIntValue()[0];

	return CN->isNullValue();
	}

	bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
	bool SExt) const {
	if (VT == MVT::i1)
	return N->isOne();

	TargetLowering::BooleanContent Cnt = getBooleanContents(VT);
	switch (Cnt) {
	case TargetLowering::ZeroOrOneBooleanContent:
	// An extended value of 1 is always true, unless its original type is i1,
	// in which case it will be sign extended to -1.
	return (N->isOne() && !SExt) \|\| (SExt && (N->getValueType(0) != MVT::i1));
	case TargetLowering::UndefinedBooleanContent:
	case TargetLowering::ZeroOrNegativeOneBooleanContent:
	return N->isAllOnesValue() && SExt;
	}
	llvm_unreachable("Unexpected enumeration.");
	}

	/// This helper function of SimplifySetCC tries to optimize the comparison when
	/// either operand of the SetCC node is a bitwise-and instruction.
	SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
	ISD::CondCode Cond, const SDLoc &DL,
	DAGCombinerInfo &DCI) const {
	// Match these patterns in any of their permutations:
	// (X & Y) == Y
	// (X & Y) != Y
	if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
	std::swap(N0, N1);

	EVT OpVT = N0.getValueType();
	if (N0.getOpcode() != ISD::AND \|\| !OpVT.isInteger() \|\|
	(Cond != ISD::SETEQ && Cond != ISD::SETNE))
	return SDValue();

	SDValue X, Y;
	if (N0.getOperand(0) == N1) {
	X = N0.getOperand(1);
	Y = N0.getOperand(0);
	} else if (N0.getOperand(1) == N1) {
	X = N0.getOperand(0);
	Y = N0.getOperand(1);
	} else {
	return SDValue();
	}

	SelectionDAG &DAG = DCI.DAG;
	SDValue Zero = DAG.getConstant(0, DL, OpVT);
	if (DAG.isKnownToBeAPowerOfTwo(Y)) {
	// Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
	// Note that where Y is variable and is known to have at most one bit set
	// (for example, if it is Z & 1) we cannot do this; the expressions are not
	// equivalent when Y == 0.
	assert(OpVT.isInteger());
	Cond = ISD::getSetCCInverse(Cond, OpVT);
	if (DCI.isBeforeLegalizeOps() \|\|
	isCondCodeLegal(Cond, N0.getSimpleValueType()))
	return DAG.getSetCC(DL, VT, N0, Zero, Cond);
	} else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
	// If the target supports an 'and-not' or 'and-complement' logic operation,
	// try to use that to make a comparison operation more efficient.
	// But don't do this transform if the mask is a single bit because there are
	// more efficient ways to deal with that case (for example, 'bt' on x86 or
	// 'rlwinm' on PPC).

	// Bail out if the compare operand that we want to turn into a zero is
	// already a zero (otherwise, infinite loop).
	auto *YConst = dyn_cast<ConstantSDNode>(Y);
	if (YConst && YConst->isNullValue())
	return SDValue();

	// Transform this into: ~X & Y == 0.
	SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
	SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
	return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
	}

	return SDValue();
	}

	/// There are multiple IR patterns that could be checking whether certain
	/// truncation of a signed number would be lossy or not. The pattern which is
	/// best at IR level, may not lower optimally. Thus, we want to unfold it.
	/// We are looking for the following pattern: (KeptBits is a constant)
	/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
	/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
	/// KeptBits also can't be 1, that would have been folded to %x dstcond 0
	/// We will unfold it into the natural trunc+sext pattern:
	/// ((%x << C) a>> C) dstcond %x
	/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
	SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
	EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
	const SDLoc &DL) const {
	// We must be comparing with a constant.
	ConstantSDNode *C1;
	if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
	return SDValue();

	// N0 should be: add %x, (1 << (KeptBits-1))
	if (N0->getOpcode() != ISD::ADD)
	return SDValue();

	// And we must be 'add'ing a constant.
	ConstantSDNode *C01;
	if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
	return SDValue();

	SDValue X = N0->getOperand(0);
	EVT XVT = X.getValueType();

	// Validate constants ...

	APInt I1 = C1->getAPIntValue();

	ISD::CondCode NewCond;
	if (Cond == ISD::CondCode::SETULT) {
	NewCond = ISD::CondCode::SETEQ;
	} else if (Cond == ISD::CondCode::SETULE) {
	NewCond = ISD::CondCode::SETEQ;
	// But need to 'canonicalize' the constant.
	I1 += 1;
	} else if (Cond == ISD::CondCode::SETUGT) {
	NewCond = ISD::CondCode::SETNE;
	// But need to 'canonicalize' the constant.
	I1 += 1;
	} else if (Cond == ISD::CondCode::SETUGE) {
	NewCond = ISD::CondCode::SETNE;
	} else
	return SDValue();

	APInt I01 = C01->getAPIntValue();

	auto checkConstants = [&I1, &I01]() -> bool {
	// Both of them must be power-of-two, and the constant from setcc is bigger.
	return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
	};

	if (checkConstants()) {
	// Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
	} else {
	// What if we invert constants? (and the target predicate)
	I1.negate();
	I01.negate();
	assert(XVT.isInteger());
	NewCond = getSetCCInverse(NewCond, XVT);
	if (!checkConstants())
	return SDValue();
	// Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
	}

	// They are power-of-two, so which bit is set?
	const unsigned KeptBits = I1.logBase2();
	const unsigned KeptBitsMinusOne = I01.logBase2();

	// Magic!
	if (KeptBits != (KeptBitsMinusOne + 1))
	return SDValue();
	assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");

	// We don't want to do this in every single case.
	SelectionDAG &DAG = DCI.DAG;
	if (!DAG.getTargetLoweringInfo().shouldTransformSignedTruncationCheck(
	XVT, KeptBits))
	return SDValue();

	const unsigned MaskedBits = XVT.getSizeInBits() - KeptBits;
	assert(MaskedBits > 0 && MaskedBits < XVT.getSizeInBits() && "unreachable");

	// Unfold into: ((%x << C) a>> C) cond %x
	// Where 'cond' will be either 'eq' or 'ne'.
	SDValue ShiftAmt = DAG.getConstant(MaskedBits, DL, XVT);
	SDValue T0 = DAG.getNode(ISD::SHL, DL, XVT, X, ShiftAmt);
	SDValue T1 = DAG.getNode(ISD::SRA, DL, XVT, T0, ShiftAmt);
	SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, X, NewCond);

	return T2;
	}

	// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
	SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
	EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
	DAGCombinerInfo &DCI, const SDLoc &DL) const {
	assert(isConstOrConstSplat(N1C) &&
	isConstOrConstSplat(N1C)->getAPIntValue().isNullValue() &&
	"Should be a comparison with 0.");
	assert((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
	"Valid only for [in]equality comparisons.");

	unsigned NewShiftOpcode;
	SDValue X, C, Y;

	SelectionDAG &DAG = DCI.DAG;
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();

	// Look for '(C l>>/<< Y)'.
	auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI, &DAG](SDValue V) {
	// The shift should be one-use.
	if (!V.hasOneUse())
	return false;
	unsigned OldShiftOpcode = V.getOpcode();
	switch (OldShiftOpcode) {
	case ISD::SHL:
	NewShiftOpcode = ISD::SRL;
	break;
	case ISD::SRL:
	NewShiftOpcode = ISD::SHL;
	break;
	default:
	return false; // must be a logical shift.
	}
	// We should be shifting a constant.
	// FIXME: best to use isConstantOrConstantVector().
	C = V.getOperand(0);
	ConstantSDNode *CC =
	isConstOrConstSplat(C, /AllowUndefs=/true, /AllowTruncation=/true);
	if (!CC)
	return false;
	Y = V.getOperand(1);

	ConstantSDNode *XC =
	isConstOrConstSplat(X, /AllowUndefs=/true, /AllowTruncation=/true);
	return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
	X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
	};

	// LHS of comparison should be an one-use 'and'.
	if (N0.getOpcode() != ISD::AND \|\| !N0.hasOneUse())
	return SDValue();

	X = N0.getOperand(0);
	SDValue Mask = N0.getOperand(1);

	// 'and' is commutative!
	if (!Match(Mask)) {
	std::swap(X, Mask);
	if (!Match(Mask))
	return SDValue();
	}

	EVT VT = X.getValueType();

	// Produce:
	// ((X 'OppositeShiftOpcode' Y) & C) Cond 0
	SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
	SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
	SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
	return T2;
	}

	/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
	/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
	/// handle the commuted versions of these patterns.
	SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
	ISD::CondCode Cond, const SDLoc &DL,
	DAGCombinerInfo &DCI) const {
	unsigned BOpcode = N0.getOpcode();
	assert((BOpcode == ISD::ADD \|\| BOpcode == ISD::SUB \|\| BOpcode == ISD::XOR) &&
	"Unexpected binop");
	assert((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) && "Unexpected condcode");

	// (X + Y) == X --> Y == 0
	// (X - Y) == X --> Y == 0
	// (X ^ Y) == X --> Y == 0
	SelectionDAG &DAG = DCI.DAG;
	EVT OpVT = N0.getValueType();
	SDValue X = N0.getOperand(0);
	SDValue Y = N0.getOperand(1);
	if (X == N1)
	return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);

	if (Y != N1)
	return SDValue();

	// (X + Y) == Y --> X == 0
	// (X ^ Y) == Y --> X == 0
	if (BOpcode == ISD::ADD \|\| BOpcode == ISD::XOR)
	return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);

	// The shift would not be valid if the operands are boolean (i1).
	if (!N0.hasOneUse() \|\| OpVT.getScalarSizeInBits() == 1)
	return SDValue();

	// (X - Y) == Y --> X == Y << 1
	EVT ShiftVT = getShiftAmountTy(OpVT, DAG.getDataLayout(),
	!DCI.isBeforeLegalize());
	SDValue One = DAG.getConstant(1, DL, ShiftVT);
	SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
	if (!DCI.isCalledByLegalizer())
	DCI.AddToWorklist(YShl1.getNode());
	return DAG.getSetCC(DL, VT, X, YShl1, Cond);
	}

	static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT,
	SDValue N0, const APInt &C1,
	ISD::CondCode Cond, const SDLoc &dl,
	SelectionDAG &DAG) {
	// Look through truncs that don't change the value of a ctpop.
	// FIXME: Add vector support? Need to be careful with setcc result type below.
	SDValue CTPOP = N0;
	if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
	N0.getScalarValueSizeInBits() > Log2_32(N0.getOperand(0).getScalarValueSizeInBits()))
	CTPOP = N0.getOperand(0);

	if (CTPOP.getOpcode() != ISD::CTPOP \|\| !CTPOP.hasOneUse())
	return SDValue();

	EVT CTVT = CTPOP.getValueType();
	SDValue CTOp = CTPOP.getOperand(0);

	// If this is a vector CTPOP, keep the CTPOP if it is legal.
	// TODO: Should we check if CTPOP is legal(or custom) for scalars?
	if (VT.isVector() && TLI.isOperationLegal(ISD::CTPOP, CTVT))
	return SDValue();

	// (ctpop x) u< 2 -> (x & x-1) == 0
	// (ctpop x) u> 1 -> (x & x-1) != 0
	if (Cond == ISD::SETULT \|\| Cond == ISD::SETUGT) {
	unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
	if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
	return SDValue();
	if (C1 == 0 && (Cond == ISD::SETULT))
	return SDValue(); // This is handled elsewhere.

	unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);

	SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
	SDValue Result = CTOp;
	for (unsigned i = 0; i < Passes; i++) {
	SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
	Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
	}
	ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
	return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
	}

	// If ctpop is not supported, expand a power-of-2 comparison based on it.
	if ((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) && C1 == 1) {
	// For scalars, keep CTPOP if it is legal or custom.
	if (!VT.isVector() && TLI.isOperationLegalOrCustom(ISD::CTPOP, CTVT))
	return SDValue();
	// This is based on X86's custom lowering for CTPOP which produces more
	// instructions than the expansion here.

	// (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0)
	// (ctpop x) != 1 --> (x == 0) \|\| ((x & x-1) != 0)
	SDValue Zero = DAG.getConstant(0, dl, CTVT);
	SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
	assert(CTVT.isInteger());
	ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, CTVT);
	SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
	SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
	SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond);
	SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
	unsigned LogicOpcode = Cond == ISD::SETEQ ? ISD::AND : ISD::OR;
	return DAG.getNode(LogicOpcode, dl, VT, LHS, RHS);
	}

	return SDValue();
	}

	/// Try to simplify a setcc built with the specified operands and cc. If it is
	/// unable to simplify it, return a null SDValue.
	SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
	ISD::CondCode Cond, bool foldBooleans,
	DAGCombinerInfo &DCI,
	const SDLoc &dl) const {
	SelectionDAG &DAG = DCI.DAG;
	const DataLayout &Layout = DAG.getDataLayout();
	EVT OpVT = N0.getValueType();

	// Constant fold or commute setcc.
	if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
	return Fold;

	// Ensure that the constant occurs on the RHS and fold constant comparisons.
	// TODO: Handle non-splat vector constants. All undef causes trouble.
	// FIXME: We can't yet fold constant scalable vector splats, so avoid an
	// infinite loop here when we encounter one.
	ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond);
	if (isConstOrConstSplat(N0) &&
	(!OpVT.isScalableVector() \|\| !isConstOrConstSplat(N1)) &&
	(DCI.isBeforeLegalizeOps() \|\|
	isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
	return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);

	// If we have a subtract with the same 2 non-constant operands as this setcc
	// -- but in reverse order -- then try to commute the operands of this setcc
	// to match. A matching pair of setcc (cmp) and sub may be combined into 1
	// instruction on some targets.
	if (!isConstOrConstSplat(N0) && !isConstOrConstSplat(N1) &&
	(DCI.isBeforeLegalizeOps() \|\|
	isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
	DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
	!DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
	return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);

	if (auto *N1C = isConstOrConstSplat(N1)) {
	const APInt &C1 = N1C->getAPIntValue();

	// Optimize some CTPOP cases.
	if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
	return V;

	// If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
	// equality comparison, then we're just comparing whether X itself is
	// zero.
	if (N0.getOpcode() == ISD::SRL && (C1.isNullValue() \|\| C1.isOneValue()) &&
	N0.getOperand(0).getOpcode() == ISD::CTLZ &&
	isPowerOf2_32(N0.getScalarValueSizeInBits())) {
	if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
	if ((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
	ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
	if ((C1 == 0) == (Cond == ISD::SETEQ)) {
	// (srl (ctlz x), 5) == 0 -> X != 0
	// (srl (ctlz x), 5) != 1 -> X != 0
	Cond = ISD::SETNE;
	} else {
	// (srl (ctlz x), 5) != 0 -> X == 0
	// (srl (ctlz x), 5) == 1 -> X == 0
	Cond = ISD::SETEQ;
	}
	SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
	return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
	Cond);
	}
	}
	}
	}

	// FIXME: Support vectors.
	if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
	const APInt &C1 = N1C->getAPIntValue();

	// (zext x) == C --> x == (trunc C)
	// (sext x) == C --> x == (trunc C)
	if ((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
	DCI.isBeforeLegalize() && N0->hasOneUse()) {
	unsigned MinBits = N0.getValueSizeInBits();
	SDValue PreExt;
	bool Signed = false;
	if (N0->getOpcode() == ISD::ZERO_EXTEND) {
	// ZExt
	MinBits = N0->getOperand(0).getValueSizeInBits();
	PreExt = N0->getOperand(0);
	} else if (N0->getOpcode() == ISD::AND) {
	// DAGCombine turns costly ZExts into ANDs
	if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
	if ((C->getAPIntValue()+1).isPowerOf2()) {
	MinBits = C->getAPIntValue().countTrailingOnes();
	PreExt = N0->getOperand(0);
	}
	} else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
	// SExt
	MinBits = N0->getOperand(0).getValueSizeInBits();
	PreExt = N0->getOperand(0);
	Signed = true;
	} else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
	// ZEXTLOAD / SEXTLOAD
	if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
	MinBits = LN0->getMemoryVT().getSizeInBits();
	PreExt = N0;
	} else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
	Signed = true;
	MinBits = LN0->getMemoryVT().getSizeInBits();
	PreExt = N0;
	}
	}

	// Figure out how many bits we need to preserve this constant.
	unsigned ReqdBits = Signed ?
	C1.getBitWidth() - C1.getNumSignBits() + 1 :
	C1.getActiveBits();

	// Make sure we're not losing bits from the constant.
	if (MinBits > 0 &&
	MinBits < C1.getBitWidth() &&
	MinBits >= ReqdBits) {
	EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
	if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
	// Will get folded away.
	SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
	if (MinBits == 1 && C1 == 1)
	// Invert the condition.
	return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
	Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
	SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
	return DAG.getSetCC(dl, VT, Trunc, C, Cond);
	}

	// If truncating the setcc operands is not desirable, we can still
	// simplify the expression in some cases:
	// setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
	// setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
	// setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
	// setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
	// setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
	// setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
	SDValue TopSetCC = N0->getOperand(0);
	unsigned N0Opc = N0->getOpcode();
	bool SExt = (N0Opc == ISD::SIGN_EXTEND);
	if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
	TopSetCC.getOpcode() == ISD::SETCC &&
	(N0Opc == ISD::ZERO_EXTEND \|\| N0Opc == ISD::SIGN_EXTEND) &&
	(isConstFalseVal(N1C) \|\|
	isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {

	bool Inverse = (N1C->isNullValue() && Cond == ISD::SETEQ) \|\|
	(!N1C->isNullValue() && Cond == ISD::SETNE);

	if (!Inverse)
	return TopSetCC;

	ISD::CondCode InvCond = ISD::getSetCCInverse(
	cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
	TopSetCC.getOperand(0).getValueType());
	return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
	TopSetCC.getOperand(1),
	InvCond);
	}
	}
	}

	// If the LHS is '(and load, const)', the RHS is 0, the test is for
	// equality or unsigned, and all 1 bits of the const are in the same
	// partial word, see if we can shorten the load.
	if (DCI.isBeforeLegalize() &&
	!ISD::isSignedIntSetCC(Cond) &&
	N0.getOpcode() == ISD::AND && C1 == 0 &&
	N0.getNode()->hasOneUse() &&
	isa<LoadSDNode>(N0.getOperand(0)) &&
	N0.getOperand(0).getNode()->hasOneUse() &&
	isa<ConstantSDNode>(N0.getOperand(1))) {
	LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
	APInt bestMask;
	unsigned bestWidth = 0, bestOffset = 0;
	if (Lod->isSimple() && Lod->isUnindexed()) {
	unsigned origWidth = N0.getValueSizeInBits();
	unsigned maskWidth = origWidth;
	// We can narrow (e.g.) 16-bit extending loads on 32-bit target to
	// 8 bits, but have to be careful...
	if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
	origWidth = Lod->getMemoryVT().getSizeInBits();
	const APInt &Mask = N0.getConstantOperandAPInt(1);
	for (unsigned width = origWidth / 2; width>=8; width /= 2) {
	APInt newMask = APInt::getLowBitsSet(maskWidth, width);
	for (unsigned offset=0; offset<origWidth/width; offset++) {
	if (Mask.isSubsetOf(newMask)) {
	if (Layout.isLittleEndian())
	bestOffset = (uint64_t)offset * (width/8);
	else
	bestOffset = (origWidth/width - offset - 1) * (width/8);
	bestMask = Mask.lshr(offset * (width/8) * 8);
	bestWidth = width;
	break;
	}
	newMask <<= width;
	}
	}
	}
	if (bestWidth) {
	EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
	if (newVT.isRound() &&
	shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT)) {
	SDValue Ptr = Lod->getBasePtr();
	if (bestOffset != 0)
	Ptr =
	DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(bestOffset), dl);
	SDValue NewLoad =
	DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
	Lod->getPointerInfo().getWithOffset(bestOffset),
	Lod->getOriginalAlign());
	return DAG.getSetCC(dl, VT,
	DAG.getNode(ISD::AND, dl, newVT, NewLoad,
	DAG.getConstant(bestMask.trunc(bestWidth),
	dl, newVT)),
	DAG.getConstant(0LL, dl, newVT), Cond);
	}
	}
	}

	// If the LHS is a ZERO_EXTEND, perform the comparison on the input.
	if (N0.getOpcode() == ISD::ZERO_EXTEND) {
	unsigned InSize = N0.getOperand(0).getValueSizeInBits();

	// If the comparison constant has bits in the upper part, the
	// zero-extended value could never match.
	if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),
	C1.getBitWidth() - InSize))) {
	switch (Cond) {
	case ISD::SETUGT:
	case ISD::SETUGE:
	case ISD::SETEQ:
	return DAG.getConstant(0, dl, VT);
	case ISD::SETULT:
	case ISD::SETULE:
	case ISD::SETNE:
	return DAG.getConstant(1, dl, VT);
	case ISD::SETGT:
	case ISD::SETGE:
	// True if the sign bit of C1 is set.
	return DAG.getConstant(C1.isNegative(), dl, VT);
	case ISD::SETLT:
	case ISD::SETLE:
	// True if the sign bit of C1 isn't set.
	return DAG.getConstant(C1.isNonNegative(), dl, VT);
	default:
	break;
	}
	}

	// Otherwise, we can perform the comparison with the low bits.
	switch (Cond) {
	case ISD::SETEQ:
	case ISD::SETNE:
	case ISD::SETUGT:
	case ISD::SETUGE:
	case ISD::SETULT:
	case ISD::SETULE: {
	EVT newVT = N0.getOperand(0).getValueType();
	if (DCI.isBeforeLegalizeOps() \|\|
	(isOperationLegal(ISD::SETCC, newVT) &&
	isCondCodeLegal(Cond, newVT.getSimpleVT()))) {
	EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
	SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);

	SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
	NewConst, Cond);
	return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
	}
	break;
	}
	default:
	break; // todo, be more careful with signed comparisons
	}
	} else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
	(Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
	!isSExtCheaperThanZExt(cast<VTSDNode>(N0.getOperand(1))->getVT(),
	OpVT)) {
	EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
	unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
	EVT ExtDstTy = N0.getValueType();
	unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();

	// If the constant doesn't fit into the number of bits for the source of
	// the sign extension, it is impossible for both sides to be equal.
	if (C1.getMinSignedBits() > ExtSrcTyBits)
	return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);

	assert(ExtDstTy == N0.getOperand(0).getValueType() &&
	ExtDstTy != ExtSrcTy && "Unexpected types!");
	APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
	SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0),
	DAG.getConstant(Imm, dl, ExtDstTy));
	if (!DCI.isCalledByLegalizer())
	DCI.AddToWorklist(ZextOp.getNode());
	// Otherwise, make this a use of a zext.
	return DAG.getSetCC(dl, VT, ZextOp,
	DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
	} else if ((N1C->isNullValue() \|\| N1C->isOne()) &&
	(Cond == ISD::SETEQ \|\| Cond == ISD::SETNE)) {
	// SETCC (SETCC), [0\|1], [EQ\|NE] -> SETCC
	if (N0.getOpcode() == ISD::SETCC &&
	isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
	(N0.getValueType() == MVT::i1 \|\|
	getBooleanContents(N0.getOperand(0).getValueType()) ==
	ZeroOrOneBooleanContent)) {
	bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
	if (TrueWhenTrue)
	return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
	// Invert the condition.
	ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
	CC = ISD::getSetCCInverse(CC, N0.getOperand(0).getValueType());
	if (DCI.isBeforeLegalizeOps() \|\|
	isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType()))
	return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
	}

	if ((N0.getOpcode() == ISD::XOR \|\|
	(N0.getOpcode() == ISD::AND &&
	N0.getOperand(0).getOpcode() == ISD::XOR &&
	N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
	isOneConstant(N0.getOperand(1))) {
	// If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
	// can only do this if the top bits are known zero.
	unsigned BitWidth = N0.getValueSizeInBits();
	if (DAG.MaskedValueIsZero(N0,
	APInt::getHighBitsSet(BitWidth,
	BitWidth-1))) {
	// Okay, get the un-inverted input value.
	SDValue Val;
	if (N0.getOpcode() == ISD::XOR) {
	Val = N0.getOperand(0);
	} else {
	assert(N0.getOpcode() == ISD::AND &&
	N0.getOperand(0).getOpcode() == ISD::XOR);
	// ((X^1)&1)^1 -> X & 1
	Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
	N0.getOperand(0).getOperand(0),
	N0.getOperand(1));
	}

	return DAG.getSetCC(dl, VT, Val, N1,
	Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
	}
	} else if (N1C->isOne()) {
	SDValue Op0 = N0;
	if (Op0.getOpcode() == ISD::TRUNCATE)
	Op0 = Op0.getOperand(0);

	if ((Op0.getOpcode() == ISD::XOR) &&
	Op0.getOperand(0).getOpcode() == ISD::SETCC &&
	Op0.getOperand(1).getOpcode() == ISD::SETCC) {
	SDValue XorLHS = Op0.getOperand(0);
	SDValue XorRHS = Op0.getOperand(1);
	// Ensure that the input setccs return an i1 type or 0/1 value.
	if (Op0.getValueType() == MVT::i1 \|\|
	(getBooleanContents(XorLHS.getOperand(0).getValueType()) ==
	ZeroOrOneBooleanContent &&
	getBooleanContents(XorRHS.getOperand(0).getValueType()) ==
	ZeroOrOneBooleanContent)) {
	// (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
	Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
	return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
	}
	}
	if (Op0.getOpcode() == ISD::AND && isOneConstant(Op0.getOperand(1))) {
	// If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
	if (Op0.getValueType().bitsGT(VT))
	Op0 = DAG.getNode(ISD::AND, dl, VT,
	DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
	DAG.getConstant(1, dl, VT));
	else if (Op0.getValueType().bitsLT(VT))
	Op0 = DAG.getNode(ISD::AND, dl, VT,
	DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
	DAG.getConstant(1, dl, VT));

	return DAG.getSetCC(dl, VT, Op0,
	DAG.getConstant(0, dl, Op0.getValueType()),
	Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
	}
	if (Op0.getOpcode() == ISD::AssertZext &&
	cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
	return DAG.getSetCC(dl, VT, Op0,
	DAG.getConstant(0, dl, Op0.getValueType()),
	Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
	}
	}

	// Given:
	// icmp eq/ne (urem %x, %y), 0
	// Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
	// icmp eq/ne %x, 0
	if (N0.getOpcode() == ISD::UREM && N1C->isNullValue() &&
	(Cond == ISD::SETEQ \|\| Cond == ISD::SETNE)) {
	KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
	KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
	if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
	return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
	}

	if (SDValue V =
	optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
	return V;
	}

	// These simplifications apply to splat vectors as well.
	// TODO: Handle more splat vector cases.
	if (auto *N1C = isConstOrConstSplat(N1)) {
	const APInt &C1 = N1C->getAPIntValue();

	APInt MinVal, MaxVal;
	unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
	if (ISD::isSignedIntSetCC(Cond)) {
	MinVal = APInt::getSignedMinValue(OperandBitSize);
	MaxVal = APInt::getSignedMaxValue(OperandBitSize);
	} else {
	MinVal = APInt::getMinValue(OperandBitSize);
	MaxVal = APInt::getMaxValue(OperandBitSize);
	}

	// Canonicalize GE/LE comparisons to use GT/LT comparisons.
	if (Cond == ISD::SETGE \|\| Cond == ISD::SETUGE) {
	// X >= MIN --> true
	if (C1 == MinVal)
	return DAG.getBoolConstant(true, dl, VT, OpVT);

	if (!VT.isVector()) { // TODO: Support this for vectors.
	// X >= C0 --> X > (C0 - 1)
	APInt C = C1 - 1;
	ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
	if ((DCI.isBeforeLegalizeOps() \|\|
	isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
	(!N1C->isOpaque() \|\| (C.getBitWidth() <= 64 &&
	isLegalICmpImmediate(C.getSExtValue())))) {
	return DAG.getSetCC(dl, VT, N0,
	DAG.getConstant(C, dl, N1.getValueType()),
	NewCC);
	}
	}
	}

	if (Cond == ISD::SETLE \|\| Cond == ISD::SETULE) {
	// X <= MAX --> true
	if (C1 == MaxVal)
	return DAG.getBoolConstant(true, dl, VT, OpVT);

	// X <= C0 --> X < (C0 + 1)
	if (!VT.isVector()) { // TODO: Support this for vectors.
	APInt C = C1 + 1;
	ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
	if ((DCI.isBeforeLegalizeOps() \|\|
	isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
	(!N1C->isOpaque() \|\| (C.getBitWidth() <= 64 &&
	isLegalICmpImmediate(C.getSExtValue())))) {
	return DAG.getSetCC(dl, VT, N0,
	DAG.getConstant(C, dl, N1.getValueType()),
	NewCC);
	}
	}
	}

	if (Cond == ISD::SETLT \|\| Cond == ISD::SETULT) {
	if (C1 == MinVal)
	return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false

	// TODO: Support this for vectors after legalize ops.
	if (!VT.isVector() \|\| DCI.isBeforeLegalizeOps()) {
	// Canonicalize setlt X, Max --> setne X, Max
	if (C1 == MaxVal)
	return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);

	// If we have setult X, 1, turn it into seteq X, 0
	if (C1 == MinVal+1)
	return DAG.getSetCC(dl, VT, N0,
	DAG.getConstant(MinVal, dl, N0.getValueType()),
	ISD::SETEQ);
	}
	}

	if (Cond == ISD::SETGT \|\| Cond == ISD::SETUGT) {
	if (C1 == MaxVal)
	return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false

	// TODO: Support this for vectors after legalize ops.
	if (!VT.isVector() \|\| DCI.isBeforeLegalizeOps()) {
	// Canonicalize setgt X, Min --> setne X, Min
	if (C1 == MinVal)
	return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);

	// If we have setugt X, Max-1, turn it into seteq X, Max
	if (C1 == MaxVal-1)
	return DAG.getSetCC(dl, VT, N0,
	DAG.getConstant(MaxVal, dl, N0.getValueType()),
	ISD::SETEQ);
	}
	}

	if (Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) {
	// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
	if (C1.isNullValue())
	if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
	VT, N0, N1, Cond, DCI, dl))
	return CC;

	// For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
	// For example, when high 32-bits of i64 X are known clear:
	// all bits clear: (X \| (Y<<32)) == 0 --> (X \| Y) == 0
	// all bits set: (X \| (Y<<32)) == -1 --> (X & Y) == -1
	bool CmpZero = N1C->getAPIntValue().isNullValue();
	bool CmpNegOne = N1C->getAPIntValue().isAllOnesValue();
	if ((CmpZero \|\| CmpNegOne) && N0.hasOneUse()) {
	// Match or(lo,shl(hi,bw/2)) pattern.
	auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
	unsigned EltBits = V.getScalarValueSizeInBits();
	if (V.getOpcode() != ISD::OR \|\| (EltBits % 2) != 0)
	return false;
	SDValue LHS = V.getOperand(0);
	SDValue RHS = V.getOperand(1);
	APInt HiBits = APInt::getHighBitsSet(EltBits, EltBits / 2);
	// Unshifted element must have zero upperbits.
	if (RHS.getOpcode() == ISD::SHL &&
	isa<ConstantSDNode>(RHS.getOperand(1)) &&
	RHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
	DAG.MaskedValueIsZero(LHS, HiBits)) {
	Lo = LHS;
	Hi = RHS.getOperand(0);
	return true;
	}
	if (LHS.getOpcode() == ISD::SHL &&
	isa<ConstantSDNode>(LHS.getOperand(1)) &&
	LHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
	DAG.MaskedValueIsZero(RHS, HiBits)) {
	Lo = RHS;
	Hi = LHS.getOperand(0);
	return true;
	}
	return false;
	};

	auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
	unsigned EltBits = N0.getScalarValueSizeInBits();
	unsigned HalfBits = EltBits / 2;
	APInt HiBits = APInt::getHighBitsSet(EltBits, HalfBits);
	SDValue LoBits = DAG.getConstant(~HiBits, dl, OpVT);
	SDValue HiMask = DAG.getNode(ISD::AND, dl, OpVT, Hi, LoBits);
	SDValue NewN0 =
	DAG.getNode(CmpZero ? ISD::OR : ISD::AND, dl, OpVT, Lo, HiMask);
	SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LoBits;
	return DAG.getSetCC(dl, VT, NewN0, NewN1, Cond);
	};

	SDValue Lo, Hi;
	if (IsConcat(N0, Lo, Hi))
	return MergeConcat(Lo, Hi);

	if (N0.getOpcode() == ISD::AND \|\| N0.getOpcode() == ISD::OR) {
	SDValue Lo0, Lo1, Hi0, Hi1;
	if (IsConcat(N0.getOperand(0), Lo0, Hi0) &&
	IsConcat(N0.getOperand(1), Lo1, Hi1)) {
	return MergeConcat(DAG.getNode(N0.getOpcode(), dl, OpVT, Lo0, Lo1),
	DAG.getNode(N0.getOpcode(), dl, OpVT, Hi0, Hi1));
	}
	}
	}
	}

	// If we have "setcc X, C0", check to see if we can shrink the immediate
	// by changing cc.
	// TODO: Support this for vectors after legalize ops.
	if (!VT.isVector() \|\| DCI.isBeforeLegalizeOps()) {
	// SETUGT X, SINTMAX -> SETLT X, 0
	// SETUGE X, SINTMIN -> SETLT X, 0
	if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) \|\|
	(Cond == ISD::SETUGE && C1.isMinSignedValue()))
	return DAG.getSetCC(dl, VT, N0,
	DAG.getConstant(0, dl, N1.getValueType()),
	ISD::SETLT);

	// SETULT X, SINTMIN -> SETGT X, -1
	// SETULE X, SINTMAX -> SETGT X, -1
	if ((Cond == ISD::SETULT && C1.isMinSignedValue()) \|\|
	(Cond == ISD::SETULE && C1.isMaxSignedValue()))
	return DAG.getSetCC(dl, VT, N0,
	DAG.getAllOnesConstant(dl, N1.getValueType()),
	ISD::SETGT);
	}
	}

	// Back to non-vector simplifications.
	// TODO: Can we do these for vector splats?
	if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	const APInt &C1 = N1C->getAPIntValue();
	EVT ShValTy = N0.getValueType();

	// Fold bit comparisons when we can. This will result in an
	// incorrect value when boolean false is negative one, unless
	// the bitsize is 1 in which case the false value is the same
	// in practice regardless of the representation.
	if ((VT.getSizeInBits() == 1 \|\|
	getBooleanContents(N0.getValueType()) == ZeroOrOneBooleanContent) &&
	(Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
	(VT == ShValTy \|\| (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
	N0.getOpcode() == ISD::AND) {
	if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
	EVT ShiftTy =
	getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize());
	if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
	// Perform the xform if the AND RHS is a single bit.
	unsigned ShCt = AndRHS->getAPIntValue().logBase2();
	if (AndRHS->getAPIntValue().isPowerOf2() &&
	!TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
	return DAG.getNode(ISD::TRUNCATE, dl, VT,
	DAG.getNode(ISD::SRL, dl, ShValTy, N0,
	DAG.getConstant(ShCt, dl, ShiftTy)));
	}
	} else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
	// (X & 8) == 8 --> (X & 8) >> 3
	// Perform the xform if C1 is a single bit.
	unsigned ShCt = C1.logBase2();
	if (C1.isPowerOf2() &&
	!TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
	return DAG.getNode(ISD::TRUNCATE, dl, VT,
	DAG.getNode(ISD::SRL, dl, ShValTy, N0,
	DAG.getConstant(ShCt, dl, ShiftTy)));
	}
	}
	}
	}

	if (C1.getMinSignedBits() <= 64 &&
	!isLegalICmpImmediate(C1.getSExtValue())) {
	EVT ShiftTy = getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize());
	// (X & -256) == 256 -> (X >> 8) == 1
	if ((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
	N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
	if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
	const APInt &AndRHSC = AndRHS->getAPIntValue();
	if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) {
	unsigned ShiftBits = AndRHSC.countTrailingZeros();
	if (!TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
	SDValue Shift =
	DAG.getNode(ISD::SRL, dl, ShValTy, N0.getOperand(0),
	DAG.getConstant(ShiftBits, dl, ShiftTy));
	SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, ShValTy);
	return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
	}
	}
	}
	} else if (Cond == ISD::SETULT \|\| Cond == ISD::SETUGE \|\|
	Cond == ISD::SETULE \|\| Cond == ISD::SETUGT) {
	bool AdjOne = (Cond == ISD::SETULE \|\| Cond == ISD::SETUGT);
	// X < 0x100000000 -> (X >> 32) < 1
	// X >= 0x100000000 -> (X >> 32) >= 1
	// X <= 0x0ffffffff -> (X >> 32) < 1
	// X > 0x0ffffffff -> (X >> 32) >= 1
	unsigned ShiftBits;
	APInt NewC = C1;
	ISD::CondCode NewCond = Cond;
	if (AdjOne) {
	ShiftBits = C1.countTrailingOnes();
	NewC = NewC + 1;
	NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
	} else {
	ShiftBits = C1.countTrailingZeros();
	}
	NewC.lshrInPlace(ShiftBits);
	if (ShiftBits && NewC.getMinSignedBits() <= 64 &&
	isLegalICmpImmediate(NewC.getSExtValue()) &&
	!TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
	SDValue Shift = DAG.getNode(ISD::SRL, dl, ShValTy, N0,
	DAG.getConstant(ShiftBits, dl, ShiftTy));
	SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
	return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
	}
	}
	}
	}

	if (!isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1)) {
	auto *CFP = cast<ConstantFPSDNode>(N1);
	assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");

	// Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
	// constant if knowing that the operand is non-nan is enough. We prefer to
	// have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
	// materialize 0.0.
	if (Cond == ISD::SETO \|\| Cond == ISD::SETUO)
	return DAG.getSetCC(dl, VT, N0, N0, Cond);

	// setcc (fneg x), C -> setcc swap(pred) x, -C
	if (N0.getOpcode() == ISD::FNEG) {
	ISD::CondCode SwapCond = ISD::getSetCCSwappedOperands(Cond);
	if (DCI.isBeforeLegalizeOps() \|\|
	isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
	SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
	return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
	}
	}

	// If the condition is not legal, see if we can find an equivalent one
	// which is legal.
	if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) {
	// If the comparison was an awkward floating-point == or != and one of
	// the comparison operands is infinity or negative infinity, convert the
	// condition to a less-awkward <= or >=.
	if (CFP->getValueAPF().isInfinity()) {
	bool IsNegInf = CFP->getValueAPF().isNegative();
	ISD::CondCode NewCond = ISD::SETCC_INVALID;
	switch (Cond) {
	case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
	case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
	case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
	case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
	default: break;
	}
	if (NewCond != ISD::SETCC_INVALID &&
	isCondCodeLegal(NewCond, N0.getSimpleValueType()))
	return DAG.getSetCC(dl, VT, N0, N1, NewCond);
	}
	}
	}

	if (N0 == N1) {
	// The sext(setcc()) => setcc() optimization relies on the appropriate
	// constant being emitted.
	assert(!N0.getValueType().isInteger() &&
	"Integer types should be handled by FoldSetCC");

	bool EqTrue = ISD::isTrueWhenEqual(Cond);
	unsigned UOF = ISD::getUnorderedFlavor(Cond);
	if (UOF == 2) // FP operators that are undefined on NaNs.
	return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
	if (UOF == unsigned(EqTrue))
	return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
	// Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
	// if it is not already.
	ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
	if (NewCond != Cond &&
	(DCI.isBeforeLegalizeOps() \|\|
	isCondCodeLegal(NewCond, N0.getSimpleValueType())))
	return DAG.getSetCC(dl, VT, N0, N1, NewCond);
	}

	if ((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
	N0.getValueType().isInteger()) {
	if (N0.getOpcode() == ISD::ADD \|\| N0.getOpcode() == ISD::SUB \|\|
	N0.getOpcode() == ISD::XOR) {
	// Simplify (X+Y) == (X+Z) --> Y == Z
	if (N0.getOpcode() == N1.getOpcode()) {
	if (N0.getOperand(0) == N1.getOperand(0))
	return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
	if (N0.getOperand(1) == N1.getOperand(1))
	return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
	if (isCommutativeBinOp(N0.getOpcode())) {
	// If X op Y == Y op X, try other combinations.
	if (N0.getOperand(0) == N1.getOperand(1))
	return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
	Cond);
	if (N0.getOperand(1) == N1.getOperand(0))
	return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
	Cond);
	}
	}

	// If RHS is a legal immediate value for a compare instruction, we need
	// to be careful about increasing register pressure needlessly.
	bool LegalRHSImm = false;

	if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
	if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
	// Turn (X+C1) == C2 --> X == C2-C1
	if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) {
	return DAG.getSetCC(dl, VT, N0.getOperand(0),
	DAG.getConstant(RHSC->getAPIntValue()-
	LHSR->getAPIntValue(),
	dl, N0.getValueType()), Cond);
	}

	// Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0.
	if (N0.getOpcode() == ISD::XOR)
	// If we know that all of the inverted bits are zero, don't bother
	// performing the inversion.
	if (DAG.MaskedValueIsZero(N0.getOperand(0), ~LHSR->getAPIntValue()))
	return
	DAG.getSetCC(dl, VT, N0.getOperand(0),
	DAG.getConstant(LHSR->getAPIntValue() ^
	RHSC->getAPIntValue(),
	dl, N0.getValueType()),
	Cond);
	}

	// Turn (C1-X) == C2 --> X == C1-C2
	if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
	if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) {
	return
	DAG.getSetCC(dl, VT, N0.getOperand(1),
	DAG.getConstant(SUBC->getAPIntValue() -
	RHSC->getAPIntValue(),
	dl, N0.getValueType()),
	Cond);
	}
	}

	// Could RHSC fold directly into a compare?
	if (RHSC->getValueType(0).getSizeInBits() <= 64)
	LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
	}

	// (X+Y) == X --> Y == 0 and similar folds.
	// Don't do this if X is an immediate that can fold into a cmp
	// instruction and X+Y has other uses. It could be an induction variable
	// chain, and the transform would increase register pressure.
	if (!LegalRHSImm \|\| N0.hasOneUse())
	if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
	return V;
	}

	if (N1.getOpcode() == ISD::ADD \|\| N1.getOpcode() == ISD::SUB \|\|
	N1.getOpcode() == ISD::XOR)
	if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
	return V;

	if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
	return V;
	}

	// Fold remainder of division by a constant.
	if ((N0.getOpcode() == ISD::UREM \|\| N0.getOpcode() == ISD::SREM) &&
	N0.hasOneUse() && (Cond == ISD::SETEQ \|\| Cond == ISD::SETNE)) {
	AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();

	// When division is cheap or optimizing for minimum size,
	// fall through to DIVREM creation by skipping this fold.
	if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttribute(Attribute::MinSize)) {
	if (N0.getOpcode() == ISD::UREM) {
	if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
	return Folded;
	} else if (N0.getOpcode() == ISD::SREM) {
	if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
	return Folded;
	}
	}
	}

	// Fold away ALL boolean setcc's.
	if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
	SDValue Temp;
	switch (Cond) {
	default: llvm_unreachable("Unknown integer setcc!");
	case ISD::SETEQ: // X == Y -> ~(X^Y)
	Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
	N0 = DAG.getNOT(dl, Temp, OpVT);
	if (!DCI.isCalledByLegalizer())
	DCI.AddToWorklist(Temp.getNode());
	break;
	case ISD::SETNE: // X != Y --> (X^Y)
	N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
	break;
	case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
	case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
	Temp = DAG.getNOT(dl, N0, OpVT);
	N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
	if (!DCI.isCalledByLegalizer())
	DCI.AddToWorklist(Temp.getNode());
	break;
	case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
	case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
	Temp = DAG.getNOT(dl, N1, OpVT);
	N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
	if (!DCI.isCalledByLegalizer())
	DCI.AddToWorklist(Temp.getNode());
	break;
	case ISD::SETULE: // X <=u Y --> X == 0 \| Y == 1 --> ~X \| Y
	case ISD::SETGE: // X >=s Y --> X == 0 \| Y == 1 --> ~X \| Y
	Temp = DAG.getNOT(dl, N0, OpVT);
	N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
	if (!DCI.isCalledByLegalizer())
	DCI.AddToWorklist(Temp.getNode());
	break;
	case ISD::SETUGE: // X >=u Y --> X == 1 \| Y == 0 --> ~Y \| X
	case ISD::SETLE: // X <=s Y --> X == 1 \| Y == 0 --> ~Y \| X
	Temp = DAG.getNOT(dl, N1, OpVT);
	N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
	break;
	}
	if (VT.getScalarType() != MVT::i1) {
	if (!DCI.isCalledByLegalizer())
	DCI.AddToWorklist(N0.getNode());
	// FIXME: If running after legalize, we probably can't do this.
	ISD::NodeType ExtendCode = getExtendForContent(getBooleanContents(OpVT));
	N0 = DAG.getNode(ExtendCode, dl, VT, N0);
	}
	return N0;
	}

	// Could not fold it.
	return SDValue();
	}

	/// Returns true (and the GlobalValue and the offset) if the node is a
	/// GlobalAddress + offset.
	bool TargetLowering::isGAPlusOffset(SDNode WN, const GlobalValue &GA,
	int64_t &Offset) const {

	SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();

	if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
	GA = GASD->getGlobal();
	Offset += GASD->getOffset();
	return true;
	}

	if (N->getOpcode() == ISD::ADD) {
	SDValue N1 = N->getOperand(0);
	SDValue N2 = N->getOperand(1);
	if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
	if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
	Offset += V->getSExtValue();
	return true;
	}
	} else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
	if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
	Offset += V->getSExtValue();
	return true;
	}
	}
	}

	return false;
	}

	SDValue TargetLowering::PerformDAGCombine(SDNode *N,
	DAGCombinerInfo &DCI) const {
	// Default implementation: no optimization.
	return SDValue();
	}

	//===----------------------------------------------------------------------===//
	// Inline Assembler Implementation Methods
	//===----------------------------------------------------------------------===//

	TargetLowering::ConstraintType
	TargetLowering::getConstraintType(StringRef Constraint) const {
	unsigned S = Constraint.size();

	if (S == 1) {
	switch (Constraint[0]) {
	default: break;
	case 'r':
	return C_RegisterClass;
	case 'm': // memory
	case 'o': // offsetable
	case 'V': // not offsetable
	return C_Memory;
	case 'n': // Simple Integer
	case 'E': // Floating Point Constant
	case 'F': // Floating Point Constant
	return C_Immediate;
	case 'i': // Simple Integer or Relocatable Constant
	case 's': // Relocatable Constant
	case 'p': // Address.
	case 'X': // Allow ANY value.
	case 'I': // Target registers.
	case 'J':
	case 'K':
	case 'L':
	case 'M':
	case 'N':
	case 'O':
	case 'P':
	case '<':
	case '>':
	return C_Other;
	}
	}

	if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
	if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
	return C_Memory;
	return C_Register;
	}
	return C_Unknown;
	}

	/// Try to replace an X constraint, which matches anything, with another that
	/// has more specific requirements based on the type of the corresponding
	/// operand.
	const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
	if (ConstraintVT.isInteger())
	return "r";
	if (ConstraintVT.isFloatingPoint())
	return "f"; // works for many targets
	return nullptr;
	}

	SDValue TargetLowering::LowerAsmOutputForConstraint(
	SDValue &Chain, SDValue &Flag, const SDLoc &DL,
	const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
	return SDValue();
	}

	/// Lower the specified operand into the Ops vector.
	/// If it is invalid, don't add anything to Ops.
	void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
	std::string &Constraint,
	std::vector<SDValue> &Ops,
	SelectionDAG &DAG) const {

	if (Constraint.length() > 1) return;

	char ConstraintLetter = Constraint[0];
	switch (ConstraintLetter) {
	default: break;
	case 'X': // Allows any operand; labels (basic block) use this.
	if (Op.getOpcode() == ISD::BasicBlock \|\|
	Op.getOpcode() == ISD::TargetBlockAddress) {
	Ops.push_back(Op);
	return;
	}
	LLVM_FALLTHROUGH;
	case 'i': // Simple Integer or Relocatable Constant
	case 'n': // Simple Integer
	case 's': { // Relocatable Constant

	GlobalAddressSDNode *GA;
	ConstantSDNode *C;
	BlockAddressSDNode *BA;
	uint64_t Offset = 0;

	// Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
	// etc., since getelementpointer is variadic. We can't use
	// SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
	// while in this case the GA may be furthest from the root node which is
	// likely an ISD::ADD.
	while (1) {
	if ((GA = dyn_cast<GlobalAddressSDNode>(Op)) && ConstraintLetter != 'n') {
	Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
	GA->getValueType(0),
	Offset + GA->getOffset()));
	return;
	}
	if ((C = dyn_cast<ConstantSDNode>(Op)) && ConstraintLetter != 's') {
	// gcc prints these as sign extended. Sign extend value to 64 bits
	// now; without this it would get ZExt'd later in
	// ScheduleDAGSDNodes::EmitNode, which is very generic.
	bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
	BooleanContent BCont = getBooleanContents(MVT::i64);
	ISD::NodeType ExtOpc =
	IsBool ? getExtendForContent(BCont) : ISD::SIGN_EXTEND;
	int64_t ExtVal =
	ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
	Ops.push_back(
	DAG.getTargetConstant(Offset + ExtVal, SDLoc(C), MVT::i64));
	return;
	}
	if ((BA = dyn_cast<BlockAddressSDNode>(Op)) && ConstraintLetter != 'n') {
	Ops.push_back(DAG.getTargetBlockAddress(
	BA->getBlockAddress(), BA->getValueType(0),
	Offset + BA->getOffset(), BA->getTargetFlags()));
	return;
	}
	const unsigned OpCode = Op.getOpcode();
	if (OpCode == ISD::ADD \|\| OpCode == ISD::SUB) {
	if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
	Op = Op.getOperand(1);
	// Subtraction is not commutative.
	else if (OpCode == ISD::ADD &&
	(C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
	Op = Op.getOperand(0);
	else
	return;
	Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
	continue;
	}
	return;
	}
	break;
	}
	}
	}

	std::pair<unsigned, const TargetRegisterClass *>
	TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
	StringRef Constraint,
	MVT VT) const {
	if (Constraint.empty() \|\| Constraint[0] != '{')
	return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
	assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");

	// Remove the braces from around the name.
	StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);

	std::pair<unsigned, const TargetRegisterClass *> R =
	std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));

	// Figure out which register class contains this reg.
	for (const TargetRegisterClass *RC : RI->regclasses()) {
	// If none of the value types for this register class are valid, we
	// can't use it. For example, 64-bit reg classes on 32-bit targets.
	if (!isLegalRC(RI, RC))
	continue;

	for (const MCPhysReg &PR : *RC) {
	if (RegName.equals_insensitive(RI->getRegAsmName(PR))) {
	std::pair<unsigned, const TargetRegisterClass *> S =
	std::make_pair(PR, RC);

	// If this register class has the requested value type, return it,
	// otherwise keep searching and return the first class found
	// if no other is found which explicitly has the requested type.
	if (RI->isTypeLegalForClass(*RC, VT))
	return S;
	if (!R.second)
	R = S;
	}
	}
	}

	return R;
	}

	//===----------------------------------------------------------------------===//
	// Constraint Selection.

	/// Return true of this is an input operand that is a matching constraint like
	/// "4".
	bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
	assert(!ConstraintCode.empty() && "No known constraint!");
	return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
	}

	/// If this is an input matching constraint, this method returns the output
	/// operand it matches.
	unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
	assert(!ConstraintCode.empty() && "No known constraint!");
	return atoi(ConstraintCode.c_str());
	}

	/// Split up the constraint string from the inline assembly value into the
	/// specific constraints and their prefixes, and also tie in the associated
	/// operand values.
	/// If this returns an empty vector, and if the constraint string itself
	/// isn't empty, there was an error parsing.
	TargetLowering::AsmOperandInfoVector
	TargetLowering::ParseConstraints(const DataLayout &DL,
	const TargetRegisterInfo *TRI,
	const CallBase &Call) const {
	/// Information about all of the constraints.
	AsmOperandInfoVector ConstraintOperands;
	const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
	unsigned maCount = 0; // Largest number of multiple alternative constraints.

	// Do a prepass over the constraints, canonicalizing them, and building up the
	// ConstraintOperands list.
	unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
	unsigned ResNo = 0; // ResNo - The result number of the next output.

	for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
	ConstraintOperands.emplace_back(std::move(CI));
	AsmOperandInfo &OpInfo = ConstraintOperands.back();

	// Update multiple alternative constraint count.
	if (OpInfo.multipleAlternatives.size() > maCount)
	maCount = OpInfo.multipleAlternatives.size();

	OpInfo.ConstraintVT = MVT::Other;

	// Compute the value type for each operand.
	switch (OpInfo.Type) {
	case InlineAsm::isOutput:
	// Indirect outputs just consume an argument.
	if (OpInfo.isIndirect) {
	OpInfo.CallOperandVal = Call.getArgOperand(ArgNo++);
	break;
	}

	// The return value of the call is this value. As such, there is no
	// corresponding argument.
	assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
	if (StructType *STy = dyn_cast<StructType>(Call.getType())) {
	OpInfo.ConstraintVT =
	getSimpleValueType(DL, STy->getElementType(ResNo));
	} else {
	assert(ResNo == 0 && "Asm only has one result!");
	OpInfo.ConstraintVT =
	getAsmOperandValueType(DL, Call.getType()).getSimpleVT();
	}
	++ResNo;
	break;
	case InlineAsm::isInput:
	OpInfo.CallOperandVal = Call.getArgOperand(ArgNo++);
	break;
	case InlineAsm::isClobber:
	// Nothing to do.
	break;
	}

	if (OpInfo.CallOperandVal) {
	llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
	if (OpInfo.isIndirect) {
	llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
	if (!PtrTy)
	report_fatal_error("Indirect operand for inline asm not a pointer!");
	OpTy = PtrTy->getElementType();
	}

	// Look for vector wrapped in a struct. e.g. { <16 x i8> }.
	if (StructType *STy = dyn_cast<StructType>(OpTy))
	if (STy->getNumElements() == 1)
	OpTy = STy->getElementType(0);

	// If OpTy is not a single value, it may be a struct/union that we
	// can tile with integers.
	if (!OpTy->isSingleValueType() && OpTy->isSized()) {
	unsigned BitSize = DL.getTypeSizeInBits(OpTy);
	switch (BitSize) {
	default: break;
	case 1:
	case 8:
	case 16:
	case 32:
	case 64:
	case 128:
	OpInfo.ConstraintVT =
	MVT::getVT(IntegerType::get(OpTy->getContext(), BitSize), true);
	break;
	}
	} else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) {
	unsigned PtrSize = DL.getPointerSizeInBits(PT->getAddressSpace());
	OpInfo.ConstraintVT = MVT::getIntegerVT(PtrSize);
	} else {
	OpInfo.ConstraintVT = MVT::getVT(OpTy, true);
	}
	}
	}

	// If we have multiple alternative constraints, select the best alternative.
	if (!ConstraintOperands.empty()) {
	if (maCount) {
	unsigned bestMAIndex = 0;
	int bestWeight = -1;
	// weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
	int weight = -1;
	unsigned maIndex;
	// Compute the sums of the weights for each alternative, keeping track
	// of the best (highest weight) one so far.
	for (maIndex = 0; maIndex < maCount; ++maIndex) {
	int weightSum = 0;
	for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
	cIndex != eIndex; ++cIndex) {
	AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
	if (OpInfo.Type == InlineAsm::isClobber)
	continue;

	// If this is an output operand with a matching input operand,
	// look up the matching input. If their types mismatch, e.g. one
	// is an integer, the other is floating point, or their sizes are
	// different, flag it as an maCantMatch.
	if (OpInfo.hasMatchingInput()) {
	AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
	if (OpInfo.ConstraintVT != Input.ConstraintVT) {
	if ((OpInfo.ConstraintVT.isInteger() !=
	Input.ConstraintVT.isInteger()) \|\|
	(OpInfo.ConstraintVT.getSizeInBits() !=
	Input.ConstraintVT.getSizeInBits())) {
	weightSum = -1; // Can't match.
	break;
	}
	}
	}
	weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
	if (weight == -1) {
	weightSum = -1;
	break;
	}
	weightSum += weight;
	}
	// Update best.
	if (weightSum > bestWeight) {
	bestWeight = weightSum;
	bestMAIndex = maIndex;
	}
	}

	// Now select chosen alternative in each constraint.
	for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
	cIndex != eIndex; ++cIndex) {
	AsmOperandInfo &cInfo = ConstraintOperands[cIndex];
	if (cInfo.Type == InlineAsm::isClobber)
	continue;
	cInfo.selectAlternative(bestMAIndex);
	}
	}
	}

	// Check and hook up tied operands, choose constraint code to use.
	for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
	cIndex != eIndex; ++cIndex) {
	AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];

	// If this is an output operand with a matching input operand, look up the
	// matching input. If their types mismatch, e.g. one is an integer, the
	// other is floating point, or their sizes are different, flag it as an
	// error.
	if (OpInfo.hasMatchingInput()) {
	AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];

	if (OpInfo.ConstraintVT != Input.ConstraintVT) {
	std::pair<unsigned, const TargetRegisterClass *> MatchRC =
	getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
	OpInfo.ConstraintVT);
	std::pair<unsigned, const TargetRegisterClass *> InputRC =
	getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
	Input.ConstraintVT);
	if ((OpInfo.ConstraintVT.isInteger() !=
	Input.ConstraintVT.isInteger()) \|\|
	(MatchRC.second != InputRC.second)) {
	report_fatal_error("Unsupported asm: input constraint"
	" with a matching output constraint of"
	" incompatible type!");
	}
	}
	}
	}

	return ConstraintOperands;
	}

	/// Return an integer indicating how general CT is.
	static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
	switch (CT) {
	case TargetLowering::C_Immediate:
	case TargetLowering::C_Other:
	case TargetLowering::C_Unknown:
	return 0;
	case TargetLowering::C_Register:
	return 1;
	case TargetLowering::C_RegisterClass:
	return 2;
	case TargetLowering::C_Memory:
	return 3;
	}
	llvm_unreachable("Invalid constraint type");
	}

	/// Examine constraint type and operand type and determine a weight value.
	/// This object must already have been set up with the operand type
	/// and the current alternative constraint selected.
	TargetLowering::ConstraintWeight
	TargetLowering::getMultipleConstraintMatchWeight(
	AsmOperandInfo &info, int maIndex) const {
	InlineAsm::ConstraintCodeVector *rCodes;
	if (maIndex >= (int)info.multipleAlternatives.size())
	rCodes = &info.Codes;
	else
	rCodes = &info.multipleAlternatives[maIndex].Codes;
	ConstraintWeight BestWeight = CW_Invalid;

	// Loop over the options, keeping track of the most general one.
	for (unsigned i = 0, e = rCodes->size(); i != e; ++i) {
	ConstraintWeight weight =
	getSingleConstraintMatchWeight(info, (*rCodes)[i].c_str());
	if (weight > BestWeight)
	BestWeight = weight;
	}

	return BestWeight;
	}

	/// Examine constraint type and operand type and determine a weight value.
	/// This object must already have been set up with the operand type
	/// and the current alternative constraint selected.
	TargetLowering::ConstraintWeight
	TargetLowering::getSingleConstraintMatchWeight(
	AsmOperandInfo &info, const char *constraint) const {
	ConstraintWeight weight = CW_Invalid;
	Value *CallOperandVal = info.CallOperandVal;
	// If we don't have a value, we can't do a match,
	// but allow it at the lowest weight.
	if (!CallOperandVal)
	return CW_Default;
	// Look at the constraint type.
	switch (*constraint) {
	case 'i': // immediate integer.
	case 'n': // immediate integer with a known value.
	if (isa<ConstantInt>(CallOperandVal))
	weight = CW_Constant;
	break;
	case 's': // non-explicit intregal immediate.
	if (isa<GlobalValue>(CallOperandVal))
	weight = CW_Constant;
	break;
	case 'E': // immediate float if host format.
	case 'F': // immediate float.
	if (isa<ConstantFP>(CallOperandVal))
	weight = CW_Constant;
	break;
	case '<': // memory operand with autodecrement.
	case '>': // memory operand with autoincrement.
	case 'm': // memory operand.
	case 'o': // offsettable memory operand
	case 'V': // non-offsettable memory operand
	weight = CW_Memory;
	break;
	case 'r': // general register.
	case 'g': // general register, memory operand or immediate integer.
	// note: Clang converts "g" to "imr".
	if (CallOperandVal->getType()->isIntegerTy())
	weight = CW_Register;
	break;
	case 'X': // any operand.
	default:
	weight = CW_Default;
	break;
	}
	return weight;
	}

	/// If there are multiple different constraints that we could pick for this
	/// operand (e.g. "imr") try to pick the 'best' one.
	/// This is somewhat tricky: constraints fall into four classes:
	/// Other -> immediates and magic values
	/// Register -> one specific register
	/// RegisterClass -> a group of regs
	/// Memory -> memory
	/// Ideally, we would pick the most specific constraint possible: if we have
	/// something that fits into a register, we would pick it. The problem here
	/// is that if we have something that could either be in a register or in
	/// memory that use of the register could cause selection of other
	/// operands to fail: they might only succeed if we pick memory. Because of
	/// this the heuristic we use is:
	///
	/// 1) If there is an 'other' constraint, and if the operand is valid for
	/// that constraint, use it. This makes us take advantage of 'i'
	/// constraints when available.
	/// 2) Otherwise, pick the most general constraint present. This prefers
	/// 'm' over 'r', for example.
	///
	static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
	const TargetLowering &TLI,
	SDValue Op, SelectionDAG *DAG) {
	assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options");
	unsigned BestIdx = 0;
	TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown;
	int BestGenerality = -1;

	// Loop over the options, keeping track of the most general one.
	for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) {
	TargetLowering::ConstraintType CType =
	TLI.getConstraintType(OpInfo.Codes[i]);

	// Indirect 'other' or 'immediate' constraints are not allowed.
	if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory \|\|
	CType == TargetLowering::C_Register \|\|
	CType == TargetLowering::C_RegisterClass))
	continue;

	// If this is an 'other' or 'immediate' constraint, see if the operand is
	// valid for it. For example, on X86 we might have an 'rI' constraint. If
	// the operand is an integer in the range [0..31] we want to use I (saving a
	// load of a register), otherwise we must use 'r'.
	if ((CType == TargetLowering::C_Other \|\|
	CType == TargetLowering::C_Immediate) && Op.getNode()) {
	assert(OpInfo.Codes[i].size() == 1 &&
	"Unhandled multi-letter 'other' constraint");
	std::vector<SDValue> ResultOps;
	TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i],
	ResultOps, *DAG);
	if (!ResultOps.empty()) {
	BestType = CType;
	BestIdx = i;
	break;
	}
	}

	// Things with matching constraints can only be registers, per gcc
	// documentation. This mainly affects "g" constraints.
	if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
	continue;

	// This constraint letter is more general than the previous one, use it.
	int Generality = getConstraintGenerality(CType);
	if (Generality > BestGenerality) {
	BestType = CType;
	BestIdx = i;
	BestGenerality = Generality;
	}
	}

	OpInfo.ConstraintCode = OpInfo.Codes[BestIdx];
	OpInfo.ConstraintType = BestType;
	}

	/// Determines the constraint code and constraint type to use for the specific
	/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
	void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
	SDValue Op,
	SelectionDAG *DAG) const {
	assert(!OpInfo.Codes.empty() && "Must have at least one constraint");

	// Single-letter constraints ('r') are very common.
	if (OpInfo.Codes.size() == 1) {
	OpInfo.ConstraintCode = OpInfo.Codes[0];
	OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
	} else {
	ChooseConstraint(OpInfo, *this, Op, DAG);
	}

	// 'X' matches anything.
	if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
	// Labels and constants are handled elsewhere ('X' is the only thing
	// that matches labels). For Functions, the type here is the type of
	// the result, which is not what we want to look at; leave them alone.
	Value *v = OpInfo.CallOperandVal;
	if (isa<BasicBlock>(v) \|\| isa<ConstantInt>(v) \|\| isa<Function>(v)) {
	OpInfo.CallOperandVal = v;
	return;
	}

	if (Op.getNode() && Op.getOpcode() == ISD::TargetBlockAddress)
	return;

	// Otherwise, try to resolve it to something we know about by looking at
	// the actual operand type.
	if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
	OpInfo.ConstraintCode = Repl;
	OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
	}
	}
	}

	/// Given an exact SDIV by a constant, create a multiplication
	/// with the multiplicative inverse of the constant.
	static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
	const SDLoc &dl, SelectionDAG &DAG,
	SmallVectorImpl<SDNode *> &Created) {
	SDValue Op0 = N->getOperand(0);
	SDValue Op1 = N->getOperand(1);
	EVT VT = N->getValueType(0);
	EVT SVT = VT.getScalarType();
	EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
	EVT ShSVT = ShVT.getScalarType();

	bool UseSRA = false;
	SmallVector<SDValue, 16> Shifts, Factors;

	auto BuildSDIVPattern = [&](ConstantSDNode *C) {
	if (C->isNullValue())
	return false;
	APInt Divisor = C->getAPIntValue();
	unsigned Shift = Divisor.countTrailingZeros();
	if (Shift) {
	Divisor.ashrInPlace(Shift);
	UseSRA = true;
	}
	// Calculate the multiplicative inverse, using Newton's method.
	APInt t;
	APInt Factor = Divisor;
	while ((t = Divisor * Factor) != 1)
	Factor *= APInt(Divisor.getBitWidth(), 2) - t;
	Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
	Factors.push_back(DAG.getConstant(Factor, dl, SVT));
	return true;
	};

	// Collect all magic values from the build vector.
	if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
	return SDValue();

	SDValue Shift, Factor;
	if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
	Shift = DAG.getBuildVector(ShVT, dl, Shifts);
	Factor = DAG.getBuildVector(VT, dl, Factors);
	} else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
	assert(Shifts.size() == 1 && Factors.size() == 1 &&
	"Expected matchUnaryPredicate to return one element for scalable "
	"vectors");
	Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
	Factor = DAG.getSplatVector(VT, dl, Factors[0]);
	} else {
	assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
	Shift = Shifts[0];
	Factor = Factors[0];
	}

	SDValue Res = Op0;

	// Shift the value upfront if it is even, so the LSB is one.
	if (UseSRA) {
	// TODO: For UDIV use SRL instead of SRA.
	SDNodeFlags Flags;
	Flags.setExact(true);
	Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, Flags);
	Created.push_back(Res.getNode());
	}

	return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
	}

	SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
	SelectionDAG &DAG,
	SmallVectorImpl<SDNode *> &Created) const {
	AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	if (TLI.isIntDivCheap(N->getValueType(0), Attr))
	return SDValue(N, 0); // Lower SDIV as SDIV
	return SDValue();
	}

	/// Given an ISD::SDIV node expressing a divide by constant,
	/// return a DAG expression to select that will generate the same value by
	/// multiplying by a magic number.
	/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
	SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
	bool IsAfterLegalization,
	SmallVectorImpl<SDNode *> &Created) const {
	SDLoc dl(N);
	EVT VT = N->getValueType(0);
	EVT SVT = VT.getScalarType();
	EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
	EVT ShSVT = ShVT.getScalarType();
	unsigned EltBits = VT.getScalarSizeInBits();
	EVT MulVT;

	// Check to see if we can do this.
	// FIXME: We should be more aggressive here.
	if (!isTypeLegal(VT)) {
	// Limit this to simple scalars for now.
	if (VT.isVector() \|\| !VT.isSimple())
	return SDValue();

	// If this type will be promoted to a large enough type with a legal
	// multiply operation, we can go ahead and do this transform.
	if (getTypeAction(VT.getSimpleVT()) != TypePromoteInteger)
	return SDValue();

	MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
	if (MulVT.getSizeInBits() < (2 * EltBits) \|\|
	!isOperationLegal(ISD::MUL, MulVT))
	return SDValue();
	}

	// If the sdiv has an 'exact' bit we can use a simpler lowering.
	if (N->getFlags().hasExact())
	return BuildExactSDIV(*this, N, dl, DAG, Created);

	SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;

	auto BuildSDIVPattern = [&](ConstantSDNode *C) {
	if (C->isNullValue())
	return false;

	const APInt &Divisor = C->getAPIntValue();
	APInt::ms magics = Divisor.magic();
	int NumeratorFactor = 0;
	int ShiftMask = -1;

	if (Divisor.isOneValue() \|\| Divisor.isAllOnesValue()) {
	// If d is +1/-1, we just multiply the numerator by +1/-1.
	NumeratorFactor = Divisor.getSExtValue();
	magics.m = 0;
	magics.s = 0;
	ShiftMask = 0;
	} else if (Divisor.isStrictlyPositive() && magics.m.isNegative()) {
	// If d > 0 and m < 0, add the numerator.
	NumeratorFactor = 1;
	} else if (Divisor.isNegative() && magics.m.isStrictlyPositive()) {
	// If d < 0 and m > 0, subtract the numerator.
	NumeratorFactor = -1;
	}

	MagicFactors.push_back(DAG.getConstant(magics.m, dl, SVT));
	Factors.push_back(DAG.getConstant(NumeratorFactor, dl, SVT));
	Shifts.push_back(DAG.getConstant(magics.s, dl, ShSVT));
	ShiftMasks.push_back(DAG.getConstant(ShiftMask, dl, SVT));
	return true;
	};

	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);

	// Collect the shifts / magic values from each element.
	if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
	return SDValue();

	SDValue MagicFactor, Factor, Shift, ShiftMask;
	if (N1.getOpcode() == ISD::BUILD_VECTOR) {
	MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
	Factor = DAG.getBuildVector(VT, dl, Factors);
	Shift = DAG.getBuildVector(ShVT, dl, Shifts);
	ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
	} else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
	assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
	Shifts.size() == 1 && ShiftMasks.size() == 1 &&
	"Expected matchUnaryPredicate to return one element for scalable "
	"vectors");
	MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
	Factor = DAG.getSplatVector(VT, dl, Factors[0]);
	Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
	ShiftMask = DAG.getSplatVector(VT, dl, ShiftMasks[0]);
	} else {
	assert(isa<ConstantSDNode>(N1) && "Expected a constant");
	MagicFactor = MagicFactors[0];
	Factor = Factors[0];
	Shift = Shifts[0];
	ShiftMask = ShiftMasks[0];
	}

	// Multiply the numerator (operand 0) by the magic value.
	// FIXME: We should support doing a MUL in a wider type.
	auto GetMULHS = [&](SDValue X, SDValue Y) {
	// If the type isn't legal, use a wider mul of the the type calculated
	// earlier.
	if (!isTypeLegal(VT)) {
	X = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, X);
	Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, Y);
	Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
	Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
	DAG.getShiftAmountConstant(EltBits, MulVT, dl));
	return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
	}

	if (isOperationLegalOrCustom(ISD::MULHS, VT, IsAfterLegalization))
	return DAG.getNode(ISD::MULHS, dl, VT, X, Y);
	if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT, IsAfterLegalization)) {
	SDValue LoHi =
	DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
	return SDValue(LoHi.getNode(), 1);
	}
	return SDValue();
	};

	SDValue Q = GetMULHS(N0, MagicFactor);
	if (!Q)
	return SDValue();

	Created.push_back(Q.getNode());

	// (Optionally) Add/subtract the numerator using Factor.
	Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
	Created.push_back(Factor.getNode());
	Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
	Created.push_back(Q.getNode());

	// Shift right algebraic by shift value.
	Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
	Created.push_back(Q.getNode());

	// Extract the sign bit, mask it and add it to the quotient.
	SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
	SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
	Created.push_back(T.getNode());
	T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
	Created.push_back(T.getNode());
	return DAG.getNode(ISD::ADD, dl, VT, Q, T);
	}

	/// Given an ISD::UDIV node expressing a divide by constant,
	/// return a DAG expression to select that will generate the same value by
	/// multiplying by a magic number.
	/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
	SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
	bool IsAfterLegalization,
	SmallVectorImpl<SDNode *> &Created) const {
	SDLoc dl(N);
	EVT VT = N->getValueType(0);
	EVT SVT = VT.getScalarType();
	EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
	EVT ShSVT = ShVT.getScalarType();
	unsigned EltBits = VT.getScalarSizeInBits();
	EVT MulVT;

	// Check to see if we can do this.
	// FIXME: We should be more aggressive here.
	if (!isTypeLegal(VT)) {
	// Limit this to simple scalars for now.
	if (VT.isVector() \|\| !VT.isSimple())
	return SDValue();

	// If this type will be promoted to a large enough type with a legal
	// multiply operation, we can go ahead and do this transform.
	if (getTypeAction(VT.getSimpleVT()) != TypePromoteInteger)
	return SDValue();

	MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
	if (MulVT.getSizeInBits() < (2 * EltBits) \|\|
	!isOperationLegal(ISD::MUL, MulVT))
	return SDValue();
	}

	bool UseNPQ = false;
	SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;

	auto BuildUDIVPattern = [&](ConstantSDNode *C) {
	if (C->isNullValue())
	return false;
	// FIXME: We should use a narrower constant when the upper
	// bits are known to be zero.
	const APInt& Divisor = C->getAPIntValue();
	APInt::mu magics = Divisor.magicu();
	unsigned PreShift = 0, PostShift = 0;

	// If the divisor is even, we can avoid using the expensive fixup by
	// shifting the divided value upfront.
	if (magics.a != 0 && !Divisor[0]) {
	PreShift = Divisor.countTrailingZeros();
	// Get magic number for the shifted divisor.
	magics = Divisor.lshr(PreShift).magicu(PreShift);
	assert(magics.a == 0 && "Should use cheap fixup now");
	}

	APInt Magic = magics.m;

	unsigned SelNPQ;
	if (magics.a == 0 \|\| Divisor.isOneValue()) {
	assert(magics.s < Divisor.getBitWidth() &&
	"We shouldn't generate an undefined shift!");
	PostShift = magics.s;
	SelNPQ = false;
	} else {
	PostShift = magics.s - 1;
	SelNPQ = true;
	}

	PreShifts.push_back(DAG.getConstant(PreShift, dl, ShSVT));
	MagicFactors.push_back(DAG.getConstant(Magic, dl, SVT));
	NPQFactors.push_back(
	DAG.getConstant(SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
	: APInt::getNullValue(EltBits),
	dl, SVT));
	PostShifts.push_back(DAG.getConstant(PostShift, dl, ShSVT));
	UseNPQ \|= SelNPQ;
	return true;
	};

	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);

	// Collect the shifts/magic values from each element.
	if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
	return SDValue();

	SDValue PreShift, PostShift, MagicFactor, NPQFactor;
	if (N1.getOpcode() == ISD::BUILD_VECTOR) {
	PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
	MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
	NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
	PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
	} else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
	assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
	NPQFactors.size() == 1 && PostShifts.size() == 1 &&
	"Expected matchUnaryPredicate to return one for scalable vectors");
	PreShift = DAG.getSplatVector(ShVT, dl, PreShifts[0]);
	MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
	NPQFactor = DAG.getSplatVector(VT, dl, NPQFactors[0]);
	PostShift = DAG.getSplatVector(ShVT, dl, PostShifts[0]);
	} else {
	assert(isa<ConstantSDNode>(N1) && "Expected a constant");
	PreShift = PreShifts[0];
	MagicFactor = MagicFactors[0];
	PostShift = PostShifts[0];
	}

	SDValue Q = N0;
	Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
	Created.push_back(Q.getNode());

	// FIXME: We should support doing a MUL in a wider type.
	auto GetMULHU = [&](SDValue X, SDValue Y) {
	// If the type isn't legal, use a wider mul of the the type calculated
	// earlier.
	if (!isTypeLegal(VT)) {
	X = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, X);
	Y = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, Y);
	Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
	Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
	DAG.getShiftAmountConstant(EltBits, MulVT, dl));
	return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
	}

	if (isOperationLegalOrCustom(ISD::MULHU, VT, IsAfterLegalization))
	return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
	if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT, IsAfterLegalization)) {
	SDValue LoHi =
	DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
	return SDValue(LoHi.getNode(), 1);
	}
	return SDValue(); // No mulhu or equivalent
	};

	// Multiply the numerator (operand 0) by the magic value.
	Q = GetMULHU(Q, MagicFactor);
	if (!Q)
	return SDValue();

	Created.push_back(Q.getNode());

	if (UseNPQ) {
	SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
	Created.push_back(NPQ.getNode());

	// For vectors we might have a mix of non-NPQ/NPQ paths, so use
	// MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
	if (VT.isVector())
	NPQ = GetMULHU(NPQ, NPQFactor);
	else
	NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));

	Created.push_back(NPQ.getNode());

	Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
	Created.push_back(Q.getNode());
	}

	Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
	Created.push_back(Q.getNode());

	EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);

	SDValue One = DAG.getConstant(1, dl, VT);
	SDValue IsOne = DAG.getSetCC(dl, SetCCVT, N1, One, ISD::SETEQ);
	return DAG.getSelect(dl, VT, IsOne, N0, Q);
	}

	/// If all values in Values that don't match the predicate are same 'splat'
	/// value, then replace all values with that splat value.
	/// Else, if AlternativeReplacement was provided, then replace all values that
	/// do match predicate with AlternativeReplacement value.
	static void
	turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values,
	std::function<bool(SDValue)> Predicate,
	SDValue AlternativeReplacement = SDValue()) {
	SDValue Replacement;
	// Is there a value for which the Predicate does NOT match? What is it?
	auto SplatValue = llvm::find_if_not(Values, Predicate);
	if (SplatValue != Values.end()) {
	// Does Values consist only of SplatValue's and values matching Predicate?
	if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
	return Value == *SplatValue \|\| Predicate(Value);
	})) // Then we shall replace values matching predicate with SplatValue.
	Replacement = *SplatValue;
	}
	if (!Replacement) {
	// Oops, we did not find the "baseline" splat value.
	if (!AlternativeReplacement)
	return; // Nothing to do.
	// Let's replace with provided value then.
	Replacement = AlternativeReplacement;
	}
	std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
	}

	/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
	/// where the divisor is constant and the comparison target is zero,
	/// return a DAG expression that will generate the same comparison result
	/// using only multiplications, additions and shifts/rotations.
	/// Ref: "Hacker's Delight" 10-17.
	SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
	SDValue CompTargetNode,
	ISD::CondCode Cond,
	DAGCombinerInfo &DCI,
	const SDLoc &DL) const {
	SmallVector<SDNode *, 5> Built;
	if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
	DCI, DL, Built)) {
	for (SDNode *N : Built)
	DCI.AddToWorklist(N);
	return Folded;
	}

	return SDValue();
	}

	SDValue
	TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
	SDValue CompTargetNode, ISD::CondCode Cond,
	DAGCombinerInfo &DCI, const SDLoc &DL,
	SmallVectorImpl<SDNode *> &Created) const {
	// fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
	// - D must be constant, with D = D0 * 2^K where D0 is odd
	// - P is the multiplicative inverse of D0 modulo 2^W
	// - Q = floor(((2^W) - 1) / D)
	// where W is the width of the common type of N and D.
	assert((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
	"Only applicable for (in)equality comparisons.");

	SelectionDAG &DAG = DCI.DAG;

	EVT VT = REMNode.getValueType();
	EVT SVT = VT.getScalarType();
	EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout(), !DCI.isBeforeLegalize());
	EVT ShSVT = ShVT.getScalarType();

	// If MUL is unavailable, we cannot proceed in any case.
	if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
	return SDValue();

	bool ComparingWithAllZeros = true;
	bool AllComparisonsWithNonZerosAreTautological = true;
	bool HadTautologicalLanes = false;
	bool AllLanesAreTautological = true;
	bool HadEvenDivisor = false;
	bool AllDivisorsArePowerOfTwo = true;
	bool HadTautologicalInvertedLanes = false;
	SmallVector<SDValue, 16> PAmts, KAmts, QAmts, IAmts;

	auto BuildUREMPattern = [&](ConstantSDNode CDiv, ConstantSDNode CCmp) {
	// Division by 0 is UB. Leave it to be constant-folded elsewhere.
	if (CDiv->isNullValue())
	return false;

	const APInt &D = CDiv->getAPIntValue();
	const APInt &Cmp = CCmp->getAPIntValue();

	ComparingWithAllZeros &= Cmp.isNullValue();

	// x u% C1` is always less than C1. So given `x u% C1 == C2`,
	// if C2 is not less than C1, the comparison is always false.
	// But we will only be able to produce the comparison that will give the
	// opposive tautological answer. So this lane would need to be fixed up.
	bool TautologicalInvertedLane = D.ule(Cmp);
	HadTautologicalInvertedLanes \|= TautologicalInvertedLane;

	// If all lanes are tautological (either all divisors are ones, or divisor
	// is not greater than the constant we are comparing with),
	// we will prefer to avoid the fold.
	bool TautologicalLane = D.isOneValue() \|\| TautologicalInvertedLane;
	HadTautologicalLanes \|= TautologicalLane;
	AllLanesAreTautological &= TautologicalLane;

	// If we are comparing with non-zero, we need'll need to subtract said
	// comparison value from the LHS. But there is no point in doing that if
	// every lane where we are comparing with non-zero is tautological..
	if (!Cmp.isNullValue())
	AllComparisonsWithNonZerosAreTautological &= TautologicalLane;

	// Decompose D into D0 * 2^K
	unsigned K = D.countTrailingZeros();
	assert((!D.isOneValue() \|\| (K == 0)) && "For divisor '1' we won't rotate.");
	APInt D0 = D.lshr(K);

	// D is even if it has trailing zeros.
	HadEvenDivisor \|= (K != 0);
	// D is a power-of-two if D0 is one.
	// If all divisors are power-of-two, we will prefer to avoid the fold.
	AllDivisorsArePowerOfTwo &= D0.isOneValue();

	// P = inv(D0, 2^W)
	// 2^W requires W + 1 bits, so we have to extend and then truncate.
	unsigned W = D.getBitWidth();
	APInt P = D0.zext(W + 1)
	.multiplicativeInverse(APInt::getSignedMinValue(W + 1))
	.trunc(W);
	assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
	assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");

	// Q = floor((2^W - 1) u/ D)
	// R = ((2^W - 1) u% D)
	APInt Q, R;
	APInt::udivrem(APInt::getAllOnesValue(W), D, Q, R);

	// If we are comparing with zero, then that comparison constant is okay,
	// else it may need to be one less than that.
	if (Cmp.ugt(R))
	Q -= 1;

	assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&
	"We are expecting that K is always less than all-ones for ShSVT");

	// If the lane is tautological the result can be constant-folded.
	if (TautologicalLane) {
	// Set P and K amount to a bogus values so we can try to splat them.
	P = 0;
	K = -1;
	// And ensure that comparison constant is tautological,
	// it will always compare true/false.
	Q = -1;
	}

	PAmts.push_back(DAG.getConstant(P, DL, SVT));
	KAmts.push_back(
	DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
	QAmts.push_back(DAG.getConstant(Q, DL, SVT));
	return true;
	};

	SDValue N = REMNode.getOperand(0);
	SDValue D = REMNode.getOperand(1);

	// Collect the values from each element.
	if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
	return SDValue();

	// If all lanes are tautological, the result can be constant-folded.
	if (AllLanesAreTautological)
	return SDValue();

	// If this is a urem by a powers-of-two, avoid the fold since it can be
	// best implemented as a bit test.
	if (AllDivisorsArePowerOfTwo)
	return SDValue();

	SDValue PVal, KVal, QVal;
	if (D.getOpcode() == ISD::BUILD_VECTOR) {
	if (HadTautologicalLanes) {
	// Try to turn PAmts into a splat, since we don't care about the values
	// that are currently '0'. If we can't, just keep '0'`s.
	turnVectorIntoSplatVector(PAmts, isNullConstant);
	// Try to turn KAmts into a splat, since we don't care about the values
	// that are currently '-1'. If we can't, change them to '0'`s.
	turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
	DAG.getConstant(0, DL, ShSVT));
	}

	PVal = DAG.getBuildVector(VT, DL, PAmts);
	KVal = DAG.getBuildVector(ShVT, DL, KAmts);
	QVal = DAG.getBuildVector(VT, DL, QAmts);
	} else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
	assert(PAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 &&
	"Expected matchBinaryPredicate to return one element for "
	"SPLAT_VECTORs");
	PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
	KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
	QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
	} else {
	PVal = PAmts[0];
	KVal = KAmts[0];
	QVal = QAmts[0];
	}

	if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
	if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::SUB, VT))
	return SDValue(); // FIXME: Could/should use `ISD::ADD`?
	assert(CompTargetNode.getValueType() == N.getValueType() &&
	"Expecting that the types on LHS and RHS of comparisons match.");
	N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
	}

	// (mul N, P)
	SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
	Created.push_back(Op0.getNode());

	// Rotate right only if any divisor was even. We avoid rotates for all-odd
	// divisors as a performance improvement, since rotating by 0 is a no-op.
	if (HadEvenDivisor) {
	// We need ROTR to do this.
	if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
	return SDValue();
	// UREM: (rotr (mul N, P), K)
	Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
	Created.push_back(Op0.getNode());
	}

	// UREM: (setule/setugt (rotr (mul N, P), K), Q)
	SDValue NewCC =
	DAG.getSetCC(DL, SETCCVT, Op0, QVal,
	((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
	if (!HadTautologicalInvertedLanes)
	return NewCC;

	// If any lanes previously compared always-false, the NewCC will give
	// always-true result for them, so we need to fixup those lanes.
	// Or the other way around for inequality predicate.
	assert(VT.isVector() && "Can/should only get here for vectors.");
	Created.push_back(NewCC.getNode());

	// x u% C1` is always less than C1. So given `x u% C1 == C2`,
	// if C2 is not less than C1, the comparison is always false.
	// But we have produced the comparison that will give the
	// opposive tautological answer. So these lanes would need to be fixed up.
	SDValue TautologicalInvertedChannels =
	DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
	Created.push_back(TautologicalInvertedChannels.getNode());

	// NOTE: we avoid letting illegal types through even if we're before legalize
	// ops – legalization has a hard time producing good code for this.
	if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
	// If we have a vector select, let's replace the comparison results in the
	// affected lanes with the correct tautological result.
	SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
	DL, SETCCVT, SETCCVT);
	return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
	Replacement, NewCC);
	}

	// Else, we can just invert the comparison result in the appropriate lanes.
	//
	// NOTE: see the note above VSELECT above.
	if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
	return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
	TautologicalInvertedChannels);

	return SDValue(); // Don't know how to lower.
	}

	/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
	/// where the divisor is constant and the comparison target is zero,
	/// return a DAG expression that will generate the same comparison result
	/// using only multiplications, additions and shifts/rotations.
	/// Ref: "Hacker's Delight" 10-17.
	SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
	SDValue CompTargetNode,
	ISD::CondCode Cond,
	DAGCombinerInfo &DCI,
	const SDLoc &DL) const {
	SmallVector<SDNode *, 7> Built;
	if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
	DCI, DL, Built)) {
	assert(Built.size() <= 7 && "Max size prediction failed.");
	for (SDNode *N : Built)
	DCI.AddToWorklist(N);
	return Folded;
	}

	return SDValue();
	}

	SDValue
	TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
	SDValue CompTargetNode, ISD::CondCode Cond,
	DAGCombinerInfo &DCI, const SDLoc &DL,
	SmallVectorImpl<SDNode *> &Created) const {
	// Fold:
	// (seteq/ne (srem N, D), 0)
	// To:
	// (setule/ugt (rotr (add (mul N, P), A), K), Q)
	//
	// - D must be constant, with D = D0 * 2^K where D0 is odd
	// - P is the multiplicative inverse of D0 modulo 2^W
	// - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
	// - Q = floor((2 * A) / (2^K))
	// where W is the width of the common type of N and D.
	assert((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
	"Only applicable for (in)equality comparisons.");

	SelectionDAG &DAG = DCI.DAG;

	EVT VT = REMNode.getValueType();
	EVT SVT = VT.getScalarType();
	EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout(), !DCI.isBeforeLegalize());
	EVT ShSVT = ShVT.getScalarType();

	// If we are after ops legalization, and MUL is unavailable, we can not
	// proceed.
	if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
	return SDValue();

	// TODO: Could support comparing with non-zero too.
	ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
	if (!CompTarget \|\| !CompTarget->isNullValue())
	return SDValue();

	bool HadIntMinDivisor = false;
	bool HadOneDivisor = false;
	bool AllDivisorsAreOnes = true;
	bool HadEvenDivisor = false;
	bool NeedToApplyOffset = false;
	bool AllDivisorsArePowerOfTwo = true;
	SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;

	auto BuildSREMPattern = [&](ConstantSDNode *C) {
	// Division by 0 is UB. Leave it to be constant-folded elsewhere.
	if (C->isNullValue())
	return false;

	// FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.

	// WARNING: this fold is only valid for positive divisors!
	APInt D = C->getAPIntValue();
	if (D.isNegative())
	D.negate(); // `rem %X, -C` is equivalent to `rem %X, C`

	HadIntMinDivisor \|= D.isMinSignedValue();

	// If all divisors are ones, we will prefer to avoid the fold.
	HadOneDivisor \|= D.isOneValue();
	AllDivisorsAreOnes &= D.isOneValue();

	// Decompose D into D0 * 2^K
	unsigned K = D.countTrailingZeros();
	assert((!D.isOneValue() \|\| (K == 0)) && "For divisor '1' we won't rotate.");
	APInt D0 = D.lshr(K);

	if (!D.isMinSignedValue()) {
	// D is even if it has trailing zeros; unless it's INT_MIN, in which case
	// we don't care about this lane in this fold, we'll special-handle it.
	HadEvenDivisor \|= (K != 0);
	}

	// D is a power-of-two if D0 is one. This includes INT_MIN.
	// If all divisors are power-of-two, we will prefer to avoid the fold.
	AllDivisorsArePowerOfTwo &= D0.isOneValue();

	// P = inv(D0, 2^W)
	// 2^W requires W + 1 bits, so we have to extend and then truncate.
	unsigned W = D.getBitWidth();
	APInt P = D0.zext(W + 1)
	.multiplicativeInverse(APInt::getSignedMinValue(W + 1))
	.trunc(W);
	assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
	assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");

	// A = floor((2^(W - 1) - 1) / D0) & -2^K
	APInt A = APInt::getSignedMaxValue(W).udiv(D0);
	A.clearLowBits(K);

	if (!D.isMinSignedValue()) {
	// If divisor INT_MIN, then we don't care about this lane in this fold,
	// we'll special-handle it.
	NeedToApplyOffset \|= A != 0;
	}

	// Q = floor((2 * A) / (2^K))
	APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));

	assert(APInt::getAllOnesValue(SVT.getSizeInBits()).ugt(A) &&
	"We are expecting that A is always less than all-ones for SVT");
	assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&
	"We are expecting that K is always less than all-ones for ShSVT");

	// If the divisor is 1 the result can be constant-folded. Likewise, we
	// don't care about INT_MIN lanes, those can be set to undef if appropriate.
	if (D.isOneValue()) {
	// Set P, A and K to a bogus values so we can try to splat them.
	P = 0;
	A = -1;
	K = -1;

	// x ?% 1 == 0 <--> true <--> x u<= -1
	Q = -1;
	}

	PAmts.push_back(DAG.getConstant(P, DL, SVT));
	AAmts.push_back(DAG.getConstant(A, DL, SVT));
	KAmts.push_back(
	DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
	QAmts.push_back(DAG.getConstant(Q, DL, SVT));
	return true;
	};

	SDValue N = REMNode.getOperand(0);
	SDValue D = REMNode.getOperand(1);

	// Collect the values from each element.
	if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
	return SDValue();

	// If this is a srem by a one, avoid the fold since it can be constant-folded.
	if (AllDivisorsAreOnes)
	return SDValue();

	// If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
	// since it can be best implemented as a bit test.
	if (AllDivisorsArePowerOfTwo)
	return SDValue();

	SDValue PVal, AVal, KVal, QVal;
	if (D.getOpcode() == ISD::BUILD_VECTOR) {
	if (HadOneDivisor) {
	// Try to turn PAmts into a splat, since we don't care about the values
	// that are currently '0'. If we can't, just keep '0'`s.
	turnVectorIntoSplatVector(PAmts, isNullConstant);
	// Try to turn AAmts into a splat, since we don't care about the
	// values that are currently '-1'. If we can't, change them to '0'`s.
	turnVectorIntoSplatVector(AAmts, isAllOnesConstant,
	DAG.getConstant(0, DL, SVT));
	// Try to turn KAmts into a splat, since we don't care about the values
	// that are currently '-1'. If we can't, change them to '0'`s.
	turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
	DAG.getConstant(0, DL, ShSVT));
	}

	PVal = DAG.getBuildVector(VT, DL, PAmts);
	AVal = DAG.getBuildVector(VT, DL, AAmts);
	KVal = DAG.getBuildVector(ShVT, DL, KAmts);
	QVal = DAG.getBuildVector(VT, DL, QAmts);
	} else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
	assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
	QAmts.size() == 1 &&
	"Expected matchUnaryPredicate to return one element for scalable "
	"vectors");
	PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
	AVal = DAG.getSplatVector(VT, DL, AAmts[0]);
	KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
	QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
	} else {
	assert(isa<ConstantSDNode>(D) && "Expected a constant");
	PVal = PAmts[0];
	AVal = AAmts[0];
	KVal = KAmts[0];
	QVal = QAmts[0];
	}

	// (mul N, P)
	SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
	Created.push_back(Op0.getNode());

	if (NeedToApplyOffset) {
	// We need ADD to do this.
	if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ADD, VT))
	return SDValue();

	// (add (mul N, P), A)
	Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
	Created.push_back(Op0.getNode());
	}

	// Rotate right only if any divisor was even. We avoid rotates for all-odd
	// divisors as a performance improvement, since rotating by 0 is a no-op.
	if (HadEvenDivisor) {
	// We need ROTR to do this.
	if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
	return SDValue();
	// SREM: (rotr (add (mul N, P), A), K)
	Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
	Created.push_back(Op0.getNode());
	}

	// SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
	SDValue Fold =
	DAG.getSetCC(DL, SETCCVT, Op0, QVal,
	((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));

	// If we didn't have lanes with INT_MIN divisor, then we're done.
	if (!HadIntMinDivisor)
	return Fold;

	// That fold is only valid for positive divisors. Which effectively means,
	// it is invalid for INT_MIN divisors. So if we have such a lane,
	// we must fix-up results for said lanes.
	assert(VT.isVector() && "Can/should only get here for vectors.");

	// NOTE: we avoid letting illegal types through even if we're before legalize
	// ops – legalization has a hard time producing good code for the code that
	// follows.
	if (!isOperationLegalOrCustom(ISD::SETEQ, VT) \|\|
	!isOperationLegalOrCustom(ISD::AND, VT) \|\|
	!isOperationLegalOrCustom(Cond, VT) \|\|
	!isOperationLegalOrCustom(ISD::VSELECT, SETCCVT))
	return SDValue();

	Created.push_back(Fold.getNode());

	SDValue IntMin = DAG.getConstant(
	APInt::getSignedMinValue(SVT.getScalarSizeInBits()), DL, VT);
	SDValue IntMax = DAG.getConstant(
	APInt::getSignedMaxValue(SVT.getScalarSizeInBits()), DL, VT);
	SDValue Zero =
	DAG.getConstant(APInt::getNullValue(SVT.getScalarSizeInBits()), DL, VT);

	// Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
	SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
	Created.push_back(DivisorIsIntMin.getNode());

	// (N s% INT_MIN) ==/!= 0 <--> (N & INT_MAX) ==/!= 0
	SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax);
	Created.push_back(Masked.getNode());
	SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond);
	Created.push_back(MaskedIsZero.getNode());

	// To produce final result we need to blend 2 vectors: 'SetCC' and
	// 'MaskedIsZero'. If the divisor for channel was NOT INT_MIN, we pick
	// from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
	// constant-folded, select can get lowered to a shuffle with constant mask.
	SDValue Blended = DAG.getNode(ISD::VSELECT, DL, SETCCVT, DivisorIsIntMin,
	MaskedIsZero, Fold);

	return Blended;
	}

	bool TargetLowering::
	verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
	if (!isa<ConstantSDNode>(Op.getOperand(0))) {
	DAG.getContext()->emitError("argument to '__builtin_return_address' must "
	"be a constant integer");
	return true;
	}

	return false;
	}

	SDValue TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
	const DenormalMode &Mode) const {
	SDLoc DL(Op);
	EVT VT = Op.getValueType();
	EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
	SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
	// Testing it with denormal inputs to avoid wrong estimate.
	if (Mode.Input == DenormalMode::IEEE) {
	// This is specifically a check for the handling of denormal inputs,
	// not the result.

	// Test = fabs(X) < SmallestNormal
	const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
	APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
	SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
	SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
	return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
	}
	// Test = X == 0.0
	return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
	}

	SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
	bool LegalOps, bool OptForSize,
	NegatibleCost &Cost,
	unsigned Depth) const {
	// fneg is removable even if it has multiple uses.
	if (Op.getOpcode() == ISD::FNEG) {
	Cost = NegatibleCost::Cheaper;
	return Op.getOperand(0);
	}

	// Don't recurse exponentially.
	if (Depth > SelectionDAG::MaxRecursionDepth)
	return SDValue();

	// Pre-increment recursion depth for use in recursive calls.
	++Depth;
	const SDNodeFlags Flags = Op->getFlags();
	const TargetOptions &Options = DAG.getTarget().Options;
	EVT VT = Op.getValueType();
	unsigned Opcode = Op.getOpcode();

	// Don't allow anything with multiple uses unless we know it is free.
	if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
	bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
	isFPExtFree(VT, Op.getOperand(0).getValueType());
	if (!IsFreeExtend)
	return SDValue();
	}

	auto RemoveDeadNode = [&](SDValue N) {
	if (N && N.getNode()->use_empty())
	DAG.RemoveDeadNode(N.getNode());
	};

	SDLoc DL(Op);

	// Because getNegatedExpression can delete nodes we need a handle to keep
	// temporary nodes alive in case the recursion manages to create an identical
	// node.
	std::list<HandleSDNode> Handles;

	switch (Opcode) {
	case ISD::ConstantFP: {
	// Don't invert constant FP values after legalization unless the target says
	// the negated constant is legal.
	bool IsOpLegal =
	isOperationLegal(ISD::ConstantFP, VT) \|\|
	isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
	OptForSize);

	if (LegalOps && !IsOpLegal)
	break;

	APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
	V.changeSign();
	SDValue CFP = DAG.getConstantFP(V, DL, VT);

	// If we already have the use of the negated floating constant, it is free
	// to negate it even it has multiple uses.
	if (!Op.hasOneUse() && CFP.use_empty())
	break;
	Cost = NegatibleCost::Neutral;
	return CFP;
	}
	case ISD::BUILD_VECTOR: {
	// Only permit BUILD_VECTOR of constants.
	if (llvm::any_of(Op->op_values(), [&](SDValue N) {
	return !N.isUndef() && !isa<ConstantFPSDNode>(N);
	}))
	break;

	bool IsOpLegal =
	(isOperationLegal(ISD::ConstantFP, VT) &&
	isOperationLegal(ISD::BUILD_VECTOR, VT)) \|\|
	llvm::all_of(Op->op_values(), [&](SDValue N) {
	return N.isUndef() \|\|
	isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
	OptForSize);
	});

	if (LegalOps && !IsOpLegal)
	break;

	SmallVector<SDValue, 4> Ops;
	for (SDValue C : Op->op_values()) {
	if (C.isUndef()) {
	Ops.push_back(C);
	continue;
	}
	APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
	V.changeSign();
	Ops.push_back(DAG.getConstantFP(V, DL, C.getValueType()));
	}
	Cost = NegatibleCost::Neutral;
	return DAG.getBuildVector(VT, DL, Ops);
	}
	case ISD::FADD: {
	if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
	break;

	// After operation legalization, it might not be legal to create new FSUBs.
	if (LegalOps && !isOperationLegalOrCustom(ISD::FSUB, VT))
	break;
	SDValue X = Op.getOperand(0), Y = Op.getOperand(1);

	// fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
	NegatibleCost CostX = NegatibleCost::Expensive;
	SDValue NegX =
	getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
	// Prevent this node from being deleted by the next call.
	if (NegX)
	Handles.emplace_back(NegX);

	// fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
	NegatibleCost CostY = NegatibleCost::Expensive;
	SDValue NegY =
	getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);

	// We're done with the handles.
	Handles.clear();

	// Negate the X if its cost is less or equal than Y.
	if (NegX && (CostX <= CostY)) {
	Cost = CostX;
	SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
	if (NegY != N)
	RemoveDeadNode(NegY);
	return N;
	}

	// Negate the Y if it is not expensive.
	if (NegY) {
	Cost = CostY;
	SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
	if (NegX != N)
	RemoveDeadNode(NegX);
	return N;
	}
	break;
	}
	case ISD::FSUB: {
	// We can't turn -(A-B) into B-A when we honor signed zeros.
	if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
	break;

	SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
	// fold (fneg (fsub 0, Y)) -> Y
	if (ConstantFPSDNode C = isConstOrConstSplatFP(X, /AllowUndefs*/ true))
	if (C->isZero()) {
	Cost = NegatibleCost::Cheaper;
	return Y;
	}

	// fold (fneg (fsub X, Y)) -> (fsub Y, X)
	Cost = NegatibleCost::Neutral;
	return DAG.getNode(ISD::FSUB, DL, VT, Y, X, Flags);
	}
	case ISD::FMUL:
	case ISD::FDIV: {
	SDValue X = Op.getOperand(0), Y = Op.getOperand(1);

	// fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
	NegatibleCost CostX = NegatibleCost::Expensive;
	SDValue NegX =
	getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
	// Prevent this node from being deleted by the next call.
	if (NegX)
	Handles.emplace_back(NegX);

	// fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
	NegatibleCost CostY = NegatibleCost::Expensive;
	SDValue NegY =
	getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);

	// We're done with the handles.
	Handles.clear();

	// Negate the X if its cost is less or equal than Y.
	if (NegX && (CostX <= CostY)) {
	Cost = CostX;
	SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
	if (NegY != N)
	RemoveDeadNode(NegY);
	return N;
	}

	// Ignore X * 2.0 because that is expected to be canonicalized to X + X.
	if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
	if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
	break;

	// Negate the Y if it is not expensive.
	if (NegY) {
	Cost = CostY;
	SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
	if (NegX != N)
	RemoveDeadNode(NegX);
	return N;
	}
	break;
	}
	case ISD::FMA:
	case ISD::FMAD: {
	if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
	break;

	SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2);
	NegatibleCost CostZ = NegatibleCost::Expensive;
	SDValue NegZ =
	getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ, Depth);
	// Give up if fail to negate the Z.
	if (!NegZ)
	break;

	// Prevent this node from being deleted by the next two calls.
	Handles.emplace_back(NegZ);

	// fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
	NegatibleCost CostX = NegatibleCost::Expensive;
	SDValue NegX =
	getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
	// Prevent this node from being deleted by the next call.
	if (NegX)
	Handles.emplace_back(NegX);

	// fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
	NegatibleCost CostY = NegatibleCost::Expensive;
	SDValue NegY =
	getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);

	// We're done with the handles.
	Handles.clear();

	// Negate the X if its cost is less or equal than Y.
	if (NegX && (CostX <= CostY)) {
	Cost = std::min(CostX, CostZ);
	SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
	if (NegY != N)
	RemoveDeadNode(NegY);
	return N;
	}

	// Negate the Y if it is not expensive.
	if (NegY) {
	Cost = std::min(CostY, CostZ);
	SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
	if (NegX != N)
	RemoveDeadNode(NegX);
	return N;
	}
	break;
	}

	case ISD::FP_EXTEND:
	case ISD::FSIN:
	if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
	OptForSize, Cost, Depth))
	return DAG.getNode(Opcode, DL, VT, NegV);
	break;
	case ISD::FP_ROUND:
	if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
	OptForSize, Cost, Depth))
	return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1));
	break;
	}

	return SDValue();
	}

	//===----------------------------------------------------------------------===//
	// Legalization Utilities
	//===----------------------------------------------------------------------===//

	bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
	SDValue LHS, SDValue RHS,
	SmallVectorImpl<SDValue> &Result,
	EVT HiLoVT, SelectionDAG &DAG,
	MulExpansionKind Kind, SDValue LL,
	SDValue LH, SDValue RL, SDValue RH) const {
	assert(Opcode == ISD::MUL \|\| Opcode == ISD::UMUL_LOHI \|\|
	Opcode == ISD::SMUL_LOHI);

	bool HasMULHS = (Kind == MulExpansionKind::Always) \|\|
	isOperationLegalOrCustom(ISD::MULHS, HiLoVT);
	bool HasMULHU = (Kind == MulExpansionKind::Always) \|\|
	isOperationLegalOrCustom(ISD::MULHU, HiLoVT);
	bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) \|\|
	isOperationLegalOrCustom(ISD::SMUL_LOHI, HiLoVT);
	bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) \|\|
	isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT);

	if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
	return false;

	unsigned OuterBitSize = VT.getScalarSizeInBits();
	unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();

	// LL, LH, RL, and RH must be either all NULL or all set to a value.
	assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) \|\|
	(!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));

	SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
	auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
	bool Signed) -> bool {
	if ((Signed && HasSMUL_LOHI) \|\| (!Signed && HasUMUL_LOHI)) {
	Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
	Hi = SDValue(Lo.getNode(), 1);
	return true;
	}
	if ((Signed && HasMULHS) \|\| (!Signed && HasMULHU)) {
	Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
	Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
	return true;
	}
	return false;
	};

	SDValue Lo, Hi;

	if (!LL.getNode() && !RL.getNode() &&
	isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
	LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
	RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
	}

	if (!LL.getNode())
	return false;

	APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
	if (DAG.MaskedValueIsZero(LHS, HighMask) &&
	DAG.MaskedValueIsZero(RHS, HighMask)) {
	// The inputs are both zero-extended.
	if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
	Result.push_back(Lo);
	Result.push_back(Hi);
	if (Opcode != ISD::MUL) {
	SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
	Result.push_back(Zero);
	Result.push_back(Zero);
	}
	return true;
	}
	}

	if (!VT.isVector() && Opcode == ISD::MUL &&
	DAG.ComputeNumSignBits(LHS) > InnerBitSize &&
	DAG.ComputeNumSignBits(RHS) > InnerBitSize) {
	// The input values are both sign-extended.
	// TODO non-MUL case?
	if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
	Result.push_back(Lo);
	Result.push_back(Hi);
	return true;
	}
	}

	unsigned ShiftAmount = OuterBitSize - InnerBitSize;
	EVT ShiftAmountTy = getShiftAmountTy(VT, DAG.getDataLayout());
	if (APInt::getMaxValue(ShiftAmountTy.getSizeInBits()).ult(ShiftAmount)) {
	// FIXME getShiftAmountTy does not always return a sensible result when VT
	// is an illegal type, and so the type may be too small to fit the shift
	// amount. Override it with i32. The shift will have to be legalized.
	ShiftAmountTy = MVT::i32;
	}
	SDValue Shift = DAG.getConstant(ShiftAmount, dl, ShiftAmountTy);

	if (!LH.getNode() && !RH.getNode() &&
	isOperationLegalOrCustom(ISD::SRL, VT) &&
	isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
	LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
	LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
	RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
	RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
	}

	if (!LH.getNode())
	return false;

	if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
	return false;

	Result.push_back(Lo);

	if (Opcode == ISD::MUL) {
	RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
	LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
	Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
	Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
	Result.push_back(Hi);
	return true;
	}

	// Compute the full width result.
	auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
	Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
	Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
	Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
	return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
	};

	SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
	if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
	return false;

	// This is effectively the add part of a multiply-add of half-sized operands,
	// so it cannot overflow.
	Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));

	if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
	return false;

	SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
	EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);

	bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
	isOperationLegalOrCustom(ISD::ADDE, VT));
	if (UseGlue)
	Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
	Merge(Lo, Hi));
	else
	Next = DAG.getNode(ISD::ADDCARRY, dl, DAG.getVTList(VT, BoolType), Next,
	Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));

	SDValue Carry = Next.getValue(1);
	Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
	Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);

	if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
	return false;

	if (UseGlue)
	Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
	Carry);
	else
	Hi = DAG.getNode(ISD::ADDCARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
	Zero, Carry);

	Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));

	if (Opcode == ISD::SMUL_LOHI) {
	SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
	DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
	Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);

	NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
	DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
	Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
	}

	Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
	Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
	Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
	return true;
	}

	bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
	SelectionDAG &DAG, MulExpansionKind Kind,
	SDValue LL, SDValue LH, SDValue RL,
	SDValue RH) const {
	SmallVector<SDValue, 2> Result;
	bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), SDLoc(N),
	N->getOperand(0), N->getOperand(1), Result, HiLoVT,
	DAG, Kind, LL, LH, RL, RH);
	if (Ok) {
	assert(Result.size() == 2);
	Lo = Result[0];
	Hi = Result[1];
	}
	return Ok;
	}

	// Check that (every element of) Z is undef or not an exact multiple of BW.
	static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
	return ISD::matchUnaryPredicate(
	Z,
	[=](ConstantSDNode *C) { return !C \|\| C->getAPIntValue().urem(BW) != 0; },
	true);
	}

	bool TargetLowering::expandFunnelShift(SDNode *Node, SDValue &Result,
	SelectionDAG &DAG) const {
	EVT VT = Node->getValueType(0);

	if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) \|\|
	!isOperationLegalOrCustom(ISD::SRL, VT) \|\|
	!isOperationLegalOrCustom(ISD::SUB, VT) \|\|
	!isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
	return false;

	SDValue X = Node->getOperand(0);
	SDValue Y = Node->getOperand(1);
	SDValue Z = Node->getOperand(2);

	unsigned BW = VT.getScalarSizeInBits();
	bool IsFSHL = Node->getOpcode() == ISD::FSHL;
	SDLoc DL(SDValue(Node, 0));

	EVT ShVT = Z.getValueType();

	// If a funnel shift in the other direction is more supported, use it.
	unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
	if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
	isOperationLegalOrCustom(RevOpcode, VT) && isPowerOf2_32(BW)) {
	if (isNonZeroModBitWidthOrUndef(Z, BW)) {
	// fshl X, Y, Z -> fshr X, Y, -Z
	// fshr X, Y, Z -> fshl X, Y, -Z
	SDValue Zero = DAG.getConstant(0, DL, ShVT);
	Z = DAG.getNode(ISD::SUB, DL, VT, Zero, Z);
	} else {
	// fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
	// fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
	SDValue One = DAG.getConstant(1, DL, ShVT);
	if (IsFSHL) {
	Y = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
	X = DAG.getNode(ISD::SRL, DL, VT, X, One);
	} else {
	X = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
	Y = DAG.getNode(ISD::SHL, DL, VT, Y, One);
	}
	Z = DAG.getNOT(DL, Z, ShVT);
	}
	Result = DAG.getNode(RevOpcode, DL, VT, X, Y, Z);
	return true;
	}

	SDValue ShX, ShY;
	SDValue ShAmt, InvShAmt;
	if (isNonZeroModBitWidthOrUndef(Z, BW)) {
	// fshl: X << C \| Y >> (BW - C)
	// fshr: X << (BW - C) \| Y >> C
	// where C = Z % BW is not zero
	SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
	ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
	InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
	ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
	ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
	} else {
	// fshl: X << (Z % BW) \| Y >> 1 >> (BW - 1 - (Z % BW))
	// fshr: X << 1 << (BW - 1 - (Z % BW)) \| Y >> (Z % BW)
	SDValue Mask = DAG.getConstant(BW - 1, DL, ShVT);
	if (isPowerOf2_32(BW)) {
	// Z % BW -> Z & (BW - 1)
	ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
	// (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
	InvShAmt = DAG.getNode(ISD::AND, DL, ShVT, DAG.getNOT(DL, Z, ShVT), Mask);
	} else {
	SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
	ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
	InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, Mask, ShAmt);
	}

	SDValue One = DAG.getConstant(1, DL, ShVT);
	if (IsFSHL) {
	ShX = DAG.getNode(ISD::SHL, DL, VT, X, ShAmt);
	SDValue ShY1 = DAG.getNode(ISD::SRL, DL, VT, Y, One);
	ShY = DAG.getNode(ISD::SRL, DL, VT, ShY1, InvShAmt);
	} else {
	SDValue ShX1 = DAG.getNode(ISD::SHL, DL, VT, X, One);
	ShX = DAG.getNode(ISD::SHL, DL, VT, ShX1, InvShAmt);
	ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt);
	}
	}
	Result = DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
	return true;
	}

	// TODO: Merge with expandFunnelShift.
	bool TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
	SDValue &Result, SelectionDAG &DAG) const {
	EVT VT = Node->getValueType(0);
	unsigned EltSizeInBits = VT.getScalarSizeInBits();
	bool IsLeft = Node->getOpcode() == ISD::ROTL;
	SDValue Op0 = Node->getOperand(0);
	SDValue Op1 = Node->getOperand(1);
	SDLoc DL(SDValue(Node, 0));

	EVT ShVT = Op1.getValueType();
	SDValue Zero = DAG.getConstant(0, DL, ShVT);

	// If a rotate in the other direction is supported, use it.
	unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
	if (isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) {
	SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
	Result = DAG.getNode(RevRot, DL, VT, Op0, Sub);
	return true;
	}

	if (!AllowVectorOps && VT.isVector() &&
	(!isOperationLegalOrCustom(ISD::SHL, VT) \|\|
	!isOperationLegalOrCustom(ISD::SRL, VT) \|\|
	!isOperationLegalOrCustom(ISD::SUB, VT) \|\|
	!isOperationLegalOrCustomOrPromote(ISD::OR, VT) \|\|
	!isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
	return false;

	unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
	unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
	SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
	SDValue ShVal;
	SDValue HsVal;
	if (isPowerOf2_32(EltSizeInBits)) {
	// (rotl x, c) -> x << (c & (w - 1)) \| x >> (-c & (w - 1))
	// (rotr x, c) -> x >> (c & (w - 1)) \| x << (-c & (w - 1))
	SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
	SDValue ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
	ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
	SDValue HsAmt = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
	HsVal = DAG.getNode(HsOpc, DL, VT, Op0, HsAmt);
	} else {
	// (rotl x, c) -> x << (c % w) \| x >> 1 >> (w - 1 - (c % w))
	// (rotr x, c) -> x >> (c % w) \| x << 1 << (w - 1 - (c % w))
	SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
	SDValue ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Op1, BitWidthC);
	ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
	SDValue HsAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthMinusOneC, ShAmt);
	SDValue One = DAG.getConstant(1, DL, ShVT);
	HsVal =
	DAG.getNode(HsOpc, DL, VT, DAG.getNode(HsOpc, DL, VT, Op0, One), HsAmt);
	}
	Result = DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal);
	return true;
	}

	void TargetLowering::expandShiftParts(SDNode *Node, SDValue &Lo, SDValue &Hi,
	SelectionDAG &DAG) const {
	assert(Node->getNumOperands() == 3 && "Not a double-shift!");
	EVT VT = Node->getValueType(0);
	unsigned VTBits = VT.getScalarSizeInBits();
	assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");

	bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
	bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
	SDValue ShOpLo = Node->getOperand(0);
	SDValue ShOpHi = Node->getOperand(1);
	SDValue ShAmt = Node->getOperand(2);
	EVT ShAmtVT = ShAmt.getValueType();
	EVT ShAmtCCVT =
	getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShAmtVT);
	SDLoc dl(Node);

	// ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
	// ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
	// away during isel.
	SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
	DAG.getConstant(VTBits - 1, dl, ShAmtVT));
	SDValue Tmp1 = IsSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
	DAG.getConstant(VTBits - 1, dl, ShAmtVT))
	: DAG.getConstant(0, dl, VT);

	SDValue Tmp2, Tmp3;
	if (IsSHL) {
	Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt);
	Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
	} else {
	Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt);
	Tmp3 = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
	}

	// If the shift amount is larger or equal than the width of a part we don't
	// use the result from the FSHL/FSHR. Insert a test and select the appropriate
	// values for large shift amounts.
	SDValue AndNode = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
	DAG.getConstant(VTBits, dl, ShAmtVT));
	SDValue Cond = DAG.getSetCC(dl, ShAmtCCVT, AndNode,
	DAG.getConstant(0, dl, ShAmtVT), ISD::SETNE);

	if (IsSHL) {
	Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
	Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
	} else {
	Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
	Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
	}
	}

	bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
	SelectionDAG &DAG) const {
	unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
	SDValue Src = Node->getOperand(OpNo);
	EVT SrcVT = Src.getValueType();
	EVT DstVT = Node->getValueType(0);
	SDLoc dl(SDValue(Node, 0));

	// FIXME: Only f32 to i64 conversions are supported.
	if (SrcVT != MVT::f32 \|\| DstVT != MVT::i64)
	return false;

	if (Node->isStrictFPOpcode())
	// When a NaN is converted to an integer a trap is allowed. We can't
	// use this expansion here because it would eliminate that trap. Other
	// traps are also allowed and cannot be eliminated. See
	// IEEE 754-2008 sec 5.8.
	return false;

	// Expand f32 -> i64 conversion
	// This algorithm comes from compiler-rt's implementation of fixsfdi:
	// https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
	unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
	EVT IntVT = SrcVT.changeTypeToInteger();
	EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());

	SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
	SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
	SDValue Bias = DAG.getConstant(127, dl, IntVT);
	SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
	SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
	SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);

	SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);

	SDValue ExponentBits = DAG.getNode(
	ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
	DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
	SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);

	SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
	DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
	DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
	Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);

	SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
	DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
	DAG.getConstant(0x00800000, dl, IntVT));

	R = DAG.getZExtOrTrunc(R, dl, DstVT);

	R = DAG.getSelectCC(
	dl, Exponent, ExponentLoBit,
	DAG.getNode(ISD::SHL, dl, DstVT, R,
	DAG.getZExtOrTrunc(
	DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
	dl, IntShVT)),
	DAG.getNode(ISD::SRL, dl, DstVT, R,
	DAG.getZExtOrTrunc(
	DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
	dl, IntShVT)),
	ISD::SETGT);

	SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
	DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);

	Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
	DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
	return true;
	}

	bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
	SDValue &Chain,
	SelectionDAG &DAG) const {
	SDLoc dl(SDValue(Node, 0));
	unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
	SDValue Src = Node->getOperand(OpNo);

	EVT SrcVT = Src.getValueType();
	EVT DstVT = Node->getValueType(0);
	EVT SetCCVT =
	getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
	EVT DstSetCCVT =
	getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);

	// Only expand vector types if we have the appropriate vector bit operations.
	unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
	ISD::FP_TO_SINT;
	if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) \|\|
	!isOperationLegalOrCustomOrPromote(ISD::XOR, SrcVT)))
	return false;

	// If the maximum float value is smaller then the signed integer range,
	// the destination signmask can't be represented by the float, so we can
	// just use FP_TO_SINT directly.
	const fltSemantics &APFSem = DAG.EVTToAPFloatSemantics(SrcVT);
	APFloat APF(APFSem, APInt::getNullValue(SrcVT.getScalarSizeInBits()));
	APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
	if (APFloat::opOverflow &
	APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
	if (Node->isStrictFPOpcode()) {
	Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
	{ Node->getOperand(0), Src });
	Chain = Result.getValue(1);
	} else
	Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
	return true;
	}

	// Don't expand it if there isn't cheap fsub instruction.
	if (!isOperationLegalOrCustom(
	Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, SrcVT))
	return false;

	SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
	SDValue Sel;

	if (Node->isStrictFPOpcode()) {
	Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
	Node->getOperand(0), /IsSignaling/ true);
	Chain = Sel.getValue(1);
	} else {
	Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
	}

	bool Strict = Node->isStrictFPOpcode() \|\|
	shouldUseStrictFP_TO_INT(SrcVT, DstVT, /IsSigned/ false);

	if (Strict) {
	// Expand based on maximum range of FP_TO_SINT, if the value exceeds the
	// signmask then offset (the result of which should be fully representable).
	// Sel = Src < 0x8000000000000000
	// FltOfs = select Sel, 0, 0x8000000000000000
	// IntOfs = select Sel, 0, 0x8000000000000000
	// Result = fp_to_sint(Src - FltOfs) ^ IntOfs

	// TODO: Should any fast-math-flags be set for the FSUB?
	SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
	DAG.getConstantFP(0.0, dl, SrcVT), Cst);
	Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
	SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
	DAG.getConstant(0, dl, DstVT),
	DAG.getConstant(SignMask, dl, DstVT));
	SDValue SInt;
	if (Node->isStrictFPOpcode()) {
	SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
	{ Chain, Src, FltOfs });
	SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
	{ Val.getValue(1), Val });
	Chain = SInt.getValue(1);
	} else {
	SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
	SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
	}
	Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
	} else {
	// Expand based on maximum range of FP_TO_SINT:
	// True = fp_to_sint(Src)
	// False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
	// Result = select (Src < 0x8000000000000000), True, False

	SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
	// TODO: Should any fast-math-flags be set for the FSUB?
	SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
	DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
	False = DAG.getNode(ISD::XOR, dl, DstVT, False,
	DAG.getConstant(SignMask, dl, DstVT));
	Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
	Result = DAG.getSelect(dl, DstVT, Sel, True, False);
	}
	return true;
	}

	bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
	SDValue &Chain,
	SelectionDAG &DAG) const {
	// This transform is not correct for converting 0 when rounding mode is set
	// to round toward negative infinity which will produce -0.0. So disable under
	// strictfp.
	if (Node->isStrictFPOpcode())
	return false;

	SDValue Src = Node->getOperand(0);
	EVT SrcVT = Src.getValueType();
	EVT DstVT = Node->getValueType(0);

	if (SrcVT.getScalarType() != MVT::i64 \|\| DstVT.getScalarType() != MVT::f64)
	return false;

	// Only expand vector types if we have the appropriate vector bit operations.
	if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) \|\|
	!isOperationLegalOrCustom(ISD::FADD, DstVT) \|\|
	!isOperationLegalOrCustom(ISD::FSUB, DstVT) \|\|
	!isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) \|\|
	!isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT)))
	return false;

	SDLoc dl(SDValue(Node, 0));
	EVT ShiftVT = getShiftAmountTy(SrcVT, DAG.getDataLayout());

	// Implementation of unsigned i64 to f64 following the algorithm in
	// __floatundidf in compiler_rt. This implementation performs rounding
	// correctly in all rounding modes with the exception of converting 0
	// when rounding toward negative infinity. In that case the fsub will produce
	// -0.0. This will be added to +0.0 and produce -0.0 which is incorrect.
	SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
	SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
	BitsToDouble(UINT64_C(0x4530000000100000)), dl, DstVT);
	SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
	SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
	SDValue HiShift = DAG.getConstant(32, dl, ShiftVT);

	SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
	SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
	SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
	SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
	SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
	SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
	SDValue HiSub =
	DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
	Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
	return true;
	}

	SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
	SelectionDAG &DAG) const {
	SDLoc dl(Node);
	unsigned NewOp = Node->getOpcode() == ISD::FMINNUM ?
	ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
	EVT VT = Node->getValueType(0);

	if (VT.isScalableVector())
	report_fatal_error(
	"Expanding fminnum/fmaxnum for scalable vectors is undefined.");

	if (isOperationLegalOrCustom(NewOp, VT)) {
	SDValue Quiet0 = Node->getOperand(0);
	SDValue Quiet1 = Node->getOperand(1);

	if (!Node->getFlags().hasNoNaNs()) {
	// Insert canonicalizes if it's possible we need to quiet to get correct
	// sNaN behavior.
	if (!DAG.isKnownNeverSNaN(Quiet0)) {
	Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
	Node->getFlags());
	}
	if (!DAG.isKnownNeverSNaN(Quiet1)) {
	Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
	Node->getFlags());
	}
	}

	return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
	}

	// If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
	// instead if there are no NaNs.
	if (Node->getFlags().hasNoNaNs()) {
	unsigned IEEE2018Op =
	Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
	if (isOperationLegalOrCustom(IEEE2018Op, VT)) {
	return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
	Node->getOperand(1), Node->getFlags());
	}
	}

	// If none of the above worked, but there are no NaNs, then expand to
	// a compare/select sequence. This is required for correctness since
	// InstCombine might have canonicalized a fcmp+select sequence to a
	// FMINNUM/FMAXNUM node. If we were to fall through to the default
	// expansion to libcall, we might introduce a link-time dependency
	// on libm into a file that originally did not have one.
	if (Node->getFlags().hasNoNaNs()) {
	ISD::CondCode Pred =
	Node->getOpcode() == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
	SDValue Op1 = Node->getOperand(0);
	SDValue Op2 = Node->getOperand(1);
	SDValue SelCC = DAG.getSelectCC(dl, Op1, Op2, Op1, Op2, Pred);
	// Copy FMF flags, but always set the no-signed-zeros flag
	// as this is implied by the FMINNUM/FMAXNUM semantics.
	SDNodeFlags Flags = Node->getFlags();
	Flags.setNoSignedZeros(true);
	SelCC->setFlags(Flags);
	return SelCC;
	}

	return SDValue();
	}

	bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result,
	SelectionDAG &DAG) const {
	SDLoc dl(Node);
	EVT VT = Node->getValueType(0);
	EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
	SDValue Op = Node->getOperand(0);
	unsigned Len = VT.getScalarSizeInBits();
	assert(VT.isInteger() && "CTPOP not implemented for this type.");

	// TODO: Add support for irregular type lengths.
	if (!(Len <= 128 && Len % 8 == 0))
	return false;

	// Only expand vector types if we have the appropriate vector bit operations.
	if (VT.isVector() && (!isOperationLegalOrCustom(ISD::ADD, VT) \|\|
	!isOperationLegalOrCustom(ISD::SUB, VT) \|\|
	!isOperationLegalOrCustom(ISD::SRL, VT) \|\|
	(Len != 8 && !isOperationLegalOrCustom(ISD::MUL, VT)) \|\|
	!isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
	return false;

	// This is the "best" algorithm from
	// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
	SDValue Mask55 =
	DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
	SDValue Mask33 =
	DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
	SDValue Mask0F =
	DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
	SDValue Mask01 =
	DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);

	// v = v - ((v >> 1) & 0x55555555...)
	Op = DAG.getNode(ISD::SUB, dl, VT, Op,
	DAG.getNode(ISD::AND, dl, VT,
	DAG.getNode(ISD::SRL, dl, VT, Op,
	DAG.getConstant(1, dl, ShVT)),
	Mask55));
	// v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
	Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
	DAG.getNode(ISD::AND, dl, VT,
	DAG.getNode(ISD::SRL, dl, VT, Op,
	DAG.getConstant(2, dl, ShVT)),
	Mask33));
	// v = (v + (v >> 4)) & 0x0F0F0F0F...
	Op = DAG.getNode(ISD::AND, dl, VT,
	DAG.getNode(ISD::ADD, dl, VT, Op,
	DAG.getNode(ISD::SRL, dl, VT, Op,
	DAG.getConstant(4, dl, ShVT))),
	Mask0F);
	// v = (v * 0x01010101...) >> (Len - 8)
	if (Len > 8)
	Op =
	DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
	DAG.getConstant(Len - 8, dl, ShVT));

	Result = Op;
	return true;
	}

	bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result,
	SelectionDAG &DAG) const {
	SDLoc dl(Node);
	EVT VT = Node->getValueType(0);
	EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
	SDValue Op = Node->getOperand(0);
	unsigned NumBitsPerElt = VT.getScalarSizeInBits();

	// If the non-ZERO_UNDEF version is supported we can use that instead.
	if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
	isOperationLegalOrCustom(ISD::CTLZ, VT)) {
	Result = DAG.getNode(ISD::CTLZ, dl, VT, Op);
	return true;
	}

	// If the ZERO_UNDEF version is supported use that and handle the zero case.
	if (isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) {
	EVT SetCCVT =
	getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
	SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
	SDValue Zero = DAG.getConstant(0, dl, VT);
	SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
	Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
	DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
	return true;
	}

	// Only expand vector types if we have the appropriate vector bit operations.
	if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) \|\|
	!isOperationLegalOrCustom(ISD::CTPOP, VT) \|\|
	!isOperationLegalOrCustom(ISD::SRL, VT) \|\|
	!isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
	return false;

	// for now, we do this:
	// x = x \| (x >> 1);
	// x = x \| (x >> 2);
	// ...
	// x = x \| (x >>16);
	// x = x \| (x >>32); // for 64-bit input
	// return popcount(~x);
	//
	// Ref: "Hacker's Delight" by Henry Warren
	for (unsigned i = 0; (1U << i) <= (NumBitsPerElt / 2); ++i) {
	SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
	Op = DAG.getNode(ISD::OR, dl, VT, Op,
	DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
	}
	Op = DAG.getNOT(dl, Op, VT);
	Result = DAG.getNode(ISD::CTPOP, dl, VT, Op);
	return true;
	}

	bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result,
	SelectionDAG &DAG) const {
	SDLoc dl(Node);
	EVT VT = Node->getValueType(0);
	SDValue Op = Node->getOperand(0);
	unsigned NumBitsPerElt = VT.getScalarSizeInBits();

	// If the non-ZERO_UNDEF version is supported we can use that instead.
	if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
	isOperationLegalOrCustom(ISD::CTTZ, VT)) {
	Result = DAG.getNode(ISD::CTTZ, dl, VT, Op);
	return true;
	}

	// If the ZERO_UNDEF version is supported use that and handle the zero case.
	if (isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) {
	EVT SetCCVT =
	getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
	SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
	SDValue Zero = DAG.getConstant(0, dl, VT);
	SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
	Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
	DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
	return true;
	}

	// Only expand vector types if we have the appropriate vector bit operations.
	if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) \|\|
	(!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
	!isOperationLegalOrCustom(ISD::CTLZ, VT)) \|\|
	!isOperationLegalOrCustom(ISD::SUB, VT) \|\|
	!isOperationLegalOrCustomOrPromote(ISD::AND, VT) \|\|
	!isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
	return false;

	// for now, we use: { return popcount(~x & (x - 1)); }
	// unless the target has ctlz but not ctpop, in which case we use:
	// { return 32 - nlz(~x & (x-1)); }
	// Ref: "Hacker's Delight" by Henry Warren
	SDValue Tmp = DAG.getNode(
	ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
	DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));

	// If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
	if (isOperationLegal(ISD::CTLZ, VT) && !isOperationLegal(ISD::CTPOP, VT)) {
	Result =
	DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
	DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
	return true;
	}

	Result = DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
	return true;
	}

	bool TargetLowering::expandABS(SDNode *N, SDValue &Result,
	SelectionDAG &DAG, bool IsNegative) const {
	SDLoc dl(N);
	EVT VT = N->getValueType(0);
	EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
	SDValue Op = N->getOperand(0);

	// abs(x) -> smax(x,sub(0,x))
	if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
	isOperationLegal(ISD::SMAX, VT)) {
	SDValue Zero = DAG.getConstant(0, dl, VT);
	Result = DAG.getNode(ISD::SMAX, dl, VT, Op,
	DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
	return true;
	}

	// abs(x) -> umin(x,sub(0,x))
	if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
	isOperationLegal(ISD::UMIN, VT)) {
	SDValue Zero = DAG.getConstant(0, dl, VT);
	Result = DAG.getNode(ISD::UMIN, dl, VT, Op,
	DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
	return true;
	}

	// 0 - abs(x) -> smin(x, sub(0,x))
	if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
	isOperationLegal(ISD::SMIN, VT)) {
	SDValue Zero = DAG.getConstant(0, dl, VT);
	Result = DAG.getNode(ISD::SMIN, dl, VT, Op,
	DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
	return true;
	}

	// Only expand vector types if we have the appropriate vector operations.
	if (VT.isVector() &&
	(!isOperationLegalOrCustom(ISD::SRA, VT) \|\|
	(!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) \|\|
	(IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) \|\|
	!isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
	return false;

	SDValue Shift =
	DAG.getNode(ISD::SRA, dl, VT, Op,
	DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, ShVT));
	if (!IsNegative) {
	SDValue Add = DAG.getNode(ISD::ADD, dl, VT, Op, Shift);
	Result = DAG.getNode(ISD::XOR, dl, VT, Add, Shift);
	} else {
	// 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
	SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
	Result = DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
	}
	return true;
	}

	SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const {
	SDLoc dl(N);
	EVT VT = N->getValueType(0);
	SDValue Op = N->getOperand(0);

	if (!VT.isSimple())
	return SDValue();

	EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
	SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
	switch (VT.getSimpleVT().getScalarType().SimpleTy) {
	default:
	return SDValue();
	case MVT::i16:
	// Use a rotate by 8. This can be further expanded if necessary.
	return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
	case MVT::i32:
	Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
	Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
	Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
	Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
	Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
	DAG.getConstant(0xFF0000, dl, VT));
	Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT));
	Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
	Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
	return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
	case MVT::i64:
	Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
	Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
	Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
	Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
	Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
	Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
	Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
	Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
	Tmp7 = DAG.getNode(ISD::AND, dl, VT, Tmp7,
	DAG.getConstant(255ULL<<48, dl, VT));
	Tmp6 = DAG.getNode(ISD::AND, dl, VT, Tmp6,
	DAG.getConstant(255ULL<<40, dl, VT));
	Tmp5 = DAG.getNode(ISD::AND, dl, VT, Tmp5,
	DAG.getConstant(255ULL<<32, dl, VT));
	Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4,
	DAG.getConstant(255ULL<<24, dl, VT));
	Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
	DAG.getConstant(255ULL<<16, dl, VT));
	Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2,
	DAG.getConstant(255ULL<<8 , dl, VT));
	Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
	Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
	Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
	Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
	Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
	Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
	return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
	}
	}

	SDValue TargetLowering::expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
	SDLoc dl(N);
	EVT VT = N->getValueType(0);
	SDValue Op = N->getOperand(0);
	EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
	unsigned Sz = VT.getScalarSizeInBits();

	SDValue Tmp, Tmp2, Tmp3;

	// If we can, perform BSWAP first and then the mask+swap the i4, then i2
	// and finally the i1 pairs.
	// TODO: We can easily support i4/i2 legal types if any target ever does.
	if (Sz >= 8 && isPowerOf2_32(Sz)) {
	// Create the masks - repeating the pattern every byte.
	APInt MaskHi4 = APInt::getSplat(Sz, APInt(8, 0xF0));
	APInt MaskHi2 = APInt::getSplat(Sz, APInt(8, 0xCC));
	APInt MaskHi1 = APInt::getSplat(Sz, APInt(8, 0xAA));
	APInt MaskLo4 = APInt::getSplat(Sz, APInt(8, 0x0F));
	APInt MaskLo2 = APInt::getSplat(Sz, APInt(8, 0x33));
	APInt MaskLo1 = APInt::getSplat(Sz, APInt(8, 0x55));

	// BSWAP if the type is wider than a single byte.
	Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);

	// swap i4: ((V & 0xF0) >> 4) \| ((V & 0x0F) << 4)
	Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi4, dl, VT));
	Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo4, dl, VT));
	Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(4, dl, SHVT));
	Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
	Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);

	// swap i2: ((V & 0xCC) >> 2) \| ((V & 0x33) << 2)
	Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi2, dl, VT));
	Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo2, dl, VT));
	Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(2, dl, SHVT));
	Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
	Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);

	// swap i1: ((V & 0xAA) >> 1) \| ((V & 0x55) << 1)
	Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi1, dl, VT));
	Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo1, dl, VT));
	Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(1, dl, SHVT));
	Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
	Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
	return Tmp;
	}

	Tmp = DAG.getConstant(0, dl, VT);
	for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
	if (I < J)
	Tmp2 =
	DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT));
	else
	Tmp2 =
	DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));

	APInt Shift(Sz, 1);
	Shift <<= J;
	Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
	Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
	}

	return Tmp;
	}

	std::pair<SDValue, SDValue>
	TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
	SelectionDAG &DAG) const {
	SDLoc SL(LD);
	SDValue Chain = LD->getChain();
	SDValue BasePTR = LD->getBasePtr();
	EVT SrcVT = LD->getMemoryVT();
	EVT DstVT = LD->getValueType(0);
	ISD::LoadExtType ExtType = LD->getExtensionType();

	if (SrcVT.isScalableVector())
	report_fatal_error("Cannot scalarize scalable vector loads");

	unsigned NumElem = SrcVT.getVectorNumElements();

	EVT SrcEltVT = SrcVT.getScalarType();
	EVT DstEltVT = DstVT.getScalarType();

	// A vector must always be stored in memory as-is, i.e. without any padding
	// between the elements, since various code depend on it, e.g. in the
	// handling of a bitcast of a vector type to int, which may be done with a
	// vector store followed by an integer load. A vector that does not have
	// elements that are byte-sized must therefore be stored as an integer
	// built out of the extracted vector elements.
	if (!SrcEltVT.isByteSized()) {
	unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
	EVT LoadVT = EVT::getIntegerVT(*DAG.getContext(), NumLoadBits);

	unsigned NumSrcBits = SrcVT.getSizeInBits();
	EVT SrcIntVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcBits);

	unsigned SrcEltBits = SrcEltVT.getSizeInBits();
	SDValue SrcEltBitMask = DAG.getConstant(
	APInt::getLowBitsSet(NumLoadBits, SrcEltBits), SL, LoadVT);

	// Load the whole vector and avoid masking off the top bits as it makes
	// the codegen worse.
	SDValue Load =
	DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR,
	LD->getPointerInfo(), SrcIntVT, LD->getOriginalAlign(),
	LD->getMemOperand()->getFlags(), LD->getAAInfo());

	SmallVector<SDValue, 8> Vals;
	for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
	unsigned ShiftIntoIdx =
	(DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
	SDValue ShiftAmount =
	DAG.getShiftAmountConstant(ShiftIntoIdx * SrcEltVT.getSizeInBits(),
	LoadVT, SL, /LegalTypes=/false);
	SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount);
	SDValue Elt =
	DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask);
	SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, Elt);

	if (ExtType != ISD::NON_EXTLOAD) {
	unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType);
	Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar);
	}

	Vals.push_back(Scalar);
	}

	SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
	return std::make_pair(Value, Load.getValue(1));
	}

	unsigned Stride = SrcEltVT.getSizeInBits() / 8;
	assert(SrcEltVT.isByteSized());

	SmallVector<SDValue, 8> Vals;
	SmallVector<SDValue, 8> LoadChains;

	for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
	SDValue ScalarLoad =
	DAG.getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR,
	LD->getPointerInfo().getWithOffset(Idx * Stride),
	SrcEltVT, LD->getOriginalAlign(),
	LD->getMemOperand()->getFlags(), LD->getAAInfo());

	BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::Fixed(Stride));

	Vals.push_back(ScalarLoad.getValue(0));
	LoadChains.push_back(ScalarLoad.getValue(1));
	}

	SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
	SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);

	return std::make_pair(Value, NewChain);
	}

	SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
	SelectionDAG &DAG) const {
	SDLoc SL(ST);

	SDValue Chain = ST->getChain();
	SDValue BasePtr = ST->getBasePtr();
	SDValue Value = ST->getValue();
	EVT StVT = ST->getMemoryVT();

	if (StVT.isScalableVector())
	report_fatal_error("Cannot scalarize scalable vector stores");

	// The type of the data we want to save
	EVT RegVT = Value.getValueType();
	EVT RegSclVT = RegVT.getScalarType();

	// The type of data as saved in memory.
	EVT MemSclVT = StVT.getScalarType();

	unsigned NumElem = StVT.getVectorNumElements();

	// A vector must always be stored in memory as-is, i.e. without any padding
	// between the elements, since various code depend on it, e.g. in the
	// handling of a bitcast of a vector type to int, which may be done with a
	// vector store followed by an integer load. A vector that does not have
	// elements that are byte-sized must therefore be stored as an integer
	// built out of the extracted vector elements.
	if (!MemSclVT.isByteSized()) {
	unsigned NumBits = StVT.getSizeInBits();
	EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);

	SDValue CurrVal = DAG.getConstant(0, SL, IntVT);

	for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
	SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
	DAG.getVectorIdxConstant(Idx, SL));
	SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
	SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
	unsigned ShiftIntoIdx =
	(DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
	SDValue ShiftAmount =
	DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
	SDValue ShiftedElt =
	DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
	CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
	}

	return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
	ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
	ST->getAAInfo());
	}

	// Store Stride in bytes
	unsigned Stride = MemSclVT.getSizeInBits() / 8;
	assert(Stride && "Zero stride!");
	// Extract each of the elements from the original vector and save them into
	// memory individually.
	SmallVector<SDValue, 8> Stores;
	for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
	SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
	DAG.getVectorIdxConstant(Idx, SL));

	SDValue Ptr =
	DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::Fixed(Idx * Stride));

	// This scalar TruncStore may be illegal, but we legalize it later.
	SDValue Store = DAG.getTruncStore(
	Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
	MemSclVT, ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
	ST->getAAInfo());

	Stores.push_back(Store);
	}

	return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
	}

	std::pair<SDValue, SDValue>
	TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
	assert(LD->getAddressingMode() == ISD::UNINDEXED &&
	"unaligned indexed loads not implemented!");
	SDValue Chain = LD->getChain();
	SDValue Ptr = LD->getBasePtr();
	EVT VT = LD->getValueType(0);
	EVT LoadedVT = LD->getMemoryVT();
	SDLoc dl(LD);
	auto &MF = DAG.getMachineFunction();

	if (VT.isFloatingPoint() \|\| VT.isVector()) {
	EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
	if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
	if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
	LoadedVT.isVector()) {
	// Scalarize the load and let the individual components be handled.
	return scalarizeVectorLoad(LD, DAG);
	}

	// Expand to a (misaligned) integer load of the same size,
	// then bitconvert to floating point or vector.
	SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
	LD->getMemOperand());
	SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
	if (LoadedVT != VT)
	Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
	ISD::ANY_EXTEND, dl, VT, Result);

	return std::make_pair(Result, newLoad.getValue(1));
	}

	// Copy the value to a (aligned) stack slot using (unaligned) integer
	// loads and stores, then do a (aligned) load from the stack slot.
	MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
	unsigned LoadedBytes = LoadedVT.getStoreSize();
	unsigned RegBytes = RegVT.getSizeInBits() / 8;
	unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;

	// Make sure the stack slot is also aligned for the register type.
	SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
	auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
	SmallVector<SDValue, 8> Stores;
	SDValue StackPtr = StackBase;
	unsigned Offset = 0;

	EVT PtrVT = Ptr.getValueType();
	EVT StackPtrVT = StackPtr.getValueType();

	SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
	SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);

	// Do all but one copies using the full register width.
	for (unsigned i = 1; i < NumRegs; i++) {
	// Load one integer register's worth from the original location.
	SDValue Load = DAG.getLoad(
	RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
	LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
	LD->getAAInfo());
	// Follow the load with a store to the stack slot. Remember the store.
	Stores.push_back(DAG.getStore(
	Load.getValue(1), dl, Load, StackPtr,
	MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
	// Increment the pointers.
	Offset += RegBytes;

	Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
	StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
	}

	// The last copy may be partial. Do an extending load.
	EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
	8 * (LoadedBytes - Offset));
	SDValue Load =
	DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
	LD->getPointerInfo().getWithOffset(Offset), MemVT,
	LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
	LD->getAAInfo());
	// Follow the load with a store to the stack slot. Remember the store.
	// On big-endian machines this requires a truncating store to ensure
	// that the bits end up in the right place.
	Stores.push_back(DAG.getTruncStore(
	Load.getValue(1), dl, Load, StackPtr,
	MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));

	// The order of the stores doesn't matter - say it with a TokenFactor.
	SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);

	// Finally, perform the original load only redirected to the stack slot.
	Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
	MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
	LoadedVT);

	// Callers expect a MERGE_VALUES node.
	return std::make_pair(Load, TF);
	}

	assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
	"Unaligned load of unsupported type.");

	// Compute the new VT that is half the size of the old one. This is an
	// integer MVT.
	unsigned NumBits = LoadedVT.getSizeInBits();
	EVT NewLoadedVT;
	NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
	NumBits >>= 1;

	Align Alignment = LD->getOriginalAlign();
	unsigned IncrementSize = NumBits / 8;
	ISD::LoadExtType HiExtType = LD->getExtensionType();

	// If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
	if (HiExtType == ISD::NON_EXTLOAD)
	HiExtType = ISD::ZEXTLOAD;

	// Load the value in two parts
	SDValue Lo, Hi;
	if (DAG.getDataLayout().isLittleEndian()) {
	Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
	NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
	LD->getAAInfo());

	Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
	Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
	LD->getPointerInfo().getWithOffset(IncrementSize),
	NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
	LD->getAAInfo());
	} else {
	Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
	NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
	LD->getAAInfo());

	Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
	Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
	LD->getPointerInfo().getWithOffset(IncrementSize),
	NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
	LD->getAAInfo());
	}

	// aggregate the two parts
	SDValue ShiftAmount =
	DAG.getConstant(NumBits, dl, getShiftAmountTy(Hi.getValueType(),
	DAG.getDataLayout()));
	SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
	Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);

	SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
	Hi.getValue(1));

	return std::make_pair(Result, TF);
	}

	SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
	SelectionDAG &DAG) const {
	assert(ST->getAddressingMode() == ISD::UNINDEXED &&
	"unaligned indexed stores not implemented!");
	SDValue Chain = ST->getChain();
	SDValue Ptr = ST->getBasePtr();
	SDValue Val = ST->getValue();
	EVT VT = Val.getValueType();
	Align Alignment = ST->getOriginalAlign();
	auto &MF = DAG.getMachineFunction();
	EVT StoreMemVT = ST->getMemoryVT();

	SDLoc dl(ST);
	if (StoreMemVT.isFloatingPoint() \|\| StoreMemVT.isVector()) {
	EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
	if (isTypeLegal(intVT)) {
	if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
	StoreMemVT.isVector()) {
	// Scalarize the store and let the individual components be handled.
	SDValue Result = scalarizeVectorStore(ST, DAG);
	return Result;
	}
	// Expand to a bitconvert of the value to the integer type of the
	// same size, then a (misaligned) int store.
	// FIXME: Does not handle truncating floating point stores!
	SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
	Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
	Alignment, ST->getMemOperand()->getFlags());
	return Result;
	}
	// Do a (aligned) store to a stack slot, then copy from the stack slot
	// to the final destination using (unaligned) integer loads and stores.
	MVT RegVT = getRegisterType(
	*DAG.getContext(),
	EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
	EVT PtrVT = Ptr.getValueType();
	unsigned StoredBytes = StoreMemVT.getStoreSize();
	unsigned RegBytes = RegVT.getSizeInBits() / 8;
	unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;

	// Make sure the stack slot is also aligned for the register type.
	SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
	auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();

	// Perform the original store, only redirected to the stack slot.
	SDValue Store = DAG.getTruncStore(
	Chain, dl, Val, StackPtr,
	MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);

	EVT StackPtrVT = StackPtr.getValueType();

	SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
	SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
	SmallVector<SDValue, 8> Stores;
	unsigned Offset = 0;

	// Do all but one copies using the full register width.
	for (unsigned i = 1; i < NumRegs; i++) {
	// Load one integer register's worth from the stack slot.
	SDValue Load = DAG.getLoad(
	RegVT, dl, Store, StackPtr,
	MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
	// Store it to the final location. Remember the store.
	Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
	ST->getPointerInfo().getWithOffset(Offset),
	ST->getOriginalAlign(),
	ST->getMemOperand()->getFlags()));
	// Increment the pointers.
	Offset += RegBytes;
	StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
	Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
	}

	// The last store may be partial. Do a truncating store. On big-endian
	// machines this requires an extending load from the stack slot to ensure
	// that the bits are in the right place.
	EVT LoadMemVT =
	EVT::getIntegerVT(DAG.getContext(), 8 (StoredBytes - Offset));

	// Load from the stack slot.
	SDValue Load = DAG.getExtLoad(
	ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
	MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);

	Stores.push_back(
	DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
	ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
	ST->getOriginalAlign(),
	ST->getMemOperand()->getFlags(), ST->getAAInfo()));
	// The order of the stores doesn't matter - say it with a TokenFactor.
	SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
	return Result;
	}

	assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
	"Unaligned store of unknown type.");
	// Get the half-size VT
	EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
	unsigned NumBits = NewStoredVT.getFixedSizeInBits();
	unsigned IncrementSize = NumBits / 8;

	// Divide the stored value in two parts.
	SDValue ShiftAmount = DAG.getConstant(
	NumBits, dl, getShiftAmountTy(Val.getValueType(), DAG.getDataLayout()));
	SDValue Lo = Val;
	SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);

	// Store the two parts
	SDValue Store1, Store2;
	Store1 = DAG.getTruncStore(Chain, dl,
	DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
	Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
	ST->getMemOperand()->getFlags());

	Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
	Store2 = DAG.getTruncStore(
	Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
	ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
	ST->getMemOperand()->getFlags(), ST->getAAInfo());

	SDValue Result =
	DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
	return Result;
	}

	SDValue
	TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
	const SDLoc &DL, EVT DataVT,
	SelectionDAG &DAG,
	bool IsCompressedMemory) const {
	SDValue Increment;
	EVT AddrVT = Addr.getValueType();
	EVT MaskVT = Mask.getValueType();
	assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
	"Incompatible types of Data and Mask");
	if (IsCompressedMemory) {
	if (DataVT.isScalableVector())
	report_fatal_error(
	"Cannot currently handle compressed memory with scalable vectors");
	// Incrementing the pointer according to number of '1's in the mask.
	EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
	SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
	if (MaskIntVT.getSizeInBits() < 32) {
	MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
	MaskIntVT = MVT::i32;
	}

	// Count '1's with POPCNT.
	Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
	Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
	// Scale is an element size in bytes.
	SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
	AddrVT);
	Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
	} else if (DataVT.isScalableVector()) {
	Increment = DAG.getVScale(DL, AddrVT,
	APInt(AddrVT.getFixedSizeInBits(),
	DataVT.getStoreSize().getKnownMinSize()));
	} else
	Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);

	return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
	}

	static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx,
	EVT VecVT, const SDLoc &dl,
	unsigned NumSubElts) {
	if (!VecVT.isScalableVector() && isa<ConstantSDNode>(Idx))
	return Idx;

	EVT IdxVT = Idx.getValueType();
	unsigned NElts = VecVT.getVectorMinNumElements();
	if (VecVT.isScalableVector()) {
	// If this is a constant index and we know the value plus the number of the
	// elements in the subvector minus one is less than the minimum number of
	// elements then it's safe to return Idx.
	if (auto *IdxCst = dyn_cast<ConstantSDNode>(Idx))
	if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts)
	return Idx;
	SDValue VS =
	DAG.getVScale(dl, IdxVT, APInt(IdxVT.getFixedSizeInBits(), NElts));
	unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
	SDValue Sub = DAG.getNode(SubOpcode, dl, IdxVT, VS,
	DAG.getConstant(NumSubElts, dl, IdxVT));
	return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, Sub);
	}
	if (isPowerOf2_32(NElts) && NumSubElts == 1) {
	APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), Log2_32(NElts));
	return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
	DAG.getConstant(Imm, dl, IdxVT));
	}
	unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0;
	return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
	DAG.getConstant(MaxIndex, dl, IdxVT));
	}

	SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
	SDValue VecPtr, EVT VecVT,
	SDValue Index) const {
	return getVectorSubVecPointer(
	DAG, VecPtr, VecVT,
	EVT::getVectorVT(*DAG.getContext(), VecVT.getVectorElementType(), 1),
	Index);
	}

	SDValue TargetLowering::getVectorSubVecPointer(SelectionDAG &DAG,
	SDValue VecPtr, EVT VecVT,
	EVT SubVecVT,
	SDValue Index) const {
	SDLoc dl(Index);
	// Make sure the index type is big enough to compute in.
	Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());

	EVT EltVT = VecVT.getVectorElementType();

	// Calculate the element offset and add it to the pointer.
	unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
	assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
	"Converting bits to bytes lost precision");

	// Scalable vectors don't need clamping as these are checked at compile time
	if (SubVecVT.isFixedLengthVector()) {
	assert(SubVecVT.getVectorElementType() == EltVT &&
	"Sub-vector must be a fixed vector with matching element type");
	Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
	SubVecVT.getVectorNumElements());
	}

	EVT IdxVT = Index.getValueType();

	Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
	DAG.getConstant(EltSize, dl, IdxVT));
	return DAG.getMemBasePlusOffset(VecPtr, Index, dl);
	}

	//===----------------------------------------------------------------------===//
	// Implementation of Emulated TLS Model
	//===----------------------------------------------------------------------===//

	SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
	SelectionDAG &DAG) const {
	// Access to address of TLS varialbe xyz is lowered to a function call:
	// __emutls_get_address( address of global variable named "__emutls_v.xyz" )
	EVT PtrVT = getPointerTy(DAG.getDataLayout());
	PointerType VoidPtrType = Type::getInt8PtrTy(DAG.getContext());
	SDLoc dl(GA);

	ArgListTy Args;
	ArgListEntry Entry;
	std::string NameString = ("__emutls_v." + GA->getGlobal()->getName()).str();
	Module VariableModule = const_cast<Module>(GA->getGlobal()->getParent());
	StringRef EmuTlsVarName(NameString);
	GlobalVariable *EmuTlsVar = VariableModule->getNamedGlobal(EmuTlsVarName);
	assert(EmuTlsVar && "Cannot find EmuTlsVar ");
	Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT);
	Entry.Ty = VoidPtrType;
	Args.push_back(Entry);

	SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);

	TargetLowering::CallLoweringInfo CLI(DAG);
	CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
	CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
	std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);

	// TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
	// At last for X86 targets, maybe good for other targets too?
	MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
	MFI.setAdjustsStack(true); // Is this only for X86 target?
	MFI.setHasCalls(true);

	assert((GA->getOffset() == 0) &&
	"Emulated TLS must have zero offset in GlobalAddressSDNode");
	return CallResult.first;
	}

	SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
	SelectionDAG &DAG) const {
	assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
	if (!isCtlzFast())
	return SDValue();
	ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
	SDLoc dl(Op);
	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
	if (C->isNullValue() && CC == ISD::SETEQ) {
	EVT VT = Op.getOperand(0).getValueType();
	SDValue Zext = Op.getOperand(0);
	if (VT.bitsLT(MVT::i32)) {
	VT = MVT::i32;
	Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
	}
	unsigned Log2b = Log2_32(VT.getSizeInBits());
	SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
	SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
	DAG.getConstant(Log2b, dl, MVT::i32));
	return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
	}
	}
	return SDValue();
	}

	// Convert redundant addressing modes (e.g. scaling is redundant
	// when accessing bytes).
	ISD::MemIndexType
	TargetLowering::getCanonicalIndexType(ISD::MemIndexType IndexType, EVT MemVT,
	SDValue Offsets) const {
	bool IsScaledIndex =
	(IndexType == ISD::SIGNED_SCALED) \|\| (IndexType == ISD::UNSIGNED_SCALED);
	bool IsSignedIndex =
	(IndexType == ISD::SIGNED_SCALED) \|\| (IndexType == ISD::SIGNED_UNSCALED);

	// Scaling is unimportant for bytes, canonicalize to unscaled.
	if (IsScaledIndex && MemVT.getScalarType() == MVT::i8) {
	IsScaledIndex = false;
	IndexType = IsSignedIndex ? ISD::SIGNED_UNSCALED : ISD::UNSIGNED_UNSCALED;
	}

	return IndexType;
	}

	SDValue TargetLowering::expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const {
	SDValue Op0 = Node->getOperand(0);
	SDValue Op1 = Node->getOperand(1);
	EVT VT = Op0.getValueType();
	unsigned Opcode = Node->getOpcode();
	SDLoc DL(Node);

	// umin(x,y) -> sub(x,usubsat(x,y))
	if (Opcode == ISD::UMIN && isOperationLegal(ISD::SUB, VT) &&
	isOperationLegal(ISD::USUBSAT, VT)) {
	return DAG.getNode(ISD::SUB, DL, VT, Op0,
	DAG.getNode(ISD::USUBSAT, DL, VT, Op0, Op1));
	}

	// umax(x,y) -> add(x,usubsat(y,x))
	if (Opcode == ISD::UMAX && isOperationLegal(ISD::ADD, VT) &&
	isOperationLegal(ISD::USUBSAT, VT)) {
	return DAG.getNode(ISD::ADD, DL, VT, Op0,
	DAG.getNode(ISD::USUBSAT, DL, VT, Op1, Op0));
	}

	// Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
	ISD::CondCode CC;
	switch (Opcode) {
	default: llvm_unreachable("How did we get here?");
	case ISD::SMAX: CC = ISD::SETGT; break;
	case ISD::SMIN: CC = ISD::SETLT; break;
	case ISD::UMAX: CC = ISD::SETUGT; break;
	case ISD::UMIN: CC = ISD::SETULT; break;
	}

	// FIXME: Should really try to split the vector in case it's legal on a
	// subvector.
	if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
	return DAG.UnrollVectorOp(Node);

	SDValue Cond = DAG.getSetCC(DL, VT, Op0, Op1, CC);
	return DAG.getSelect(DL, VT, Cond, Op0, Op1);
	}

	SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
	unsigned Opcode = Node->getOpcode();
	SDValue LHS = Node->getOperand(0);
	SDValue RHS = Node->getOperand(1);
	EVT VT = LHS.getValueType();
	SDLoc dl(Node);

	assert(VT == RHS.getValueType() && "Expected operands to be the same type");
	assert(VT.isInteger() && "Expected operands to be integers");

	// usub.sat(a, b) -> umax(a, b) - b
	if (Opcode == ISD::USUBSAT && isOperationLegal(ISD::UMAX, VT)) {
	SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
	return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
	}

	// uadd.sat(a, b) -> umin(a, ~b) + b
	if (Opcode == ISD::UADDSAT && isOperationLegal(ISD::UMIN, VT)) {
	SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
	SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
	return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
	}

	unsigned OverflowOp;
	switch (Opcode) {
	case ISD::SADDSAT:
	OverflowOp = ISD::SADDO;
	break;
	case ISD::UADDSAT:
	OverflowOp = ISD::UADDO;
	break;
	case ISD::SSUBSAT:
	OverflowOp = ISD::SSUBO;
	break;
	case ISD::USUBSAT:
	OverflowOp = ISD::USUBO;
	break;
	default:
	llvm_unreachable("Expected method to receive signed or unsigned saturation "
	"addition or subtraction node.");
	}

	// FIXME: Should really try to split the vector in case it's legal on a
	// subvector.
	if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
	return DAG.UnrollVectorOp(Node);

	unsigned BitWidth = LHS.getScalarValueSizeInBits();
	EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
	SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
	SDValue SumDiff = Result.getValue(0);
	SDValue Overflow = Result.getValue(1);
	SDValue Zero = DAG.getConstant(0, dl, VT);
	SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);

	if (Opcode == ISD::UADDSAT) {
	if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
	// (LHS + RHS) \| OverflowMask
	SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
	return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
	}
	// Overflow ? 0xffff.... : (LHS + RHS)
	return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
	}

	if (Opcode == ISD::USUBSAT) {
	if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
	// (LHS - RHS) & ~OverflowMask
	SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
	SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
	return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
	}
	// Overflow ? 0 : (LHS - RHS)
	return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
	}

	// SatMax -> Overflow && SumDiff < 0
	// SatMin -> Overflow && SumDiff >= 0
	APInt MinVal = APInt::getSignedMinValue(BitWidth);
	APInt MaxVal = APInt::getSignedMaxValue(BitWidth);
	SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
	SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
	SDValue SumNeg = DAG.getSetCC(dl, BoolVT, SumDiff, Zero, ISD::SETLT);
	Result = DAG.getSelect(dl, VT, SumNeg, SatMax, SatMin);
	return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
	}

	SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const {
	unsigned Opcode = Node->getOpcode();
	bool IsSigned = Opcode == ISD::SSHLSAT;
	SDValue LHS = Node->getOperand(0);
	SDValue RHS = Node->getOperand(1);
	EVT VT = LHS.getValueType();
	SDLoc dl(Node);

	assert((Node->getOpcode() == ISD::SSHLSAT \|\|
	Node->getOpcode() == ISD::USHLSAT) &&
	"Expected a SHLSAT opcode");
	assert(VT == RHS.getValueType() && "Expected operands to be the same type");
	assert(VT.isInteger() && "Expected operands to be integers");

	// If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.

	unsigned BW = VT.getScalarSizeInBits();
	SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS);
	SDValue Orig =
	DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS);

	SDValue SatVal;
	if (IsSigned) {
	SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT);
	SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT);
	SatVal = DAG.getSelectCC(dl, LHS, DAG.getConstant(0, dl, VT),
	SatMin, SatMax, ISD::SETLT);
	} else {
	SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT);
	}
	Result = DAG.getSelectCC(dl, LHS, Orig, SatVal, Result, ISD::SETNE);

	return Result;
	}

	SDValue
	TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
	assert((Node->getOpcode() == ISD::SMULFIX \|\|
	Node->getOpcode() == ISD::UMULFIX \|\|
	Node->getOpcode() == ISD::SMULFIXSAT \|\|
	Node->getOpcode() == ISD::UMULFIXSAT) &&
	"Expected a fixed point multiplication opcode");

	SDLoc dl(Node);
	SDValue LHS = Node->getOperand(0);
	SDValue RHS = Node->getOperand(1);
	EVT VT = LHS.getValueType();
	unsigned Scale = Node->getConstantOperandVal(2);
	bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT \|\|
	Node->getOpcode() == ISD::UMULFIXSAT);
	bool Signed = (Node->getOpcode() == ISD::SMULFIX \|\|
	Node->getOpcode() == ISD::SMULFIXSAT);
	EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
	unsigned VTSize = VT.getScalarSizeInBits();

	if (!Scale) {
	// [us]mul.fix(a, b, 0) -> mul(a, b)
	if (!Saturating) {
	if (isOperationLegalOrCustom(ISD::MUL, VT))
	return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
	} else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
	SDValue Result =
	DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
	SDValue Product = Result.getValue(0);
	SDValue Overflow = Result.getValue(1);
	SDValue Zero = DAG.getConstant(0, dl, VT);

	APInt MinVal = APInt::getSignedMinValue(VTSize);
	APInt MaxVal = APInt::getSignedMaxValue(VTSize);
	SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
	SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
	- SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT);
	- Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin);
	+ // Xor the inputs, if resulting sign bit is 0 the product will be
	+ // positive, else negative.
	+ SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
	+ SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
	+ Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
	return DAG.getSelect(dl, VT, Overflow, Result, Product);
	} else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
	SDValue Result =
	DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
	SDValue Product = Result.getValue(0);
	SDValue Overflow = Result.getValue(1);

	APInt MaxVal = APInt::getMaxValue(VTSize);
	SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
	return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
	}
	}

	assert(((Signed && Scale < VTSize) \|\| (!Signed && Scale <= VTSize)) &&
	"Expected scale to be less than the number of bits if signed or at "
	"most the number of bits if unsigned.");
	assert(LHS.getValueType() == RHS.getValueType() &&
	"Expected both operands to be the same type");

	// Get the upper and lower bits of the result.
	SDValue Lo, Hi;
	unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
	unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
	if (isOperationLegalOrCustom(LoHiOp, VT)) {
	SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
	Lo = Result.getValue(0);
	Hi = Result.getValue(1);
	} else if (isOperationLegalOrCustom(HiOp, VT)) {
	Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
	Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
	} else if (VT.isVector()) {
	return SDValue();
	} else {
	report_fatal_error("Unable to expand fixed point multiplication.");
	}

	if (Scale == VTSize)
	// Result is just the top half since we'd be shifting by the width of the
	// operand. Overflow impossible so this works for both UMULFIX and
	// UMULFIXSAT.
	return Hi;

	// The result will need to be shifted right by the scale since both operands
	// are scaled. The result is given to us in 2 halves, so we only want part of
	// both in the result.
	EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout());
	SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
	DAG.getConstant(Scale, dl, ShiftTy));
	if (!Saturating)
	return Result;

	if (!Signed) {
	// Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
	// widened multiplication) aren't all zeroes.

	// Saturate to max if ((Hi >> Scale) != 0),
	// which is the same as if (Hi > ((1 << Scale) - 1))
	APInt MaxVal = APInt::getMaxValue(VTSize);
	SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
	dl, VT);
	Result = DAG.getSelectCC(dl, Hi, LowMask,
	DAG.getConstant(MaxVal, dl, VT), Result,
	ISD::SETUGT);

	return Result;
	}

	// Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
	// widened multiplication) aren't all ones or all zeroes.

	SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
	SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);

	if (Scale == 0) {
	SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
	DAG.getConstant(VTSize - 1, dl, ShiftTy));
	SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
	// Saturated to SatMin if wide product is negative, and SatMax if wide
	// product is positive ...
	SDValue Zero = DAG.getConstant(0, dl, VT);
	SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
	ISD::SETLT);
	// ... but only if we overflowed.
	return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
	}

	// We handled Scale==0 above so all the bits to examine is in Hi.

	// Saturate to max if ((Hi >> (Scale - 1)) > 0),
	// which is the same as if (Hi > (1 << (Scale - 1)) - 1)
	SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
	dl, VT);
	Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
	// Saturate to min if (Hi >> (Scale - 1)) < -1),
	// which is the same as if (HI < (-1 << (Scale - 1))
	SDValue HighMask =
	DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
	dl, VT);
	Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
	return Result;
	}

	SDValue
	TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
	SDValue LHS, SDValue RHS,
	unsigned Scale, SelectionDAG &DAG) const {
	assert((Opcode == ISD::SDIVFIX \|\| Opcode == ISD::SDIVFIXSAT \|\|
	Opcode == ISD::UDIVFIX \|\| Opcode == ISD::UDIVFIXSAT) &&
	"Expected a fixed point division opcode");

	EVT VT = LHS.getValueType();
	bool Signed = Opcode == ISD::SDIVFIX \|\| Opcode == ISD::SDIVFIXSAT;
	bool Saturating = Opcode == ISD::SDIVFIXSAT \|\| Opcode == ISD::UDIVFIXSAT;
	EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);

	// If there is enough room in the type to upscale the LHS or downscale the
	// RHS before the division, we can perform it in this type without having to
	// resize. For signed operations, the LHS headroom is the number of
	// redundant sign bits, and for unsigned ones it is the number of zeroes.
	// The headroom for the RHS is the number of trailing zeroes.
	unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1
	: DAG.computeKnownBits(LHS).countMinLeadingZeros();
	unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();

	// For signed saturating operations, we need to be able to detect true integer
	// division overflow; that is, when you have MIN / -EPS. However, this
	// is undefined behavior and if we emit divisions that could take such
	// values it may cause undesired behavior (arithmetic exceptions on x86, for
	// example).
	// Avoid this by requiring an extra bit so that we never get this case.
	// FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
	// signed saturating division, we need to emit a whopping 32-bit division.
	if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
	return SDValue();

	unsigned LHSShift = std::min(LHSLead, Scale);
	unsigned RHSShift = Scale - LHSShift;

	// At this point, we know that if we shift the LHS up by LHSShift and the
	// RHS down by RHSShift, we can emit a regular division with a final scaling
	// factor of Scale.

	EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout());
	if (LHSShift)
	LHS = DAG.getNode(ISD::SHL, dl, VT, LHS,
	DAG.getConstant(LHSShift, dl, ShiftTy));
	if (RHSShift)
	RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS,
	DAG.getConstant(RHSShift, dl, ShiftTy));

	SDValue Quot;
	if (Signed) {
	// For signed operations, if the resulting quotient is negative and the
	// remainder is nonzero, subtract 1 from the quotient to round towards
	// negative infinity.
	SDValue Rem;
	// FIXME: Ideally we would always produce an SDIVREM here, but if the
	// type isn't legal, SDIVREM cannot be expanded. There is no reason why
	// we couldn't just form a libcall, but the type legalizer doesn't do it.
	if (isTypeLegal(VT) &&
	isOperationLegalOrCustom(ISD::SDIVREM, VT)) {
	Quot = DAG.getNode(ISD::SDIVREM, dl,
	DAG.getVTList(VT, VT),
	LHS, RHS);
	Rem = Quot.getValue(1);
	Quot = Quot.getValue(0);
	} else {
	Quot = DAG.getNode(ISD::SDIV, dl, VT,
	LHS, RHS);
	Rem = DAG.getNode(ISD::SREM, dl, VT,
	LHS, RHS);
	}
	SDValue Zero = DAG.getConstant(0, dl, VT);
	SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE);
	SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT);
	SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT);
	SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg);
	SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot,
	DAG.getConstant(1, dl, VT));
	Quot = DAG.getSelect(dl, VT,
	DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg),
	Sub1, Quot);
	} else
	Quot = DAG.getNode(ISD::UDIV, dl, VT,
	LHS, RHS);

	return Quot;
	}

	void TargetLowering::expandUADDSUBO(
	SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
	SDLoc dl(Node);
	SDValue LHS = Node->getOperand(0);
	SDValue RHS = Node->getOperand(1);
	bool IsAdd = Node->getOpcode() == ISD::UADDO;

	// If ADD/SUBCARRY is legal, use that instead.
	unsigned OpcCarry = IsAdd ? ISD::ADDCARRY : ISD::SUBCARRY;
	if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
	SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
	SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
	{ LHS, RHS, CarryIn });
	Result = SDValue(NodeCarry.getNode(), 0);
	Overflow = SDValue(NodeCarry.getNode(), 1);
	return;
	}

	Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
	LHS.getValueType(), LHS, RHS);

	EVT ResultType = Node->getValueType(1);
	EVT SetCCType = getSetCCResultType(
	DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
	ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
	SDValue SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
	Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
	}

	void TargetLowering::expandSADDSUBO(
	SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
	SDLoc dl(Node);
	SDValue LHS = Node->getOperand(0);
	SDValue RHS = Node->getOperand(1);
	bool IsAdd = Node->getOpcode() == ISD::SADDO;

	Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
	LHS.getValueType(), LHS, RHS);

	EVT ResultType = Node->getValueType(1);
	EVT OType = getSetCCResultType(
	DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));

	// If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
	unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
	if (isOperationLegalOrCustom(OpcSat, LHS.getValueType())) {
	SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
	SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
	Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
	return;
	}

	SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());

	// For an addition, the result should be less than one of the operands (LHS)
	// if and only if the other operand (RHS) is negative, otherwise there will
	// be overflow.
	// For a subtraction, the result should be less than one of the operands
	// (LHS) if and only if the other operand (RHS) is (non-zero) positive,
	// otherwise there will be overflow.
	SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
	SDValue ConditionRHS =
	DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT);

	Overflow = DAG.getBoolExtOrTrunc(
	DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl,
	ResultType, ResultType);
	}

	bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
	SDValue &Overflow, SelectionDAG &DAG) const {
	SDLoc dl(Node);
	EVT VT = Node->getValueType(0);
	EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
	SDValue LHS = Node->getOperand(0);
	SDValue RHS = Node->getOperand(1);
	bool isSigned = Node->getOpcode() == ISD::SMULO;

	// For power-of-two multiplications we can use a simpler shift expansion.
	if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
	const APInt &C = RHSC->getAPIntValue();
	// mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
	if (C.isPowerOf2()) {
	// smulo(x, signed_min) is same as umulo(x, signed_min).
	bool UseArithShift = isSigned && !C.isMinSignedValue();
	EVT ShiftAmtTy = getShiftAmountTy(VT, DAG.getDataLayout());
	SDValue ShiftAmt = DAG.getConstant(C.logBase2(), dl, ShiftAmtTy);
	Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
	Overflow = DAG.getSetCC(dl, SetCCVT,
	DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
	dl, VT, Result, ShiftAmt),
	LHS, ISD::SETNE);
	return true;
	}
	}

	EVT WideVT = EVT::getIntegerVT(DAG.getContext(), VT.getScalarSizeInBits() 2);
	if (VT.isVector())
	WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
	VT.getVectorNumElements());

	SDValue BottomHalf;
	SDValue TopHalf;
	static const unsigned Ops[2][3] =
	{ { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
	{ ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
	if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
	BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
	TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
	} else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
	BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
	RHS);
	TopHalf = BottomHalf.getValue(1);
	} else if (isTypeLegal(WideVT)) {
	LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
	RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
	SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
	BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
	SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits(), dl,
	getShiftAmountTy(WideVT, DAG.getDataLayout()));
	TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
	DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
	} else {
	if (VT.isVector())
	return false;

	// We can fall back to a libcall with an illegal type for the MUL if we
	// have a libcall big enough.
	// Also, we can fall back to a division in some cases, but that's a big
	// performance hit in the general case.
	RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
	if (WideVT == MVT::i16)
	LC = RTLIB::MUL_I16;
	else if (WideVT == MVT::i32)
	LC = RTLIB::MUL_I32;
	else if (WideVT == MVT::i64)
	LC = RTLIB::MUL_I64;
	else if (WideVT == MVT::i128)
	LC = RTLIB::MUL_I128;
	assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!");

	SDValue HiLHS;
	SDValue HiRHS;
	if (isSigned) {
	// The high part is obtained by SRA'ing all but one of the bits of low
	// part.
	unsigned LoSize = VT.getFixedSizeInBits();
	HiLHS =
	DAG.getNode(ISD::SRA, dl, VT, LHS,
	DAG.getConstant(LoSize - 1, dl,
	getPointerTy(DAG.getDataLayout())));
	HiRHS =
	DAG.getNode(ISD::SRA, dl, VT, RHS,
	DAG.getConstant(LoSize - 1, dl,
	getPointerTy(DAG.getDataLayout())));
	} else {
	HiLHS = DAG.getConstant(0, dl, VT);
	HiRHS = DAG.getConstant(0, dl, VT);
	}

	// Here we're passing the 2 arguments explicitly as 4 arguments that are
	// pre-lowered to the correct types. This all depends upon WideVT not
	// being a legal type for the architecture and thus has to be split to
	// two arguments.
	SDValue Ret;
	TargetLowering::MakeLibCallOptions CallOptions;
	CallOptions.setSExt(isSigned);
	CallOptions.setIsPostTypeLegalization(true);
	if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
	// Halves of WideVT are packed into registers in different order
	// depending on platform endianness. This is usually handled by
	// the C calling convention, but we can't defer to it in
	// the legalizer.
	SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };
	Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
	} else {
	SDValue Args[] = { HiLHS, LHS, HiRHS, RHS };
	Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
	}
	assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
	"Ret value is a collection of constituent nodes holding result.");
	if (DAG.getDataLayout().isLittleEndian()) {
	// Same as above.
	BottomHalf = Ret.getOperand(0);
	TopHalf = Ret.getOperand(1);
	} else {
	BottomHalf = Ret.getOperand(1);
	TopHalf = Ret.getOperand(0);
	}
	}

	Result = BottomHalf;
	if (isSigned) {
	SDValue ShiftAmt = DAG.getConstant(
	VT.getScalarSizeInBits() - 1, dl,
	getShiftAmountTy(BottomHalf.getValueType(), DAG.getDataLayout()));
	SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
	Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
	} else {
	Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
	DAG.getConstant(0, dl, VT), ISD::SETNE);
	}

	// Truncate the result if SetCC returns a larger type than needed.
	EVT RType = Node->getValueType(1);
	if (RType.bitsLT(Overflow.getValueType()))
	Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);

	assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
	"Unexpected result type for S/UMULO legalization");
	return true;
	}

	SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
	SDLoc dl(Node);
	unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
	SDValue Op = Node->getOperand(0);
	EVT VT = Op.getValueType();

	if (VT.isScalableVector())
	report_fatal_error(
	"Expanding reductions for scalable vectors is undefined.");

	// Try to use a shuffle reduction for power of two vectors.
	if (VT.isPow2VectorType()) {
	while (VT.getVectorNumElements() > 1) {
	EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
	if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
	break;

	SDValue Lo, Hi;
	std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
	Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi);
	VT = HalfVT;
	}
	}

	EVT EltVT = VT.getVectorElementType();
	unsigned NumElts = VT.getVectorNumElements();

	SmallVector<SDValue, 8> Ops;
	DAG.ExtractVectorElements(Op, Ops, 0, NumElts);

	SDValue Res = Ops[0];
	for (unsigned i = 1; i < NumElts; i++)
	Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());

	// Result type may be wider than element type.
	if (EltVT != Node->getValueType(0))
	Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
	return Res;
	}

	SDValue TargetLowering::expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const {
	SDLoc dl(Node);
	SDValue AccOp = Node->getOperand(0);
	SDValue VecOp = Node->getOperand(1);
	SDNodeFlags Flags = Node->getFlags();

	EVT VT = VecOp.getValueType();
	EVT EltVT = VT.getVectorElementType();

	if (VT.isScalableVector())
	report_fatal_error(
	"Expanding reductions for scalable vectors is undefined.");

	unsigned NumElts = VT.getVectorNumElements();

	SmallVector<SDValue, 8> Ops;
	DAG.ExtractVectorElements(VecOp, Ops, 0, NumElts);

	unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());

	SDValue Res = AccOp;
	for (unsigned i = 0; i < NumElts; i++)
	Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);

	return Res;
	}

	bool TargetLowering::expandREM(SDNode *Node, SDValue &Result,
	SelectionDAG &DAG) const {
	EVT VT = Node->getValueType(0);
	SDLoc dl(Node);
	bool isSigned = Node->getOpcode() == ISD::SREM;
	unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
	unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
	SDValue Dividend = Node->getOperand(0);
	SDValue Divisor = Node->getOperand(1);
	if (isOperationLegalOrCustom(DivRemOpc, VT)) {
	SDVTList VTs = DAG.getVTList(VT, VT);
	Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1);
	return true;
	}
	if (isOperationLegalOrCustom(DivOpc, VT)) {
	// X % Y -> X-X/Y*Y
	SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor);
	SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor);
	Result = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
	return true;
	}
	return false;
	}

	SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
	SelectionDAG &DAG) const {
	bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
	SDLoc dl(SDValue(Node, 0));
	SDValue Src = Node->getOperand(0);

	// DstVT is the result type, while SatVT is the size to which we saturate
	EVT SrcVT = Src.getValueType();
	EVT DstVT = Node->getValueType(0);

	EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
	unsigned SatWidth = SatVT.getScalarSizeInBits();
	unsigned DstWidth = DstVT.getScalarSizeInBits();
	assert(SatWidth <= DstWidth &&
	"Expected saturation width smaller than result width");

	// Determine minimum and maximum integer values and their corresponding
	// floating-point values.
	APInt MinInt, MaxInt;
	if (IsSigned) {
	MinInt = APInt::getSignedMinValue(SatWidth).sextOrSelf(DstWidth);
	MaxInt = APInt::getSignedMaxValue(SatWidth).sextOrSelf(DstWidth);
	} else {
	MinInt = APInt::getMinValue(SatWidth).zextOrSelf(DstWidth);
	MaxInt = APInt::getMaxValue(SatWidth).zextOrSelf(DstWidth);
	}

	// We cannot risk emitting FP_TO_XINT nodes with a source VT of f16, as
	// libcall emission cannot handle this. Large result types will fail.
	if (SrcVT == MVT::f16) {
	Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);
	SrcVT = Src.getValueType();
	}

	APFloat MinFloat(DAG.EVTToAPFloatSemantics(SrcVT));
	APFloat MaxFloat(DAG.EVTToAPFloatSemantics(SrcVT));

	APFloat::opStatus MinStatus =
	MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
	APFloat::opStatus MaxStatus =
	MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
	bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
	!(MaxStatus & APFloat::opStatus::opInexact);

	SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
	SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);

	// If the integer bounds are exactly representable as floats and min/max are
	// legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
	// of comparisons and selects.
	bool MinMaxLegal = isOperationLegal(ISD::FMINNUM, SrcVT) &&
	isOperationLegal(ISD::FMAXNUM, SrcVT);
	if (AreExactFloatBounds && MinMaxLegal) {
	SDValue Clamped = Src;

	// Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
	Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, MinFloatNode);
	// Clamp by MaxFloat from above. NaN cannot occur.
	Clamped = DAG.getNode(ISD::FMINNUM, dl, SrcVT, Clamped, MaxFloatNode);
	// Convert clamped value to integer.
	SDValue FpToInt = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
	dl, DstVT, Clamped);

	// In the unsigned case we're done, because we mapped NaN to MinFloat,
	// which will cast to zero.
	if (!IsSigned)
	return FpToInt;

	// Otherwise, select 0 if Src is NaN.
	SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
	return DAG.getSelectCC(dl, Src, Src, ZeroInt, FpToInt,
	ISD::CondCode::SETUO);
	}

	SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
	SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT);

	// Result of direct conversion. The assumption here is that the operation is
	// non-trapping and it's fine to apply it to an out-of-range value if we
	// select it away later.
	SDValue FpToInt =
	DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, DstVT, Src);

	SDValue Select = FpToInt;

	// If Src ULT MinFloat, select MinInt. In particular, this also selects
	// MinInt if Src is NaN.
	Select = DAG.getSelectCC(dl, Src, MinFloatNode, MinIntNode, Select,
	ISD::CondCode::SETULT);
	// If Src OGT MaxFloat, select MaxInt.
	Select = DAG.getSelectCC(dl, Src, MaxFloatNode, MaxIntNode, Select,
	ISD::CondCode::SETOGT);

	// In the unsigned case we are done, because we mapped NaN to MinInt, which
	// is already zero.
	if (!IsSigned)
	return Select;

	// Otherwise, select 0 if Src is NaN.
	SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
	return DAG.getSelectCC(dl, Src, Src, ZeroInt, Select, ISD::CondCode::SETUO);
	}

	SDValue TargetLowering::expandVectorSplice(SDNode *Node,
	SelectionDAG &DAG) const {
	assert(Node->getOpcode() == ISD::VECTOR_SPLICE && "Unexpected opcode!");
	assert(Node->getValueType(0).isScalableVector() &&
	"Fixed length vector types expected to use SHUFFLE_VECTOR!");

	EVT VT = Node->getValueType(0);
	SDValue V1 = Node->getOperand(0);
	SDValue V2 = Node->getOperand(1);
	int64_t Imm = cast<ConstantSDNode>(Node->getOperand(2))->getSExtValue();
	SDLoc DL(Node);

	// Expand through memory thusly:
	// Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
	// Store V1, Ptr
	// Store V2, Ptr + sizeof(V1)
	// If (Imm < 0)
	// TrailingElts = -Imm
	// Ptr = Ptr + sizeof(V1) - (TrailingElts * sizeof(VT.Elt))
	// else
	// Ptr = Ptr + (Imm * sizeof(VT.Elt))
	// Res = Load Ptr

	Align Alignment = DAG.getReducedAlign(VT, /UseABI=/false);

	EVT MemVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
	VT.getVectorElementCount() * 2);
	SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
	EVT PtrVT = StackPtr.getValueType();
	auto &MF = DAG.getMachineFunction();
	auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
	auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);

	// Store the lo part of CONCAT_VECTORS(V1, V2)
	SDValue StoreV1 = DAG.getStore(DAG.getEntryNode(), DL, V1, StackPtr, PtrInfo);
	// Store the hi part of CONCAT_VECTORS(V1, V2)
	SDValue OffsetToV2 = DAG.getVScale(
	DL, PtrVT,
	APInt(PtrVT.getFixedSizeInBits(), VT.getStoreSize().getKnownMinSize()));
	SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, OffsetToV2);
	SDValue StoreV2 = DAG.getStore(StoreV1, DL, V2, StackPtr2, PtrInfo);

	if (Imm >= 0) {
	// Load back the required element. getVectorElementPointer takes care of
	// clamping the index if it's out-of-bounds.
	StackPtr = getVectorElementPointer(DAG, StackPtr, VT, Node->getOperand(2));
	// Load the spliced result
	return DAG.getLoad(VT, DL, StoreV2, StackPtr,
	MachinePointerInfo::getUnknownStack(MF));
	}

	uint64_t TrailingElts = -Imm;

	// NOTE: TrailingElts must be clamped so as not to read outside of V1:V2.
	TypeSize EltByteSize = VT.getVectorElementType().getStoreSize();
	SDValue TrailingBytes =
	DAG.getConstant(TrailingElts * EltByteSize, DL, PtrVT);

	if (TrailingElts > VT.getVectorMinNumElements()) {
	SDValue VLBytes = DAG.getVScale(
	DL, PtrVT,
	APInt(PtrVT.getFixedSizeInBits(), VT.getStoreSize().getKnownMinSize()));
	TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VLBytes);
	}

	// Calculate the start address of the spliced result.
	StackPtr2 = DAG.getNode(ISD::SUB, DL, PtrVT, StackPtr2, TrailingBytes);

	// Load the spliced result
	return DAG.getLoad(VT, DL, StoreV2, StackPtr2,
	MachinePointerInfo::getUnknownStack(MF));
	}

	bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT,
	SDValue &LHS, SDValue &RHS,
	SDValue &CC, bool &NeedInvert,
	const SDLoc &dl, SDValue &Chain,
	bool IsSignaling) const {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	MVT OpVT = LHS.getSimpleValueType();
	ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
	NeedInvert = false;
	switch (TLI.getCondCodeAction(CCCode, OpVT)) {
	default:
	llvm_unreachable("Unknown condition code action!");
	case TargetLowering::Legal:
	// Nothing to do.
	break;
	case TargetLowering::Expand: {
	ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(CCCode);
	if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
	std::swap(LHS, RHS);
	CC = DAG.getCondCode(InvCC);
	return true;
	}
	// Swapping operands didn't work. Try inverting the condition.
	bool NeedSwap = false;
	InvCC = getSetCCInverse(CCCode, OpVT);
	if (!TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
	// If inverting the condition is not enough, try swapping operands
	// on top of it.
	InvCC = ISD::getSetCCSwappedOperands(InvCC);
	NeedSwap = true;
	}
	if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
	CC = DAG.getCondCode(InvCC);
	NeedInvert = true;
	if (NeedSwap)
	std::swap(LHS, RHS);
	return true;
	}

	ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
	unsigned Opc = 0;
	switch (CCCode) {
	default:
	llvm_unreachable("Don't know how to expand this condition!");
	case ISD::SETUO:
	if (TLI.isCondCodeLegal(ISD::SETUNE, OpVT)) {
	CC1 = ISD::SETUNE;
	CC2 = ISD::SETUNE;
	Opc = ISD::OR;
	break;
	}
	assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
	"If SETUE is expanded, SETOEQ or SETUNE must be legal!");
	NeedInvert = true;
	LLVM_FALLTHROUGH;
	case ISD::SETO:
	assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
	"If SETO is expanded, SETOEQ must be legal!");
	CC1 = ISD::SETOEQ;
	CC2 = ISD::SETOEQ;
	Opc = ISD::AND;
	break;
	case ISD::SETONE:
	case ISD::SETUEQ:
	// If the SETUO or SETO CC isn't legal, we might be able to use
	// SETOGT \|\| SETOLT, inverting the result for SETUEQ. We only need one
	// of SETOGT/SETOLT to be legal, the other can be emulated by swapping
	// the operands.
	CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
	if (!TLI.isCondCodeLegal(CC2, OpVT) &&
	(TLI.isCondCodeLegal(ISD::SETOGT, OpVT) \|\|
	TLI.isCondCodeLegal(ISD::SETOLT, OpVT))) {
	CC1 = ISD::SETOGT;
	CC2 = ISD::SETOLT;
	Opc = ISD::OR;
	NeedInvert = ((unsigned)CCCode & 0x8U);
	break;
	}
	LLVM_FALLTHROUGH;
	case ISD::SETOEQ:
	case ISD::SETOGT:
	case ISD::SETOGE:
	case ISD::SETOLT:
	case ISD::SETOLE:
	case ISD::SETUNE:
	case ISD::SETUGT:
	case ISD::SETUGE:
	case ISD::SETULT:
	case ISD::SETULE:
	// If we are floating point, assign and break, otherwise fall through.
	if (!OpVT.isInteger()) {
	// We can use the 4th bit to tell if we are the unordered
	// or ordered version of the opcode.
	CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
	Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
	CC1 = (ISD::CondCode)(((int)CCCode & 0x7) \| 0x10);
	break;
	}
	// Fallthrough if we are unsigned integer.
	LLVM_FALLTHROUGH;
	case ISD::SETLE:
	case ISD::SETGT:
	case ISD::SETGE:
	case ISD::SETLT:
	case ISD::SETNE:
	case ISD::SETEQ:
	// If all combinations of inverting the condition and swapping operands
	// didn't work then we have no means to expand the condition.
	llvm_unreachable("Don't know how to expand this condition!");
	}

	SDValue SetCC1, SetCC2;
	if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
	// If we aren't the ordered or unorder operation,
	// then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
	SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
	SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
	} else {
	// Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
	SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
	SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
	}
	if (Chain)
	Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
	SetCC2.getValue(1));
	LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
	RHS = SDValue();
	CC = SDValue();
	return true;
	}
	}
	return false;
	}
	diff --git a/contrib/llvm-project/llvm/lib/Linker/LinkModules.cpp b/contrib/llvm-project/llvm/lib/Linker/LinkModules.cpp
	index 97d6f8cd8075..efdbc49cdf47 100644
	--- a/contrib/llvm-project/llvm/lib/Linker/LinkModules.cpp
	+++ b/contrib/llvm-project/llvm/lib/Linker/LinkModules.cpp
	@@ -1,605 +1,621 @@
	//===- lib/Linker/LinkModules.cpp - Module Linker Implementation ----------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file implements the LLVM module linker.
	//
	//===----------------------------------------------------------------------===//

	#include "LinkDiagnosticInfo.h"
	#include "llvm-c/Linker.h"
	#include "llvm/ADT/SetVector.h"
	#include "llvm/IR/Comdat.h"
	#include "llvm/IR/DiagnosticPrinter.h"
	#include "llvm/IR/GlobalValue.h"
	#include "llvm/IR/LLVMContext.h"
	#include "llvm/IR/Module.h"
	#include "llvm/Linker/Linker.h"
	#include "llvm/Support/Error.h"
	using namespace llvm;

	namespace {

	/// This is an implementation class for the LinkModules function, which is the
	/// entrypoint for this file.
	class ModuleLinker {
	IRMover &Mover;
	std::unique_ptr<Module> SrcM;

	SetVector<GlobalValue *> ValuesToLink;

	/// For symbol clashes, prefer those from Src.
	unsigned Flags;

	/// List of global value names that should be internalized.
	StringSet<> Internalize;

	/// Function that will perform the actual internalization. The reason for a
	/// callback is that the linker cannot call internalizeModule without
	/// creating a circular dependency between IPO and the linker.
	std::function<void(Module &, const StringSet<> &)> InternalizeCallback;

	/// Used as the callback for lazy linking.
	/// The mover has just hit GV and we have to decide if it, and other members
	/// of the same comdat, should be linked. Every member to be linked is passed
	/// to Add.
	void addLazyFor(GlobalValue &GV, const IRMover::ValueAdder &Add);

	bool shouldOverrideFromSrc() { return Flags & Linker::OverrideFromSrc; }
	bool shouldLinkOnlyNeeded() { return Flags & Linker::LinkOnlyNeeded; }

	bool shouldLinkFromSource(bool &LinkFromSrc, const GlobalValue &Dest,
	const GlobalValue &Src);

	/// Should we have mover and linker error diag info?
	bool emitError(const Twine &Message) {
	SrcM->getContext().diagnose(LinkDiagnosticInfo(DS_Error, Message));
	return true;
	}

	bool getComdatLeader(Module &M, StringRef ComdatName,
	const GlobalVariable *&GVar);
	bool computeResultingSelectionKind(StringRef ComdatName,
	Comdat::SelectionKind Src,
	Comdat::SelectionKind Dst,
	Comdat::SelectionKind &Result,
	bool &LinkFromSrc);
	std::map<const Comdat *, std::pair<Comdat::SelectionKind, bool>>
	ComdatsChosen;
	bool getComdatResult(const Comdat *SrcC, Comdat::SelectionKind &SK,
	bool &LinkFromSrc);
	// Keep track of the lazy linked global members of each comdat in source.
	DenseMap<const Comdat , std::vector<GlobalValue >> LazyComdatMembers;

	/// Given a global in the source module, return the global in the
	/// destination module that is being linked to, if any.
	GlobalValue getLinkedToGlobal(const GlobalValue SrcGV) {
	Module &DstM = Mover.getModule();
	// If the source has no name it can't link. If it has local linkage,
	// there is no name match-up going on.
	if (!SrcGV->hasName() \|\| GlobalValue::isLocalLinkage(SrcGV->getLinkage()))
	return nullptr;

	// Otherwise see if we have a match in the destination module's symtab.
	GlobalValue *DGV = DstM.getNamedValue(SrcGV->getName());
	if (!DGV)
	return nullptr;

	// If we found a global with the same name in the dest module, but it has
	// internal linkage, we are really not doing any linkage here.
	if (DGV->hasLocalLinkage())
	return nullptr;

	// Otherwise, we do in fact link to the destination global.
	return DGV;
	}

	/// Drop GV if it is a member of a comdat that we are dropping.
	/// This can happen with COFF's largest selection kind.
	void dropReplacedComdat(GlobalValue &GV,
	const DenseSet<const Comdat *> &ReplacedDstComdats);

	bool linkIfNeeded(GlobalValue &GV);

	public:
	ModuleLinker(IRMover &Mover, std::unique_ptr<Module> SrcM, unsigned Flags,
	std::function<void(Module &, const StringSet<> &)>
	InternalizeCallback = {})
	: Mover(Mover), SrcM(std::move(SrcM)), Flags(Flags),
	InternalizeCallback(std::move(InternalizeCallback)) {}

	bool run();
	};
	}

	static GlobalValue::VisibilityTypes
	getMinVisibility(GlobalValue::VisibilityTypes A,
	GlobalValue::VisibilityTypes B) {
	if (A == GlobalValue::HiddenVisibility \|\| B == GlobalValue::HiddenVisibility)
	return GlobalValue::HiddenVisibility;
	if (A == GlobalValue::ProtectedVisibility \|\|
	B == GlobalValue::ProtectedVisibility)
	return GlobalValue::ProtectedVisibility;
	return GlobalValue::DefaultVisibility;
	}

	bool ModuleLinker::getComdatLeader(Module &M, StringRef ComdatName,
	const GlobalVariable *&GVar) {
	const GlobalValue *GVal = M.getNamedValue(ComdatName);
	if (const auto *GA = dyn_cast_or_null<GlobalAlias>(GVal)) {
	GVal = GA->getBaseObject();
	if (!GVal)
	// We cannot resolve the size of the aliasee yet.
	return emitError("Linking COMDATs named '" + ComdatName +
	"': COMDAT key involves incomputable alias size.");
	}

	GVar = dyn_cast_or_null<GlobalVariable>(GVal);
	if (!GVar)
	return emitError(
	"Linking COMDATs named '" + ComdatName +
	"': GlobalVariable required for data dependent selection!");

	return false;
	}

	bool ModuleLinker::computeResultingSelectionKind(StringRef ComdatName,
	Comdat::SelectionKind Src,
	Comdat::SelectionKind Dst,
	Comdat::SelectionKind &Result,
	bool &LinkFromSrc) {
	Module &DstM = Mover.getModule();
	// The ability to mix Comdat::SelectionKind::Any with
	// Comdat::SelectionKind::Largest is a behavior that comes from COFF.
	bool DstAnyOrLargest = Dst == Comdat::SelectionKind::Any \|\|
	Dst == Comdat::SelectionKind::Largest;
	bool SrcAnyOrLargest = Src == Comdat::SelectionKind::Any \|\|
	Src == Comdat::SelectionKind::Largest;
	if (DstAnyOrLargest && SrcAnyOrLargest) {
	if (Dst == Comdat::SelectionKind::Largest \|\|
	Src == Comdat::SelectionKind::Largest)
	Result = Comdat::SelectionKind::Largest;
	else
	Result = Comdat::SelectionKind::Any;
	} else if (Src == Dst) {
	Result = Dst;
	} else {
	return emitError("Linking COMDATs named '" + ComdatName +
	"': invalid selection kinds!");
	}

	switch (Result) {
	case Comdat::SelectionKind::Any:
	// Go with Dst.
	LinkFromSrc = false;
	break;
	- case Comdat::SelectionKind::NoDeduplicate:
	- return emitError("Linking COMDATs named '" + ComdatName +
	- "': nodeduplicate has been violated!");
	+ case Comdat::SelectionKind::NoDeduplicate: {
	+ const GlobalVariable *DstGV;
	+ const GlobalVariable *SrcGV;
	+ if (getComdatLeader(DstM, ComdatName, DstGV) \|\|
	+ getComdatLeader(*SrcM, ComdatName, SrcGV))
	+ return true;
	+
	+ if (SrcGV->isWeakForLinker()) {
	+ // Go with Dst.
	+ LinkFromSrc = false;
	+ } else if (DstGV->isWeakForLinker()) {
	+ // Go with Src.
	+ LinkFromSrc = true;
	+ } else {
	+ return emitError("Linking COMDATs named '" + ComdatName +
	+ "': nodeduplicate has been violated!");
	+ }
	+ break;
	+ }
	case Comdat::SelectionKind::ExactMatch:
	case Comdat::SelectionKind::Largest:
	case Comdat::SelectionKind::SameSize: {
	const GlobalVariable *DstGV;
	const GlobalVariable *SrcGV;
	if (getComdatLeader(DstM, ComdatName, DstGV) \|\|
	getComdatLeader(*SrcM, ComdatName, SrcGV))
	return true;

	const DataLayout &DstDL = DstM.getDataLayout();
	const DataLayout &SrcDL = SrcM->getDataLayout();
	uint64_t DstSize = DstDL.getTypeAllocSize(DstGV->getValueType());
	uint64_t SrcSize = SrcDL.getTypeAllocSize(SrcGV->getValueType());
	if (Result == Comdat::SelectionKind::ExactMatch) {
	if (SrcGV->getInitializer() != DstGV->getInitializer())
	return emitError("Linking COMDATs named '" + ComdatName +
	"': ExactMatch violated!");
	LinkFromSrc = false;
	} else if (Result == Comdat::SelectionKind::Largest) {
	LinkFromSrc = SrcSize > DstSize;
	} else if (Result == Comdat::SelectionKind::SameSize) {
	if (SrcSize != DstSize)
	return emitError("Linking COMDATs named '" + ComdatName +
	"': SameSize violated!");
	LinkFromSrc = false;
	} else {
	llvm_unreachable("unknown selection kind");
	}
	break;
	}
	}

	return false;
	}

	bool ModuleLinker::getComdatResult(const Comdat *SrcC,
	Comdat::SelectionKind &Result,
	bool &LinkFromSrc) {
	Module &DstM = Mover.getModule();
	Comdat::SelectionKind SSK = SrcC->getSelectionKind();
	StringRef ComdatName = SrcC->getName();
	Module::ComdatSymTabType &ComdatSymTab = DstM.getComdatSymbolTable();
	Module::ComdatSymTabType::iterator DstCI = ComdatSymTab.find(ComdatName);

	if (DstCI == ComdatSymTab.end()) {
	// Use the comdat if it is only available in one of the modules.
	LinkFromSrc = true;
	Result = SSK;
	return false;
	}

	const Comdat *DstC = &DstCI->second;
	Comdat::SelectionKind DSK = DstC->getSelectionKind();
	return computeResultingSelectionKind(ComdatName, SSK, DSK, Result,
	LinkFromSrc);
	}

	bool ModuleLinker::shouldLinkFromSource(bool &LinkFromSrc,
	const GlobalValue &Dest,
	const GlobalValue &Src) {

	// Should we unconditionally use the Src?
	if (shouldOverrideFromSrc()) {
	LinkFromSrc = true;
	return false;
	}

	// We always have to add Src if it has appending linkage.
	if (Src.hasAppendingLinkage() \|\| Dest.hasAppendingLinkage()) {
	LinkFromSrc = true;
	return false;
	}

	bool SrcIsDeclaration = Src.isDeclarationForLinker();
	bool DestIsDeclaration = Dest.isDeclarationForLinker();

	if (SrcIsDeclaration) {
	// If Src is external or if both Src & Dest are external.. Just link the
	// external globals, we aren't adding anything.
	if (Src.hasDLLImportStorageClass()) {
	// If one of GVs is marked as DLLImport, result should be dllimport'ed.
	LinkFromSrc = DestIsDeclaration;
	return false;
	}
	// If the Dest is weak, use the source linkage.
	if (Dest.hasExternalWeakLinkage()) {
	LinkFromSrc = true;
	return false;
	}
	// Link an available_externally over a declaration.
	LinkFromSrc = !Src.isDeclaration() && Dest.isDeclaration();
	return false;
	}

	if (DestIsDeclaration) {
	// If Dest is external but Src is not:
	LinkFromSrc = true;
	return false;
	}

	if (Src.hasCommonLinkage()) {
	if (Dest.hasLinkOnceLinkage() \|\| Dest.hasWeakLinkage()) {
	LinkFromSrc = true;
	return false;
	}

	if (!Dest.hasCommonLinkage()) {
	LinkFromSrc = false;
	return false;
	}

	const DataLayout &DL = Dest.getParent()->getDataLayout();
	uint64_t DestSize = DL.getTypeAllocSize(Dest.getValueType());
	uint64_t SrcSize = DL.getTypeAllocSize(Src.getValueType());
	LinkFromSrc = SrcSize > DestSize;
	return false;
	}

	if (Src.isWeakForLinker()) {
	assert(!Dest.hasExternalWeakLinkage());
	assert(!Dest.hasAvailableExternallyLinkage());

	if (Dest.hasLinkOnceLinkage() && Src.hasWeakLinkage()) {
	LinkFromSrc = true;
	return false;
	}

	LinkFromSrc = false;
	return false;
	}

	if (Dest.isWeakForLinker()) {
	assert(Src.hasExternalLinkage());
	LinkFromSrc = true;
	return false;
	}

	assert(!Src.hasExternalWeakLinkage());
	assert(!Dest.hasExternalWeakLinkage());
	assert(Dest.hasExternalLinkage() && Src.hasExternalLinkage() &&
	"Unexpected linkage type!");
	return emitError("Linking globals named '" + Src.getName() +
	"': symbol multiply defined!");
	}

	bool ModuleLinker::linkIfNeeded(GlobalValue &GV) {
	GlobalValue *DGV = getLinkedToGlobal(&GV);

	if (shouldLinkOnlyNeeded()) {
	// Always import variables with appending linkage.
	if (!GV.hasAppendingLinkage()) {
	// Don't import globals unless they are referenced by the destination
	// module.
	if (!DGV)
	return false;
	// Don't import globals that are already defined in the destination module
	if (!DGV->isDeclaration())
	return false;
	}
	}

	if (DGV && !GV.hasLocalLinkage() && !GV.hasAppendingLinkage()) {
	auto *DGVar = dyn_cast<GlobalVariable>(DGV);
	auto *SGVar = dyn_cast<GlobalVariable>(&GV);
	if (DGVar && SGVar) {
	if (DGVar->isDeclaration() && SGVar->isDeclaration() &&
	(!DGVar->isConstant() \|\| !SGVar->isConstant())) {
	DGVar->setConstant(false);
	SGVar->setConstant(false);
	}
	if (DGVar->hasCommonLinkage() && SGVar->hasCommonLinkage()) {
	MaybeAlign Align(
	std::max(DGVar->getAlignment(), SGVar->getAlignment()));
	SGVar->setAlignment(Align);
	DGVar->setAlignment(Align);
	}
	}

	GlobalValue::VisibilityTypes Visibility =
	getMinVisibility(DGV->getVisibility(), GV.getVisibility());
	DGV->setVisibility(Visibility);
	GV.setVisibility(Visibility);

	GlobalValue::UnnamedAddr UnnamedAddr = GlobalValue::getMinUnnamedAddr(
	DGV->getUnnamedAddr(), GV.getUnnamedAddr());
	DGV->setUnnamedAddr(UnnamedAddr);
	GV.setUnnamedAddr(UnnamedAddr);
	}

	if (!DGV && !shouldOverrideFromSrc() &&
	(GV.hasLocalLinkage() \|\| GV.hasLinkOnceLinkage() \|\|
	GV.hasAvailableExternallyLinkage()))
	return false;

	if (GV.isDeclaration())
	return false;

	if (const Comdat *SC = GV.getComdat()) {
	bool LinkFromSrc;
	Comdat::SelectionKind SK;
	std::tie(SK, LinkFromSrc) = ComdatsChosen[SC];
	if (!LinkFromSrc)
	return false;
	}

	bool LinkFromSrc = true;
	if (DGV && shouldLinkFromSource(LinkFromSrc, *DGV, GV))
	return true;
	if (LinkFromSrc)
	ValuesToLink.insert(&GV);
	return false;
	}

	void ModuleLinker::addLazyFor(GlobalValue &GV, const IRMover::ValueAdder &Add) {
	// Add these to the internalize list
	if (!GV.hasLinkOnceLinkage() && !GV.hasAvailableExternallyLinkage() &&
	!shouldLinkOnlyNeeded())
	return;

	if (InternalizeCallback)
	Internalize.insert(GV.getName());
	Add(GV);

	const Comdat *SC = GV.getComdat();
	if (!SC)
	return;
	for (GlobalValue *GV2 : LazyComdatMembers[SC]) {
	GlobalValue *DGV = getLinkedToGlobal(GV2);
	bool LinkFromSrc = true;
	if (DGV && shouldLinkFromSource(LinkFromSrc, DGV, GV2))
	return;
	if (!LinkFromSrc)
	continue;
	if (InternalizeCallback)
	Internalize.insert(GV2->getName());
	Add(*GV2);
	}
	}

	void ModuleLinker::dropReplacedComdat(
	GlobalValue &GV, const DenseSet<const Comdat *> &ReplacedDstComdats) {
	Comdat *C = GV.getComdat();
	if (!C)
	return;
	if (!ReplacedDstComdats.count(C))
	return;
	if (GV.use_empty()) {
	GV.eraseFromParent();
	return;
	}

	if (auto *F = dyn_cast<Function>(&GV)) {
	F->deleteBody();
	} else if (auto *Var = dyn_cast<GlobalVariable>(&GV)) {
	Var->setInitializer(nullptr);
	} else {
	auto &Alias = cast<GlobalAlias>(GV);
	Module &M = *Alias.getParent();
	GlobalValue *Declaration;
	if (auto *FTy = dyn_cast<FunctionType>(Alias.getValueType())) {
	Declaration = Function::Create(FTy, GlobalValue::ExternalLinkage, "", &M);
	} else {
	Declaration =
	new GlobalVariable(M, Alias.getValueType(), /isConstant/ false,
	GlobalValue::ExternalLinkage,
	/Initializer/ nullptr);
	}
	Declaration->takeName(&Alias);
	Alias.replaceAllUsesWith(Declaration);
	Alias.eraseFromParent();
	}
	}

	bool ModuleLinker::run() {
	Module &DstM = Mover.getModule();
	DenseSet<const Comdat *> ReplacedDstComdats;

	for (const auto &SMEC : SrcM->getComdatSymbolTable()) {
	const Comdat &C = SMEC.getValue();
	if (ComdatsChosen.count(&C))
	continue;
	Comdat::SelectionKind SK;
	bool LinkFromSrc;
	if (getComdatResult(&C, SK, LinkFromSrc))
	return true;
	ComdatsChosen[&C] = std::make_pair(SK, LinkFromSrc);

	if (!LinkFromSrc)
	continue;

	Module::ComdatSymTabType &ComdatSymTab = DstM.getComdatSymbolTable();
	Module::ComdatSymTabType::iterator DstCI = ComdatSymTab.find(C.getName());
	if (DstCI == ComdatSymTab.end())
	continue;

	// The source comdat is replacing the dest one.
	const Comdat *DstC = &DstCI->second;
	ReplacedDstComdats.insert(DstC);
	}

	// Alias have to go first, since we are not able to find their comdats
	// otherwise.
	for (auto I = DstM.alias_begin(), E = DstM.alias_end(); I != E;) {
	GlobalAlias &GV = *I++;
	dropReplacedComdat(GV, ReplacedDstComdats);
	}

	for (auto I = DstM.global_begin(), E = DstM.global_end(); I != E;) {
	GlobalVariable &GV = *I++;
	dropReplacedComdat(GV, ReplacedDstComdats);
	}

	for (auto I = DstM.begin(), E = DstM.end(); I != E;) {
	Function &GV = *I++;
	dropReplacedComdat(GV, ReplacedDstComdats);
	}

	for (GlobalVariable &GV : SrcM->globals())
	if (GV.hasLinkOnceLinkage())
	if (const Comdat *SC = GV.getComdat())
	LazyComdatMembers[SC].push_back(&GV);

	for (Function &SF : *SrcM)
	if (SF.hasLinkOnceLinkage())
	if (const Comdat *SC = SF.getComdat())
	LazyComdatMembers[SC].push_back(&SF);

	for (GlobalAlias &GA : SrcM->aliases())
	if (GA.hasLinkOnceLinkage())
	if (const Comdat *SC = GA.getComdat())
	LazyComdatMembers[SC].push_back(&GA);

	// Insert all of the globals in src into the DstM module... without linking
	// initializers (which could refer to functions not yet mapped over).
	for (GlobalVariable &GV : SrcM->globals())
	if (linkIfNeeded(GV))
	return true;

	for (Function &SF : *SrcM)
	if (linkIfNeeded(SF))
	return true;

	for (GlobalAlias &GA : SrcM->aliases())
	if (linkIfNeeded(GA))
	return true;

	for (unsigned I = 0; I < ValuesToLink.size(); ++I) {
	GlobalValue *GV = ValuesToLink[I];
	const Comdat *SC = GV->getComdat();
	if (!SC)
	continue;
	for (GlobalValue *GV2 : LazyComdatMembers[SC]) {
	GlobalValue *DGV = getLinkedToGlobal(GV2);
	bool LinkFromSrc = true;
	if (DGV && shouldLinkFromSource(LinkFromSrc, DGV, GV2))
	return true;
	if (LinkFromSrc)
	ValuesToLink.insert(GV2);
	}
	}

	if (InternalizeCallback) {
	for (GlobalValue *GV : ValuesToLink)
	Internalize.insert(GV->getName());
	}

	// FIXME: Propagate Errors through to the caller instead of emitting
	// diagnostics.
	bool HasErrors = false;
	if (Error E = Mover.move(std::move(SrcM), ValuesToLink.getArrayRef(),
	[this](GlobalValue &GV, IRMover::ValueAdder Add) {
	addLazyFor(GV, Add);
	},
	/* IsPerformingImport */ false)) {
	handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) {
	DstM.getContext().diagnose(LinkDiagnosticInfo(DS_Error, EIB.message()));
	HasErrors = true;
	});
	}
	if (HasErrors)
	return true;

	if (InternalizeCallback)
	InternalizeCallback(DstM, Internalize);

	return false;
	}

	Linker::Linker(Module &M) : Mover(M) {}

	bool Linker::linkInModule(
	std::unique_ptr<Module> Src, unsigned Flags,
	std::function<void(Module &, const StringSet<> &)> InternalizeCallback) {
	ModuleLinker ModLinker(Mover, std::move(Src), Flags,
	std::move(InternalizeCallback));
	return ModLinker.run();
	}

	//===----------------------------------------------------------------------===//
	// LinkModules entrypoint.
	//===----------------------------------------------------------------------===//

	/// This function links two modules together, with the resulting Dest module
	/// modified to be the composite of the two input modules. If an error occurs,
	/// true is returned and ErrorMsg (if not null) is set to indicate the problem.
	/// Upon failure, the Dest module could be in a modified state, and shouldn't be
	/// relied on to be consistent.
	bool Linker::linkModules(
	Module &Dest, std::unique_ptr<Module> Src, unsigned Flags,
	std::function<void(Module &, const StringSet<> &)> InternalizeCallback) {
	Linker L(Dest);
	return L.linkInModule(std::move(Src), Flags, std::move(InternalizeCallback));
	}

	//===----------------------------------------------------------------------===//
	// C API.
	//===----------------------------------------------------------------------===//

	LLVMBool LLVMLinkModules2(LLVMModuleRef Dest, LLVMModuleRef Src) {
	Module *D = unwrap(Dest);
	std::unique_ptr<Module> M(unwrap(Src));
	return Linker::linkModules(*D, std::move(M));
	}
	diff --git a/contrib/llvm-project/llvm/lib/Passes/PassBuilder.cpp b/contrib/llvm-project/llvm/lib/Passes/PassBuilder.cpp
	index f52dbc604a9f..21c06e2dec26 100644
	--- a/contrib/llvm-project/llvm/lib/Passes/PassBuilder.cpp
	+++ b/contrib/llvm-project/llvm/lib/Passes/PassBuilder.cpp
	@@ -1,3229 +1,3232 @@
	//===- Parsing, selection, and construction of pass pipelines -------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	/// \file
	///
	/// This file provides the implementation of the PassBuilder based on our
	/// static pass registry as well as related functionality. It also provides
	/// helpers to aid in analyzing, debugging, and testing passes and pass
	/// pipelines.
	///
	//===----------------------------------------------------------------------===//

	#include "llvm/Passes/PassBuilder.h"
	#include "llvm/ADT/StringSwitch.h"
	#include "llvm/Analysis/AliasAnalysisEvaluator.h"
	#include "llvm/Analysis/AliasSetTracker.h"
	#include "llvm/Analysis/AssumptionCache.h"
	#include "llvm/Analysis/BasicAliasAnalysis.h"
	#include "llvm/Analysis/BlockFrequencyInfo.h"
	#include "llvm/Analysis/BranchProbabilityInfo.h"
	#include "llvm/Analysis/CFGPrinter.h"
	#include "llvm/Analysis/CFLAndersAliasAnalysis.h"
	#include "llvm/Analysis/CFLSteensAliasAnalysis.h"
	#include "llvm/Analysis/CGSCCPassManager.h"
	#include "llvm/Analysis/CallGraph.h"
	#include "llvm/Analysis/DDG.h"
	#include "llvm/Analysis/DDGPrinter.h"
	#include "llvm/Analysis/Delinearization.h"
	#include "llvm/Analysis/DemandedBits.h"
	#include "llvm/Analysis/DependenceAnalysis.h"
	#include "llvm/Analysis/DivergenceAnalysis.h"
	#include "llvm/Analysis/DominanceFrontier.h"
	#include "llvm/Analysis/FunctionPropertiesAnalysis.h"
	#include "llvm/Analysis/GlobalsModRef.h"
	#include "llvm/Analysis/IRSimilarityIdentifier.h"
	#include "llvm/Analysis/IVUsers.h"
	#include "llvm/Analysis/InlineAdvisor.h"
	#include "llvm/Analysis/InlineSizeEstimatorAnalysis.h"
	#include "llvm/Analysis/InstCount.h"
	#include "llvm/Analysis/LazyCallGraph.h"
	#include "llvm/Analysis/LazyValueInfo.h"
	#include "llvm/Analysis/Lint.h"
	#include "llvm/Analysis/LoopAccessAnalysis.h"
	#include "llvm/Analysis/LoopCacheAnalysis.h"
	#include "llvm/Analysis/LoopInfo.h"
	#include "llvm/Analysis/LoopNestAnalysis.h"
	#include "llvm/Analysis/MemDerefPrinter.h"
	#include "llvm/Analysis/MemoryDependenceAnalysis.h"
	#include "llvm/Analysis/MemorySSA.h"
	#include "llvm/Analysis/ModuleDebugInfoPrinter.h"
	#include "llvm/Analysis/ModuleSummaryAnalysis.h"
	#include "llvm/Analysis/MustExecute.h"
	#include "llvm/Analysis/ObjCARCAliasAnalysis.h"
	#include "llvm/Analysis/OptimizationRemarkEmitter.h"
	#include "llvm/Analysis/PhiValues.h"
	#include "llvm/Analysis/PostDominators.h"
	#include "llvm/Analysis/ProfileSummaryInfo.h"
	#include "llvm/Analysis/RegionInfo.h"
	#include "llvm/Analysis/ScalarEvolution.h"
	#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
	#include "llvm/Analysis/ScopedNoAliasAA.h"
	#include "llvm/Analysis/StackLifetime.h"
	#include "llvm/Analysis/StackSafetyAnalysis.h"
	#include "llvm/Analysis/TargetLibraryInfo.h"
	#include "llvm/Analysis/TargetTransformInfo.h"
	#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
	#include "llvm/IR/Dominators.h"
	#include "llvm/IR/IRPrintingPasses.h"
	#include "llvm/IR/PassManager.h"
	#include "llvm/IR/PrintPasses.h"
	#include "llvm/IR/SafepointIRVerifier.h"
	#include "llvm/IR/Verifier.h"
	#include "llvm/Support/CommandLine.h"
	#include "llvm/Support/Debug.h"
	#include "llvm/Support/ErrorHandling.h"
	#include "llvm/Support/FormatVariadic.h"
	#include "llvm/Support/Regex.h"
	#include "llvm/Target/TargetMachine.h"
	#include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
	#include "llvm/Transforms/Coroutines/CoroCleanup.h"
	#include "llvm/Transforms/Coroutines/CoroEarly.h"
	#include "llvm/Transforms/Coroutines/CoroElide.h"
	#include "llvm/Transforms/Coroutines/CoroSplit.h"
	#include "llvm/Transforms/IPO/AlwaysInliner.h"
	#include "llvm/Transforms/IPO/Annotation2Metadata.h"
	#include "llvm/Transforms/IPO/ArgumentPromotion.h"
	#include "llvm/Transforms/IPO/Attributor.h"
	#include "llvm/Transforms/IPO/BlockExtractor.h"
	#include "llvm/Transforms/IPO/CalledValuePropagation.h"
	#include "llvm/Transforms/IPO/ConstantMerge.h"
	#include "llvm/Transforms/IPO/CrossDSOCFI.h"
	#include "llvm/Transforms/IPO/DeadArgumentElimination.h"
	#include "llvm/Transforms/IPO/ElimAvailExtern.h"
	#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
	#include "llvm/Transforms/IPO/FunctionAttrs.h"
	#include "llvm/Transforms/IPO/FunctionImport.h"
	#include "llvm/Transforms/IPO/GlobalDCE.h"
	#include "llvm/Transforms/IPO/GlobalOpt.h"
	#include "llvm/Transforms/IPO/GlobalSplit.h"
	#include "llvm/Transforms/IPO/HotColdSplitting.h"
	#include "llvm/Transforms/IPO/IROutliner.h"
	#include "llvm/Transforms/IPO/InferFunctionAttrs.h"
	#include "llvm/Transforms/IPO/Inliner.h"
	#include "llvm/Transforms/IPO/Internalize.h"
	#include "llvm/Transforms/IPO/LoopExtractor.h"
	#include "llvm/Transforms/IPO/LowerTypeTests.h"
	#include "llvm/Transforms/IPO/MergeFunctions.h"
	#include "llvm/Transforms/IPO/OpenMPOpt.h"
	#include "llvm/Transforms/IPO/PartialInlining.h"
	#include "llvm/Transforms/IPO/SCCP.h"
	#include "llvm/Transforms/IPO/SampleProfile.h"
	#include "llvm/Transforms/IPO/SampleProfileProbe.h"
	#include "llvm/Transforms/IPO/StripDeadPrototypes.h"
	#include "llvm/Transforms/IPO/StripSymbols.h"
	#include "llvm/Transforms/IPO/SyntheticCountsPropagation.h"
	#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
	#include "llvm/Transforms/InstCombine/InstCombine.h"
	#include "llvm/Transforms/Instrumentation.h"
	#include "llvm/Transforms/Instrumentation/AddressSanitizer.h"
	#include "llvm/Transforms/Instrumentation/BoundsChecking.h"
	#include "llvm/Transforms/Instrumentation/CGProfile.h"
	#include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
	#include "llvm/Transforms/Instrumentation/DataFlowSanitizer.h"
	#include "llvm/Transforms/Instrumentation/GCOVProfiler.h"
	#include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h"
	#include "llvm/Transforms/Instrumentation/InstrOrderFile.h"
	#include "llvm/Transforms/Instrumentation/InstrProfiling.h"
	#include "llvm/Transforms/Instrumentation/MemProfiler.h"
	#include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
	#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
	#include "llvm/Transforms/Instrumentation/PoisonChecking.h"
	#include "llvm/Transforms/Instrumentation/SanitizerCoverage.h"
	#include "llvm/Transforms/Instrumentation/ThreadSanitizer.h"
	#include "llvm/Transforms/ObjCARC.h"
	#include "llvm/Transforms/Scalar/ADCE.h"
	#include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h"
	#include "llvm/Transforms/Scalar/AnnotationRemarks.h"
	#include "llvm/Transforms/Scalar/BDCE.h"
	#include "llvm/Transforms/Scalar/CallSiteSplitting.h"
	#include "llvm/Transforms/Scalar/ConstantHoisting.h"
	#include "llvm/Transforms/Scalar/ConstraintElimination.h"
	#include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h"
	#include "llvm/Transforms/Scalar/DCE.h"
	#include "llvm/Transforms/Scalar/DFAJumpThreading.h"
	#include "llvm/Transforms/Scalar/DeadStoreElimination.h"
	#include "llvm/Transforms/Scalar/DivRemPairs.h"
	#include "llvm/Transforms/Scalar/EarlyCSE.h"
	#include "llvm/Transforms/Scalar/Float2Int.h"
	#include "llvm/Transforms/Scalar/GVN.h"
	#include "llvm/Transforms/Scalar/GuardWidening.h"
	#include "llvm/Transforms/Scalar/IVUsersPrinter.h"
	#include "llvm/Transforms/Scalar/IndVarSimplify.h"
	#include "llvm/Transforms/Scalar/InductiveRangeCheckElimination.h"
	#include "llvm/Transforms/Scalar/InferAddressSpaces.h"
	#include "llvm/Transforms/Scalar/InstSimplifyPass.h"
	#include "llvm/Transforms/Scalar/JumpThreading.h"
	#include "llvm/Transforms/Scalar/LICM.h"
	#include "llvm/Transforms/Scalar/LoopAccessAnalysisPrinter.h"
	#include "llvm/Transforms/Scalar/LoopBoundSplit.h"
	#include "llvm/Transforms/Scalar/LoopDataPrefetch.h"
	#include "llvm/Transforms/Scalar/LoopDeletion.h"
	#include "llvm/Transforms/Scalar/LoopDistribute.h"
	#include "llvm/Transforms/Scalar/LoopFlatten.h"
	#include "llvm/Transforms/Scalar/LoopFuse.h"
	#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
	#include "llvm/Transforms/Scalar/LoopInstSimplify.h"
	#include "llvm/Transforms/Scalar/LoopInterchange.h"
	#include "llvm/Transforms/Scalar/LoopLoadElimination.h"
	#include "llvm/Transforms/Scalar/LoopPassManager.h"
	#include "llvm/Transforms/Scalar/LoopPredication.h"
	#include "llvm/Transforms/Scalar/LoopReroll.h"
	#include "llvm/Transforms/Scalar/LoopRotation.h"
	#include "llvm/Transforms/Scalar/LoopSimplifyCFG.h"
	#include "llvm/Transforms/Scalar/LoopSink.h"
	#include "llvm/Transforms/Scalar/LoopStrengthReduce.h"
	#include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h"
	#include "llvm/Transforms/Scalar/LoopUnrollPass.h"
	#include "llvm/Transforms/Scalar/LoopVersioningLICM.h"
	#include "llvm/Transforms/Scalar/LowerAtomic.h"
	#include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h"
	#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
	#include "llvm/Transforms/Scalar/LowerGuardIntrinsic.h"
	#include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h"
	#include "llvm/Transforms/Scalar/LowerWidenableCondition.h"
	#include "llvm/Transforms/Scalar/MakeGuardsExplicit.h"
	#include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
	#include "llvm/Transforms/Scalar/MergeICmps.h"
	#include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h"
	#include "llvm/Transforms/Scalar/NaryReassociate.h"
	#include "llvm/Transforms/Scalar/NewGVN.h"
	#include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h"
	#include "llvm/Transforms/Scalar/Reassociate.h"
	#include "llvm/Transforms/Scalar/Reg2Mem.h"
	#include "llvm/Transforms/Scalar/RewriteStatepointsForGC.h"
	#include "llvm/Transforms/Scalar/SCCP.h"
	#include "llvm/Transforms/Scalar/SROA.h"
	#include "llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h"
	#include "llvm/Transforms/Scalar/Scalarizer.h"
	#include "llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h"
	#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
	#include "llvm/Transforms/Scalar/SimplifyCFG.h"
	#include "llvm/Transforms/Scalar/Sink.h"
	#include "llvm/Transforms/Scalar/SpeculativeExecution.h"
	#include "llvm/Transforms/Scalar/StraightLineStrengthReduce.h"
	#include "llvm/Transforms/Scalar/StructurizeCFG.h"
	#include "llvm/Transforms/Scalar/TailRecursionElimination.h"
	#include "llvm/Transforms/Scalar/WarnMissedTransforms.h"
	#include "llvm/Transforms/Utils/AddDiscriminators.h"
	#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
	#include "llvm/Transforms/Utils/BreakCriticalEdges.h"
	#include "llvm/Transforms/Utils/CanonicalizeAliases.h"
	#include "llvm/Transforms/Utils/CanonicalizeFreezeInLoops.h"
	#include "llvm/Transforms/Utils/EntryExitInstrumenter.h"
	#include "llvm/Transforms/Utils/FixIrreducible.h"
	#include "llvm/Transforms/Utils/HelloWorld.h"
	#include "llvm/Transforms/Utils/InjectTLIMappings.h"
	#include "llvm/Transforms/Utils/InstructionNamer.h"
	#include "llvm/Transforms/Utils/LCSSA.h"
	#include "llvm/Transforms/Utils/LibCallsShrinkWrap.h"
	#include "llvm/Transforms/Utils/LoopSimplify.h"
	#include "llvm/Transforms/Utils/LoopVersioning.h"
	#include "llvm/Transforms/Utils/LowerInvoke.h"
	#include "llvm/Transforms/Utils/LowerSwitch.h"
	#include "llvm/Transforms/Utils/Mem2Reg.h"
	#include "llvm/Transforms/Utils/MetaRenamer.h"
	#include "llvm/Transforms/Utils/NameAnonGlobals.h"
	#include "llvm/Transforms/Utils/RelLookupTableConverter.h"
	#include "llvm/Transforms/Utils/StripGCRelocates.h"
	#include "llvm/Transforms/Utils/StripNonLineTableDebugInfo.h"
	#include "llvm/Transforms/Utils/SymbolRewriter.h"
	#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
	#include "llvm/Transforms/Utils/UnifyLoopExits.h"
	#include "llvm/Transforms/Vectorize/LoadStoreVectorizer.h"
	#include "llvm/Transforms/Vectorize/LoopVectorize.h"
	#include "llvm/Transforms/Vectorize/SLPVectorizer.h"
	#include "llvm/Transforms/Vectorize/VectorCombine.h"

	using namespace llvm;

	static cl::opt<InliningAdvisorMode> UseInlineAdvisor(
	"enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden,
	cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),
	cl::values(clEnumValN(InliningAdvisorMode::Default, "default",
	"Heuristics-based inliner version."),
	clEnumValN(InliningAdvisorMode::Development, "development",
	"Use development mode (runtime-loadable model)."),
	clEnumValN(InliningAdvisorMode::Release, "release",
	"Use release mode (AOT-compiled model).")));

	static cl::opt<bool> EnableSyntheticCounts(
	"enable-npm-synthetic-counts", cl::init(false), cl::Hidden, cl::ZeroOrMore,
	cl::desc("Run synthetic function entry count generation "
	"pass"));

	static const Regex DefaultAliasRegex(
	"^(default\|thinlto-pre-link\|thinlto\|lto-pre-link\|lto)<(O[0123sz])>$");

	/// Flag to enable inline deferral during PGO.
	static cl::opt<bool>
	EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
	cl::Hidden,
	cl::desc("Enable inline deferral during PGO"));

	static cl::opt<bool> EnableMemProfiler("enable-mem-prof", cl::init(false),
	cl::Hidden, cl::ZeroOrMore,
	cl::desc("Enable memory profiler"));

	static cl::opt<bool> PerformMandatoryInliningsFirst(
	"mandatory-inlining-first", cl::init(true), cl::Hidden, cl::ZeroOrMore,
	cl::desc("Perform mandatory inlinings module-wide, before performing "
	"inlining."));

	static cl::opt<bool> EnableO3NonTrivialUnswitching(
	"enable-npm-O3-nontrivial-unswitch", cl::init(true), cl::Hidden,
	cl::ZeroOrMore, cl::desc("Enable non-trivial loop unswitching for -O3"));

	PipelineTuningOptions::PipelineTuningOptions() {
	LoopInterleaving = true;
	LoopVectorization = true;
	SLPVectorization = false;
	LoopUnrolling = true;
	ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll;
	LicmMssaOptCap = SetLicmMssaOptCap;
	LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap;
	CallGraphProfile = true;
	MergeFunctions = false;
	}

	namespace llvm {
	extern cl::opt<unsigned> MaxDevirtIterations;
	extern cl::opt<bool> EnableConstraintElimination;
	extern cl::opt<bool> EnableFunctionSpecialization;
	extern cl::opt<bool> EnableGVNHoist;
	extern cl::opt<bool> EnableGVNSink;
	extern cl::opt<bool> EnableHotColdSplit;
	extern cl::opt<bool> EnableIROutliner;
	extern cl::opt<bool> EnableOrderFileInstrumentation;
	extern cl::opt<bool> EnableCHR;
	extern cl::opt<bool> EnableLoopInterchange;
	extern cl::opt<bool> EnableUnrollAndJam;
	extern cl::opt<bool> EnableLoopFlatten;
	extern cl::opt<bool> EnableDFAJumpThreading;
	extern cl::opt<bool> RunNewGVN;
	extern cl::opt<bool> RunPartialInlining;
	extern cl::opt<bool> ExtraVectorizerPasses;

	extern cl::opt<bool> FlattenedProfileUsed;

	extern cl::opt<AttributorRunOption> AttributorRun;
	extern cl::opt<bool> EnableKnowledgeRetention;

	extern cl::opt<bool> EnableMatrix;

	extern cl::opt<bool> DisablePreInliner;
	extern cl::opt<int> PreInlineThreshold;
	} // namespace llvm

	const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O0 = {
	/SpeedLevel/ 0,
	/SizeLevel/ 0};
	const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O1 = {
	/SpeedLevel/ 1,
	/SizeLevel/ 0};
	const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O2 = {
	/SpeedLevel/ 2,
	/SizeLevel/ 0};
	const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O3 = {
	/SpeedLevel/ 3,
	/SizeLevel/ 0};
	const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::Os = {
	/SpeedLevel/ 2,
	/SizeLevel/ 1};
	const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::Oz = {
	/SpeedLevel/ 2,
	/SizeLevel/ 2};

	namespace {

	// The following passes/analyses have custom names, otherwise their name will
	// include `(anonymous namespace)`. These are special since they are only for
	// testing purposes and don't live in a header file.

	/// No-op module pass which does nothing.
	struct NoOpModulePass : PassInfoMixin<NoOpModulePass> {
	PreservedAnalyses run(Module &M, ModuleAnalysisManager &) {
	return PreservedAnalyses::all();
	}

	static StringRef name() { return "NoOpModulePass"; }
	};

	/// No-op module analysis.
	class NoOpModuleAnalysis : public AnalysisInfoMixin<NoOpModuleAnalysis> {
	friend AnalysisInfoMixin<NoOpModuleAnalysis>;
	static AnalysisKey Key;

	public:
	struct Result {};
	Result run(Module &, ModuleAnalysisManager &) { return Result(); }
	static StringRef name() { return "NoOpModuleAnalysis"; }
	};

	/// No-op CGSCC pass which does nothing.
	struct NoOpCGSCCPass : PassInfoMixin<NoOpCGSCCPass> {
	PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &,
	LazyCallGraph &, CGSCCUpdateResult &UR) {
	return PreservedAnalyses::all();
	}
	static StringRef name() { return "NoOpCGSCCPass"; }
	};

	/// No-op CGSCC analysis.
	class NoOpCGSCCAnalysis : public AnalysisInfoMixin<NoOpCGSCCAnalysis> {
	friend AnalysisInfoMixin<NoOpCGSCCAnalysis>;
	static AnalysisKey Key;

	public:
	struct Result {};
	Result run(LazyCallGraph::SCC &, CGSCCAnalysisManager &, LazyCallGraph &G) {
	return Result();
	}
	static StringRef name() { return "NoOpCGSCCAnalysis"; }
	};

	/// No-op function pass which does nothing.
	struct NoOpFunctionPass : PassInfoMixin<NoOpFunctionPass> {
	PreservedAnalyses run(Function &F, FunctionAnalysisManager &) {
	return PreservedAnalyses::all();
	}
	static StringRef name() { return "NoOpFunctionPass"; }
	};

	/// No-op function analysis.
	class NoOpFunctionAnalysis : public AnalysisInfoMixin<NoOpFunctionAnalysis> {
	friend AnalysisInfoMixin<NoOpFunctionAnalysis>;
	static AnalysisKey Key;

	public:
	struct Result {};
	Result run(Function &, FunctionAnalysisManager &) { return Result(); }
	static StringRef name() { return "NoOpFunctionAnalysis"; }
	};

	/// No-op loop pass which does nothing.
	struct NoOpLoopPass : PassInfoMixin<NoOpLoopPass> {
	PreservedAnalyses run(Loop &L, LoopAnalysisManager &,
	LoopStandardAnalysisResults &, LPMUpdater &) {
	return PreservedAnalyses::all();
	}
	static StringRef name() { return "NoOpLoopPass"; }
	};

	/// No-op loop analysis.
	class NoOpLoopAnalysis : public AnalysisInfoMixin<NoOpLoopAnalysis> {
	friend AnalysisInfoMixin<NoOpLoopAnalysis>;
	static AnalysisKey Key;

	public:
	struct Result {};
	Result run(Loop &, LoopAnalysisManager &, LoopStandardAnalysisResults &) {
	return Result();
	}
	static StringRef name() { return "NoOpLoopAnalysis"; }
	};

	AnalysisKey NoOpModuleAnalysis::Key;
	AnalysisKey NoOpCGSCCAnalysis::Key;
	AnalysisKey NoOpFunctionAnalysis::Key;
	AnalysisKey NoOpLoopAnalysis::Key;

	/// Whether or not we should populate a PassInstrumentationCallbacks's class to
	/// pass name map.
	///
	/// This is for optimization purposes so we don't populate it if we never use
	/// it. This should be updated if new pass instrumentation wants to use the map.
	/// We currently only use this for --print-before/after.
	bool shouldPopulateClassToPassNames() {
	return !printBeforePasses().empty() \|\| !printAfterPasses().empty();
	}

	} // namespace

	PassBuilder::PassBuilder(TargetMachine *TM, PipelineTuningOptions PTO,
	Optional<PGOOptions> PGOOpt,
	PassInstrumentationCallbacks *PIC)
	: TM(TM), PTO(PTO), PGOOpt(PGOOpt), PIC(PIC) {
	if (TM)
	TM->registerPassBuilderCallbacks(*this);
	if (PIC && shouldPopulateClassToPassNames()) {
	#define MODULE_PASS(NAME, CREATE_PASS) \
	PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
	#define MODULE_ANALYSIS(NAME, CREATE_PASS) \
	PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
	#define FUNCTION_PASS(NAME, CREATE_PASS) \
	PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
	#define FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
	PIC->addClassToPassName(CLASS, NAME);
	#define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \
	PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
	#define LOOP_PASS(NAME, CREATE_PASS) \
	PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
	#define LOOP_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
	PIC->addClassToPassName(CLASS, NAME);
	#define LOOP_ANALYSIS(NAME, CREATE_PASS) \
	PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
	#define CGSCC_PASS(NAME, CREATE_PASS) \
	PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
	#define CGSCC_ANALYSIS(NAME, CREATE_PASS) \
	PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
	#include "PassRegistry.def"
	}
	}

	void PassBuilder::invokePeepholeEPCallbacks(
	FunctionPassManager &FPM, PassBuilder::OptimizationLevel Level) {
	for (auto &C : PeepholeEPCallbacks)
	C(FPM, Level);
	}

	void PassBuilder::registerModuleAnalyses(ModuleAnalysisManager &MAM) {
	#define MODULE_ANALYSIS(NAME, CREATE_PASS) \
	MAM.registerPass([&] { return CREATE_PASS; });
	#include "PassRegistry.def"

	for (auto &C : ModuleAnalysisRegistrationCallbacks)
	C(MAM);
	}

	void PassBuilder::registerCGSCCAnalyses(CGSCCAnalysisManager &CGAM) {
	#define CGSCC_ANALYSIS(NAME, CREATE_PASS) \
	CGAM.registerPass([&] { return CREATE_PASS; });
	#include "PassRegistry.def"

	for (auto &C : CGSCCAnalysisRegistrationCallbacks)
	C(CGAM);
	}

	void PassBuilder::registerFunctionAnalyses(FunctionAnalysisManager &FAM) {
	#define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \
	FAM.registerPass([&] { return CREATE_PASS; });
	#include "PassRegistry.def"

	for (auto &C : FunctionAnalysisRegistrationCallbacks)
	C(FAM);
	}

	void PassBuilder::registerLoopAnalyses(LoopAnalysisManager &LAM) {
	#define LOOP_ANALYSIS(NAME, CREATE_PASS) \
	LAM.registerPass([&] { return CREATE_PASS; });
	#include "PassRegistry.def"

	for (auto &C : LoopAnalysisRegistrationCallbacks)
	C(LAM);
	}

	// Helper to add AnnotationRemarksPass.
	static void addAnnotationRemarksPass(ModulePassManager &MPM) {
	FunctionPassManager FPM;
	FPM.addPass(AnnotationRemarksPass());
	MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
	}

	// Helper to check if the current compilation phase is preparing for LTO
	static bool isLTOPreLink(ThinOrFullLTOPhase Phase) {
	return Phase == ThinOrFullLTOPhase::ThinLTOPreLink \|\|
	Phase == ThinOrFullLTOPhase::FullLTOPreLink;
	}

	// TODO: Investigate the cost/benefit of tail call elimination on debugging.
	FunctionPassManager
	PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
	ThinOrFullLTOPhase Phase) {

	FunctionPassManager FPM;

	// Form SSA out of local memory accesses after breaking apart aggregates into
	// scalars.
	FPM.addPass(SROA());

	// Catch trivial redundancies
	FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));

	// Hoisting of scalars and load expressions.
	FPM.addPass(SimplifyCFGPass());
	FPM.addPass(InstCombinePass());

	FPM.addPass(LibCallsShrinkWrapPass());

	invokePeepholeEPCallbacks(FPM, Level);

	FPM.addPass(SimplifyCFGPass());

	// Form canonically associated expression trees, and simplify the trees using
	// basic mathematical properties. For example, this will form (nearly)
	// minimal multiplication trees.
	FPM.addPass(ReassociatePass());

	// Add the primary loop simplification pipeline.
	// FIXME: Currently this is split into two loop pass pipelines because we run
	// some function passes in between them. These can and should be removed
	// and/or replaced by scheduling the loop pass equivalents in the correct
	// positions. But those equivalent passes aren't powerful enough yet.
	// Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
	// used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
	// fully replace `SimplifyCFGPass`, and the closest to the other we have is
	// `LoopInstSimplify`.
	LoopPassManager LPM1, LPM2;

	// Simplify the loop body. We do this initially to clean up after other loop
	// passes run, either when iterating on a loop or on inner loops with
	// implications on the outer loop.
	LPM1.addPass(LoopInstSimplifyPass());
	LPM1.addPass(LoopSimplifyCFGPass());

	// Try to remove as much code from the loop header as possible,
	// to reduce amount of IR that will have to be duplicated.
	// TODO: Investigate promotion cap for O1.
	LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));

	LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true,
	isLTOPreLink(Phase)));
	// TODO: Investigate promotion cap for O1.
	LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
	LPM1.addPass(SimpleLoopUnswitchPass());

	LPM2.addPass(LoopIdiomRecognizePass());
	LPM2.addPass(IndVarSimplifyPass());

	for (auto &C : LateLoopOptimizationsEPCallbacks)
	C(LPM2, Level);

	LPM2.addPass(LoopDeletionPass());

	if (EnableLoopInterchange)
	LPM2.addPass(LoopInterchangePass());

	// Do not enable unrolling in PreLinkThinLTO phase during sample PGO
	// because it changes IR to makes profile annotation in back compile
	// inaccurate. The normal unroller doesn't pay attention to forced full unroll
	// attributes so we need to make sure and allow the full unroll pass to pay
	// attention to it.
	if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink \|\| !PGOOpt \|\|
	PGOOpt->Action != PGOOptions::SampleUse)
	LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
	/* OnlyWhenForced= */ !PTO.LoopUnrolling,
	PTO.ForgetAllSCEVInLoopUnroll));

	for (auto &C : LoopOptimizerEndEPCallbacks)
	C(LPM2, Level);

	// We provide the opt remark emitter pass for LICM to use. We only need to do
	// this once as it is immutable.
	FPM.addPass(
	RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
	FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
	EnableMSSALoopDependency,
	/UseBlockFrequencyInfo=/true));
	FPM.addPass(SimplifyCFGPass());
	FPM.addPass(InstCombinePass());
	if (EnableLoopFlatten)
	FPM.addPass(createFunctionToLoopPassAdaptor(LoopFlattenPass()));
	// The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
	// All loop passes must preserve it, in order to be able to use it.
	FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
	/UseMemorySSA=/false,
	/UseBlockFrequencyInfo=/false));

	// Delete small array after loop unroll.
	FPM.addPass(SROA());

	// Specially optimize memory movement as it doesn't look like dataflow in SSA.
	FPM.addPass(MemCpyOptPass());

	// Sparse conditional constant propagation.
	// FIXME: It isn't clear why we do this after loop passes rather than
	// before...
	FPM.addPass(SCCPPass());

	// Delete dead bit computations (instcombine runs after to fold away the dead
	// computations, and then ADCE will run later to exploit any new DCE
	// opportunities that creates).
	FPM.addPass(BDCEPass());

	// Run instcombine after redundancy and dead bit elimination to exploit
	// opportunities opened up by them.
	FPM.addPass(InstCombinePass());
	invokePeepholeEPCallbacks(FPM, Level);

	FPM.addPass(CoroElidePass());

	for (auto &C : ScalarOptimizerLateEPCallbacks)
	C(FPM, Level);

	// Finally, do an expensive DCE pass to catch all the dead code exposed by
	// the simplifications and basic cleanup after all the simplifications.
	// TODO: Investigate if this is too expensive.
	FPM.addPass(ADCEPass());
	FPM.addPass(SimplifyCFGPass());
	FPM.addPass(InstCombinePass());
	invokePeepholeEPCallbacks(FPM, Level);

	return FPM;
	}

	FunctionPassManager
	PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
	ThinOrFullLTOPhase Phase) {
	assert(Level != OptimizationLevel::O0 && "Must request optimizations!");

	// The O1 pipeline has a separate pipeline creation function to simplify
	// construction readability.
	if (Level.getSpeedupLevel() == 1)
	return buildO1FunctionSimplificationPipeline(Level, Phase);

	FunctionPassManager FPM;

	// Form SSA out of local memory accesses after breaking apart aggregates into
	// scalars.
	FPM.addPass(SROA());

	// Catch trivial redundancies
	FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
	if (EnableKnowledgeRetention)
	FPM.addPass(AssumeSimplifyPass());

	// Hoisting of scalars and load expressions.
	if (EnableGVNHoist)
	FPM.addPass(GVNHoistPass());

	// Global value numbering based sinking.
	if (EnableGVNSink) {
	FPM.addPass(GVNSinkPass());
	FPM.addPass(SimplifyCFGPass());
	}

	if (EnableConstraintElimination)
	FPM.addPass(ConstraintEliminationPass());

	// Speculative execution if the target has divergent branches; otherwise nop.
	FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));

	// Optimize based on known information about branches, and cleanup afterward.
	FPM.addPass(JumpThreadingPass());
	FPM.addPass(CorrelatedValuePropagationPass());

	FPM.addPass(SimplifyCFGPass());
	if (Level == OptimizationLevel::O3)
	FPM.addPass(AggressiveInstCombinePass());
	FPM.addPass(InstCombinePass());

	if (!Level.isOptimizingForSize())
	FPM.addPass(LibCallsShrinkWrapPass());

	invokePeepholeEPCallbacks(FPM, Level);

	// For PGO use pipeline, try to optimize memory intrinsics such as memcpy
	// using the size value profile. Don't perform this when optimizing for size.
	if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&
	!Level.isOptimizingForSize())
	FPM.addPass(PGOMemOPSizeOpt());

	FPM.addPass(TailCallElimPass());
	FPM.addPass(SimplifyCFGPass());

	// Form canonically associated expression trees, and simplify the trees using
	// basic mathematical properties. For example, this will form (nearly)
	// minimal multiplication trees.
	FPM.addPass(ReassociatePass());

	// Add the primary loop simplification pipeline.
	// FIXME: Currently this is split into two loop pass pipelines because we run
	// some function passes in between them. These can and should be removed
	// and/or replaced by scheduling the loop pass equivalents in the correct
	// positions. But those equivalent passes aren't powerful enough yet.
	// Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
	// used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
	// fully replace `SimplifyCFGPass`, and the closest to the other we have is
	// `LoopInstSimplify`.
	LoopPassManager LPM1, LPM2;

	// Simplify the loop body. We do this initially to clean up after other loop
	// passes run, either when iterating on a loop or on inner loops with
	// implications on the outer loop.
	LPM1.addPass(LoopInstSimplifyPass());
	LPM1.addPass(LoopSimplifyCFGPass());

	// Try to remove as much code from the loop header as possible,
	// to reduce amount of IR that will have to be duplicated.
	// TODO: Investigate promotion cap for O1.
	LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));

	// Disable header duplication in loop rotation at -Oz.
	LPM1.addPass(
	LoopRotatePass(Level != OptimizationLevel::Oz, isLTOPreLink(Phase)));
	// TODO: Investigate promotion cap for O1.
	LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
	LPM1.addPass(
	SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3 &&
	EnableO3NonTrivialUnswitching));
	LPM2.addPass(LoopIdiomRecognizePass());
	LPM2.addPass(IndVarSimplifyPass());

	for (auto &C : LateLoopOptimizationsEPCallbacks)
	C(LPM2, Level);

	LPM2.addPass(LoopDeletionPass());

	if (EnableLoopInterchange)
	LPM2.addPass(LoopInterchangePass());

	// Do not enable unrolling in PreLinkThinLTO phase during sample PGO
	// because it changes IR to makes profile annotation in back compile
	// inaccurate. The normal unroller doesn't pay attention to forced full unroll
	// attributes so we need to make sure and allow the full unroll pass to pay
	// attention to it.
	if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink \|\| !PGOOpt \|\|
	PGOOpt->Action != PGOOptions::SampleUse)
	LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
	/* OnlyWhenForced= */ !PTO.LoopUnrolling,
	PTO.ForgetAllSCEVInLoopUnroll));

	for (auto &C : LoopOptimizerEndEPCallbacks)
	C(LPM2, Level);

	// We provide the opt remark emitter pass for LICM to use. We only need to do
	// this once as it is immutable.
	FPM.addPass(
	RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
	FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
	EnableMSSALoopDependency,
	/UseBlockFrequencyInfo=/true));
	FPM.addPass(SimplifyCFGPass());
	FPM.addPass(InstCombinePass());
	if (EnableLoopFlatten)
	FPM.addPass(createFunctionToLoopPassAdaptor(LoopFlattenPass()));
	// The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
	// LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
	// All loop passes must preserve it, in order to be able to use it.
	FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
	/UseMemorySSA=/false,
	/UseBlockFrequencyInfo=/false));

	// Delete small array after loop unroll.
	FPM.addPass(SROA());

	// Eliminate redundancies.
	FPM.addPass(MergedLoadStoreMotionPass());
	if (RunNewGVN)
	FPM.addPass(NewGVNPass());
	else
	FPM.addPass(GVN());

	// Sparse conditional constant propagation.
	// FIXME: It isn't clear why we do this after loop passes rather than
	// before...
	FPM.addPass(SCCPPass());

	// Delete dead bit computations (instcombine runs after to fold away the dead
	// computations, and then ADCE will run later to exploit any new DCE
	// opportunities that creates).
	FPM.addPass(BDCEPass());

	// Run instcombine after redundancy and dead bit elimination to exploit
	// opportunities opened up by them.
	FPM.addPass(InstCombinePass());
	invokePeepholeEPCallbacks(FPM, Level);

	// Re-consider control flow based optimizations after redundancy elimination,
	// redo DCE, etc.
	if (EnableDFAJumpThreading && Level.getSizeLevel() == 0)
	FPM.addPass(DFAJumpThreadingPass());

	FPM.addPass(JumpThreadingPass());
	FPM.addPass(CorrelatedValuePropagationPass());

	// Finally, do an expensive DCE pass to catch all the dead code exposed by
	// the simplifications and basic cleanup after all the simplifications.
	// TODO: Investigate if this is too expensive.
	FPM.addPass(ADCEPass());

	// Specially optimize memory movement as it doesn't look like dataflow in SSA.
	FPM.addPass(MemCpyOptPass());

	FPM.addPass(DSEPass());
	FPM.addPass(createFunctionToLoopPassAdaptor(
	LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
	EnableMSSALoopDependency, /UseBlockFrequencyInfo=/true));

	FPM.addPass(CoroElidePass());

	for (auto &C : ScalarOptimizerLateEPCallbacks)
	C(FPM, Level);

	FPM.addPass(SimplifyCFGPass(
	SimplifyCFGOptions().hoistCommonInsts(true).sinkCommonInsts(true)));
	FPM.addPass(InstCombinePass());
	invokePeepholeEPCallbacks(FPM, Level);

	if (EnableCHR && Level == OptimizationLevel::O3 && PGOOpt &&
	(PGOOpt->Action == PGOOptions::IRUse \|\|
	PGOOpt->Action == PGOOptions::SampleUse))
	FPM.addPass(ControlHeightReductionPass());

	return FPM;
	}

	void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
	MPM.addPass(CanonicalizeAliasesPass());
	MPM.addPass(NameAnonGlobalPass());
	}

	void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
	PassBuilder::OptimizationLevel Level,
	bool RunProfileGen, bool IsCS,
	std::string ProfileFile,
	std::string ProfileRemappingFile) {
	assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
	if (!IsCS && !DisablePreInliner) {
	InlineParams IP;

	IP.DefaultThreshold = PreInlineThreshold;

	// FIXME: The hint threshold has the same value used by the regular inliner
	// when not optimzing for size. This should probably be lowered after
	// performance testing.
	// FIXME: this comment is cargo culted from the old pass manager, revisit).
	IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;
	ModuleInlinerWrapperPass MIWP(IP);
	CGSCCPassManager &CGPipeline = MIWP.getPM();

	FunctionPassManager FPM;
	FPM.addPass(SROA());
	FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
	FPM.addPass(SimplifyCFGPass()); // Merge & remove basic blocks.
	FPM.addPass(InstCombinePass()); // Combine silly sequences.
	invokePeepholeEPCallbacks(FPM, Level);

	CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM)));

	MPM.addPass(std::move(MIWP));

	// Delete anything that is now dead to make sure that we don't instrument
	// dead code. Instrumentation can end up keeping dead code around and
	// dramatically increase code size.
	MPM.addPass(GlobalDCEPass());
	}

	if (!RunProfileGen) {
	assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
	MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS));
	// Cache ProfileSummaryAnalysis once to avoid the potential need to insert
	// RequireAnalysisPass for PSI before subsequent non-module passes.
	MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
	return;
	}

	// Perform PGO instrumentation.
	MPM.addPass(PGOInstrumentationGen(IsCS));

	FunctionPassManager FPM;
	// Disable header duplication in loop rotation at -Oz.
	FPM.addPass(createFunctionToLoopPassAdaptor(
	LoopRotatePass(Level != OptimizationLevel::Oz), EnableMSSALoopDependency,
	/UseBlockFrequencyInfo=/false));
	MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));

	// Add the profile lowering pass.
	InstrProfOptions Options;
	if (!ProfileFile.empty())
	Options.InstrProfileOutput = ProfileFile;
	// Do counter promotion at Level greater than O0.
	Options.DoCounterPromotion = true;
	Options.UseBFIInPromotion = IsCS;
	MPM.addPass(InstrProfiling(Options, IsCS));
	}

	void PassBuilder::addPGOInstrPassesForO0(ModulePassManager &MPM,
	bool RunProfileGen, bool IsCS,
	std::string ProfileFile,
	std::string ProfileRemappingFile) {
	if (!RunProfileGen) {
	assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
	MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS));
	// Cache ProfileSummaryAnalysis once to avoid the potential need to insert
	// RequireAnalysisPass for PSI before subsequent non-module passes.
	MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
	return;
	}

	// Perform PGO instrumentation.
	MPM.addPass(PGOInstrumentationGen(IsCS));
	// Add the profile lowering pass.
	InstrProfOptions Options;
	if (!ProfileFile.empty())
	Options.InstrProfileOutput = ProfileFile;
	// Do not do counter promotion at O0.
	Options.DoCounterPromotion = false;
	Options.UseBFIInPromotion = IsCS;
	MPM.addPass(InstrProfiling(Options, IsCS));
	}

	static InlineParams
	getInlineParamsFromOptLevel(PassBuilder::OptimizationLevel Level) {
	return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel());
	}

	ModuleInlinerWrapperPass
	PassBuilder::buildInlinerPipeline(OptimizationLevel Level,
	ThinOrFullLTOPhase Phase) {
	InlineParams IP = getInlineParamsFromOptLevel(Level);
	if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
	PGOOpt->Action == PGOOptions::SampleUse)
	IP.HotCallSiteThreshold = 0;

	if (PGOOpt)
	IP.EnableDeferral = EnablePGOInlineDeferral;

	ModuleInlinerWrapperPass MIWP(IP, PerformMandatoryInliningsFirst,
	UseInlineAdvisor, MaxDevirtIterations);

	// Require the GlobalsAA analysis for the module so we can query it within
	// the CGSCC pipeline.
	MIWP.addModulePass(RequireAnalysisPass<GlobalsAA, Module>());
	// Invalidate AAManager so it can be recreated and pick up the newly available
	// GlobalsAA.
	MIWP.addModulePass(
	createModuleToFunctionPassAdaptor(InvalidateAnalysisPass<AAManager>()));

	// Require the ProfileSummaryAnalysis for the module so we can query it within
	// the inliner pass.
	MIWP.addModulePass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());

	// Now begin the main postorder CGSCC pipeline.
	// FIXME: The current CGSCC pipeline has its origins in the legacy pass
	// manager and trying to emulate its precise behavior. Much of this doesn't
	// make a lot of sense and we should revisit the core CGSCC structure.
	CGSCCPassManager &MainCGPipeline = MIWP.getPM();

	// Note: historically, the PruneEH pass was run first to deduce nounwind and
	// generally clean up exception handling overhead. It isn't clear this is
	// valuable as the inliner doesn't currently care whether it is inlining an
	// invoke or a call.

	if (AttributorRun & AttributorRunOption::CGSCC)
	MainCGPipeline.addPass(AttributorCGSCCPass());

	// Now deduce any function attributes based in the current code.
	MainCGPipeline.addPass(PostOrderFunctionAttrsPass());

	// When at O3 add argument promotion to the pass pipeline.
	// FIXME: It isn't at all clear why this should be limited to O3.
	if (Level == OptimizationLevel::O3)
	MainCGPipeline.addPass(ArgumentPromotionPass());

	// Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
	// there are no OpenMP runtime calls present in the module.
	if (Level == OptimizationLevel::O2 \|\| Level == OptimizationLevel::O3)
	MainCGPipeline.addPass(OpenMPOptCGSCCPass());

	for (auto &C : CGSCCOptimizerLateEPCallbacks)
	C(MainCGPipeline, Level);

	// Lastly, add the core function simplification pipeline nested inside the
	// CGSCC walk.
	MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
	buildFunctionSimplificationPipeline(Level, Phase)));

	MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));

	return MIWP;
	}

	ModulePassManager
	PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
	ThinOrFullLTOPhase Phase) {
	ModulePassManager MPM;

	// Place pseudo probe instrumentation as the first pass of the pipeline to
	// minimize the impact of optimization changes.
	if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
	Phase != ThinOrFullLTOPhase::ThinLTOPostLink)
	MPM.addPass(SampleProfileProbePass(TM));

	bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);

	// In ThinLTO mode, when flattened profile is used, all the available
	// profile information will be annotated in PreLink phase so there is
	// no need to load the profile again in PostLink.
	bool LoadSampleProfile =
	HasSampleProfile &&
	!(FlattenedProfileUsed && Phase == ThinOrFullLTOPhase::ThinLTOPostLink);

	// During the ThinLTO backend phase we perform early indirect call promotion
	// here, before globalopt. Otherwise imported available_externally functions
	// look unreferenced and are removed. If we are going to load the sample
	// profile then defer until later.
	// TODO: See if we can move later and consolidate with the location where
	// we perform ICP when we are loading a sample profile.
	// TODO: We pass HasSampleProfile (whether there was a sample profile file
	// passed to the compile) to the SamplePGO flag of ICP. This is used to
	// determine whether the new direct calls are annotated with prof metadata.
	// Ideally this should be determined from whether the IR is annotated with
	// sample profile, and not whether the a sample profile was provided on the
	// command line. E.g. for flattened profiles where we will not be reloading
	// the sample profile in the ThinLTO backend, we ideally shouldn't have to
	// provide the sample profile file.
	if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile)
	MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));

	// Do basic inference of function attributes from known properties of system
	// libraries and other oracles.
	MPM.addPass(InferFunctionAttrsPass());

	// Create an early function pass manager to cleanup the output of the
	// frontend.
	FunctionPassManager EarlyFPM;
	// Lower llvm.expect to metadata before attempting transforms.
	// Compare/branch metadata may alter the behavior of passes like SimplifyCFG.
	EarlyFPM.addPass(LowerExpectIntrinsicPass());
	EarlyFPM.addPass(SimplifyCFGPass());
	EarlyFPM.addPass(SROA());
	EarlyFPM.addPass(EarlyCSEPass());
	EarlyFPM.addPass(CoroEarlyPass());
	if (Level == OptimizationLevel::O3)
	EarlyFPM.addPass(CallSiteSplittingPass());

	// In SamplePGO ThinLTO backend, we need instcombine before profile annotation
	// to convert bitcast to direct calls so that they can be inlined during the
	// profile annotation prepration step.
	// More details about SamplePGO design can be found in:
	// https://research.google.com/pubs/pub45290.html
	// FIXME: revisit how SampleProfileLoad/Inliner/ICP is structured.
	if (LoadSampleProfile)
	EarlyFPM.addPass(InstCombinePass());
	MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM)));

	if (LoadSampleProfile) {
	// Annotate sample profile right after early FPM to ensure freshness of
	// the debug info.
	MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
	PGOOpt->ProfileRemappingFile, Phase));
	// Cache ProfileSummaryAnalysis once to avoid the potential need to insert
	// RequireAnalysisPass for PSI before subsequent non-module passes.
	MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
	// Do not invoke ICP in the LTOPrelink phase as it makes it hard
	// for the profile annotation to be accurate in the LTO backend.
	if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink &&
	Phase != ThinOrFullLTOPhase::FullLTOPreLink)
	// We perform early indirect call promotion here, before globalopt.
	// This is important for the ThinLTO backend phase because otherwise
	// imported available_externally functions look unreferenced and are
	// removed.
	MPM.addPass(
	PGOIndirectCallPromotion(true /* IsInLTO /, true / SamplePGO */));
	}

	// Try to perform OpenMP specific optimizations on the module. This is a
	// (quick!) no-op if there are no OpenMP runtime calls present in the module.
	if (Level != OptimizationLevel::O0)
	MPM.addPass(OpenMPOptPass());

	if (AttributorRun & AttributorRunOption::MODULE)
	MPM.addPass(AttributorPass());

	// Lower type metadata and the type.test intrinsic in the ThinLTO
	// post link pipeline after ICP. This is to enable usage of the type
	// tests in ICP sequences.
	if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink)
	MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));

	for (auto &C : PipelineEarlySimplificationEPCallbacks)
	C(MPM, Level);

	// Specialize functions with IPSCCP.
	if (EnableFunctionSpecialization)
	MPM.addPass(FunctionSpecializationPass());

	// Interprocedural constant propagation now that basic cleanup has occurred
	// and prior to optimizing globals.
	// FIXME: This position in the pipeline hasn't been carefully considered in
	// years, it should be re-analyzed.
	MPM.addPass(IPSCCPPass());

	// Attach metadata to indirect call sites indicating the set of functions
	// they may target at run-time. This should follow IPSCCP.
	MPM.addPass(CalledValuePropagationPass());

	// Optimize globals to try and fold them into constants.
	MPM.addPass(GlobalOptPass());

	// Promote any localized globals to SSA registers.
	// FIXME: Should this instead by a run of SROA?
	// FIXME: We should probably run instcombine and simplifycfg afterward to
	// delete control flows that are dead once globals have been folded to
	// constants.
	MPM.addPass(createModuleToFunctionPassAdaptor(PromotePass()));

	// Remove any dead arguments exposed by cleanups and constant folding
	// globals.
	MPM.addPass(DeadArgumentEliminationPass());

	// Create a small function pass pipeline to cleanup after all the global
	// optimizations.
	FunctionPassManager GlobalCleanupPM;
	GlobalCleanupPM.addPass(InstCombinePass());
	invokePeepholeEPCallbacks(GlobalCleanupPM, Level);

	GlobalCleanupPM.addPass(SimplifyCFGPass());
	MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM)));

	// Add all the requested passes for instrumentation PGO, if requested.
	if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
	(PGOOpt->Action == PGOOptions::IRInstr \|\|
	PGOOpt->Action == PGOOptions::IRUse)) {
	addPGOInstrPasses(MPM, Level,
	/* RunProfileGen */ PGOOpt->Action == PGOOptions::IRInstr,
	/* IsCS */ false, PGOOpt->ProfileFile,
	PGOOpt->ProfileRemappingFile);
	MPM.addPass(PGOIndirectCallPromotion(false, false));
	}
	if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
	PGOOpt->CSAction == PGOOptions::CSIRInstr)
	MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile));

	// Synthesize function entry counts for non-PGO compilation.
	if (EnableSyntheticCounts && !PGOOpt)
	MPM.addPass(SyntheticCountsPropagation());

	MPM.addPass(buildInlinerPipeline(Level, Phase));

	if (EnableMemProfiler && Phase != ThinOrFullLTOPhase::ThinLTOPreLink) {
	MPM.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass()));
	MPM.addPass(ModuleMemProfilerPass());
	}

	return MPM;
	}

	/// TODO: Should LTO cause any differences to this set of passes?
	void PassBuilder::addVectorPasses(OptimizationLevel Level,
	FunctionPassManager &FPM, bool IsFullLTO) {
	FPM.addPass(LoopVectorizePass(
	LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization)));

	if (IsFullLTO) {
	// The vectorizer may have significantly shortened a loop body; unroll
	// again. Unroll small loops to hide loop backedge latency and saturate any
	// parallel execution resources of an out-of-order processor. We also then
	// need to clean up redundancies and loop invariant code.
	// FIXME: It would be really good to use a loop-integrated instruction
	// combiner for cleanup here so that the unrolling and LICM can be pipelined
	// across the loop nests.
	// We do UnrollAndJam in a separate LPM to ensure it happens before unroll
	if (EnableUnrollAndJam && PTO.LoopUnrolling)
	FPM.addPass(createFunctionToLoopPassAdaptor(
	LoopUnrollAndJamPass(Level.getSpeedupLevel())));
	FPM.addPass(LoopUnrollPass(LoopUnrollOptions(
	Level.getSpeedupLevel(), /OnlyWhenForced=/!PTO.LoopUnrolling,
	PTO.ForgetAllSCEVInLoopUnroll)));
	FPM.addPass(WarnMissedTransformationsPass());
	}

	if (!IsFullLTO) {
	// Eliminate loads by forwarding stores from the previous iteration to loads
	// of the current iteration.
	FPM.addPass(LoopLoadEliminationPass());
	}
	// Cleanup after the loop optimization passes.
	FPM.addPass(InstCombinePass());

	if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
	// At higher optimization levels, try to clean up any runtime overlap and
	// alignment checks inserted by the vectorizer. We want to track correlated
	// runtime checks for two inner loops in the same outer loop, fold any
	// common computations, hoist loop-invariant aspects out of any outer loop,
	// and unswitch the runtime checks if possible. Once hoisted, we may have
	// dead (or speculatable) control flows or more combining opportunities.
	FPM.addPass(EarlyCSEPass());
	FPM.addPass(CorrelatedValuePropagationPass());
	FPM.addPass(InstCombinePass());
	LoopPassManager LPM;
	LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
	LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
	OptimizationLevel::O3));
	FPM.addPass(
	RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
	FPM.addPass(createFunctionToLoopPassAdaptor(
	std::move(LPM), EnableMSSALoopDependency,
	/UseBlockFrequencyInfo=/true));
	FPM.addPass(SimplifyCFGPass());
	FPM.addPass(InstCombinePass());
	}

	// Now that we've formed fast to execute loop structures, we do further
	// optimizations. These are run afterward as they might block doing complex
	// analyses and transforms such as what are needed for loop vectorization.

	// Cleanup after loop vectorization, etc. Simplification passes like CVP and
	// GVN, loop transforms, and others have already run, so it's now better to
	// convert to more optimized IR using more aggressive simplify CFG options.
	// The extra sinking transform can create larger basic blocks, so do this
	// before SLP vectorization.
	FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
	.forwardSwitchCondToPhi(true)
	.convertSwitchToLookupTable(true)
	.needCanonicalLoops(false)
	.hoistCommonInsts(true)
	.sinkCommonInsts(true)));

	if (IsFullLTO) {
	FPM.addPass(SCCPPass());
	FPM.addPass(InstCombinePass());
	FPM.addPass(BDCEPass());
	}

	// Optimize parallel scalar instruction chains into SIMD instructions.
	if (PTO.SLPVectorization) {
	FPM.addPass(SLPVectorizerPass());
	if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
	FPM.addPass(EarlyCSEPass());
	}
	}
	// Enhance/cleanup vector code.
	FPM.addPass(VectorCombinePass());

	if (!IsFullLTO) {
	FPM.addPass(InstCombinePass());
	// Unroll small loops to hide loop backedge latency and saturate any
	// parallel execution resources of an out-of-order processor. We also then
	// need to clean up redundancies and loop invariant code.
	// FIXME: It would be really good to use a loop-integrated instruction
	// combiner for cleanup here so that the unrolling and LICM can be pipelined
	// across the loop nests.
	// We do UnrollAndJam in a separate LPM to ensure it happens before unroll
	if (EnableUnrollAndJam && PTO.LoopUnrolling) {
	FPM.addPass(createFunctionToLoopPassAdaptor(
	LoopUnrollAndJamPass(Level.getSpeedupLevel())));
	}
	FPM.addPass(LoopUnrollPass(LoopUnrollOptions(
	Level.getSpeedupLevel(), /OnlyWhenForced=/!PTO.LoopUnrolling,
	PTO.ForgetAllSCEVInLoopUnroll)));
	FPM.addPass(WarnMissedTransformationsPass());
	FPM.addPass(InstCombinePass());
	FPM.addPass(
	RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
	FPM.addPass(createFunctionToLoopPassAdaptor(
	LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
	EnableMSSALoopDependency, /UseBlockFrequencyInfo=/true));
	}

	// Now that we've vectorized and unrolled loops, we may have more refined
	// alignment information, try to re-derive it here.
	FPM.addPass(AlignmentFromAssumptionsPass());

	if (IsFullLTO)
	FPM.addPass(InstCombinePass());
	}

	ModulePassManager
	PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
	bool LTOPreLink) {
	ModulePassManager MPM;

	// Optimize globals now that the module is fully simplified.
	MPM.addPass(GlobalOptPass());
	MPM.addPass(GlobalDCEPass());

	// Run partial inlining pass to partially inline functions that have
	// large bodies.
	if (RunPartialInlining)
	MPM.addPass(PartialInlinerPass());

	// Remove avail extern fns and globals definitions since we aren't compiling
	// an object file for later LTO. For LTO we want to preserve these so they
	// are eligible for inlining at link-time. Note if they are unreferenced they
	// will be removed by GlobalDCE later, so this only impacts referenced
	// available externally globals. Eventually they will be suppressed during
	// codegen, but eliminating here enables more opportunity for GlobalDCE as it
	// may make globals referenced by available external functions dead and saves
	// running remaining passes on the eliminated functions. These should be
	// preserved during prelinking for link-time inlining decisions.
	if (!LTOPreLink)
	MPM.addPass(EliminateAvailableExternallyPass());

	if (EnableOrderFileInstrumentation)
	MPM.addPass(InstrOrderFilePass());

	// Do RPO function attribute inference across the module to forward-propagate
	// attributes where applicable.
	// FIXME: Is this really an optimization rather than a canonicalization?
	MPM.addPass(ReversePostOrderFunctionAttrsPass());

	// Do a post inline PGO instrumentation and use pass. This is a context
	// sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
	// cross-module inline has not been done yet. The context sensitive
	// instrumentation is after all the inlines are done.
	if (!LTOPreLink && PGOOpt) {
	if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
	addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true,
	/* IsCS */ true, PGOOpt->CSProfileGenFile,
	PGOOpt->ProfileRemappingFile);
	else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
	addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false,
	/* IsCS */ true, PGOOpt->ProfileFile,
	PGOOpt->ProfileRemappingFile);
	}

	// Re-require GloblasAA here prior to function passes. This is particularly
	// useful as the above will have inlined, DCE'ed, and function-attr
	// propagated everything. We should at this point have a reasonably minimal
	// and richly annotated call graph. By computing aliasing and mod/ref
	// information for all local globals here, the late loop passes and notably
	// the vectorizer will be able to use them to help recognize vectorizable
	// memory operations.
	MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>());

	FunctionPassManager OptimizePM;
	OptimizePM.addPass(Float2IntPass());
	OptimizePM.addPass(LowerConstantIntrinsicsPass());

	if (EnableMatrix) {
	OptimizePM.addPass(LowerMatrixIntrinsicsPass());
	OptimizePM.addPass(EarlyCSEPass());
	}

	// FIXME: We need to run some loop optimizations to re-rotate loops after
	// simplifycfg and others undo their rotation.

	// Optimize the loop execution. These passes operate on entire loop nests
	// rather than on each loop in an inside-out manner, and so they are actually
	// function passes.

	for (auto &C : VectorizerStartEPCallbacks)
	C(OptimizePM, Level);

	// First rotate loops that may have been un-rotated by prior passes.
	// Disable header duplication at -Oz.
	OptimizePM.addPass(createFunctionToLoopPassAdaptor(
	LoopRotatePass(Level != OptimizationLevel::Oz, LTOPreLink),
	EnableMSSALoopDependency,
	/UseBlockFrequencyInfo=/false));

	// Distribute loops to allow partial vectorization. I.e. isolate dependences
	// into separate loop that would otherwise inhibit vectorization. This is
	// currently only performed for loops marked with the metadata
	// llvm.loop.distribute=true or when -enable-loop-distribute is specified.
	OptimizePM.addPass(LoopDistributePass());

	// Populates the VFABI attribute with the scalar-to-vector mappings
	// from the TargetLibraryInfo.
	OptimizePM.addPass(InjectTLIMappings());

	addVectorPasses(Level, OptimizePM, /* IsFullLTO */ false);

	// Split out cold code. Splitting is done late to avoid hiding context from
	// other optimizations and inadvertently regressing performance. The tradeoff
	// is that this has a higher code size cost than splitting early.
	if (EnableHotColdSplit && !LTOPreLink)
	MPM.addPass(HotColdSplittingPass());

	// Search the code for similar regions of code. If enough similar regions can
	// be found where extracting the regions into their own function will decrease
	// the size of the program, we extract the regions, a deduplicate the
	// structurally similar regions.
	if (EnableIROutliner)
	MPM.addPass(IROutlinerPass());

	// Merge functions if requested.
	if (PTO.MergeFunctions)
	MPM.addPass(MergeFunctionsPass());

	// LoopSink pass sinks instructions hoisted by LICM, which serves as a
	// canonicalization pass that enables other optimizations. As a result,
	// LoopSink pass needs to be a very late IR pass to avoid undoing LICM
	// result too early.
	OptimizePM.addPass(LoopSinkPass());

	// And finally clean up LCSSA form before generating code.
	OptimizePM.addPass(InstSimplifyPass());

	// This hoists/decomposes div/rem ops. It should run after other sink/hoist
	// passes to avoid re-sinking, but before SimplifyCFG because it can allow
	// flattening of blocks.
	OptimizePM.addPass(DivRemPairsPass());

	// LoopSink (and other loop passes since the last simplifyCFG) might have
	// resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
	OptimizePM.addPass(SimplifyCFGPass());

	OptimizePM.addPass(CoroCleanupPass());

	// Add the core optimizing pipeline.
	MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM)));

	for (auto &C : OptimizerLastEPCallbacks)
	C(MPM, Level);

	if (PTO.CallGraphProfile)
	MPM.addPass(CGProfilePass());

	// Now we need to do some global optimization transforms.
	// FIXME: It would seem like these should come first in the optimization
	// pipeline and maybe be the bottom of the canonicalization pipeline? Weird
	// ordering here.
	MPM.addPass(GlobalDCEPass());
	MPM.addPass(ConstantMergePass());

	// TODO: Relative look table converter pass caused an issue when full lto is
	// enabled. See https://reviews.llvm.org/D94355 for more details.
	// Until the issue fixed, disable this pass during pre-linking phase.
	if (!LTOPreLink)
	MPM.addPass(RelLookupTableConverterPass());

	return MPM;
	}

	ModulePassManager
	PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
	bool LTOPreLink) {
	assert(Level != OptimizationLevel::O0 &&
	"Must request optimizations for the default pipeline!");

	ModulePassManager MPM;

	// Convert @llvm.global.annotations to !annotation metadata.
	MPM.addPass(Annotation2MetadataPass());

	// Force any function attributes we want the rest of the pipeline to observe.
	MPM.addPass(ForceFunctionAttrsPass());

	// Apply module pipeline start EP callback.
	for (auto &C : PipelineStartEPCallbacks)
	C(MPM, Level);

	if (PGOOpt && PGOOpt->DebugInfoForProfiling)
	MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));

	// Add the core simplification pipeline.
	MPM.addPass(buildModuleSimplificationPipeline(
	Level, LTOPreLink ? ThinOrFullLTOPhase::FullLTOPreLink
	: ThinOrFullLTOPhase::None));

	// Now add the optimization pipeline.
	MPM.addPass(buildModuleOptimizationPipeline(Level, LTOPreLink));

	if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
	MPM.addPass(PseudoProbeUpdatePass());

	// Emit annotation remarks.
	addAnnotationRemarksPass(MPM);

	if (LTOPreLink)
	addRequiredLTOPreLinkPasses(MPM);

	return MPM;
	}

	ModulePassManager
	PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
	assert(Level != OptimizationLevel::O0 &&
	"Must request optimizations for the default pipeline!");

	ModulePassManager MPM;

	// Convert @llvm.global.annotations to !annotation metadata.
	MPM.addPass(Annotation2MetadataPass());

	// Force any function attributes we want the rest of the pipeline to observe.
	MPM.addPass(ForceFunctionAttrsPass());

	if (PGOOpt && PGOOpt->DebugInfoForProfiling)
	MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));

	// Apply module pipeline start EP callback.
	for (auto &C : PipelineStartEPCallbacks)
	C(MPM, Level);

	// If we are planning to perform ThinLTO later, we don't bloat the code with
	// unrolling/vectorization/... now. Just simplify the module as much as we
	// can.
	MPM.addPass(buildModuleSimplificationPipeline(
	Level, ThinOrFullLTOPhase::ThinLTOPreLink));

	// Run partial inlining pass to partially inline functions that have
	// large bodies.
	// FIXME: It isn't clear whether this is really the right place to run this
	// in ThinLTO. Because there is another canonicalization and simplification
	// phase that will run after the thin link, running this here ends up with
	// less information than will be available later and it may grow functions in
	// ways that aren't beneficial.
	if (RunPartialInlining)
	MPM.addPass(PartialInlinerPass());

	// Reduce the size of the IR as much as possible.
	MPM.addPass(GlobalOptPass());

	// Module simplification splits coroutines, but does not fully clean up
	// coroutine intrinsics. To ensure ThinLTO optimization passes don't trip up
	// on these, we schedule the cleanup here.
	MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass()));

	if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
	MPM.addPass(PseudoProbeUpdatePass());

	// Handle OptimizerLastEPCallbacks added by clang on PreLink. Actual
	// optimization is going to be done in PostLink stage, but clang can't
	// add callbacks there in case of in-process ThinLTO called by linker.
	for (auto &C : OptimizerLastEPCallbacks)
	C(MPM, Level);

	if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
	MPM.addPass(PseudoProbeUpdatePass());

	// Emit annotation remarks.
	addAnnotationRemarksPass(MPM);

	addRequiredLTOPreLinkPasses(MPM);

	return MPM;
	}

	ModulePassManager PassBuilder::buildThinLTODefaultPipeline(
	OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
	ModulePassManager MPM;

	// Convert @llvm.global.annotations to !annotation metadata.
	MPM.addPass(Annotation2MetadataPass());

	if (ImportSummary) {
	// These passes import type identifier resolutions for whole-program
	// devirtualization and CFI. They must run early because other passes may
	// disturb the specific instruction patterns that these passes look for,
	// creating dependencies on resolutions that may not appear in the summary.
	//
	// For example, GVN may transform the pattern assume(type.test) appearing in
	// two basic blocks into assume(phi(type.test, type.test)), which would
	// transform a dependency on a WPD resolution into a dependency on a type
	// identifier resolution for CFI.
	//
	// Also, WPD has access to more precise information than ICP and can
	// devirtualize more effectively, so it should operate on the IR first.
	//
	// The WPD and LowerTypeTest passes need to run at -O0 to lower type
	// metadata and intrinsics.
	MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary));
	MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
	}

	if (Level == OptimizationLevel::O0) {
	// Run a second time to clean up any type tests left behind by WPD for use
	// in ICP.
	MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
	// Drop available_externally and unreferenced globals. This is necessary
	// with ThinLTO in order to avoid leaving undefined references to dead
	// globals in the object file.
	MPM.addPass(EliminateAvailableExternallyPass());
	MPM.addPass(GlobalDCEPass());
	return MPM;
	}

	// Force any function attributes we want the rest of the pipeline to observe.
	MPM.addPass(ForceFunctionAttrsPass());

	// Add the core simplification pipeline.
	MPM.addPass(buildModuleSimplificationPipeline(
	Level, ThinOrFullLTOPhase::ThinLTOPostLink));

	// Now add the optimization pipeline.
	MPM.addPass(buildModuleOptimizationPipeline(Level));

	// Emit annotation remarks.
	addAnnotationRemarksPass(MPM);

	return MPM;
	}

	ModulePassManager
	PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
	assert(Level != OptimizationLevel::O0 &&
	"Must request optimizations for the default pipeline!");
	// FIXME: We should use a customized pre-link pipeline!
	return buildPerModuleDefaultPipeline(Level,
	/* LTOPreLink */ true);
	}

	ModulePassManager
	PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
	ModuleSummaryIndex *ExportSummary) {
	ModulePassManager MPM;

	// Convert @llvm.global.annotations to !annotation metadata.
	MPM.addPass(Annotation2MetadataPass());

	// Create a function that performs CFI checks for cross-DSO calls with targets
	// in the current module.
	MPM.addPass(CrossDSOCFIPass());

	if (Level == OptimizationLevel::O0) {
	// The WPD and LowerTypeTest passes need to run at -O0 to lower type
	// metadata and intrinsics.
	MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
	MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
	// Run a second time to clean up any type tests left behind by WPD for use
	// in ICP.
	MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));

	// Emit annotation remarks.
	addAnnotationRemarksPass(MPM);

	return MPM;
	}

	if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
	// Load sample profile before running the LTO optimization pipeline.
	MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
	PGOOpt->ProfileRemappingFile,
	ThinOrFullLTOPhase::FullLTOPostLink));
	// Cache ProfileSummaryAnalysis once to avoid the potential need to insert
	// RequireAnalysisPass for PSI before subsequent non-module passes.
	MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
	}

	// Remove unused virtual tables to improve the quality of code generated by
	// whole-program devirtualization and bitset lowering.
	MPM.addPass(GlobalDCEPass());

	// Force any function attributes we want the rest of the pipeline to observe.
	MPM.addPass(ForceFunctionAttrsPass());

	// Do basic inference of function attributes from known properties of system
	// libraries and other oracles.
	MPM.addPass(InferFunctionAttrsPass());

	if (Level.getSpeedupLevel() > 1) {
	FunctionPassManager EarlyFPM;
	EarlyFPM.addPass(CallSiteSplittingPass());
	MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM)));

	// Indirect call promotion. This should promote all the targets that are
	// left by the earlier promotion pass that promotes intra-module targets.
	// This two-step promotion is to save the compile time. For LTO, it should
	// produce the same result as if we only do promotion here.
	MPM.addPass(PGOIndirectCallPromotion(
	true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));

	if (EnableFunctionSpecialization)
	MPM.addPass(FunctionSpecializationPass());
	// Propagate constants at call sites into the functions they call. This
	// opens opportunities for globalopt (and inlining) by substituting function
	// pointers passed as arguments to direct uses of functions.
	MPM.addPass(IPSCCPPass());

	// Attach metadata to indirect call sites indicating the set of functions
	// they may target at run-time. This should follow IPSCCP.
	MPM.addPass(CalledValuePropagationPass());
	}

	// Now deduce any function attributes based in the current code.
	MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
	PostOrderFunctionAttrsPass()));

	// Do RPO function attribute inference across the module to forward-propagate
	// attributes where applicable.
	// FIXME: Is this really an optimization rather than a canonicalization?
	MPM.addPass(ReversePostOrderFunctionAttrsPass());

	// Use in-range annotations on GEP indices to split globals where beneficial.
	MPM.addPass(GlobalSplitPass());

	// Run whole program optimization of virtual call when the list of callees
	// is fixed.
	MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));

	// Stop here at -O1.
	if (Level == OptimizationLevel::O1) {
	// The LowerTypeTestsPass needs to run to lower type metadata and the
	// type.test intrinsics. The pass does nothing if CFI is disabled.
	MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
	// Run a second time to clean up any type tests left behind by WPD for use
	// in ICP (which is performed earlier than this in the regular LTO
	// pipeline).
	MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));

	// Emit annotation remarks.
	addAnnotationRemarksPass(MPM);

	return MPM;
	}

	// Optimize globals to try and fold them into constants.
	MPM.addPass(GlobalOptPass());

	// Promote any localized globals to SSA registers.
	MPM.addPass(createModuleToFunctionPassAdaptor(PromotePass()));

	// Linking modules together can lead to duplicate global constant, only
	// keep one copy of each constant.
	MPM.addPass(ConstantMergePass());

	// Remove unused arguments from functions.
	MPM.addPass(DeadArgumentEliminationPass());

	// Reduce the code after globalopt and ipsccp. Both can open up significant
	// simplification opportunities, and both can propagate functions through
	// function pointers. When this happens, we often have to resolve varargs
	// calls, etc, so let instcombine do this.
	FunctionPassManager PeepholeFPM;
	if (Level == OptimizationLevel::O3)
	PeepholeFPM.addPass(AggressiveInstCombinePass());
	PeepholeFPM.addPass(InstCombinePass());
	invokePeepholeEPCallbacks(PeepholeFPM, Level);

	MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM)));

	// Note: historically, the PruneEH pass was run first to deduce nounwind and
	// generally clean up exception handling overhead. It isn't clear this is
	// valuable as the inliner doesn't currently care whether it is inlining an
	// invoke or a call.
	// Run the inliner now.
	MPM.addPass(ModuleInlinerWrapperPass(getInlineParamsFromOptLevel(Level)));

	// Optimize globals again after we ran the inliner.
	MPM.addPass(GlobalOptPass());

	// Garbage collect dead functions.
	- // FIXME: Add ArgumentPromotion pass after once it's ported.
	MPM.addPass(GlobalDCEPass());

	+ // If we didn't decide to inline a function, check to see if we can
	+ // transform it to pass arguments by value instead of by reference.
	+ MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(ArgumentPromotionPass()));
	+
	FunctionPassManager FPM;
	// The IPO Passes may leave cruft around. Clean up after them.
	FPM.addPass(InstCombinePass());
	invokePeepholeEPCallbacks(FPM, Level);

	FPM.addPass(JumpThreadingPass(/InsertFreezeWhenUnfoldingSelect/ true));

	// Do a post inline PGO instrumentation and use pass. This is a context
	// sensitive PGO pass.
	if (PGOOpt) {
	if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
	addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true,
	/* IsCS */ true, PGOOpt->CSProfileGenFile,
	PGOOpt->ProfileRemappingFile);
	else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
	addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false,
	/* IsCS */ true, PGOOpt->ProfileFile,
	PGOOpt->ProfileRemappingFile);
	}

	// Break up allocas
	FPM.addPass(SROA());

	// LTO provides additional opportunities for tailcall elimination due to
	// link-time inlining, and visibility of nocapture attribute.
	FPM.addPass(TailCallElimPass());

	// Run a few AA driver optimizations here and now to cleanup the code.
	MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));

	MPM.addPass(
	createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass()));

	// Require the GlobalsAA analysis for the module so we can query it within
	// MainFPM.
	MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>());
	// Invalidate AAManager so it can be recreated and pick up the newly available
	// GlobalsAA.
	MPM.addPass(
	createModuleToFunctionPassAdaptor(InvalidateAnalysisPass<AAManager>()));

	FunctionPassManager MainFPM;
	MainFPM.addPass(createFunctionToLoopPassAdaptor(
	LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
	EnableMSSALoopDependency, /UseBlockFrequencyInfo=/true));

	if (RunNewGVN)
	MainFPM.addPass(NewGVNPass());
	else
	MainFPM.addPass(GVN());

	// Remove dead memcpy()'s.
	MainFPM.addPass(MemCpyOptPass());

	// Nuke dead stores.
	MainFPM.addPass(DSEPass());
	MainFPM.addPass(MergedLoadStoreMotionPass());

	// More loops are countable; try to optimize them.
	if (EnableLoopFlatten && Level.getSpeedupLevel() > 1)
	MainFPM.addPass(createFunctionToLoopPassAdaptor(LoopFlattenPass()));

	if (EnableConstraintElimination)
	MainFPM.addPass(ConstraintEliminationPass());

	LoopPassManager LPM;
	LPM.addPass(IndVarSimplifyPass());
	LPM.addPass(LoopDeletionPass());
	// FIXME: Add loop interchange.

	// Unroll small loops and perform peeling.
	LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
	/* OnlyWhenForced= */ !PTO.LoopUnrolling,
	PTO.ForgetAllSCEVInLoopUnroll));
	// The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
	// All loop passes must preserve it, in order to be able to use it.
	MainFPM.addPass(createFunctionToLoopPassAdaptor(
	std::move(LPM), /UseMemorySSA=/false, /UseBlockFrequencyInfo=/true));

	MainFPM.addPass(LoopDistributePass());

	addVectorPasses(Level, MainFPM, /* IsFullLTO */ true);

	invokePeepholeEPCallbacks(MainFPM, Level);
	MainFPM.addPass(JumpThreadingPass(/InsertFreezeWhenUnfoldingSelect/ true));
	MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM)));

	// Lower type metadata and the type.test intrinsic. This pass supports
	// clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
	// to be run at link time if CFI is enabled. This pass does nothing if
	// CFI is disabled.
	MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
	// Run a second time to clean up any type tests left behind by WPD for use
	// in ICP (which is performed earlier than this in the regular LTO pipeline).
	MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));

	// Enable splitting late in the FullLTO post-link pipeline. This is done in
	// the same stage in the old pass manager (\ref addLateLTOOptimizationPasses).
	if (EnableHotColdSplit)
	MPM.addPass(HotColdSplittingPass());

	// Add late LTO optimization passes.
	// Delete basic blocks, which optimization passes may have killed.
	MPM.addPass(createModuleToFunctionPassAdaptor(
	SimplifyCFGPass(SimplifyCFGOptions().hoistCommonInsts(true))));

	// Drop bodies of available eternally objects to improve GlobalDCE.
	MPM.addPass(EliminateAvailableExternallyPass());

	// Now that we have optimized the program, discard unreachable functions.
	MPM.addPass(GlobalDCEPass());

	if (PTO.MergeFunctions)
	MPM.addPass(MergeFunctionsPass());

	// Emit annotation remarks.
	addAnnotationRemarksPass(MPM);

	return MPM;
	}

	ModulePassManager PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level,
	bool LTOPreLink) {
	assert(Level == OptimizationLevel::O0 &&
	"buildO0DefaultPipeline should only be used with O0");

	ModulePassManager MPM;

	if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr \|\|
	PGOOpt->Action == PGOOptions::IRUse))
	addPGOInstrPassesForO0(
	MPM,
	/* RunProfileGen */ (PGOOpt->Action == PGOOptions::IRInstr),
	/* IsCS */ false, PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);

	for (auto &C : PipelineStartEPCallbacks)
	C(MPM, Level);

	if (PGOOpt && PGOOpt->DebugInfoForProfiling)
	MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));

	for (auto &C : PipelineEarlySimplificationEPCallbacks)
	C(MPM, Level);

	// Build a minimal pipeline based on the semantics required by LLVM,
	// which is just that always inlining occurs. Further, disable generating
	// lifetime intrinsics to avoid enabling further optimizations during
	// code generation.
	MPM.addPass(AlwaysInlinerPass(
	/InsertLifetimeIntrinsics=/false));

	if (PTO.MergeFunctions)
	MPM.addPass(MergeFunctionsPass());

	if (EnableMatrix)
	MPM.addPass(
	createModuleToFunctionPassAdaptor(LowerMatrixIntrinsicsPass(true)));

	if (!CGSCCOptimizerLateEPCallbacks.empty()) {
	CGSCCPassManager CGPM;
	for (auto &C : CGSCCOptimizerLateEPCallbacks)
	C(CGPM, Level);
	if (!CGPM.isEmpty())
	MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
	}
	if (!LateLoopOptimizationsEPCallbacks.empty()) {
	LoopPassManager LPM;
	for (auto &C : LateLoopOptimizationsEPCallbacks)
	C(LPM, Level);
	if (!LPM.isEmpty()) {
	MPM.addPass(createModuleToFunctionPassAdaptor(
	createFunctionToLoopPassAdaptor(std::move(LPM))));
	}
	}
	if (!LoopOptimizerEndEPCallbacks.empty()) {
	LoopPassManager LPM;
	for (auto &C : LoopOptimizerEndEPCallbacks)
	C(LPM, Level);
	if (!LPM.isEmpty()) {
	MPM.addPass(createModuleToFunctionPassAdaptor(
	createFunctionToLoopPassAdaptor(std::move(LPM))));
	}
	}
	if (!ScalarOptimizerLateEPCallbacks.empty()) {
	FunctionPassManager FPM;
	for (auto &C : ScalarOptimizerLateEPCallbacks)
	C(FPM, Level);
	if (!FPM.isEmpty())
	MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
	}
	if (!VectorizerStartEPCallbacks.empty()) {
	FunctionPassManager FPM;
	for (auto &C : VectorizerStartEPCallbacks)
	C(FPM, Level);
	if (!FPM.isEmpty())
	MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
	}

	MPM.addPass(createModuleToFunctionPassAdaptor(CoroEarlyPass()));
	CGSCCPassManager CGPM;
	CGPM.addPass(CoroSplitPass());
	MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
	MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass()));

	for (auto &C : OptimizerLastEPCallbacks)
	C(MPM, Level);

	if (LTOPreLink)
	addRequiredLTOPreLinkPasses(MPM);

	return MPM;
	}

	AAManager PassBuilder::buildDefaultAAPipeline() {
	AAManager AA;

	// The order in which these are registered determines their priority when
	// being queried.

	// First we register the basic alias analysis that provides the majority of
	// per-function local AA logic. This is a stateless, on-demand local set of
	// AA techniques.
	AA.registerFunctionAnalysis<BasicAA>();

	// Next we query fast, specialized alias analyses that wrap IR-embedded
	// information about aliasing.
	AA.registerFunctionAnalysis<ScopedNoAliasAA>();
	AA.registerFunctionAnalysis<TypeBasedAA>();

	// Add support for querying global aliasing information when available.
	// Because the `AAManager` is a function analysis and `GlobalsAA` is a module
	// analysis, all that the `AAManager` can do is query for any cached
	// results from `GlobalsAA` through a readonly proxy.
	AA.registerModuleAnalysis<GlobalsAA>();

	// Add target-specific alias analyses.
	if (TM)
	TM->registerDefaultAliasAnalyses(AA);

	return AA;
	}

	static Optional<int> parseRepeatPassName(StringRef Name) {
	if (!Name.consume_front("repeat<") \|\| !Name.consume_back(">"))
	return None;
	int Count;
	if (Name.getAsInteger(0, Count) \|\| Count <= 0)
	return None;
	return Count;
	}

	static Optional<int> parseDevirtPassName(StringRef Name) {
	if (!Name.consume_front("devirt<") \|\| !Name.consume_back(">"))
	return None;
	int Count;
	if (Name.getAsInteger(0, Count) \|\| Count < 0)
	return None;
	return Count;
	}

	static bool checkParametrizedPassName(StringRef Name, StringRef PassName) {
	if (!Name.consume_front(PassName))
	return false;
	// normal pass name w/o parameters == default parameters
	if (Name.empty())
	return true;
	return Name.startswith("<") && Name.endswith(">");
	}

	namespace {

	/// This performs customized parsing of pass name with parameters.
	///
	/// We do not need parametrization of passes in textual pipeline very often,
	/// yet on a rare occasion ability to specify parameters right there can be
	/// useful.
	///
	/// \p Name - parameterized specification of a pass from a textual pipeline
	/// is a string in a form of :
	/// PassName '<' parameter-list '>'
	///
	/// Parameter list is being parsed by the parser callable argument, \p Parser,
	/// It takes a string-ref of parameters and returns either StringError or a
	/// parameter list in a form of a custom parameters type, all wrapped into
	/// Expected<> template class.
	///
	template <typename ParametersParseCallableT>
	auto parsePassParameters(ParametersParseCallableT &&Parser, StringRef Name,
	StringRef PassName) -> decltype(Parser(StringRef{})) {
	using ParametersT = typename decltype(Parser(StringRef{}))::value_type;

	StringRef Params = Name;
	if (!Params.consume_front(PassName)) {
	assert(false &&
	"unable to strip pass name from parametrized pass specification");
	}
	if (!Params.empty() &&
	(!Params.consume_front("<") \|\| !Params.consume_back(">"))) {
	assert(false && "invalid format for parametrized pass name");
	}

	Expected<ParametersT> Result = Parser(Params);
	assert((Result \|\| Result.template errorIsA<StringError>()) &&
	"Pass parameter parser can only return StringErrors.");
	return Result;
	}

	/// Parser of parameters for LoopUnroll pass.
	Expected<LoopUnrollOptions> parseLoopUnrollOptions(StringRef Params) {
	LoopUnrollOptions UnrollOpts;
	while (!Params.empty()) {
	StringRef ParamName;
	std::tie(ParamName, Params) = Params.split(';');
	int OptLevel = StringSwitch<int>(ParamName)
	.Case("O0", 0)
	.Case("O1", 1)
	.Case("O2", 2)
	.Case("O3", 3)
	.Default(-1);
	if (OptLevel >= 0) {
	UnrollOpts.setOptLevel(OptLevel);
	continue;
	}
	if (ParamName.consume_front("full-unroll-max=")) {
	int Count;
	if (ParamName.getAsInteger(0, Count))
	return make_error<StringError>(
	formatv("invalid LoopUnrollPass parameter '{0}' ", ParamName).str(),
	inconvertibleErrorCode());
	UnrollOpts.setFullUnrollMaxCount(Count);
	continue;
	}

	bool Enable = !ParamName.consume_front("no-");
	if (ParamName == "partial") {
	UnrollOpts.setPartial(Enable);
	} else if (ParamName == "peeling") {
	UnrollOpts.setPeeling(Enable);
	} else if (ParamName == "profile-peeling") {
	UnrollOpts.setProfileBasedPeeling(Enable);
	} else if (ParamName == "runtime") {
	UnrollOpts.setRuntime(Enable);
	} else if (ParamName == "upperbound") {
	UnrollOpts.setUpperBound(Enable);
	} else {
	return make_error<StringError>(
	formatv("invalid LoopUnrollPass parameter '{0}' ", ParamName).str(),
	inconvertibleErrorCode());
	}
	}
	return UnrollOpts;
	}

	Expected<MemorySanitizerOptions> parseMSanPassOptions(StringRef Params) {
	MemorySanitizerOptions Result;
	while (!Params.empty()) {
	StringRef ParamName;
	std::tie(ParamName, Params) = Params.split(';');

	if (ParamName == "recover") {
	Result.Recover = true;
	} else if (ParamName == "kernel") {
	Result.Kernel = true;
	} else if (ParamName.consume_front("track-origins=")) {
	if (ParamName.getAsInteger(0, Result.TrackOrigins))
	return make_error<StringError>(
	formatv("invalid argument to MemorySanitizer pass track-origins "
	"parameter: '{0}' ",
	ParamName)
	.str(),
	inconvertibleErrorCode());
	} else {
	return make_error<StringError>(
	formatv("invalid MemorySanitizer pass parameter '{0}' ", ParamName)
	.str(),
	inconvertibleErrorCode());
	}
	}
	return Result;
	}

	/// Parser of parameters for SimplifyCFG pass.
	Expected<SimplifyCFGOptions> parseSimplifyCFGOptions(StringRef Params) {
	SimplifyCFGOptions Result;
	while (!Params.empty()) {
	StringRef ParamName;
	std::tie(ParamName, Params) = Params.split(';');

	bool Enable = !ParamName.consume_front("no-");
	if (ParamName == "forward-switch-cond") {
	Result.forwardSwitchCondToPhi(Enable);
	} else if (ParamName == "switch-to-lookup") {
	Result.convertSwitchToLookupTable(Enable);
	} else if (ParamName == "keep-loops") {
	Result.needCanonicalLoops(Enable);
	} else if (ParamName == "hoist-common-insts") {
	Result.hoistCommonInsts(Enable);
	} else if (ParamName == "sink-common-insts") {
	Result.sinkCommonInsts(Enable);
	} else if (Enable && ParamName.consume_front("bonus-inst-threshold=")) {
	APInt BonusInstThreshold;
	if (ParamName.getAsInteger(0, BonusInstThreshold))
	return make_error<StringError>(
	formatv("invalid argument to SimplifyCFG pass bonus-threshold "
	"parameter: '{0}' ",
	ParamName).str(),
	inconvertibleErrorCode());
	Result.bonusInstThreshold(BonusInstThreshold.getSExtValue());
	} else {
	return make_error<StringError>(
	formatv("invalid SimplifyCFG pass parameter '{0}' ", ParamName).str(),
	inconvertibleErrorCode());
	}
	}
	return Result;
	}

	/// Parser of parameters for LoopVectorize pass.
	Expected<LoopVectorizeOptions> parseLoopVectorizeOptions(StringRef Params) {
	LoopVectorizeOptions Opts;
	while (!Params.empty()) {
	StringRef ParamName;
	std::tie(ParamName, Params) = Params.split(';');

	bool Enable = !ParamName.consume_front("no-");
	if (ParamName == "interleave-forced-only") {
	Opts.setInterleaveOnlyWhenForced(Enable);
	} else if (ParamName == "vectorize-forced-only") {
	Opts.setVectorizeOnlyWhenForced(Enable);
	} else {
	return make_error<StringError>(
	formatv("invalid LoopVectorize parameter '{0}' ", ParamName).str(),
	inconvertibleErrorCode());
	}
	}
	return Opts;
	}

	Expected<std::pair<bool, bool>> parseLoopUnswitchOptions(StringRef Params) {
	std::pair<bool, bool> Result = {false, true};
	while (!Params.empty()) {
	StringRef ParamName;
	std::tie(ParamName, Params) = Params.split(';');

	bool Enable = !ParamName.consume_front("no-");
	if (ParamName == "nontrivial") {
	Result.first = Enable;
	} else if (ParamName == "trivial") {
	Result.second = Enable;
	} else {
	return make_error<StringError>(
	formatv("invalid LoopUnswitch pass parameter '{0}' ", ParamName)
	.str(),
	inconvertibleErrorCode());
	}
	}
	return Result;
	}

	Expected<bool> parseMergedLoadStoreMotionOptions(StringRef Params) {
	bool Result = false;
	while (!Params.empty()) {
	StringRef ParamName;
	std::tie(ParamName, Params) = Params.split(';');

	bool Enable = !ParamName.consume_front("no-");
	if (ParamName == "split-footer-bb") {
	Result = Enable;
	} else {
	return make_error<StringError>(
	formatv("invalid MergedLoadStoreMotion pass parameter '{0}' ",
	ParamName)
	.str(),
	inconvertibleErrorCode());
	}
	}
	return Result;
	}

	Expected<GVNOptions> parseGVNOptions(StringRef Params) {
	GVNOptions Result;
	while (!Params.empty()) {
	StringRef ParamName;
	std::tie(ParamName, Params) = Params.split(';');

	bool Enable = !ParamName.consume_front("no-");
	if (ParamName == "pre") {
	Result.setPRE(Enable);
	} else if (ParamName == "load-pre") {
	Result.setLoadPRE(Enable);
	} else if (ParamName == "split-backedge-load-pre") {
	Result.setLoadPRESplitBackedge(Enable);
	} else if (ParamName == "memdep") {
	Result.setMemDep(Enable);
	} else {
	return make_error<StringError>(
	formatv("invalid GVN pass parameter '{0}' ", ParamName).str(),
	inconvertibleErrorCode());
	}
	}
	return Result;
	}

	Expected<StackLifetime::LivenessType>
	parseStackLifetimeOptions(StringRef Params) {
	StackLifetime::LivenessType Result = StackLifetime::LivenessType::May;
	while (!Params.empty()) {
	StringRef ParamName;
	std::tie(ParamName, Params) = Params.split(';');

	if (ParamName == "may") {
	Result = StackLifetime::LivenessType::May;
	} else if (ParamName == "must") {
	Result = StackLifetime::LivenessType::Must;
	} else {
	return make_error<StringError>(
	formatv("invalid StackLifetime parameter '{0}' ", ParamName).str(),
	inconvertibleErrorCode());
	}
	}
	return Result;
	}

	} // namespace

	/// Tests whether a pass name starts with a valid prefix for a default pipeline
	/// alias.
	static bool startsWithDefaultPipelineAliasPrefix(StringRef Name) {
	return Name.startswith("default") \|\| Name.startswith("thinlto") \|\|
	Name.startswith("lto");
	}

	/// Tests whether registered callbacks will accept a given pass name.
	///
	/// When parsing a pipeline text, the type of the outermost pipeline may be
	/// omitted, in which case the type is automatically determined from the first
	/// pass name in the text. This may be a name that is handled through one of the
	/// callbacks. We check this through the oridinary parsing callbacks by setting
	/// up a dummy PassManager in order to not force the client to also handle this
	/// type of query.
	template <typename PassManagerT, typename CallbacksT>
	static bool callbacksAcceptPassName(StringRef Name, CallbacksT &Callbacks) {
	if (!Callbacks.empty()) {
	PassManagerT DummyPM;
	for (auto &CB : Callbacks)
	if (CB(Name, DummyPM, {}))
	return true;
	}
	return false;
	}

	template <typename CallbacksT>
	static bool isModulePassName(StringRef Name, CallbacksT &Callbacks) {
	// Manually handle aliases for pre-configured pipeline fragments.
	if (startsWithDefaultPipelineAliasPrefix(Name))
	return DefaultAliasRegex.match(Name);

	// Explicitly handle pass manager names.
	if (Name == "module")
	return true;
	if (Name == "cgscc")
	return true;
	if (Name == "function")
	return true;

	// Explicitly handle custom-parsed pass names.
	if (parseRepeatPassName(Name))
	return true;

	#define MODULE_PASS(NAME, CREATE_PASS) \
	if (Name == NAME) \
	return true;
	#define MODULE_ANALYSIS(NAME, CREATE_PASS) \
	if (Name == "require<" NAME ">" \|\| Name == "invalidate<" NAME ">") \
	return true;
	#include "PassRegistry.def"

	return callbacksAcceptPassName<ModulePassManager>(Name, Callbacks);
	}

	template <typename CallbacksT>
	static bool isCGSCCPassName(StringRef Name, CallbacksT &Callbacks) {
	// Explicitly handle pass manager names.
	if (Name == "cgscc")
	return true;
	if (Name == "function")
	return true;

	// Explicitly handle custom-parsed pass names.
	if (parseRepeatPassName(Name))
	return true;
	if (parseDevirtPassName(Name))
	return true;

	#define CGSCC_PASS(NAME, CREATE_PASS) \
	if (Name == NAME) \
	return true;
	#define CGSCC_ANALYSIS(NAME, CREATE_PASS) \
	if (Name == "require<" NAME ">" \|\| Name == "invalidate<" NAME ">") \
	return true;
	#include "PassRegistry.def"

	return callbacksAcceptPassName<CGSCCPassManager>(Name, Callbacks);
	}

	template <typename CallbacksT>
	static bool isFunctionPassName(StringRef Name, CallbacksT &Callbacks) {
	// Explicitly handle pass manager names.
	if (Name == "function")
	return true;
	if (Name == "loop" \|\| Name == "loop-mssa")
	return true;

	// Explicitly handle custom-parsed pass names.
	if (parseRepeatPassName(Name))
	return true;

	#define FUNCTION_PASS(NAME, CREATE_PASS) \
	if (Name == NAME) \
	return true;
	#define FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
	if (checkParametrizedPassName(Name, NAME)) \
	return true;
	#define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \
	if (Name == "require<" NAME ">" \|\| Name == "invalidate<" NAME ">") \
	return true;
	#include "PassRegistry.def"

	return callbacksAcceptPassName<FunctionPassManager>(Name, Callbacks);
	}

	template <typename CallbacksT>
	static bool isLoopPassName(StringRef Name, CallbacksT &Callbacks) {
	// Explicitly handle pass manager names.
	if (Name == "loop" \|\| Name == "loop-mssa")
	return true;

	// Explicitly handle custom-parsed pass names.
	if (parseRepeatPassName(Name))
	return true;

	#define LOOP_PASS(NAME, CREATE_PASS) \
	if (Name == NAME) \
	return true;
	#define LOOP_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
	if (checkParametrizedPassName(Name, NAME)) \
	return true;
	#define LOOP_ANALYSIS(NAME, CREATE_PASS) \
	if (Name == "require<" NAME ">" \|\| Name == "invalidate<" NAME ">") \
	return true;
	#include "PassRegistry.def"

	return callbacksAcceptPassName<LoopPassManager>(Name, Callbacks);
	}

	Optional<std::vector<PassBuilder::PipelineElement>>
	PassBuilder::parsePipelineText(StringRef Text) {
	std::vector<PipelineElement> ResultPipeline;

	SmallVector<std::vector<PipelineElement> *, 4> PipelineStack = {
	&ResultPipeline};
	for (;;) {
	std::vector<PipelineElement> &Pipeline = *PipelineStack.back();
	size_t Pos = Text.find_first_of(",()");
	Pipeline.push_back({Text.substr(0, Pos), {}});

	// If we have a single terminating name, we're done.
	if (Pos == Text.npos)
	break;

	char Sep = Text[Pos];
	Text = Text.substr(Pos + 1);
	if (Sep == ',')
	// Just a name ending in a comma, continue.
	continue;

	if (Sep == '(') {
	// Push the inner pipeline onto the stack to continue processing.
	PipelineStack.push_back(&Pipeline.back().InnerPipeline);
	continue;
	}

	assert(Sep == ')' && "Bogus separator!");
	// When handling the close parenthesis, we greedily consume them to avoid
	// empty strings in the pipeline.
	do {
	// If we try to pop the outer pipeline we have unbalanced parentheses.
	if (PipelineStack.size() == 1)
	return None;

	PipelineStack.pop_back();
	} while (Text.consume_front(")"));

	// Check if we've finished parsing.
	if (Text.empty())
	break;

	// Otherwise, the end of an inner pipeline always has to be followed by
	// a comma, and then we can continue.
	if (!Text.consume_front(","))
	return None;
	}

	if (PipelineStack.size() > 1)
	// Unbalanced paretheses.
	return None;

	assert(PipelineStack.back() == &ResultPipeline &&
	"Wrong pipeline at the bottom of the stack!");
	return {std::move(ResultPipeline)};
	}

	Error PassBuilder::parseModulePass(ModulePassManager &MPM,
	const PipelineElement &E) {
	auto &Name = E.Name;
	auto &InnerPipeline = E.InnerPipeline;

	// First handle complex passes like the pass managers which carry pipelines.
	if (!InnerPipeline.empty()) {
	if (Name == "module") {
	ModulePassManager NestedMPM;
	if (auto Err = parseModulePassPipeline(NestedMPM, InnerPipeline))
	return Err;
	MPM.addPass(std::move(NestedMPM));
	return Error::success();
	}
	if (Name == "cgscc") {
	CGSCCPassManager CGPM;
	if (auto Err = parseCGSCCPassPipeline(CGPM, InnerPipeline))
	return Err;
	MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
	return Error::success();
	}
	if (Name == "function") {
	FunctionPassManager FPM;
	if (auto Err = parseFunctionPassPipeline(FPM, InnerPipeline))
	return Err;
	MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
	return Error::success();
	}
	if (auto Count = parseRepeatPassName(Name)) {
	ModulePassManager NestedMPM;
	if (auto Err = parseModulePassPipeline(NestedMPM, InnerPipeline))
	return Err;
	MPM.addPass(createRepeatedPass(*Count, std::move(NestedMPM)));
	return Error::success();
	}

	for (auto &C : ModulePipelineParsingCallbacks)
	if (C(Name, MPM, InnerPipeline))
	return Error::success();

	// Normal passes can't have pipelines.
	return make_error<StringError>(
	formatv("invalid use of '{0}' pass as module pipeline", Name).str(),
	inconvertibleErrorCode());
	;
	}

	// Manually handle aliases for pre-configured pipeline fragments.
	if (startsWithDefaultPipelineAliasPrefix(Name)) {
	SmallVector<StringRef, 3> Matches;
	if (!DefaultAliasRegex.match(Name, &Matches))
	return make_error<StringError>(
	formatv("unknown default pipeline alias '{0}'", Name).str(),
	inconvertibleErrorCode());

	assert(Matches.size() == 3 && "Must capture two matched strings!");

	OptimizationLevel L = StringSwitch<OptimizationLevel>(Matches[2])
	.Case("O0", OptimizationLevel::O0)
	.Case("O1", OptimizationLevel::O1)
	.Case("O2", OptimizationLevel::O2)
	.Case("O3", OptimizationLevel::O3)
	.Case("Os", OptimizationLevel::Os)
	.Case("Oz", OptimizationLevel::Oz);
	if (L == OptimizationLevel::O0 && Matches[1] != "thinlto" &&
	Matches[1] != "lto") {
	MPM.addPass(buildO0DefaultPipeline(L, Matches[1] == "thinlto-pre-link" \|\|
	Matches[1] == "lto-pre-link"));
	return Error::success();
	}

	// This is consistent with old pass manager invoked via opt, but
	// inconsistent with clang. Clang doesn't enable loop vectorization
	// but does enable slp vectorization at Oz.
	PTO.LoopVectorization =
	L.getSpeedupLevel() > 1 && L != OptimizationLevel::Oz;
	PTO.SLPVectorization =
	L.getSpeedupLevel() > 1 && L != OptimizationLevel::Oz;

	if (Matches[1] == "default") {
	MPM.addPass(buildPerModuleDefaultPipeline(L));
	} else if (Matches[1] == "thinlto-pre-link") {
	MPM.addPass(buildThinLTOPreLinkDefaultPipeline(L));
	} else if (Matches[1] == "thinlto") {
	MPM.addPass(buildThinLTODefaultPipeline(L, nullptr));
	} else if (Matches[1] == "lto-pre-link") {
	MPM.addPass(buildLTOPreLinkDefaultPipeline(L));
	} else {
	assert(Matches[1] == "lto" && "Not one of the matched options!");
	MPM.addPass(buildLTODefaultPipeline(L, nullptr));
	}
	return Error::success();
	}

	// Finally expand the basic registered passes from the .inc file.
	#define MODULE_PASS(NAME, CREATE_PASS) \
	if (Name == NAME) { \
	MPM.addPass(CREATE_PASS); \
	return Error::success(); \
	}
	#define MODULE_ANALYSIS(NAME, CREATE_PASS) \
	if (Name == "require<" NAME ">") { \
	MPM.addPass( \
	RequireAnalysisPass< \
	std::remove_reference<decltype(CREATE_PASS)>::type, Module>()); \
	return Error::success(); \
	} \
	if (Name == "invalidate<" NAME ">") { \
	MPM.addPass(InvalidateAnalysisPass< \
	std::remove_reference<decltype(CREATE_PASS)>::type>()); \
	return Error::success(); \
	}
	#define CGSCC_PASS(NAME, CREATE_PASS) \
	if (Name == NAME) { \
	MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(CREATE_PASS)); \
	return Error::success(); \
	}
	#define FUNCTION_PASS(NAME, CREATE_PASS) \
	if (Name == NAME) { \
	MPM.addPass(createModuleToFunctionPassAdaptor(CREATE_PASS)); \
	return Error::success(); \
	}
	#define FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
	if (checkParametrizedPassName(Name, NAME)) { \
	auto Params = parsePassParameters(PARSER, Name, NAME); \
	if (!Params) \
	return Params.takeError(); \
	MPM.addPass(createModuleToFunctionPassAdaptor(CREATE_PASS(Params.get()))); \
	return Error::success(); \
	}
	#define LOOP_PASS(NAME, CREATE_PASS) \
	if (Name == NAME) { \
	MPM.addPass(createModuleToFunctionPassAdaptor( \
	createFunctionToLoopPassAdaptor(CREATE_PASS, false, false))); \
	return Error::success(); \
	}
	#define LOOP_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
	if (checkParametrizedPassName(Name, NAME)) { \
	auto Params = parsePassParameters(PARSER, Name, NAME); \
	if (!Params) \
	return Params.takeError(); \
	MPM.addPass( \
	createModuleToFunctionPassAdaptor(createFunctionToLoopPassAdaptor( \
	CREATE_PASS(Params.get()), false, false))); \
	return Error::success(); \
	}
	#include "PassRegistry.def"

	for (auto &C : ModulePipelineParsingCallbacks)
	if (C(Name, MPM, InnerPipeline))
	return Error::success();
	return make_error<StringError>(
	formatv("unknown module pass '{0}'", Name).str(),
	inconvertibleErrorCode());
	}

	Error PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM,
	const PipelineElement &E) {
	auto &Name = E.Name;
	auto &InnerPipeline = E.InnerPipeline;

	// First handle complex passes like the pass managers which carry pipelines.
	if (!InnerPipeline.empty()) {
	if (Name == "cgscc") {
	CGSCCPassManager NestedCGPM;
	if (auto Err = parseCGSCCPassPipeline(NestedCGPM, InnerPipeline))
	return Err;
	// Add the nested pass manager with the appropriate adaptor.
	CGPM.addPass(std::move(NestedCGPM));
	return Error::success();
	}
	if (Name == "function") {
	FunctionPassManager FPM;
	if (auto Err = parseFunctionPassPipeline(FPM, InnerPipeline))
	return Err;
	// Add the nested pass manager with the appropriate adaptor.
	CGPM.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM)));
	return Error::success();
	}
	if (auto Count = parseRepeatPassName(Name)) {
	CGSCCPassManager NestedCGPM;
	if (auto Err = parseCGSCCPassPipeline(NestedCGPM, InnerPipeline))
	return Err;
	CGPM.addPass(createRepeatedPass(*Count, std::move(NestedCGPM)));
	return Error::success();
	}
	if (auto MaxRepetitions = parseDevirtPassName(Name)) {
	CGSCCPassManager NestedCGPM;
	if (auto Err = parseCGSCCPassPipeline(NestedCGPM, InnerPipeline))
	return Err;
	CGPM.addPass(
	createDevirtSCCRepeatedPass(std::move(NestedCGPM), *MaxRepetitions));
	return Error::success();
	}

	for (auto &C : CGSCCPipelineParsingCallbacks)
	if (C(Name, CGPM, InnerPipeline))
	return Error::success();

	// Normal passes can't have pipelines.
	return make_error<StringError>(
	formatv("invalid use of '{0}' pass as cgscc pipeline", Name).str(),
	inconvertibleErrorCode());
	}

	// Now expand the basic registered passes from the .inc file.
	#define CGSCC_PASS(NAME, CREATE_PASS) \
	if (Name == NAME) { \
	CGPM.addPass(CREATE_PASS); \
	return Error::success(); \
	}
	#define CGSCC_ANALYSIS(NAME, CREATE_PASS) \
	if (Name == "require<" NAME ">") { \
	CGPM.addPass(RequireAnalysisPass< \
	std::remove_reference<decltype(CREATE_PASS)>::type, \
	LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &, \
	CGSCCUpdateResult &>()); \
	return Error::success(); \
	} \
	if (Name == "invalidate<" NAME ">") { \
	CGPM.addPass(InvalidateAnalysisPass< \
	std::remove_reference<decltype(CREATE_PASS)>::type>()); \
	return Error::success(); \
	}
	#define FUNCTION_PASS(NAME, CREATE_PASS) \
	if (Name == NAME) { \
	CGPM.addPass(createCGSCCToFunctionPassAdaptor(CREATE_PASS)); \
	return Error::success(); \
	}
	#define FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
	if (checkParametrizedPassName(Name, NAME)) { \
	auto Params = parsePassParameters(PARSER, Name, NAME); \
	if (!Params) \
	return Params.takeError(); \
	CGPM.addPass(createCGSCCToFunctionPassAdaptor(CREATE_PASS(Params.get()))); \
	return Error::success(); \
	}
	#define LOOP_PASS(NAME, CREATE_PASS) \
	if (Name == NAME) { \
	CGPM.addPass(createCGSCCToFunctionPassAdaptor( \
	createFunctionToLoopPassAdaptor(CREATE_PASS, false, false))); \
	return Error::success(); \
	}
	#define LOOP_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
	if (checkParametrizedPassName(Name, NAME)) { \
	auto Params = parsePassParameters(PARSER, Name, NAME); \
	if (!Params) \
	return Params.takeError(); \
	CGPM.addPass( \
	createCGSCCToFunctionPassAdaptor(createFunctionToLoopPassAdaptor( \
	CREATE_PASS(Params.get()), false, false))); \
	return Error::success(); \
	}
	#include "PassRegistry.def"

	for (auto &C : CGSCCPipelineParsingCallbacks)
	if (C(Name, CGPM, InnerPipeline))
	return Error::success();
	return make_error<StringError>(
	formatv("unknown cgscc pass '{0}'", Name).str(),
	inconvertibleErrorCode());
	}

	Error PassBuilder::parseFunctionPass(FunctionPassManager &FPM,
	const PipelineElement &E) {
	auto &Name = E.Name;
	auto &InnerPipeline = E.InnerPipeline;

	// First handle complex passes like the pass managers which carry pipelines.
	if (!InnerPipeline.empty()) {
	if (Name == "function") {
	FunctionPassManager NestedFPM;
	if (auto Err = parseFunctionPassPipeline(NestedFPM, InnerPipeline))
	return Err;
	// Add the nested pass manager with the appropriate adaptor.
	FPM.addPass(std::move(NestedFPM));
	return Error::success();
	}
	if (Name == "loop" \|\| Name == "loop-mssa") {
	LoopPassManager LPM;
	if (auto Err = parseLoopPassPipeline(LPM, InnerPipeline))
	return Err;
	// Add the nested pass manager with the appropriate adaptor.
	bool UseMemorySSA = (Name == "loop-mssa");
	bool UseBFI = llvm::any_of(
	InnerPipeline, [](auto Pipeline) { return Pipeline.Name == "licm"; });
	FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), UseMemorySSA,
	UseBFI));
	return Error::success();
	}
	if (auto Count = parseRepeatPassName(Name)) {
	FunctionPassManager NestedFPM;
	if (auto Err = parseFunctionPassPipeline(NestedFPM, InnerPipeline))
	return Err;
	FPM.addPass(createRepeatedPass(*Count, std::move(NestedFPM)));
	return Error::success();
	}

	for (auto &C : FunctionPipelineParsingCallbacks)
	if (C(Name, FPM, InnerPipeline))
	return Error::success();

	// Normal passes can't have pipelines.
	return make_error<StringError>(
	formatv("invalid use of '{0}' pass as function pipeline", Name).str(),
	inconvertibleErrorCode());
	}

	// Now expand the basic registered passes from the .inc file.
	#define FUNCTION_PASS(NAME, CREATE_PASS) \
	if (Name == NAME) { \
	FPM.addPass(CREATE_PASS); \
	return Error::success(); \
	}
	#define FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
	if (checkParametrizedPassName(Name, NAME)) { \
	auto Params = parsePassParameters(PARSER, Name, NAME); \
	if (!Params) \
	return Params.takeError(); \
	FPM.addPass(CREATE_PASS(Params.get())); \
	return Error::success(); \
	}
	#define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \
	if (Name == "require<" NAME ">") { \
	FPM.addPass( \
	RequireAnalysisPass< \
	std::remove_reference<decltype(CREATE_PASS)>::type, Function>()); \
	return Error::success(); \
	} \
	if (Name == "invalidate<" NAME ">") { \
	FPM.addPass(InvalidateAnalysisPass< \
	std::remove_reference<decltype(CREATE_PASS)>::type>()); \
	return Error::success(); \
	}
	// FIXME: UseMemorySSA is set to false. Maybe we could do things like:
	// bool UseMemorySSA = !("canon-freeze" \|\| "loop-predication" \|\|
	// "guard-widening");
	// The risk is that it may become obsolete if we're not careful.
	#define LOOP_PASS(NAME, CREATE_PASS) \
	if (Name == NAME) { \
	FPM.addPass(createFunctionToLoopPassAdaptor(CREATE_PASS, false, false)); \
	return Error::success(); \
	}
	#define LOOP_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
	if (checkParametrizedPassName(Name, NAME)) { \
	auto Params = parsePassParameters(PARSER, Name, NAME); \
	if (!Params) \
	return Params.takeError(); \
	FPM.addPass(createFunctionToLoopPassAdaptor(CREATE_PASS(Params.get()), \
	false, false)); \
	return Error::success(); \
	}
	#include "PassRegistry.def"

	for (auto &C : FunctionPipelineParsingCallbacks)
	if (C(Name, FPM, InnerPipeline))
	return Error::success();
	return make_error<StringError>(
	formatv("unknown function pass '{0}'", Name).str(),
	inconvertibleErrorCode());
	}

	Error PassBuilder::parseLoopPass(LoopPassManager &LPM,
	const PipelineElement &E) {
	StringRef Name = E.Name;
	auto &InnerPipeline = E.InnerPipeline;

	// First handle complex passes like the pass managers which carry pipelines.
	if (!InnerPipeline.empty()) {
	if (Name == "loop") {
	LoopPassManager NestedLPM;
	if (auto Err = parseLoopPassPipeline(NestedLPM, InnerPipeline))
	return Err;
	// Add the nested pass manager with the appropriate adaptor.
	LPM.addPass(std::move(NestedLPM));
	return Error::success();
	}
	if (auto Count = parseRepeatPassName(Name)) {
	LoopPassManager NestedLPM;
	if (auto Err = parseLoopPassPipeline(NestedLPM, InnerPipeline))
	return Err;
	LPM.addPass(createRepeatedPass(*Count, std::move(NestedLPM)));
	return Error::success();
	}

	for (auto &C : LoopPipelineParsingCallbacks)
	if (C(Name, LPM, InnerPipeline))
	return Error::success();

	// Normal passes can't have pipelines.
	return make_error<StringError>(
	formatv("invalid use of '{0}' pass as loop pipeline", Name).str(),
	inconvertibleErrorCode());
	}

	// Now expand the basic registered passes from the .inc file.
	#define LOOP_PASS(NAME, CREATE_PASS) \
	if (Name == NAME) { \
	LPM.addPass(CREATE_PASS); \
	return Error::success(); \
	}
	#define LOOP_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
	if (checkParametrizedPassName(Name, NAME)) { \
	auto Params = parsePassParameters(PARSER, Name, NAME); \
	if (!Params) \
	return Params.takeError(); \
	LPM.addPass(CREATE_PASS(Params.get())); \
	return Error::success(); \
	}
	#define LOOP_ANALYSIS(NAME, CREATE_PASS) \
	if (Name == "require<" NAME ">") { \
	LPM.addPass(RequireAnalysisPass< \
	std::remove_reference<decltype(CREATE_PASS)>::type, Loop, \
	LoopAnalysisManager, LoopStandardAnalysisResults &, \
	LPMUpdater &>()); \
	return Error::success(); \
	} \
	if (Name == "invalidate<" NAME ">") { \
	LPM.addPass(InvalidateAnalysisPass< \
	std::remove_reference<decltype(CREATE_PASS)>::type>()); \
	return Error::success(); \
	}
	#include "PassRegistry.def"

	for (auto &C : LoopPipelineParsingCallbacks)
	if (C(Name, LPM, InnerPipeline))
	return Error::success();
	return make_error<StringError>(formatv("unknown loop pass '{0}'", Name).str(),
	inconvertibleErrorCode());
	}

	bool PassBuilder::parseAAPassName(AAManager &AA, StringRef Name) {
	#define MODULE_ALIAS_ANALYSIS(NAME, CREATE_PASS) \
	if (Name == NAME) { \
	AA.registerModuleAnalysis< \
	std::remove_reference<decltype(CREATE_PASS)>::type>(); \
	return true; \
	}
	#define FUNCTION_ALIAS_ANALYSIS(NAME, CREATE_PASS) \
	if (Name == NAME) { \
	AA.registerFunctionAnalysis< \
	std::remove_reference<decltype(CREATE_PASS)>::type>(); \
	return true; \
	}
	#include "PassRegistry.def"

	for (auto &C : AAParsingCallbacks)
	if (C(Name, AA))
	return true;
	return false;
	}

	Error PassBuilder::parseLoopPassPipeline(LoopPassManager &LPM,
	ArrayRef<PipelineElement> Pipeline) {
	for (const auto &Element : Pipeline) {
	if (auto Err = parseLoopPass(LPM, Element))
	return Err;
	}
	return Error::success();
	}

	Error PassBuilder::parseFunctionPassPipeline(
	FunctionPassManager &FPM, ArrayRef<PipelineElement> Pipeline) {
	for (const auto &Element : Pipeline) {
	if (auto Err = parseFunctionPass(FPM, Element))
	return Err;
	}
	return Error::success();
	}

	Error PassBuilder::parseCGSCCPassPipeline(CGSCCPassManager &CGPM,
	ArrayRef<PipelineElement> Pipeline) {
	for (const auto &Element : Pipeline) {
	if (auto Err = parseCGSCCPass(CGPM, Element))
	return Err;
	}
	return Error::success();
	}

	void PassBuilder::crossRegisterProxies(LoopAnalysisManager &LAM,
	FunctionAnalysisManager &FAM,
	CGSCCAnalysisManager &CGAM,
	ModuleAnalysisManager &MAM) {
	MAM.registerPass([&] { return FunctionAnalysisManagerModuleProxy(FAM); });
	MAM.registerPass([&] { return CGSCCAnalysisManagerModuleProxy(CGAM); });
	CGAM.registerPass([&] { return ModuleAnalysisManagerCGSCCProxy(MAM); });
	FAM.registerPass([&] { return CGSCCAnalysisManagerFunctionProxy(CGAM); });
	FAM.registerPass([&] { return ModuleAnalysisManagerFunctionProxy(MAM); });
	FAM.registerPass([&] { return LoopAnalysisManagerFunctionProxy(LAM); });
	LAM.registerPass([&] { return FunctionAnalysisManagerLoopProxy(FAM); });
	}

	Error PassBuilder::parseModulePassPipeline(ModulePassManager &MPM,
	ArrayRef<PipelineElement> Pipeline) {
	for (const auto &Element : Pipeline) {
	if (auto Err = parseModulePass(MPM, Element))
	return Err;
	}
	return Error::success();
	}

	// Primary pass pipeline description parsing routine for a \c ModulePassManager
	// FIXME: Should this routine accept a TargetMachine or require the caller to
	// pre-populate the analysis managers with target-specific stuff?
	Error PassBuilder::parsePassPipeline(ModulePassManager &MPM,
	StringRef PipelineText) {
	auto Pipeline = parsePipelineText(PipelineText);
	if (!Pipeline \|\| Pipeline->empty())
	return make_error<StringError>(
	formatv("invalid pipeline '{0}'", PipelineText).str(),
	inconvertibleErrorCode());

	// If the first name isn't at the module layer, wrap the pipeline up
	// automatically.
	StringRef FirstName = Pipeline->front().Name;

	if (!isModulePassName(FirstName, ModulePipelineParsingCallbacks)) {
	if (isCGSCCPassName(FirstName, CGSCCPipelineParsingCallbacks)) {
	Pipeline = {{"cgscc", std::move(*Pipeline)}};
	} else if (isFunctionPassName(FirstName,
	FunctionPipelineParsingCallbacks)) {
	Pipeline = {{"function", std::move(*Pipeline)}};
	} else if (isLoopPassName(FirstName, LoopPipelineParsingCallbacks)) {
	Pipeline = {{"function", {{"loop", std::move(*Pipeline)}}}};
	} else {
	for (auto &C : TopLevelPipelineParsingCallbacks)
	if (C(MPM, *Pipeline))
	return Error::success();

	// Unknown pass or pipeline name!
	auto &InnerPipeline = Pipeline->front().InnerPipeline;
	return make_error<StringError>(
	formatv("unknown {0} name '{1}'",
	(InnerPipeline.empty() ? "pass" : "pipeline"), FirstName)
	.str(),
	inconvertibleErrorCode());
	}
	}

	if (auto Err = parseModulePassPipeline(MPM, *Pipeline))
	return Err;
	return Error::success();
	}

	// Primary pass pipeline description parsing routine for a \c CGSCCPassManager
	Error PassBuilder::parsePassPipeline(CGSCCPassManager &CGPM,
	StringRef PipelineText) {
	auto Pipeline = parsePipelineText(PipelineText);
	if (!Pipeline \|\| Pipeline->empty())
	return make_error<StringError>(
	formatv("invalid pipeline '{0}'", PipelineText).str(),
	inconvertibleErrorCode());

	StringRef FirstName = Pipeline->front().Name;
	if (!isCGSCCPassName(FirstName, CGSCCPipelineParsingCallbacks))
	return make_error<StringError>(
	formatv("unknown cgscc pass '{0}' in pipeline '{1}'", FirstName,
	PipelineText)
	.str(),
	inconvertibleErrorCode());

	if (auto Err = parseCGSCCPassPipeline(CGPM, *Pipeline))
	return Err;
	return Error::success();
	}

	// Primary pass pipeline description parsing routine for a \c
	// FunctionPassManager
	Error PassBuilder::parsePassPipeline(FunctionPassManager &FPM,
	StringRef PipelineText) {
	auto Pipeline = parsePipelineText(PipelineText);
	if (!Pipeline \|\| Pipeline->empty())
	return make_error<StringError>(
	formatv("invalid pipeline '{0}'", PipelineText).str(),
	inconvertibleErrorCode());

	StringRef FirstName = Pipeline->front().Name;
	if (!isFunctionPassName(FirstName, FunctionPipelineParsingCallbacks))
	return make_error<StringError>(
	formatv("unknown function pass '{0}' in pipeline '{1}'", FirstName,
	PipelineText)
	.str(),
	inconvertibleErrorCode());

	if (auto Err = parseFunctionPassPipeline(FPM, *Pipeline))
	return Err;
	return Error::success();
	}

	// Primary pass pipeline description parsing routine for a \c LoopPassManager
	Error PassBuilder::parsePassPipeline(LoopPassManager &CGPM,
	StringRef PipelineText) {
	auto Pipeline = parsePipelineText(PipelineText);
	if (!Pipeline \|\| Pipeline->empty())
	return make_error<StringError>(
	formatv("invalid pipeline '{0}'", PipelineText).str(),
	inconvertibleErrorCode());

	if (auto Err = parseLoopPassPipeline(CGPM, *Pipeline))
	return Err;

	return Error::success();
	}

	Error PassBuilder::parseAAPipeline(AAManager &AA, StringRef PipelineText) {
	// If the pipeline just consists of the word 'default' just replace the AA
	// manager with our default one.
	if (PipelineText == "default") {
	AA = buildDefaultAAPipeline();
	return Error::success();
	}

	while (!PipelineText.empty()) {
	StringRef Name;
	std::tie(Name, PipelineText) = PipelineText.split(',');
	if (!parseAAPassName(AA, Name))
	return make_error<StringError>(
	formatv("unknown alias analysis name '{0}'", Name).str(),
	inconvertibleErrorCode());
	}

	return Error::success();
	}

	bool PassBuilder::isAAPassName(StringRef PassName) {
	#define MODULE_ALIAS_ANALYSIS(NAME, CREATE_PASS) \
	if (PassName == NAME) \
	return true;
	#define FUNCTION_ALIAS_ANALYSIS(NAME, CREATE_PASS) \
	if (PassName == NAME) \
	return true;
	#include "PassRegistry.def"
	return false;
	}

	bool PassBuilder::isAnalysisPassName(StringRef PassName) {
	#define MODULE_ANALYSIS(NAME, CREATE_PASS) \
	if (PassName == NAME) \
	return true;
	#define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \
	if (PassName == NAME) \
	return true;
	#define LOOP_ANALYSIS(NAME, CREATE_PASS) \
	if (PassName == NAME) \
	return true;
	#define CGSCC_ANALYSIS(NAME, CREATE_PASS) \
	if (PassName == NAME) \
	return true;
	#define MODULE_ALIAS_ANALYSIS(NAME, CREATE_PASS) \
	if (PassName == NAME) \
	return true;
	#define FUNCTION_ALIAS_ANALYSIS(NAME, CREATE_PASS) \
	if (PassName == NAME) \
	return true;
	#include "PassRegistry.def"
	return false;
	}

	static void printPassName(StringRef PassName, raw_ostream &OS) {
	OS << " " << PassName << "\n";
	}
	static void printPassName(StringRef PassName, StringRef Params,
	raw_ostream &OS) {
	OS << " " << PassName << "<" << Params << ">\n";
	}

	void PassBuilder::printPassNames(raw_ostream &OS) {
	// TODO: print pass descriptions when they are available

	OS << "Module passes:\n";
	#define MODULE_PASS(NAME, CREATE_PASS) printPassName(NAME, OS);
	#include "PassRegistry.def"

	OS << "Module analyses:\n";
	#define MODULE_ANALYSIS(NAME, CREATE_PASS) printPassName(NAME, OS);
	#include "PassRegistry.def"

	OS << "Module alias analyses:\n";
	#define MODULE_ALIAS_ANALYSIS(NAME, CREATE_PASS) printPassName(NAME, OS);
	#include "PassRegistry.def"

	OS << "CGSCC passes:\n";
	#define CGSCC_PASS(NAME, CREATE_PASS) printPassName(NAME, OS);
	#include "PassRegistry.def"

	OS << "CGSCC analyses:\n";
	#define CGSCC_ANALYSIS(NAME, CREATE_PASS) printPassName(NAME, OS);
	#include "PassRegistry.def"

	OS << "Function passes:\n";
	#define FUNCTION_PASS(NAME, CREATE_PASS) printPassName(NAME, OS);
	#include "PassRegistry.def"

	OS << "Function passes with params:\n";
	#define FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
	printPassName(NAME, PARAMS, OS);
	#include "PassRegistry.def"

	OS << "Function analyses:\n";
	#define FUNCTION_ANALYSIS(NAME, CREATE_PASS) printPassName(NAME, OS);
	#include "PassRegistry.def"

	OS << "Function alias analyses:\n";
	#define FUNCTION_ALIAS_ANALYSIS(NAME, CREATE_PASS) printPassName(NAME, OS);
	#include "PassRegistry.def"

	OS << "Loop passes:\n";
	#define LOOP_PASS(NAME, CREATE_PASS) printPassName(NAME, OS);
	#include "PassRegistry.def"

	OS << "Loop passes with params:\n";
	#define LOOP_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
	printPassName(NAME, PARAMS, OS);
	#include "PassRegistry.def"

	OS << "Loop analyses:\n";
	#define LOOP_ANALYSIS(NAME, CREATE_PASS) printPassName(NAME, OS);
	#include "PassRegistry.def"
	}

	void PassBuilder::registerParseTopLevelPipelineCallback(
	const std::function<bool(ModulePassManager &, ArrayRef<PipelineElement>)>
	&C) {
	TopLevelPipelineParsingCallbacks.push_back(C);
	}
	diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
	index b27a02b8c182..60c00f47859b 100644
	--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
	+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
	@@ -1,18751 +1,18753 @@
	//===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation ----===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file implements the AArch64TargetLowering class.
	//
	//===----------------------------------------------------------------------===//

	#include "AArch64ISelLowering.h"
	#include "AArch64CallingConvention.h"
	#include "AArch64ExpandImm.h"
	#include "AArch64MachineFunctionInfo.h"
	#include "AArch64PerfectShuffle.h"
	#include "AArch64RegisterInfo.h"
	#include "AArch64Subtarget.h"
	#include "MCTargetDesc/AArch64AddressingModes.h"
	#include "Utils/AArch64BaseInfo.h"
	#include "llvm/ADT/APFloat.h"
	#include "llvm/ADT/APInt.h"
	#include "llvm/ADT/ArrayRef.h"
	#include "llvm/ADT/STLExtras.h"
	#include "llvm/ADT/SmallSet.h"
	#include "llvm/ADT/SmallVector.h"
	#include "llvm/ADT/Statistic.h"
	#include "llvm/ADT/StringRef.h"
	#include "llvm/ADT/Triple.h"
	#include "llvm/ADT/Twine.h"
	#include "llvm/Analysis/ObjCARCUtil.h"
	#include "llvm/Analysis/VectorUtils.h"
	#include "llvm/CodeGen/Analysis.h"
	#include "llvm/CodeGen/CallingConvLower.h"
	#include "llvm/CodeGen/MachineBasicBlock.h"
	#include "llvm/CodeGen/MachineFrameInfo.h"
	#include "llvm/CodeGen/MachineFunction.h"
	#include "llvm/CodeGen/MachineInstr.h"
	#include "llvm/CodeGen/MachineInstrBuilder.h"
	#include "llvm/CodeGen/MachineMemOperand.h"
	#include "llvm/CodeGen/MachineRegisterInfo.h"
	#include "llvm/CodeGen/RuntimeLibcalls.h"
	#include "llvm/CodeGen/SelectionDAG.h"
	#include "llvm/CodeGen/SelectionDAGNodes.h"
	#include "llvm/CodeGen/TargetCallingConv.h"
	#include "llvm/CodeGen/TargetInstrInfo.h"
	#include "llvm/CodeGen/ValueTypes.h"
	#include "llvm/IR/Attributes.h"
	#include "llvm/IR/Constants.h"
	#include "llvm/IR/DataLayout.h"
	#include "llvm/IR/DebugLoc.h"
	#include "llvm/IR/DerivedTypes.h"
	#include "llvm/IR/Function.h"
	#include "llvm/IR/GetElementPtrTypeIterator.h"
	#include "llvm/IR/GlobalValue.h"
	#include "llvm/IR/IRBuilder.h"
	#include "llvm/IR/Instruction.h"
	#include "llvm/IR/Instructions.h"
	#include "llvm/IR/IntrinsicInst.h"
	#include "llvm/IR/Intrinsics.h"
	#include "llvm/IR/IntrinsicsAArch64.h"
	#include "llvm/IR/Module.h"
	#include "llvm/IR/OperandTraits.h"
	#include "llvm/IR/PatternMatch.h"
	#include "llvm/IR/Type.h"
	#include "llvm/IR/Use.h"
	#include "llvm/IR/Value.h"
	#include "llvm/MC/MCRegisterInfo.h"
	#include "llvm/Support/Casting.h"
	#include "llvm/Support/CodeGen.h"
	#include "llvm/Support/CommandLine.h"
	#include "llvm/Support/Compiler.h"
	#include "llvm/Support/Debug.h"
	#include "llvm/Support/ErrorHandling.h"
	#include "llvm/Support/KnownBits.h"
	#include "llvm/Support/MachineValueType.h"
	#include "llvm/Support/MathExtras.h"
	#include "llvm/Support/raw_ostream.h"
	#include "llvm/Target/TargetMachine.h"
	#include "llvm/Target/TargetOptions.h"
	#include <algorithm>
	#include <bitset>
	#include <cassert>
	#include <cctype>
	#include <cstdint>
	#include <cstdlib>
	#include <iterator>
	#include <limits>
	#include <tuple>
	#include <utility>
	#include <vector>

	using namespace llvm;
	using namespace llvm::PatternMatch;

	#define DEBUG_TYPE "aarch64-lower"

	STATISTIC(NumTailCalls, "Number of tail calls");
	STATISTIC(NumShiftInserts, "Number of vector shift inserts");
	STATISTIC(NumOptimizedImms, "Number of times immediates were optimized");

	// FIXME: The necessary dtprel relocations don't seem to be supported
	// well in the GNU bfd and gold linkers at the moment. Therefore, by
	// default, for now, fall back to GeneralDynamic code generation.
	cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration(
	"aarch64-elf-ldtls-generation", cl::Hidden,
	cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
	cl::init(false));

	static cl::opt<bool>
	EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden,
	cl::desc("Enable AArch64 logical imm instruction "
	"optimization"),
	cl::init(true));

	// Temporary option added for the purpose of testing functionality added
	// to DAGCombiner.cpp in D92230. It is expected that this can be removed
	// in future when both implementations will be based off MGATHER rather
	// than the GLD1 nodes added for the SVE gather load intrinsics.
	static cl::opt<bool>
	EnableCombineMGatherIntrinsics("aarch64-enable-mgather-combine", cl::Hidden,
	cl::desc("Combine extends of AArch64 masked "
	"gather intrinsics"),
	cl::init(true));

	/// Value type used for condition codes.
	static const MVT MVT_CC = MVT::i32;

	static inline EVT getPackedSVEVectorVT(EVT VT) {
	switch (VT.getSimpleVT().SimpleTy) {
	default:
	llvm_unreachable("unexpected element type for vector");
	case MVT::i8:
	return MVT::nxv16i8;
	case MVT::i16:
	return MVT::nxv8i16;
	case MVT::i32:
	return MVT::nxv4i32;
	case MVT::i64:
	return MVT::nxv2i64;
	case MVT::f16:
	return MVT::nxv8f16;
	case MVT::f32:
	return MVT::nxv4f32;
	case MVT::f64:
	return MVT::nxv2f64;
	case MVT::bf16:
	return MVT::nxv8bf16;
	}
	}

	// NOTE: Currently there's only a need to return integer vector types. If this
	// changes then just add an extra "type" parameter.
	static inline EVT getPackedSVEVectorVT(ElementCount EC) {
	switch (EC.getKnownMinValue()) {
	default:
	llvm_unreachable("unexpected element count for vector");
	case 16:
	return MVT::nxv16i8;
	case 8:
	return MVT::nxv8i16;
	case 4:
	return MVT::nxv4i32;
	case 2:
	return MVT::nxv2i64;
	}
	}

	static inline EVT getPromotedVTForPredicate(EVT VT) {
	assert(VT.isScalableVector() && (VT.getVectorElementType() == MVT::i1) &&
	"Expected scalable predicate vector type!");
	switch (VT.getVectorMinNumElements()) {
	default:
	llvm_unreachable("unexpected element count for vector");
	case 2:
	return MVT::nxv2i64;
	case 4:
	return MVT::nxv4i32;
	case 8:
	return MVT::nxv8i16;
	case 16:
	return MVT::nxv16i8;
	}
	}

	/// Returns true if VT's elements occupy the lowest bit positions of its
	/// associated register class without any intervening space.
	///
	/// For example, nxv2f16, nxv4f16 and nxv8f16 are legal types that belong to the
	/// same register class, but only nxv8f16 can be treated as a packed vector.
	static inline bool isPackedVectorType(EVT VT, SelectionDAG &DAG) {
	assert(VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
	"Expected legal vector type!");
	return VT.isFixedLengthVector() \|\|
	VT.getSizeInBits().getKnownMinSize() == AArch64::SVEBitsPerBlock;
	}

	// Returns true for ####_MERGE_PASSTHRU opcodes, whose operands have a leading
	// predicate and end with a passthru value matching the result type.
	static bool isMergePassthruOpcode(unsigned Opc) {
	switch (Opc) {
	default:
	return false;
	case AArch64ISD::BITREVERSE_MERGE_PASSTHRU:
	case AArch64ISD::BSWAP_MERGE_PASSTHRU:
	case AArch64ISD::CTLZ_MERGE_PASSTHRU:
	case AArch64ISD::CTPOP_MERGE_PASSTHRU:
	case AArch64ISD::DUP_MERGE_PASSTHRU:
	case AArch64ISD::ABS_MERGE_PASSTHRU:
	case AArch64ISD::NEG_MERGE_PASSTHRU:
	case AArch64ISD::FNEG_MERGE_PASSTHRU:
	case AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU:
	case AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU:
	case AArch64ISD::FCEIL_MERGE_PASSTHRU:
	case AArch64ISD::FFLOOR_MERGE_PASSTHRU:
	case AArch64ISD::FNEARBYINT_MERGE_PASSTHRU:
	case AArch64ISD::FRINT_MERGE_PASSTHRU:
	case AArch64ISD::FROUND_MERGE_PASSTHRU:
	case AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU:
	case AArch64ISD::FTRUNC_MERGE_PASSTHRU:
	case AArch64ISD::FP_ROUND_MERGE_PASSTHRU:
	case AArch64ISD::FP_EXTEND_MERGE_PASSTHRU:
	case AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU:
	case AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU:
	case AArch64ISD::FCVTZU_MERGE_PASSTHRU:
	case AArch64ISD::FCVTZS_MERGE_PASSTHRU:
	case AArch64ISD::FSQRT_MERGE_PASSTHRU:
	case AArch64ISD::FRECPX_MERGE_PASSTHRU:
	case AArch64ISD::FABS_MERGE_PASSTHRU:
	return true;
	}
	}

	AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
	const AArch64Subtarget &STI)
	: TargetLowering(TM), Subtarget(&STI) {
	// AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
	// we have to make something up. Arbitrarily, choose ZeroOrOne.
	setBooleanContents(ZeroOrOneBooleanContent);
	// When comparing vectors the result sets the different elements in the
	// vector to all-one or all-zero.
	setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);

	// Set up the register classes.
	addRegisterClass(MVT::i32, &AArch64::GPR32allRegClass);
	addRegisterClass(MVT::i64, &AArch64::GPR64allRegClass);

	if (Subtarget->hasLS64()) {
	addRegisterClass(MVT::i64x8, &AArch64::GPR64x8ClassRegClass);
	setOperationAction(ISD::LOAD, MVT::i64x8, Custom);
	setOperationAction(ISD::STORE, MVT::i64x8, Custom);
	}

	if (Subtarget->hasFPARMv8()) {
	addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
	addRegisterClass(MVT::bf16, &AArch64::FPR16RegClass);
	addRegisterClass(MVT::f32, &AArch64::FPR32RegClass);
	addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
	addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
	}

	if (Subtarget->hasNEON()) {
	addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass);
	addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass);
	// Someone set us up the NEON.
	addDRTypeForNEON(MVT::v2f32);
	addDRTypeForNEON(MVT::v8i8);
	addDRTypeForNEON(MVT::v4i16);
	addDRTypeForNEON(MVT::v2i32);
	addDRTypeForNEON(MVT::v1i64);
	addDRTypeForNEON(MVT::v1f64);
	addDRTypeForNEON(MVT::v4f16);
	if (Subtarget->hasBF16())
	addDRTypeForNEON(MVT::v4bf16);

	addQRTypeForNEON(MVT::v4f32);
	addQRTypeForNEON(MVT::v2f64);
	addQRTypeForNEON(MVT::v16i8);
	addQRTypeForNEON(MVT::v8i16);
	addQRTypeForNEON(MVT::v4i32);
	addQRTypeForNEON(MVT::v2i64);
	addQRTypeForNEON(MVT::v8f16);
	if (Subtarget->hasBF16())
	addQRTypeForNEON(MVT::v8bf16);
	}

	if (Subtarget->hasSVE()) {
	// Add legal sve predicate types
	addRegisterClass(MVT::nxv2i1, &AArch64::PPRRegClass);
	addRegisterClass(MVT::nxv4i1, &AArch64::PPRRegClass);
	addRegisterClass(MVT::nxv8i1, &AArch64::PPRRegClass);
	addRegisterClass(MVT::nxv16i1, &AArch64::PPRRegClass);

	// Add legal sve data types
	addRegisterClass(MVT::nxv16i8, &AArch64::ZPRRegClass);
	addRegisterClass(MVT::nxv8i16, &AArch64::ZPRRegClass);
	addRegisterClass(MVT::nxv4i32, &AArch64::ZPRRegClass);
	addRegisterClass(MVT::nxv2i64, &AArch64::ZPRRegClass);

	addRegisterClass(MVT::nxv2f16, &AArch64::ZPRRegClass);
	addRegisterClass(MVT::nxv4f16, &AArch64::ZPRRegClass);
	addRegisterClass(MVT::nxv8f16, &AArch64::ZPRRegClass);
	addRegisterClass(MVT::nxv2f32, &AArch64::ZPRRegClass);
	addRegisterClass(MVT::nxv4f32, &AArch64::ZPRRegClass);
	addRegisterClass(MVT::nxv2f64, &AArch64::ZPRRegClass);

	if (Subtarget->hasBF16()) {
	addRegisterClass(MVT::nxv2bf16, &AArch64::ZPRRegClass);
	addRegisterClass(MVT::nxv4bf16, &AArch64::ZPRRegClass);
	addRegisterClass(MVT::nxv8bf16, &AArch64::ZPRRegClass);
	}

	if (Subtarget->useSVEForFixedLengthVectors()) {
	for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
	if (useSVEForFixedLengthVectorVT(VT))
	addRegisterClass(VT, &AArch64::ZPRRegClass);

	for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
	if (useSVEForFixedLengthVectorVT(VT))
	addRegisterClass(VT, &AArch64::ZPRRegClass);
	}

	for (auto VT : { MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64 }) {
	setOperationAction(ISD::SADDSAT, VT, Legal);
	setOperationAction(ISD::UADDSAT, VT, Legal);
	setOperationAction(ISD::SSUBSAT, VT, Legal);
	setOperationAction(ISD::USUBSAT, VT, Legal);
	setOperationAction(ISD::UREM, VT, Expand);
	setOperationAction(ISD::SREM, VT, Expand);
	setOperationAction(ISD::SDIVREM, VT, Expand);
	setOperationAction(ISD::UDIVREM, VT, Expand);
	}

	for (auto VT :
	{ MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv4i8,
	MVT::nxv4i16, MVT::nxv4i32, MVT::nxv8i8, MVT::nxv8i16 })
	setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Legal);

	for (auto VT :
	{ MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32, MVT::nxv4f32,
	MVT::nxv2f64 }) {
	setCondCodeAction(ISD::SETO, VT, Expand);
	setCondCodeAction(ISD::SETOLT, VT, Expand);
	setCondCodeAction(ISD::SETLT, VT, Expand);
	setCondCodeAction(ISD::SETOLE, VT, Expand);
	setCondCodeAction(ISD::SETLE, VT, Expand);
	setCondCodeAction(ISD::SETULT, VT, Expand);
	setCondCodeAction(ISD::SETULE, VT, Expand);
	setCondCodeAction(ISD::SETUGE, VT, Expand);
	setCondCodeAction(ISD::SETUGT, VT, Expand);
	setCondCodeAction(ISD::SETUEQ, VT, Expand);
	setCondCodeAction(ISD::SETUNE, VT, Expand);

	setOperationAction(ISD::FREM, VT, Expand);
	setOperationAction(ISD::FPOW, VT, Expand);
	setOperationAction(ISD::FPOWI, VT, Expand);
	setOperationAction(ISD::FCOS, VT, Expand);
	setOperationAction(ISD::FSIN, VT, Expand);
	setOperationAction(ISD::FSINCOS, VT, Expand);
	setOperationAction(ISD::FEXP, VT, Expand);
	setOperationAction(ISD::FEXP2, VT, Expand);
	setOperationAction(ISD::FLOG, VT, Expand);
	setOperationAction(ISD::FLOG2, VT, Expand);
	setOperationAction(ISD::FLOG10, VT, Expand);
	}
	}

	// Compute derived properties from the register classes
	computeRegisterProperties(Subtarget->getRegisterInfo());

	// Provide all sorts of operation actions
	setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
	setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
	setOperationAction(ISD::SETCC, MVT::i32, Custom);
	setOperationAction(ISD::SETCC, MVT::i64, Custom);
	setOperationAction(ISD::SETCC, MVT::f16, Custom);
	setOperationAction(ISD::SETCC, MVT::f32, Custom);
	setOperationAction(ISD::SETCC, MVT::f64, Custom);
	setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Custom);
	setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Custom);
	setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Custom);
	setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Custom);
	setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Custom);
	setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Custom);
	setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
	setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
	setOperationAction(ISD::BRCOND, MVT::Other, Expand);
	setOperationAction(ISD::BR_CC, MVT::i32, Custom);
	setOperationAction(ISD::BR_CC, MVT::i64, Custom);
	setOperationAction(ISD::BR_CC, MVT::f16, Custom);
	setOperationAction(ISD::BR_CC, MVT::f32, Custom);
	setOperationAction(ISD::BR_CC, MVT::f64, Custom);
	setOperationAction(ISD::SELECT, MVT::i32, Custom);
	setOperationAction(ISD::SELECT, MVT::i64, Custom);
	setOperationAction(ISD::SELECT, MVT::f16, Custom);
	setOperationAction(ISD::SELECT, MVT::f32, Custom);
	setOperationAction(ISD::SELECT, MVT::f64, Custom);
	setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
	setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
	setOperationAction(ISD::SELECT_CC, MVT::f16, Custom);
	setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
	setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
	setOperationAction(ISD::BR_JT, MVT::Other, Custom);
	setOperationAction(ISD::JumpTable, MVT::i64, Custom);

	setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
	setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
	setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);

	setOperationAction(ISD::FREM, MVT::f32, Expand);
	setOperationAction(ISD::FREM, MVT::f64, Expand);
	setOperationAction(ISD::FREM, MVT::f80, Expand);

	setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);

	// Custom lowering hooks are needed for XOR
	// to fold it into CSINC/CSINV.
	setOperationAction(ISD::XOR, MVT::i32, Custom);
	setOperationAction(ISD::XOR, MVT::i64, Custom);

	// Virtually no operation on f128 is legal, but LLVM can't expand them when
	// there's a valid register class, so we need custom operations in most cases.
	setOperationAction(ISD::FABS, MVT::f128, Expand);
	setOperationAction(ISD::FADD, MVT::f128, LibCall);
	setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
	setOperationAction(ISD::FCOS, MVT::f128, Expand);
	setOperationAction(ISD::FDIV, MVT::f128, LibCall);
	setOperationAction(ISD::FMA, MVT::f128, Expand);
	setOperationAction(ISD::FMUL, MVT::f128, LibCall);
	setOperationAction(ISD::FNEG, MVT::f128, Expand);
	setOperationAction(ISD::FPOW, MVT::f128, Expand);
	setOperationAction(ISD::FREM, MVT::f128, Expand);
	setOperationAction(ISD::FRINT, MVT::f128, Expand);
	setOperationAction(ISD::FSIN, MVT::f128, Expand);
	setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
	setOperationAction(ISD::FSQRT, MVT::f128, Expand);
	setOperationAction(ISD::FSUB, MVT::f128, LibCall);
	setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
	setOperationAction(ISD::SETCC, MVT::f128, Custom);
	setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Custom);
	setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Custom);
	setOperationAction(ISD::BR_CC, MVT::f128, Custom);
	setOperationAction(ISD::SELECT, MVT::f128, Custom);
	setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
	setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);

	// Lowering for many of the conversions is actually specified by the non-f128
	// type. The LowerXXX function will be trivial when f128 isn't involved.
	setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
	setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
	setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom);
	setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
	setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
	setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i128, Custom);
	setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
	setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
	setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom);
	setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
	setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
	setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i128, Custom);
	setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
	setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
	setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom);
	setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
	setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
	setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i128, Custom);
	setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
	setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
	setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom);
	setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
	setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
	setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, Custom);
	setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
	setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
	setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
	setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
	setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
	setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom);

	setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i32, Custom);
	setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom);
	setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i32, Custom);
	setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom);

	// Variable arguments.
	setOperationAction(ISD::VASTART, MVT::Other, Custom);
	setOperationAction(ISD::VAARG, MVT::Other, Custom);
	setOperationAction(ISD::VACOPY, MVT::Other, Custom);
	setOperationAction(ISD::VAEND, MVT::Other, Expand);

	// Variable-sized objects.
	setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
	setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);

	if (Subtarget->isTargetWindows())
	setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
	else
	setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);

	// Constant pool entries
	setOperationAction(ISD::ConstantPool, MVT::i64, Custom);

	// BlockAddress
	setOperationAction(ISD::BlockAddress, MVT::i64, Custom);

	// Add/Sub overflow ops with MVT::Glues are lowered to NZCV dependences.
	setOperationAction(ISD::ADDC, MVT::i32, Custom);
	setOperationAction(ISD::ADDE, MVT::i32, Custom);
	setOperationAction(ISD::SUBC, MVT::i32, Custom);
	setOperationAction(ISD::SUBE, MVT::i32, Custom);
	setOperationAction(ISD::ADDC, MVT::i64, Custom);
	setOperationAction(ISD::ADDE, MVT::i64, Custom);
	setOperationAction(ISD::SUBC, MVT::i64, Custom);
	setOperationAction(ISD::SUBE, MVT::i64, Custom);

	// AArch64 lacks both left-rotate and popcount instructions.
	setOperationAction(ISD::ROTL, MVT::i32, Expand);
	setOperationAction(ISD::ROTL, MVT::i64, Expand);
	for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
	setOperationAction(ISD::ROTL, VT, Expand);
	setOperationAction(ISD::ROTR, VT, Expand);
	}

	// AArch64 doesn't have i32 MULH{S\|U}.
	setOperationAction(ISD::MULHU, MVT::i32, Expand);
	setOperationAction(ISD::MULHS, MVT::i32, Expand);

	// AArch64 doesn't have {U\|S}MUL_LOHI.
	setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
	setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);

	setOperationAction(ISD::CTPOP, MVT::i32, Custom);
	setOperationAction(ISD::CTPOP, MVT::i64, Custom);
	setOperationAction(ISD::CTPOP, MVT::i128, Custom);

	setOperationAction(ISD::ABS, MVT::i32, Custom);
	setOperationAction(ISD::ABS, MVT::i64, Custom);

	setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
	setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
	for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
	setOperationAction(ISD::SDIVREM, VT, Expand);
	setOperationAction(ISD::UDIVREM, VT, Expand);
	}
	setOperationAction(ISD::SREM, MVT::i32, Expand);
	setOperationAction(ISD::SREM, MVT::i64, Expand);
	setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
	setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
	setOperationAction(ISD::UREM, MVT::i32, Expand);
	setOperationAction(ISD::UREM, MVT::i64, Expand);

	// Custom lower Add/Sub/Mul with overflow.
	setOperationAction(ISD::SADDO, MVT::i32, Custom);
	setOperationAction(ISD::SADDO, MVT::i64, Custom);
	setOperationAction(ISD::UADDO, MVT::i32, Custom);
	setOperationAction(ISD::UADDO, MVT::i64, Custom);
	setOperationAction(ISD::SSUBO, MVT::i32, Custom);
	setOperationAction(ISD::SSUBO, MVT::i64, Custom);
	setOperationAction(ISD::USUBO, MVT::i32, Custom);
	setOperationAction(ISD::USUBO, MVT::i64, Custom);
	setOperationAction(ISD::SMULO, MVT::i32, Custom);
	setOperationAction(ISD::SMULO, MVT::i64, Custom);
	setOperationAction(ISD::UMULO, MVT::i32, Custom);
	setOperationAction(ISD::UMULO, MVT::i64, Custom);

	setOperationAction(ISD::FSIN, MVT::f32, Expand);
	setOperationAction(ISD::FSIN, MVT::f64, Expand);
	setOperationAction(ISD::FCOS, MVT::f32, Expand);
	setOperationAction(ISD::FCOS, MVT::f64, Expand);
	setOperationAction(ISD::FPOW, MVT::f32, Expand);
	setOperationAction(ISD::FPOW, MVT::f64, Expand);
	setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
	setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
	if (Subtarget->hasFullFP16())
	setOperationAction(ISD::FCOPYSIGN, MVT::f16, Custom);
	else
	setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote);

	setOperationAction(ISD::FREM, MVT::f16, Promote);
	setOperationAction(ISD::FREM, MVT::v4f16, Expand);
	setOperationAction(ISD::FREM, MVT::v8f16, Expand);
	setOperationAction(ISD::FPOW, MVT::f16, Promote);
	setOperationAction(ISD::FPOW, MVT::v4f16, Expand);
	setOperationAction(ISD::FPOW, MVT::v8f16, Expand);
	setOperationAction(ISD::FPOWI, MVT::f16, Promote);
	setOperationAction(ISD::FPOWI, MVT::v4f16, Expand);
	setOperationAction(ISD::FPOWI, MVT::v8f16, Expand);
	setOperationAction(ISD::FCOS, MVT::f16, Promote);
	setOperationAction(ISD::FCOS, MVT::v4f16, Expand);
	setOperationAction(ISD::FCOS, MVT::v8f16, Expand);
	setOperationAction(ISD::FSIN, MVT::f16, Promote);
	setOperationAction(ISD::FSIN, MVT::v4f16, Expand);
	setOperationAction(ISD::FSIN, MVT::v8f16, Expand);
	setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
	setOperationAction(ISD::FSINCOS, MVT::v4f16, Expand);
	setOperationAction(ISD::FSINCOS, MVT::v8f16, Expand);
	setOperationAction(ISD::FEXP, MVT::f16, Promote);
	setOperationAction(ISD::FEXP, MVT::v4f16, Expand);
	setOperationAction(ISD::FEXP, MVT::v8f16, Expand);
	setOperationAction(ISD::FEXP2, MVT::f16, Promote);
	setOperationAction(ISD::FEXP2, MVT::v4f16, Expand);
	setOperationAction(ISD::FEXP2, MVT::v8f16, Expand);
	setOperationAction(ISD::FLOG, MVT::f16, Promote);
	setOperationAction(ISD::FLOG, MVT::v4f16, Expand);
	setOperationAction(ISD::FLOG, MVT::v8f16, Expand);
	setOperationAction(ISD::FLOG2, MVT::f16, Promote);
	setOperationAction(ISD::FLOG2, MVT::v4f16, Expand);
	setOperationAction(ISD::FLOG2, MVT::v8f16, Expand);
	setOperationAction(ISD::FLOG10, MVT::f16, Promote);
	setOperationAction(ISD::FLOG10, MVT::v4f16, Expand);
	setOperationAction(ISD::FLOG10, MVT::v8f16, Expand);

	if (!Subtarget->hasFullFP16()) {
	setOperationAction(ISD::SELECT, MVT::f16, Promote);
	setOperationAction(ISD::SELECT_CC, MVT::f16, Promote);
	setOperationAction(ISD::SETCC, MVT::f16, Promote);
	setOperationAction(ISD::BR_CC, MVT::f16, Promote);
	setOperationAction(ISD::FADD, MVT::f16, Promote);
	setOperationAction(ISD::FSUB, MVT::f16, Promote);
	setOperationAction(ISD::FMUL, MVT::f16, Promote);
	setOperationAction(ISD::FDIV, MVT::f16, Promote);
	setOperationAction(ISD::FMA, MVT::f16, Promote);
	setOperationAction(ISD::FNEG, MVT::f16, Promote);
	setOperationAction(ISD::FABS, MVT::f16, Promote);
	setOperationAction(ISD::FCEIL, MVT::f16, Promote);
	setOperationAction(ISD::FSQRT, MVT::f16, Promote);
	setOperationAction(ISD::FFLOOR, MVT::f16, Promote);
	setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
	setOperationAction(ISD::FRINT, MVT::f16, Promote);
	setOperationAction(ISD::FROUND, MVT::f16, Promote);
	setOperationAction(ISD::FROUNDEVEN, MVT::f16, Promote);
	setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
	setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
	setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
	setOperationAction(ISD::FMINIMUM, MVT::f16, Promote);
	setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote);

	// promote v4f16 to v4f32 when that is known to be safe.
	setOperationAction(ISD::FADD, MVT::v4f16, Promote);
	setOperationAction(ISD::FSUB, MVT::v4f16, Promote);
	setOperationAction(ISD::FMUL, MVT::v4f16, Promote);
	setOperationAction(ISD::FDIV, MVT::v4f16, Promote);
	AddPromotedToType(ISD::FADD, MVT::v4f16, MVT::v4f32);
	AddPromotedToType(ISD::FSUB, MVT::v4f16, MVT::v4f32);
	AddPromotedToType(ISD::FMUL, MVT::v4f16, MVT::v4f32);
	AddPromotedToType(ISD::FDIV, MVT::v4f16, MVT::v4f32);

	setOperationAction(ISD::FABS, MVT::v4f16, Expand);
	setOperationAction(ISD::FNEG, MVT::v4f16, Expand);
	setOperationAction(ISD::FROUND, MVT::v4f16, Expand);
	setOperationAction(ISD::FROUNDEVEN, MVT::v4f16, Expand);
	setOperationAction(ISD::FMA, MVT::v4f16, Expand);
	setOperationAction(ISD::SETCC, MVT::v4f16, Expand);
	setOperationAction(ISD::BR_CC, MVT::v4f16, Expand);
	setOperationAction(ISD::SELECT, MVT::v4f16, Expand);
	setOperationAction(ISD::SELECT_CC, MVT::v4f16, Expand);
	setOperationAction(ISD::FTRUNC, MVT::v4f16, Expand);
	setOperationAction(ISD::FCOPYSIGN, MVT::v4f16, Expand);
	setOperationAction(ISD::FFLOOR, MVT::v4f16, Expand);
	setOperationAction(ISD::FCEIL, MVT::v4f16, Expand);
	setOperationAction(ISD::FRINT, MVT::v4f16, Expand);
	setOperationAction(ISD::FNEARBYINT, MVT::v4f16, Expand);
	setOperationAction(ISD::FSQRT, MVT::v4f16, Expand);

	setOperationAction(ISD::FABS, MVT::v8f16, Expand);
	setOperationAction(ISD::FADD, MVT::v8f16, Expand);
	setOperationAction(ISD::FCEIL, MVT::v8f16, Expand);
	setOperationAction(ISD::FCOPYSIGN, MVT::v8f16, Expand);
	setOperationAction(ISD::FDIV, MVT::v8f16, Expand);
	setOperationAction(ISD::FFLOOR, MVT::v8f16, Expand);
	setOperationAction(ISD::FMA, MVT::v8f16, Expand);
	setOperationAction(ISD::FMUL, MVT::v8f16, Expand);
	setOperationAction(ISD::FNEARBYINT, MVT::v8f16, Expand);
	setOperationAction(ISD::FNEG, MVT::v8f16, Expand);
	setOperationAction(ISD::FROUND, MVT::v8f16, Expand);
	setOperationAction(ISD::FROUNDEVEN, MVT::v8f16, Expand);
	setOperationAction(ISD::FRINT, MVT::v8f16, Expand);
	setOperationAction(ISD::FSQRT, MVT::v8f16, Expand);
	setOperationAction(ISD::FSUB, MVT::v8f16, Expand);
	setOperationAction(ISD::FTRUNC, MVT::v8f16, Expand);
	setOperationAction(ISD::SETCC, MVT::v8f16, Expand);
	setOperationAction(ISD::BR_CC, MVT::v8f16, Expand);
	setOperationAction(ISD::SELECT, MVT::v8f16, Expand);
	setOperationAction(ISD::SELECT_CC, MVT::v8f16, Expand);
	setOperationAction(ISD::FP_EXTEND, MVT::v8f16, Expand);
	}

	// AArch64 has implementations of a lot of rounding-like FP operations.
	for (MVT Ty : {MVT::f32, MVT::f64}) {
	setOperationAction(ISD::FFLOOR, Ty, Legal);
	setOperationAction(ISD::FNEARBYINT, Ty, Legal);
	setOperationAction(ISD::FCEIL, Ty, Legal);
	setOperationAction(ISD::FRINT, Ty, Legal);
	setOperationAction(ISD::FTRUNC, Ty, Legal);
	setOperationAction(ISD::FROUND, Ty, Legal);
	setOperationAction(ISD::FROUNDEVEN, Ty, Legal);
	setOperationAction(ISD::FMINNUM, Ty, Legal);
	setOperationAction(ISD::FMAXNUM, Ty, Legal);
	setOperationAction(ISD::FMINIMUM, Ty, Legal);
	setOperationAction(ISD::FMAXIMUM, Ty, Legal);
	setOperationAction(ISD::LROUND, Ty, Legal);
	setOperationAction(ISD::LLROUND, Ty, Legal);
	setOperationAction(ISD::LRINT, Ty, Legal);
	setOperationAction(ISD::LLRINT, Ty, Legal);
	}

	if (Subtarget->hasFullFP16()) {
	setOperationAction(ISD::FNEARBYINT, MVT::f16, Legal);
	setOperationAction(ISD::FFLOOR, MVT::f16, Legal);
	setOperationAction(ISD::FCEIL, MVT::f16, Legal);
	setOperationAction(ISD::FRINT, MVT::f16, Legal);
	setOperationAction(ISD::FTRUNC, MVT::f16, Legal);
	setOperationAction(ISD::FROUND, MVT::f16, Legal);
	setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal);
	setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
	setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
	setOperationAction(ISD::FMINIMUM, MVT::f16, Legal);
	setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal);
	}

	setOperationAction(ISD::PREFETCH, MVT::Other, Custom);

	setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
	setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);

	setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
	setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
	setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
	setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom);
	setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);

	// Generate outline atomics library calls only if LSE was not specified for
	// subtarget
	if (Subtarget->outlineAtomics() && !Subtarget->hasLSE()) {
	setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, LibCall);
	setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, LibCall);
	setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, LibCall);
	setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, LibCall);
	setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, LibCall);
	setOperationAction(ISD::ATOMIC_SWAP, MVT::i8, LibCall);
	setOperationAction(ISD::ATOMIC_SWAP, MVT::i16, LibCall);
	setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, LibCall);
	setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, LibCall);
	setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i8, LibCall);
	setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i16, LibCall);
	setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, LibCall);
	setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, LibCall);
	setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i8, LibCall);
	setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i16, LibCall);
	setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, LibCall);
	setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, LibCall);
	setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i8, LibCall);
	setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i16, LibCall);
	setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i32, LibCall);
	setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i64, LibCall);
	setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i8, LibCall);
	setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i16, LibCall);
	setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, LibCall);
	setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, LibCall);
	#define LCALLNAMES(A, B, N) \
	setLibcallName(A##N##_RELAX, #B #N "_relax"); \
	setLibcallName(A##N##_ACQ, #B #N "_acq"); \
	setLibcallName(A##N##_REL, #B #N "_rel"); \
	setLibcallName(A##N##_ACQ_REL, #B #N "_acq_rel");
	#define LCALLNAME4(A, B) \
	LCALLNAMES(A, B, 1) \
	LCALLNAMES(A, B, 2) LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8)
	#define LCALLNAME5(A, B) \
	LCALLNAMES(A, B, 1) \
	LCALLNAMES(A, B, 2) \
	LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8) LCALLNAMES(A, B, 16)
	LCALLNAME5(RTLIB::OUTLINE_ATOMIC_CAS, __aarch64_cas)
	LCALLNAME4(RTLIB::OUTLINE_ATOMIC_SWP, __aarch64_swp)
	LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDADD, __aarch64_ldadd)
	LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDSET, __aarch64_ldset)
	LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDCLR, __aarch64_ldclr)
	LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDEOR, __aarch64_ldeor)
	#undef LCALLNAMES
	#undef LCALLNAME4
	#undef LCALLNAME5
	}

	// 128-bit loads and stores can be done without expanding
	setOperationAction(ISD::LOAD, MVT::i128, Custom);
	setOperationAction(ISD::STORE, MVT::i128, Custom);

	// 256 bit non-temporal stores can be lowered to STNP. Do this as part of the
	// custom lowering, as there are no un-paired non-temporal stores and
	// legalization will break up 256 bit inputs.
	setOperationAction(ISD::STORE, MVT::v32i8, Custom);
	setOperationAction(ISD::STORE, MVT::v16i16, Custom);
	setOperationAction(ISD::STORE, MVT::v16f16, Custom);
	setOperationAction(ISD::STORE, MVT::v8i32, Custom);
	setOperationAction(ISD::STORE, MVT::v8f32, Custom);
	setOperationAction(ISD::STORE, MVT::v4f64, Custom);
	setOperationAction(ISD::STORE, MVT::v4i64, Custom);

	// Lower READCYCLECOUNTER using an mrs from PMCCNTR_EL0.
	// This requires the Performance Monitors extension.
	if (Subtarget->hasPerfMon())
	setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);

	if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
	getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
	// Issue __sincos_stret if available.
	setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
	setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
	} else {
	setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
	setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
	}

	if (Subtarget->getTargetTriple().isOSMSVCRT()) {
	// MSVCRT doesn't have powi; fall back to pow
	setLibcallName(RTLIB::POWI_F32, nullptr);
	setLibcallName(RTLIB::POWI_F64, nullptr);
	}

	// Make floating-point constants legal for the large code model, so they don't
	// become loads from the constant pool.
	if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
	setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
	setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
	}

	// AArch64 does not have floating-point extending loads, i1 sign-extending
	// load, floating-point truncating stores, or v2i32->v2i16 truncating store.
	for (MVT VT : MVT::fp_valuetypes()) {
	setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
	setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
	setLoadExtAction(ISD::EXTLOAD, VT, MVT::f64, Expand);
	setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
	}
	for (MVT VT : MVT::integer_valuetypes())
	setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Expand);

	setTruncStoreAction(MVT::f32, MVT::f16, Expand);
	setTruncStoreAction(MVT::f64, MVT::f32, Expand);
	setTruncStoreAction(MVT::f64, MVT::f16, Expand);
	setTruncStoreAction(MVT::f128, MVT::f80, Expand);
	setTruncStoreAction(MVT::f128, MVT::f64, Expand);
	setTruncStoreAction(MVT::f128, MVT::f32, Expand);
	setTruncStoreAction(MVT::f128, MVT::f16, Expand);

	setOperationAction(ISD::BITCAST, MVT::i16, Custom);
	setOperationAction(ISD::BITCAST, MVT::f16, Custom);
	setOperationAction(ISD::BITCAST, MVT::bf16, Custom);

	// Indexed loads and stores are supported.
	for (unsigned im = (unsigned)ISD::PRE_INC;
	im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
	setIndexedLoadAction(im, MVT::i8, Legal);
	setIndexedLoadAction(im, MVT::i16, Legal);
	setIndexedLoadAction(im, MVT::i32, Legal);
	setIndexedLoadAction(im, MVT::i64, Legal);
	setIndexedLoadAction(im, MVT::f64, Legal);
	setIndexedLoadAction(im, MVT::f32, Legal);
	setIndexedLoadAction(im, MVT::f16, Legal);
	setIndexedLoadAction(im, MVT::bf16, Legal);
	setIndexedStoreAction(im, MVT::i8, Legal);
	setIndexedStoreAction(im, MVT::i16, Legal);
	setIndexedStoreAction(im, MVT::i32, Legal);
	setIndexedStoreAction(im, MVT::i64, Legal);
	setIndexedStoreAction(im, MVT::f64, Legal);
	setIndexedStoreAction(im, MVT::f32, Legal);
	setIndexedStoreAction(im, MVT::f16, Legal);
	setIndexedStoreAction(im, MVT::bf16, Legal);
	}

	// Trap.
	setOperationAction(ISD::TRAP, MVT::Other, Legal);
	setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
	setOperationAction(ISD::UBSANTRAP, MVT::Other, Legal);

	// We combine OR nodes for bitfield operations.
	setTargetDAGCombine(ISD::OR);
	// Try to create BICs for vector ANDs.
	setTargetDAGCombine(ISD::AND);

	// Vector add and sub nodes may conceal a high-half opportunity.
	// Also, try to fold ADD into CSINC/CSINV..
	setTargetDAGCombine(ISD::ADD);
	setTargetDAGCombine(ISD::ABS);
	setTargetDAGCombine(ISD::SUB);
	setTargetDAGCombine(ISD::SRL);
	setTargetDAGCombine(ISD::XOR);
	setTargetDAGCombine(ISD::SINT_TO_FP);
	setTargetDAGCombine(ISD::UINT_TO_FP);

	// TODO: Do the same for FP_TO_*INT_SAT.
	setTargetDAGCombine(ISD::FP_TO_SINT);
	setTargetDAGCombine(ISD::FP_TO_UINT);
	setTargetDAGCombine(ISD::FDIV);

	// Try and combine setcc with csel
	setTargetDAGCombine(ISD::SETCC);

	setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);

	setTargetDAGCombine(ISD::ANY_EXTEND);
	setTargetDAGCombine(ISD::ZERO_EXTEND);
	setTargetDAGCombine(ISD::SIGN_EXTEND);
	setTargetDAGCombine(ISD::VECTOR_SPLICE);
	setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
	setTargetDAGCombine(ISD::TRUNCATE);
	setTargetDAGCombine(ISD::CONCAT_VECTORS);
	setTargetDAGCombine(ISD::STORE);
	if (Subtarget->supportsAddressTopByteIgnored())
	setTargetDAGCombine(ISD::LOAD);

	setTargetDAGCombine(ISD::MUL);

	setTargetDAGCombine(ISD::SELECT);
	setTargetDAGCombine(ISD::VSELECT);

	setTargetDAGCombine(ISD::INTRINSIC_VOID);
	setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
	setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
	setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
	setTargetDAGCombine(ISD::VECREDUCE_ADD);
	setTargetDAGCombine(ISD::STEP_VECTOR);

	setTargetDAGCombine(ISD::GlobalAddress);

	// In case of strict alignment, avoid an excessive number of byte wide stores.
	MaxStoresPerMemsetOptSize = 8;
	MaxStoresPerMemset = Subtarget->requiresStrictAlign()
	? MaxStoresPerMemsetOptSize : 32;

	MaxGluedStoresPerMemcpy = 4;
	MaxStoresPerMemcpyOptSize = 4;
	MaxStoresPerMemcpy = Subtarget->requiresStrictAlign()
	? MaxStoresPerMemcpyOptSize : 16;

	MaxStoresPerMemmoveOptSize = MaxStoresPerMemmove = 4;

	MaxLoadsPerMemcmpOptSize = 4;
	MaxLoadsPerMemcmp = Subtarget->requiresStrictAlign()
	? MaxLoadsPerMemcmpOptSize : 8;

	setStackPointerRegisterToSaveRestore(AArch64::SP);

	setSchedulingPreference(Sched::Hybrid);

	EnableExtLdPromotion = true;

	// Set required alignment.
	setMinFunctionAlignment(Align(4));
	// Set preferred alignments.
	setPrefLoopAlignment(Align(1ULL << STI.getPrefLoopLogAlignment()));
	setPrefFunctionAlignment(Align(1ULL << STI.getPrefFunctionLogAlignment()));

	// Only change the limit for entries in a jump table if specified by
	// the sub target, but not at the command line.
	unsigned MaxJT = STI.getMaximumJumpTableSize();
	if (MaxJT && getMaximumJumpTableSize() == UINT_MAX)
	setMaximumJumpTableSize(MaxJT);

	setHasExtractBitsInsn(true);

	setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);

	if (Subtarget->hasNEON()) {
	// FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
	// silliness like this:
	setOperationAction(ISD::FABS, MVT::v1f64, Expand);
	setOperationAction(ISD::FADD, MVT::v1f64, Expand);
	setOperationAction(ISD::FCEIL, MVT::v1f64, Expand);
	setOperationAction(ISD::FCOPYSIGN, MVT::v1f64, Expand);
	setOperationAction(ISD::FCOS, MVT::v1f64, Expand);
	setOperationAction(ISD::FDIV, MVT::v1f64, Expand);
	setOperationAction(ISD::FFLOOR, MVT::v1f64, Expand);
	setOperationAction(ISD::FMA, MVT::v1f64, Expand);
	setOperationAction(ISD::FMUL, MVT::v1f64, Expand);
	setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Expand);
	setOperationAction(ISD::FNEG, MVT::v1f64, Expand);
	setOperationAction(ISD::FPOW, MVT::v1f64, Expand);
	setOperationAction(ISD::FREM, MVT::v1f64, Expand);
	setOperationAction(ISD::FROUND, MVT::v1f64, Expand);
	setOperationAction(ISD::FROUNDEVEN, MVT::v1f64, Expand);
	setOperationAction(ISD::FRINT, MVT::v1f64, Expand);
	setOperationAction(ISD::FSIN, MVT::v1f64, Expand);
	setOperationAction(ISD::FSINCOS, MVT::v1f64, Expand);
	setOperationAction(ISD::FSQRT, MVT::v1f64, Expand);
	setOperationAction(ISD::FSUB, MVT::v1f64, Expand);
	setOperationAction(ISD::FTRUNC, MVT::v1f64, Expand);
	setOperationAction(ISD::SETCC, MVT::v1f64, Expand);
	setOperationAction(ISD::BR_CC, MVT::v1f64, Expand);
	setOperationAction(ISD::SELECT, MVT::v1f64, Expand);
	setOperationAction(ISD::SELECT_CC, MVT::v1f64, Expand);
	setOperationAction(ISD::FP_EXTEND, MVT::v1f64, Expand);

	setOperationAction(ISD::FP_TO_SINT, MVT::v1i64, Expand);
	setOperationAction(ISD::FP_TO_UINT, MVT::v1i64, Expand);
	setOperationAction(ISD::SINT_TO_FP, MVT::v1i64, Expand);
	setOperationAction(ISD::UINT_TO_FP, MVT::v1i64, Expand);
	setOperationAction(ISD::FP_ROUND, MVT::v1f64, Expand);

	setOperationAction(ISD::MUL, MVT::v1i64, Expand);

	// AArch64 doesn't have a direct vector ->f32 conversion instructions for
	// elements smaller than i32, so promote the input to i32 first.
	setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i8, MVT::v4i32);
	setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i8, MVT::v4i32);
	setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i8, MVT::v8i32);
	setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i8, MVT::v8i32);
	setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v16i8, MVT::v16i32);
	setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v16i8, MVT::v16i32);

	// Similarly, there is no direct i32 -> f64 vector conversion instruction.
	setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
	setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
	setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom);
	setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom);
	// Or, direct i32 -> f16 vector conversion. Set it so custom, so the
	// conversion happens in two steps: v4i32 -> v4f32 -> v4f16
	setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom);
	setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);

	if (Subtarget->hasFullFP16()) {
	setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
	setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
	setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom);
	setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
	} else {
	// when AArch64 doesn't have fullfp16 support, promote the input
	// to i32 first.
	setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i16, MVT::v4i32);
	setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i16, MVT::v4i32);
	setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i16, MVT::v8i32);
	setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i16, MVT::v8i32);
	}

	setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
	setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
	setOperationAction(ISD::BITREVERSE, MVT::v8i8, Legal);
	setOperationAction(ISD::BITREVERSE, MVT::v16i8, Legal);
	setOperationAction(ISD::BITREVERSE, MVT::v2i32, Custom);
	setOperationAction(ISD::BITREVERSE, MVT::v4i32, Custom);
	setOperationAction(ISD::BITREVERSE, MVT::v1i64, Custom);
	setOperationAction(ISD::BITREVERSE, MVT::v2i64, Custom);

	// AArch64 doesn't have MUL.2d:
	setOperationAction(ISD::MUL, MVT::v2i64, Expand);
	// Custom handling for some quad-vector types to detect MULL.
	setOperationAction(ISD::MUL, MVT::v8i16, Custom);
	setOperationAction(ISD::MUL, MVT::v4i32, Custom);
	setOperationAction(ISD::MUL, MVT::v2i64, Custom);

	// Saturates
	for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
	MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
	setOperationAction(ISD::SADDSAT, VT, Legal);
	setOperationAction(ISD::UADDSAT, VT, Legal);
	setOperationAction(ISD::SSUBSAT, VT, Legal);
	setOperationAction(ISD::USUBSAT, VT, Legal);
	}

	for (MVT VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16,
	MVT::v4i32}) {
	setOperationAction(ISD::ABDS, VT, Legal);
	setOperationAction(ISD::ABDU, VT, Legal);
	}

	// Vector reductions
	for (MVT VT : { MVT::v4f16, MVT::v2f32,
	MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
	if (VT.getVectorElementType() != MVT::f16 \|\| Subtarget->hasFullFP16()) {
	setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
	setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);

	setOperationAction(ISD::VECREDUCE_FADD, VT, Legal);
	}
	}
	for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
	MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
	setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
	setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
	setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
	setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
	setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
	}
	setOperationAction(ISD::VECREDUCE_ADD, MVT::v2i64, Custom);

	setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal);
	setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
	// Likewise, narrowing and extending vector loads/stores aren't handled
	// directly.
	for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
	setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);

	if (VT == MVT::v16i8 \|\| VT == MVT::v8i16 \|\| VT == MVT::v4i32) {
	setOperationAction(ISD::MULHS, VT, Legal);
	setOperationAction(ISD::MULHU, VT, Legal);
	} else {
	setOperationAction(ISD::MULHS, VT, Expand);
	setOperationAction(ISD::MULHU, VT, Expand);
	}
	setOperationAction(ISD::SMUL_LOHI, VT, Expand);
	setOperationAction(ISD::UMUL_LOHI, VT, Expand);

	setOperationAction(ISD::BSWAP, VT, Expand);
	setOperationAction(ISD::CTTZ, VT, Expand);

	for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
	setTruncStoreAction(VT, InnerVT, Expand);
	setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
	setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
	setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
	}
	}

	// AArch64 has implementations of a lot of rounding-like FP operations.
	for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) {
	setOperationAction(ISD::FFLOOR, Ty, Legal);
	setOperationAction(ISD::FNEARBYINT, Ty, Legal);
	setOperationAction(ISD::FCEIL, Ty, Legal);
	setOperationAction(ISD::FRINT, Ty, Legal);
	setOperationAction(ISD::FTRUNC, Ty, Legal);
	setOperationAction(ISD::FROUND, Ty, Legal);
	setOperationAction(ISD::FROUNDEVEN, Ty, Legal);
	}

	if (Subtarget->hasFullFP16()) {
	for (MVT Ty : {MVT::v4f16, MVT::v8f16}) {
	setOperationAction(ISD::FFLOOR, Ty, Legal);
	setOperationAction(ISD::FNEARBYINT, Ty, Legal);
	setOperationAction(ISD::FCEIL, Ty, Legal);
	setOperationAction(ISD::FRINT, Ty, Legal);
	setOperationAction(ISD::FTRUNC, Ty, Legal);
	setOperationAction(ISD::FROUND, Ty, Legal);
	setOperationAction(ISD::FROUNDEVEN, Ty, Legal);
	}
	}

	if (Subtarget->hasSVE())
	setOperationAction(ISD::VSCALE, MVT::i32, Custom);

	setTruncStoreAction(MVT::v4i16, MVT::v4i8, Custom);

	setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
	setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
	setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
	setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
	setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
	setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
	}

	if (Subtarget->hasSVE()) {
	for (auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) {
	setOperationAction(ISD::BITREVERSE, VT, Custom);
	setOperationAction(ISD::BSWAP, VT, Custom);
	setOperationAction(ISD::CTLZ, VT, Custom);
	setOperationAction(ISD::CTPOP, VT, Custom);
	setOperationAction(ISD::CTTZ, VT, Custom);
	setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
	setOperationAction(ISD::UINT_TO_FP, VT, Custom);
	setOperationAction(ISD::SINT_TO_FP, VT, Custom);
	setOperationAction(ISD::FP_TO_UINT, VT, Custom);
	setOperationAction(ISD::FP_TO_SINT, VT, Custom);
	setOperationAction(ISD::MGATHER, VT, Custom);
	setOperationAction(ISD::MSCATTER, VT, Custom);
	setOperationAction(ISD::MLOAD, VT, Custom);
	setOperationAction(ISD::MUL, VT, Custom);
	setOperationAction(ISD::MULHS, VT, Custom);
	setOperationAction(ISD::MULHU, VT, Custom);
	setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
	setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
	setOperationAction(ISD::SELECT, VT, Custom);
	setOperationAction(ISD::SETCC, VT, Custom);
	setOperationAction(ISD::SDIV, VT, Custom);
	setOperationAction(ISD::UDIV, VT, Custom);
	setOperationAction(ISD::SMIN, VT, Custom);
	setOperationAction(ISD::UMIN, VT, Custom);
	setOperationAction(ISD::SMAX, VT, Custom);
	setOperationAction(ISD::UMAX, VT, Custom);
	setOperationAction(ISD::SHL, VT, Custom);
	setOperationAction(ISD::SRL, VT, Custom);
	setOperationAction(ISD::SRA, VT, Custom);
	setOperationAction(ISD::ABS, VT, Custom);
	setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
	setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
	setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
	setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
	setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
	setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
	setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
	setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);

	setOperationAction(ISD::UMUL_LOHI, VT, Expand);
	setOperationAction(ISD::SMUL_LOHI, VT, Expand);
	setOperationAction(ISD::SELECT_CC, VT, Expand);
	setOperationAction(ISD::ROTL, VT, Expand);
	setOperationAction(ISD::ROTR, VT, Expand);
	}

	// Illegal unpacked integer vector types.
	for (auto VT : {MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32}) {
	setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
	setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
	}

	// Legalize unpacked bitcasts to REINTERPRET_CAST.
	for (auto VT : {MVT::nxv2i16, MVT::nxv4i16, MVT::nxv2i32, MVT::nxv2bf16,
	MVT::nxv2f16, MVT::nxv4f16, MVT::nxv2f32})
	setOperationAction(ISD::BITCAST, VT, Custom);

	for (auto VT : {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1}) {
	setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
	setOperationAction(ISD::SELECT, VT, Custom);
	setOperationAction(ISD::SETCC, VT, Custom);
	setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
	setOperationAction(ISD::TRUNCATE, VT, Custom);
	setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
	setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
	setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);

	setOperationAction(ISD::SELECT_CC, VT, Expand);
	setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
	setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);

	// There are no legal MVT::nxv16f## based types.
	if (VT != MVT::nxv16i1) {
	setOperationAction(ISD::SINT_TO_FP, VT, Custom);
	setOperationAction(ISD::UINT_TO_FP, VT, Custom);
	}
	}

	// NEON doesn't support masked loads/stores/gathers/scatters, but SVE does
	for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v1f64,
	MVT::v2f64, MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
	MVT::v2i32, MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
	setOperationAction(ISD::MLOAD, VT, Custom);
	setOperationAction(ISD::MSTORE, VT, Custom);
	setOperationAction(ISD::MGATHER, VT, Custom);
	setOperationAction(ISD::MSCATTER, VT, Custom);
	}

	for (MVT VT : MVT::fp_scalable_vector_valuetypes()) {
	for (MVT InnerVT : MVT::fp_scalable_vector_valuetypes()) {
	// Avoid marking truncating FP stores as legal to prevent the
	// DAGCombiner from creating unsupported truncating stores.
	setTruncStoreAction(VT, InnerVT, Expand);
	// SVE does not have floating-point extending loads.
	setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
	setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
	setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
	}
	}

	// SVE supports truncating stores of 64 and 128-bit vectors
	setTruncStoreAction(MVT::v2i64, MVT::v2i8, Custom);
	setTruncStoreAction(MVT::v2i64, MVT::v2i16, Custom);
	setTruncStoreAction(MVT::v2i64, MVT::v2i32, Custom);
	setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
	setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);

	for (auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
	MVT::nxv4f32, MVT::nxv2f64}) {
	setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
	setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
	setOperationAction(ISD::MGATHER, VT, Custom);
	setOperationAction(ISD::MSCATTER, VT, Custom);
	setOperationAction(ISD::MLOAD, VT, Custom);
	setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
	setOperationAction(ISD::SELECT, VT, Custom);
	setOperationAction(ISD::FADD, VT, Custom);
	setOperationAction(ISD::FDIV, VT, Custom);
	setOperationAction(ISD::FMA, VT, Custom);
	setOperationAction(ISD::FMAXIMUM, VT, Custom);
	setOperationAction(ISD::FMAXNUM, VT, Custom);
	setOperationAction(ISD::FMINIMUM, VT, Custom);
	setOperationAction(ISD::FMINNUM, VT, Custom);
	setOperationAction(ISD::FMUL, VT, Custom);
	setOperationAction(ISD::FNEG, VT, Custom);
	setOperationAction(ISD::FSUB, VT, Custom);
	setOperationAction(ISD::FCEIL, VT, Custom);
	setOperationAction(ISD::FFLOOR, VT, Custom);
	setOperationAction(ISD::FNEARBYINT, VT, Custom);
	setOperationAction(ISD::FRINT, VT, Custom);
	setOperationAction(ISD::FROUND, VT, Custom);
	setOperationAction(ISD::FROUNDEVEN, VT, Custom);
	setOperationAction(ISD::FTRUNC, VT, Custom);
	setOperationAction(ISD::FSQRT, VT, Custom);
	setOperationAction(ISD::FABS, VT, Custom);
	setOperationAction(ISD::FP_EXTEND, VT, Custom);
	setOperationAction(ISD::FP_ROUND, VT, Custom);
	setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
	setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
	setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
	setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
	setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);

	setOperationAction(ISD::SELECT_CC, VT, Expand);
	}

	for (auto VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) {
	setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
	setOperationAction(ISD::MGATHER, VT, Custom);
	setOperationAction(ISD::MSCATTER, VT, Custom);
	setOperationAction(ISD::MLOAD, VT, Custom);
	}

	setOperationAction(ISD::SPLAT_VECTOR, MVT::nxv8bf16, Custom);

	setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
	setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);

	// NOTE: Currently this has to happen after computeRegisterProperties rather
	// than the preferred option of combining it with the addRegisterClass call.
	if (Subtarget->useSVEForFixedLengthVectors()) {
	for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
	if (useSVEForFixedLengthVectorVT(VT))
	addTypeForFixedLengthSVE(VT);
	for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
	if (useSVEForFixedLengthVectorVT(VT))
	addTypeForFixedLengthSVE(VT);

	// 64bit results can mean a bigger than NEON input.
	for (auto VT : {MVT::v8i8, MVT::v4i16})
	setOperationAction(ISD::TRUNCATE, VT, Custom);
	setOperationAction(ISD::FP_ROUND, MVT::v4f16, Custom);

	// 128bit results imply a bigger than NEON input.
	for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
	setOperationAction(ISD::TRUNCATE, VT, Custom);
	for (auto VT : {MVT::v8f16, MVT::v4f32})
	setOperationAction(ISD::FP_ROUND, VT, Custom);

	// These operations are not supported on NEON but SVE can do them.
	setOperationAction(ISD::BITREVERSE, MVT::v1i64, Custom);
	setOperationAction(ISD::CTLZ, MVT::v1i64, Custom);
	setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
	setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
	setOperationAction(ISD::MUL, MVT::v1i64, Custom);
	setOperationAction(ISD::MUL, MVT::v2i64, Custom);
	setOperationAction(ISD::MULHS, MVT::v1i64, Custom);
	setOperationAction(ISD::MULHS, MVT::v2i64, Custom);
	setOperationAction(ISD::MULHU, MVT::v1i64, Custom);
	setOperationAction(ISD::MULHU, MVT::v2i64, Custom);
	setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
	setOperationAction(ISD::SDIV, MVT::v16i8, Custom);
	setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
	setOperationAction(ISD::SDIV, MVT::v8i16, Custom);
	setOperationAction(ISD::SDIV, MVT::v2i32, Custom);
	setOperationAction(ISD::SDIV, MVT::v4i32, Custom);
	setOperationAction(ISD::SDIV, MVT::v1i64, Custom);
	setOperationAction(ISD::SDIV, MVT::v2i64, Custom);
	setOperationAction(ISD::SMAX, MVT::v1i64, Custom);
	setOperationAction(ISD::SMAX, MVT::v2i64, Custom);
	setOperationAction(ISD::SMIN, MVT::v1i64, Custom);
	setOperationAction(ISD::SMIN, MVT::v2i64, Custom);
	setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
	setOperationAction(ISD::UDIV, MVT::v16i8, Custom);
	setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
	setOperationAction(ISD::UDIV, MVT::v8i16, Custom);
	setOperationAction(ISD::UDIV, MVT::v2i32, Custom);
	setOperationAction(ISD::UDIV, MVT::v4i32, Custom);
	setOperationAction(ISD::UDIV, MVT::v1i64, Custom);
	setOperationAction(ISD::UDIV, MVT::v2i64, Custom);
	setOperationAction(ISD::UMAX, MVT::v1i64, Custom);
	setOperationAction(ISD::UMAX, MVT::v2i64, Custom);
	setOperationAction(ISD::UMIN, MVT::v1i64, Custom);
	setOperationAction(ISD::UMIN, MVT::v2i64, Custom);
	setOperationAction(ISD::VECREDUCE_SMAX, MVT::v2i64, Custom);
	setOperationAction(ISD::VECREDUCE_SMIN, MVT::v2i64, Custom);
	setOperationAction(ISD::VECREDUCE_UMAX, MVT::v2i64, Custom);
	setOperationAction(ISD::VECREDUCE_UMIN, MVT::v2i64, Custom);

	// Int operations with no NEON support.
	for (auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
	MVT::v2i32, MVT::v4i32, MVT::v2i64}) {
	setOperationAction(ISD::BITREVERSE, VT, Custom);
	setOperationAction(ISD::CTTZ, VT, Custom);
	setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
	setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
	setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
	}

	// FP operations with no NEON support.
	for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32,
	MVT::v1f64, MVT::v2f64})
	setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);

	// Use SVE for vectors with more than 2 elements.
	for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v4f32})
	setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
	}

	setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv2i1, MVT::nxv2i64);
	setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv4i1, MVT::nxv4i32);
	setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv8i1, MVT::nxv8i16);
	setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv16i1, MVT::nxv16i8);
	}

	PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
	}

	void AArch64TargetLowering::addTypeForNEON(MVT VT) {
	assert(VT.isVector() && "VT should be a vector type");

	if (VT.isFloatingPoint()) {
	MVT PromoteTo = EVT(VT).changeVectorElementTypeToInteger().getSimpleVT();
	setOperationPromotedToType(ISD::LOAD, VT, PromoteTo);
	setOperationPromotedToType(ISD::STORE, VT, PromoteTo);
	}

	// Mark vector float intrinsics as expand.
	if (VT == MVT::v2f32 \|\| VT == MVT::v4f32 \|\| VT == MVT::v2f64) {
	setOperationAction(ISD::FSIN, VT, Expand);
	setOperationAction(ISD::FCOS, VT, Expand);
	setOperationAction(ISD::FPOW, VT, Expand);
	setOperationAction(ISD::FLOG, VT, Expand);
	setOperationAction(ISD::FLOG2, VT, Expand);
	setOperationAction(ISD::FLOG10, VT, Expand);
	setOperationAction(ISD::FEXP, VT, Expand);
	setOperationAction(ISD::FEXP2, VT, Expand);
	}

	// But we do support custom-lowering for FCOPYSIGN.
	if (VT == MVT::v2f32 \|\| VT == MVT::v4f32 \|\| VT == MVT::v2f64 \|\|
	((VT == MVT::v4f16 \|\| VT == MVT::v8f16) && Subtarget->hasFullFP16()))
	setOperationAction(ISD::FCOPYSIGN, VT, Custom);

	setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
	setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
	setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
	setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
	setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
	setOperationAction(ISD::SRA, VT, Custom);
	setOperationAction(ISD::SRL, VT, Custom);
	setOperationAction(ISD::SHL, VT, Custom);
	setOperationAction(ISD::OR, VT, Custom);
	setOperationAction(ISD::SETCC, VT, Custom);
	setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);

	setOperationAction(ISD::SELECT, VT, Expand);
	setOperationAction(ISD::SELECT_CC, VT, Expand);
	setOperationAction(ISD::VSELECT, VT, Expand);
	for (MVT InnerVT : MVT::all_valuetypes())
	setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);

	// CNT supports only B element sizes, then use UADDLP to widen.
	if (VT != MVT::v8i8 && VT != MVT::v16i8)
	setOperationAction(ISD::CTPOP, VT, Custom);

	setOperationAction(ISD::UDIV, VT, Expand);
	setOperationAction(ISD::SDIV, VT, Expand);
	setOperationAction(ISD::UREM, VT, Expand);
	setOperationAction(ISD::SREM, VT, Expand);
	setOperationAction(ISD::FREM, VT, Expand);

	setOperationAction(ISD::FP_TO_SINT, VT, Custom);
	setOperationAction(ISD::FP_TO_UINT, VT, Custom);

	if (!VT.isFloatingPoint())
	setOperationAction(ISD::ABS, VT, Legal);

	// [SU][MIN\|MAX] are available for all NEON types apart from i64.
	if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64)
	for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
	setOperationAction(Opcode, VT, Legal);

	// F[MIN\|MAX][NUM\|NAN] are available for all FP NEON types.
	if (VT.isFloatingPoint() &&
	VT.getVectorElementType() != MVT::bf16 &&
	(VT.getVectorElementType() != MVT::f16 \|\| Subtarget->hasFullFP16()))
	for (unsigned Opcode :
	{ISD::FMINIMUM, ISD::FMAXIMUM, ISD::FMINNUM, ISD::FMAXNUM})
	setOperationAction(Opcode, VT, Legal);

	if (Subtarget->isLittleEndian()) {
	for (unsigned im = (unsigned)ISD::PRE_INC;
	im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
	setIndexedLoadAction(im, VT, Legal);
	setIndexedStoreAction(im, VT, Legal);
	}
	}
	}

	void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
	assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");

	// By default everything must be expanded.
	for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
	setOperationAction(Op, VT, Expand);

	// We use EXTRACT_SUBVECTOR to "cast" a scalable vector to a fixed length one.
	setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);

	if (VT.isFloatingPoint()) {
	setCondCodeAction(ISD::SETO, VT, Expand);
	setCondCodeAction(ISD::SETOLT, VT, Expand);
	setCondCodeAction(ISD::SETLT, VT, Expand);
	setCondCodeAction(ISD::SETOLE, VT, Expand);
	setCondCodeAction(ISD::SETLE, VT, Expand);
	setCondCodeAction(ISD::SETULT, VT, Expand);
	setCondCodeAction(ISD::SETULE, VT, Expand);
	setCondCodeAction(ISD::SETUGE, VT, Expand);
	setCondCodeAction(ISD::SETUGT, VT, Expand);
	setCondCodeAction(ISD::SETUEQ, VT, Expand);
	setCondCodeAction(ISD::SETUNE, VT, Expand);
	}

	// Mark integer truncating stores as having custom lowering
	if (VT.isInteger()) {
	MVT InnerVT = VT.changeVectorElementType(MVT::i8);
	while (InnerVT != VT) {
	setTruncStoreAction(VT, InnerVT, Custom);
	InnerVT = InnerVT.changeVectorElementType(
	MVT::getIntegerVT(2 * InnerVT.getScalarSizeInBits()));
	}
	}

	// Lower fixed length vector operations to scalable equivalents.
	setOperationAction(ISD::ABS, VT, Custom);
	setOperationAction(ISD::ADD, VT, Custom);
	setOperationAction(ISD::AND, VT, Custom);
	setOperationAction(ISD::ANY_EXTEND, VT, Custom);
	setOperationAction(ISD::BITCAST, VT, Custom);
	setOperationAction(ISD::BITREVERSE, VT, Custom);
	setOperationAction(ISD::BSWAP, VT, Custom);
	setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
	setOperationAction(ISD::CTLZ, VT, Custom);
	setOperationAction(ISD::CTPOP, VT, Custom);
	setOperationAction(ISD::CTTZ, VT, Custom);
	setOperationAction(ISD::FABS, VT, Custom);
	setOperationAction(ISD::FADD, VT, Custom);
	setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
	setOperationAction(ISD::FCEIL, VT, Custom);
	setOperationAction(ISD::FDIV, VT, Custom);
	setOperationAction(ISD::FFLOOR, VT, Custom);
	setOperationAction(ISD::FMA, VT, Custom);
	setOperationAction(ISD::FMAXIMUM, VT, Custom);
	setOperationAction(ISD::FMAXNUM, VT, Custom);
	setOperationAction(ISD::FMINIMUM, VT, Custom);
	setOperationAction(ISD::FMINNUM, VT, Custom);
	setOperationAction(ISD::FMUL, VT, Custom);
	setOperationAction(ISD::FNEARBYINT, VT, Custom);
	setOperationAction(ISD::FNEG, VT, Custom);
	setOperationAction(ISD::FP_EXTEND, VT, Custom);
	setOperationAction(ISD::FP_ROUND, VT, Custom);
	setOperationAction(ISD::FP_TO_SINT, VT, Custom);
	setOperationAction(ISD::FP_TO_UINT, VT, Custom);
	setOperationAction(ISD::FRINT, VT, Custom);
	setOperationAction(ISD::FROUND, VT, Custom);
	setOperationAction(ISD::FROUNDEVEN, VT, Custom);
	setOperationAction(ISD::FSQRT, VT, Custom);
	setOperationAction(ISD::FSUB, VT, Custom);
	setOperationAction(ISD::FTRUNC, VT, Custom);
	setOperationAction(ISD::LOAD, VT, Custom);
	setOperationAction(ISD::MGATHER, VT, Custom);
	setOperationAction(ISD::MLOAD, VT, Custom);
	setOperationAction(ISD::MSCATTER, VT, Custom);
	setOperationAction(ISD::MSTORE, VT, Custom);
	setOperationAction(ISD::MUL, VT, Custom);
	setOperationAction(ISD::MULHS, VT, Custom);
	setOperationAction(ISD::MULHU, VT, Custom);
	setOperationAction(ISD::OR, VT, Custom);
	setOperationAction(ISD::SDIV, VT, Custom);
	setOperationAction(ISD::SELECT, VT, Custom);
	setOperationAction(ISD::SETCC, VT, Custom);
	setOperationAction(ISD::SHL, VT, Custom);
	setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
	setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
	setOperationAction(ISD::SINT_TO_FP, VT, Custom);
	setOperationAction(ISD::SMAX, VT, Custom);
	setOperationAction(ISD::SMIN, VT, Custom);
	setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
	setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
	setOperationAction(ISD::SRA, VT, Custom);
	setOperationAction(ISD::SRL, VT, Custom);
	setOperationAction(ISD::STORE, VT, Custom);
	setOperationAction(ISD::SUB, VT, Custom);
	setOperationAction(ISD::TRUNCATE, VT, Custom);
	setOperationAction(ISD::UDIV, VT, Custom);
	setOperationAction(ISD::UINT_TO_FP, VT, Custom);
	setOperationAction(ISD::UMAX, VT, Custom);
	setOperationAction(ISD::UMIN, VT, Custom);
	setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
	setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
	setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
	setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
	setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
	setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
	setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
	setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
	setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
	setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
	setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
	setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
	setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
	setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
	setOperationAction(ISD::VSELECT, VT, Custom);
	setOperationAction(ISD::XOR, VT, Custom);
	setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
	}

	void AArch64TargetLowering::addDRTypeForNEON(MVT VT) {
	addRegisterClass(VT, &AArch64::FPR64RegClass);
	addTypeForNEON(VT);
	}

	void AArch64TargetLowering::addQRTypeForNEON(MVT VT) {
	addRegisterClass(VT, &AArch64::FPR128RegClass);
	addTypeForNEON(VT);
	}

	EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &,
	LLVMContext &C, EVT VT) const {
	if (!VT.isVector())
	return MVT::i32;
	if (VT.isScalableVector())
	return EVT::getVectorVT(C, MVT::i1, VT.getVectorElementCount());
	return VT.changeVectorElementTypeToInteger();
	}

	static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm,
	const APInt &Demanded,
	TargetLowering::TargetLoweringOpt &TLO,
	unsigned NewOpc) {
	uint64_t OldImm = Imm, NewImm, Enc;
	uint64_t Mask = ((uint64_t)(-1LL) >> (64 - Size)), OrigMask = Mask;

	// Return if the immediate is already all zeros, all ones, a bimm32 or a
	// bimm64.
	if (Imm == 0 \|\| Imm == Mask \|\|
	AArch64_AM::isLogicalImmediate(Imm & Mask, Size))
	return false;

	unsigned EltSize = Size;
	uint64_t DemandedBits = Demanded.getZExtValue();

	// Clear bits that are not demanded.
	Imm &= DemandedBits;

	while (true) {
	// The goal here is to set the non-demanded bits in a way that minimizes
	// the number of switching between 0 and 1. In order to achieve this goal,
	// we set the non-demanded bits to the value of the preceding demanded bits.
	// For example, if we have an immediate 0bx10xx0x1 ('x' indicates a
	// non-demanded bit), we copy bit0 (1) to the least significant 'x',
	// bit2 (0) to 'xx', and bit6 (1) to the most significant 'x'.
	// The final result is 0b11000011.
	uint64_t NonDemandedBits = ~DemandedBits;
	uint64_t InvertedImm = ~Imm & DemandedBits;
	uint64_t RotatedImm =
	((InvertedImm << 1) \| (InvertedImm >> (EltSize - 1) & 1)) &
	NonDemandedBits;
	uint64_t Sum = RotatedImm + NonDemandedBits;
	bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1));
	uint64_t Ones = (Sum + Carry) & NonDemandedBits;
	NewImm = (Imm \| Ones) & Mask;

	// If NewImm or its bitwise NOT is a shifted mask, it is a bitmask immediate
	// or all-ones or all-zeros, in which case we can stop searching. Otherwise,
	// we halve the element size and continue the search.
	if (isShiftedMask_64(NewImm) \|\| isShiftedMask_64(~(NewImm \| ~Mask)))
	break;

	// We cannot shrink the element size any further if it is 2-bits.
	if (EltSize == 2)
	return false;

	EltSize /= 2;
	Mask >>= EltSize;
	uint64_t Hi = Imm >> EltSize, DemandedBitsHi = DemandedBits >> EltSize;

	// Return if there is mismatch in any of the demanded bits of Imm and Hi.
	if (((Imm ^ Hi) & (DemandedBits & DemandedBitsHi) & Mask) != 0)
	return false;

	// Merge the upper and lower halves of Imm and DemandedBits.
	Imm \|= Hi;
	DemandedBits \|= DemandedBitsHi;
	}

	++NumOptimizedImms;

	// Replicate the element across the register width.
	while (EltSize < Size) {
	NewImm \|= NewImm << EltSize;
	EltSize *= 2;
	}

	(void)OldImm;
	assert(((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&
	"demanded bits should never be altered");
	assert(OldImm != NewImm && "the new imm shouldn't be equal to the old imm");

	// Create the new constant immediate node.
	EVT VT = Op.getValueType();
	SDLoc DL(Op);
	SDValue New;

	// If the new constant immediate is all-zeros or all-ones, let the target
	// independent DAG combine optimize this node.
	if (NewImm == 0 \|\| NewImm == OrigMask) {
	New = TLO.DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
	TLO.DAG.getConstant(NewImm, DL, VT));
	// Otherwise, create a machine node so that target independent DAG combine
	// doesn't undo this optimization.
	} else {
	Enc = AArch64_AM::encodeLogicalImmediate(NewImm, Size);
	SDValue EncConst = TLO.DAG.getTargetConstant(Enc, DL, VT);
	New = SDValue(
	TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst), 0);
	}

	return TLO.CombineTo(Op, New);
	}

	bool AArch64TargetLowering::targetShrinkDemandedConstant(
	SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
	TargetLoweringOpt &TLO) const {
	// Delay this optimization to as late as possible.
	if (!TLO.LegalOps)
	return false;

	if (!EnableOptimizeLogicalImm)
	return false;

	EVT VT = Op.getValueType();
	if (VT.isVector())
	return false;

	unsigned Size = VT.getSizeInBits();
	assert((Size == 32 \|\| Size == 64) &&
	"i32 or i64 is expected after legalization.");

	// Exit early if we demand all bits.
	if (DemandedBits.countPopulation() == Size)
	return false;

	unsigned NewOpc;
	switch (Op.getOpcode()) {
	default:
	return false;
	case ISD::AND:
	NewOpc = Size == 32 ? AArch64::ANDWri : AArch64::ANDXri;
	break;
	case ISD::OR:
	NewOpc = Size == 32 ? AArch64::ORRWri : AArch64::ORRXri;
	break;
	case ISD::XOR:
	NewOpc = Size == 32 ? AArch64::EORWri : AArch64::EORXri;
	break;
	}
	ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
	if (!C)
	return false;
	uint64_t Imm = C->getZExtValue();
	return optimizeLogicalImm(Op, Size, Imm, DemandedBits, TLO, NewOpc);
	}

	/// computeKnownBitsForTargetNode - Determine which of the bits specified in
	/// Mask are known to be either zero or one and return them Known.
	void AArch64TargetLowering::computeKnownBitsForTargetNode(
	const SDValue Op, KnownBits &Known,
	const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const {
	switch (Op.getOpcode()) {
	default:
	break;
	case AArch64ISD::CSEL: {
	KnownBits Known2;
	Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
	Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
	Known = KnownBits::commonBits(Known, Known2);
	break;
	}
	case AArch64ISD::LOADgot:
	case AArch64ISD::ADDlow: {
	if (!Subtarget->isTargetILP32())
	break;
	// In ILP32 mode all valid pointers are in the low 4GB of the address-space.
	Known.Zero = APInt::getHighBitsSet(64, 32);
	break;
	}
	case ISD::INTRINSIC_W_CHAIN: {
	ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
	Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
	switch (IntID) {
	default: return;
	case Intrinsic::aarch64_ldaxr:
	case Intrinsic::aarch64_ldxr: {
	unsigned BitWidth = Known.getBitWidth();
	EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
	unsigned MemBits = VT.getScalarSizeInBits();
	Known.Zero \|= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
	return;
	}
	}
	break;
	}
	case ISD::INTRINSIC_WO_CHAIN:
	case ISD::INTRINSIC_VOID: {
	unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
	switch (IntNo) {
	default:
	break;
	case Intrinsic::aarch64_neon_umaxv:
	case Intrinsic::aarch64_neon_uminv: {
	// Figure out the datatype of the vector operand. The UMINV instruction
	// will zero extend the result, so we can mark as known zero all the
	// bits larger than the element datatype. 32-bit or larget doesn't need
	// this as those are legal types and will be handled by isel directly.
	MVT VT = Op.getOperand(1).getValueType().getSimpleVT();
	unsigned BitWidth = Known.getBitWidth();
	if (VT == MVT::v8i8 \|\| VT == MVT::v16i8) {
	assert(BitWidth >= 8 && "Unexpected width!");
	APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 8);
	Known.Zero \|= Mask;
	} else if (VT == MVT::v4i16 \|\| VT == MVT::v8i16) {
	assert(BitWidth >= 16 && "Unexpected width!");
	APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16);
	Known.Zero \|= Mask;
	}
	break;
	} break;
	}
	}
	}
	}

	MVT AArch64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
	EVT) const {
	return MVT::i64;
	}

	bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
	EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
	bool *Fast) const {
	if (Subtarget->requiresStrictAlign())
	return false;

	if (Fast) {
	// Some CPUs are fine with unaligned stores except for 128-bit ones.
	*Fast = !Subtarget->isMisaligned128StoreSlow() \|\| VT.getStoreSize() != 16 \|\|
	// See comments in performSTORECombine() for more details about
	// these conditions.

	// Code that uses clang vector extensions can mark that it
	// wants unaligned accesses to be treated as fast by
	// underspecifying alignment to be 1 or 2.
	Alignment <= 2 \|\|

	// Disregard v2i64. Memcpy lowering produces those and splitting
	// them regresses performance on micro-benchmarks and olden/bh.
	VT == MVT::v2i64;
	}
	return true;
	}

	// Same as above but handling LLTs instead.
	bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
	LLT Ty, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
	bool *Fast) const {
	if (Subtarget->requiresStrictAlign())
	return false;

	if (Fast) {
	// Some CPUs are fine with unaligned stores except for 128-bit ones.
	*Fast = !Subtarget->isMisaligned128StoreSlow() \|\|
	Ty.getSizeInBytes() != 16 \|\|
	// See comments in performSTORECombine() for more details about
	// these conditions.

	// Code that uses clang vector extensions can mark that it
	// wants unaligned accesses to be treated as fast by
	// underspecifying alignment to be 1 or 2.
	Alignment <= 2 \|\|

	// Disregard v2i64. Memcpy lowering produces those and splitting
	// them regresses performance on micro-benchmarks and olden/bh.
	Ty == LLT::fixed_vector(2, 64);
	}
	return true;
	}

	FastISel *
	AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
	const TargetLibraryInfo *libInfo) const {
	return AArch64::createFastISel(funcInfo, libInfo);
	}

	const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
	#define MAKE_CASE(V) \
	case V: \
	return #V;
	switch ((AArch64ISD::NodeType)Opcode) {
	case AArch64ISD::FIRST_NUMBER:
	break;
	MAKE_CASE(AArch64ISD::CALL)
	MAKE_CASE(AArch64ISD::ADRP)
	MAKE_CASE(AArch64ISD::ADR)
	MAKE_CASE(AArch64ISD::ADDlow)
	MAKE_CASE(AArch64ISD::LOADgot)
	MAKE_CASE(AArch64ISD::RET_FLAG)
	MAKE_CASE(AArch64ISD::BRCOND)
	MAKE_CASE(AArch64ISD::CSEL)
	MAKE_CASE(AArch64ISD::CSINV)
	MAKE_CASE(AArch64ISD::CSNEG)
	MAKE_CASE(AArch64ISD::CSINC)
	MAKE_CASE(AArch64ISD::THREAD_POINTER)
	MAKE_CASE(AArch64ISD::TLSDESC_CALLSEQ)
	MAKE_CASE(AArch64ISD::ADD_PRED)
	MAKE_CASE(AArch64ISD::MUL_PRED)
	MAKE_CASE(AArch64ISD::MULHS_PRED)
	MAKE_CASE(AArch64ISD::MULHU_PRED)
	MAKE_CASE(AArch64ISD::SDIV_PRED)
	MAKE_CASE(AArch64ISD::SHL_PRED)
	MAKE_CASE(AArch64ISD::SMAX_PRED)
	MAKE_CASE(AArch64ISD::SMIN_PRED)
	MAKE_CASE(AArch64ISD::SRA_PRED)
	MAKE_CASE(AArch64ISD::SRL_PRED)
	MAKE_CASE(AArch64ISD::SUB_PRED)
	MAKE_CASE(AArch64ISD::UDIV_PRED)
	MAKE_CASE(AArch64ISD::UMAX_PRED)
	MAKE_CASE(AArch64ISD::UMIN_PRED)
	MAKE_CASE(AArch64ISD::FNEG_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::FCEIL_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::FFLOOR_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::FRINT_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::FROUND_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::FTRUNC_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::FP_ROUND_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::FCVTZU_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::FCVTZS_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::FSQRT_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::FRECPX_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::FABS_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::ABS_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::NEG_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::SETCC_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::ADC)
	MAKE_CASE(AArch64ISD::SBC)
	MAKE_CASE(AArch64ISD::ADDS)
	MAKE_CASE(AArch64ISD::SUBS)
	MAKE_CASE(AArch64ISD::ADCS)
	MAKE_CASE(AArch64ISD::SBCS)
	MAKE_CASE(AArch64ISD::ANDS)
	MAKE_CASE(AArch64ISD::CCMP)
	MAKE_CASE(AArch64ISD::CCMN)
	MAKE_CASE(AArch64ISD::FCCMP)
	MAKE_CASE(AArch64ISD::FCMP)
	MAKE_CASE(AArch64ISD::STRICT_FCMP)
	MAKE_CASE(AArch64ISD::STRICT_FCMPE)
	MAKE_CASE(AArch64ISD::DUP)
	MAKE_CASE(AArch64ISD::DUPLANE8)
	MAKE_CASE(AArch64ISD::DUPLANE16)
	MAKE_CASE(AArch64ISD::DUPLANE32)
	MAKE_CASE(AArch64ISD::DUPLANE64)
	MAKE_CASE(AArch64ISD::MOVI)
	MAKE_CASE(AArch64ISD::MOVIshift)
	MAKE_CASE(AArch64ISD::MOVIedit)
	MAKE_CASE(AArch64ISD::MOVImsl)
	MAKE_CASE(AArch64ISD::FMOV)
	MAKE_CASE(AArch64ISD::MVNIshift)
	MAKE_CASE(AArch64ISD::MVNImsl)
	MAKE_CASE(AArch64ISD::BICi)
	MAKE_CASE(AArch64ISD::ORRi)
	MAKE_CASE(AArch64ISD::BSP)
	MAKE_CASE(AArch64ISD::EXTR)
	MAKE_CASE(AArch64ISD::ZIP1)
	MAKE_CASE(AArch64ISD::ZIP2)
	MAKE_CASE(AArch64ISD::UZP1)
	MAKE_CASE(AArch64ISD::UZP2)
	MAKE_CASE(AArch64ISD::TRN1)
	MAKE_CASE(AArch64ISD::TRN2)
	MAKE_CASE(AArch64ISD::REV16)
	MAKE_CASE(AArch64ISD::REV32)
	MAKE_CASE(AArch64ISD::REV64)
	MAKE_CASE(AArch64ISD::EXT)
	MAKE_CASE(AArch64ISD::SPLICE)
	MAKE_CASE(AArch64ISD::VSHL)
	MAKE_CASE(AArch64ISD::VLSHR)
	MAKE_CASE(AArch64ISD::VASHR)
	MAKE_CASE(AArch64ISD::VSLI)
	MAKE_CASE(AArch64ISD::VSRI)
	MAKE_CASE(AArch64ISD::CMEQ)
	MAKE_CASE(AArch64ISD::CMGE)
	MAKE_CASE(AArch64ISD::CMGT)
	MAKE_CASE(AArch64ISD::CMHI)
	MAKE_CASE(AArch64ISD::CMHS)
	MAKE_CASE(AArch64ISD::FCMEQ)
	MAKE_CASE(AArch64ISD::FCMGE)
	MAKE_CASE(AArch64ISD::FCMGT)
	MAKE_CASE(AArch64ISD::CMEQz)
	MAKE_CASE(AArch64ISD::CMGEz)
	MAKE_CASE(AArch64ISD::CMGTz)
	MAKE_CASE(AArch64ISD::CMLEz)
	MAKE_CASE(AArch64ISD::CMLTz)
	MAKE_CASE(AArch64ISD::FCMEQz)
	MAKE_CASE(AArch64ISD::FCMGEz)
	MAKE_CASE(AArch64ISD::FCMGTz)
	MAKE_CASE(AArch64ISD::FCMLEz)
	MAKE_CASE(AArch64ISD::FCMLTz)
	MAKE_CASE(AArch64ISD::SADDV)
	MAKE_CASE(AArch64ISD::UADDV)
	MAKE_CASE(AArch64ISD::SRHADD)
	MAKE_CASE(AArch64ISD::URHADD)
	MAKE_CASE(AArch64ISD::SHADD)
	MAKE_CASE(AArch64ISD::UHADD)
	MAKE_CASE(AArch64ISD::SDOT)
	MAKE_CASE(AArch64ISD::UDOT)
	MAKE_CASE(AArch64ISD::SMINV)
	MAKE_CASE(AArch64ISD::UMINV)
	MAKE_CASE(AArch64ISD::SMAXV)
	MAKE_CASE(AArch64ISD::UMAXV)
	MAKE_CASE(AArch64ISD::SADDV_PRED)
	MAKE_CASE(AArch64ISD::UADDV_PRED)
	MAKE_CASE(AArch64ISD::SMAXV_PRED)
	MAKE_CASE(AArch64ISD::UMAXV_PRED)
	MAKE_CASE(AArch64ISD::SMINV_PRED)
	MAKE_CASE(AArch64ISD::UMINV_PRED)
	MAKE_CASE(AArch64ISD::ORV_PRED)
	MAKE_CASE(AArch64ISD::EORV_PRED)
	MAKE_CASE(AArch64ISD::ANDV_PRED)
	MAKE_CASE(AArch64ISD::CLASTA_N)
	MAKE_CASE(AArch64ISD::CLASTB_N)
	MAKE_CASE(AArch64ISD::LASTA)
	MAKE_CASE(AArch64ISD::LASTB)
	MAKE_CASE(AArch64ISD::REINTERPRET_CAST)
	MAKE_CASE(AArch64ISD::LS64_BUILD)
	MAKE_CASE(AArch64ISD::LS64_EXTRACT)
	MAKE_CASE(AArch64ISD::TBL)
	MAKE_CASE(AArch64ISD::FADD_PRED)
	MAKE_CASE(AArch64ISD::FADDA_PRED)
	MAKE_CASE(AArch64ISD::FADDV_PRED)
	MAKE_CASE(AArch64ISD::FDIV_PRED)
	MAKE_CASE(AArch64ISD::FMA_PRED)
	MAKE_CASE(AArch64ISD::FMAX_PRED)
	MAKE_CASE(AArch64ISD::FMAXV_PRED)
	MAKE_CASE(AArch64ISD::FMAXNM_PRED)
	MAKE_CASE(AArch64ISD::FMAXNMV_PRED)
	MAKE_CASE(AArch64ISD::FMIN_PRED)
	MAKE_CASE(AArch64ISD::FMINV_PRED)
	MAKE_CASE(AArch64ISD::FMINNM_PRED)
	MAKE_CASE(AArch64ISD::FMINNMV_PRED)
	MAKE_CASE(AArch64ISD::FMUL_PRED)
	MAKE_CASE(AArch64ISD::FSUB_PRED)
	MAKE_CASE(AArch64ISD::BIC)
	MAKE_CASE(AArch64ISD::BIT)
	MAKE_CASE(AArch64ISD::CBZ)
	MAKE_CASE(AArch64ISD::CBNZ)
	MAKE_CASE(AArch64ISD::TBZ)
	MAKE_CASE(AArch64ISD::TBNZ)
	MAKE_CASE(AArch64ISD::TC_RETURN)
	MAKE_CASE(AArch64ISD::PREFETCH)
	MAKE_CASE(AArch64ISD::SITOF)
	MAKE_CASE(AArch64ISD::UITOF)
	MAKE_CASE(AArch64ISD::NVCAST)
	MAKE_CASE(AArch64ISD::MRS)
	MAKE_CASE(AArch64ISD::SQSHL_I)
	MAKE_CASE(AArch64ISD::UQSHL_I)
	MAKE_CASE(AArch64ISD::SRSHR_I)
	MAKE_CASE(AArch64ISD::URSHR_I)
	MAKE_CASE(AArch64ISD::SQSHLU_I)
	MAKE_CASE(AArch64ISD::WrapperLarge)
	MAKE_CASE(AArch64ISD::LD2post)
	MAKE_CASE(AArch64ISD::LD3post)
	MAKE_CASE(AArch64ISD::LD4post)
	MAKE_CASE(AArch64ISD::ST2post)
	MAKE_CASE(AArch64ISD::ST3post)
	MAKE_CASE(AArch64ISD::ST4post)
	MAKE_CASE(AArch64ISD::LD1x2post)
	MAKE_CASE(AArch64ISD::LD1x3post)
	MAKE_CASE(AArch64ISD::LD1x4post)
	MAKE_CASE(AArch64ISD::ST1x2post)
	MAKE_CASE(AArch64ISD::ST1x3post)
	MAKE_CASE(AArch64ISD::ST1x4post)
	MAKE_CASE(AArch64ISD::LD1DUPpost)
	MAKE_CASE(AArch64ISD::LD2DUPpost)
	MAKE_CASE(AArch64ISD::LD3DUPpost)
	MAKE_CASE(AArch64ISD::LD4DUPpost)
	MAKE_CASE(AArch64ISD::LD1LANEpost)
	MAKE_CASE(AArch64ISD::LD2LANEpost)
	MAKE_CASE(AArch64ISD::LD3LANEpost)
	MAKE_CASE(AArch64ISD::LD4LANEpost)
	MAKE_CASE(AArch64ISD::ST2LANEpost)
	MAKE_CASE(AArch64ISD::ST3LANEpost)
	MAKE_CASE(AArch64ISD::ST4LANEpost)
	MAKE_CASE(AArch64ISD::SMULL)
	MAKE_CASE(AArch64ISD::UMULL)
	MAKE_CASE(AArch64ISD::FRECPE)
	MAKE_CASE(AArch64ISD::FRECPS)
	MAKE_CASE(AArch64ISD::FRSQRTE)
	MAKE_CASE(AArch64ISD::FRSQRTS)
	MAKE_CASE(AArch64ISD::STG)
	MAKE_CASE(AArch64ISD::STZG)
	MAKE_CASE(AArch64ISD::ST2G)
	MAKE_CASE(AArch64ISD::STZ2G)
	MAKE_CASE(AArch64ISD::SUNPKHI)
	MAKE_CASE(AArch64ISD::SUNPKLO)
	MAKE_CASE(AArch64ISD::UUNPKHI)
	MAKE_CASE(AArch64ISD::UUNPKLO)
	MAKE_CASE(AArch64ISD::INSR)
	MAKE_CASE(AArch64ISD::PTEST)
	MAKE_CASE(AArch64ISD::PTRUE)
	MAKE_CASE(AArch64ISD::LD1_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::LD1S_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::LDNF1_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::LDNF1S_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::LDFF1_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::LDFF1S_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::LD1RQ_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::LD1RO_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::SVE_LD2_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::SVE_LD3_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::SVE_LD4_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLD1_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLD1_SCALED_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLD1_SXTW_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLD1_UXTW_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLD1_IMM_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLD1S_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLD1S_SCALED_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLD1S_SXTW_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLD1S_UXTW_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLD1S_IMM_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLDFF1_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLDFF1_SCALED_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLDFF1_SXTW_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLDFF1_UXTW_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLDFF1_IMM_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLDFF1S_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLDFF1S_SCALED_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLDFF1S_SXTW_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLDFF1S_UXTW_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLDFF1S_SXTW_SCALED_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLDFF1S_UXTW_SCALED_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLDFF1S_IMM_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLDNT1_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLDNT1_INDEX_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLDNT1S_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::ST1_PRED)
	MAKE_CASE(AArch64ISD::SST1_PRED)
	MAKE_CASE(AArch64ISD::SST1_SCALED_PRED)
	MAKE_CASE(AArch64ISD::SST1_SXTW_PRED)
	MAKE_CASE(AArch64ISD::SST1_UXTW_PRED)
	MAKE_CASE(AArch64ISD::SST1_SXTW_SCALED_PRED)
	MAKE_CASE(AArch64ISD::SST1_UXTW_SCALED_PRED)
	MAKE_CASE(AArch64ISD::SST1_IMM_PRED)
	MAKE_CASE(AArch64ISD::SSTNT1_PRED)
	MAKE_CASE(AArch64ISD::SSTNT1_INDEX_PRED)
	MAKE_CASE(AArch64ISD::LDP)
	MAKE_CASE(AArch64ISD::STP)
	MAKE_CASE(AArch64ISD::STNP)
	MAKE_CASE(AArch64ISD::BITREVERSE_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::BSWAP_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::CTLZ_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::CTPOP_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::DUP_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::INDEX_VECTOR)
	MAKE_CASE(AArch64ISD::UADDLP)
	MAKE_CASE(AArch64ISD::CALL_RVMARKER)
	}
	#undef MAKE_CASE
	return nullptr;
	}

	MachineBasicBlock *
	AArch64TargetLowering::EmitF128CSEL(MachineInstr &MI,
	MachineBasicBlock *MBB) const {
	// We materialise the F128CSEL pseudo-instruction as some control flow and a
	// phi node:

	// OrigBB:
	// [... previous instrs leading to comparison ...]
	// b.ne TrueBB
	// b EndBB
	// TrueBB:
	// ; Fallthrough
	// EndBB:
	// Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB]

	MachineFunction *MF = MBB->getParent();
	const TargetInstrInfo *TII = Subtarget->getInstrInfo();
	const BasicBlock *LLVM_BB = MBB->getBasicBlock();
	DebugLoc DL = MI.getDebugLoc();
	MachineFunction::iterator It = ++MBB->getIterator();

	Register DestReg = MI.getOperand(0).getReg();
	Register IfTrueReg = MI.getOperand(1).getReg();
	Register IfFalseReg = MI.getOperand(2).getReg();
	unsigned CondCode = MI.getOperand(3).getImm();
	bool NZCVKilled = MI.getOperand(4).isKill();

	MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB);
	MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB);
	MF->insert(It, TrueBB);
	MF->insert(It, EndBB);

	// Transfer rest of current basic-block to EndBB
	EndBB->splice(EndBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)),
	MBB->end());
	EndBB->transferSuccessorsAndUpdatePHIs(MBB);

	BuildMI(MBB, DL, TII->get(AArch64::Bcc)).addImm(CondCode).addMBB(TrueBB);
	BuildMI(MBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
	MBB->addSuccessor(TrueBB);
	MBB->addSuccessor(EndBB);

	// TrueBB falls through to the end.
	TrueBB->addSuccessor(EndBB);

	if (!NZCVKilled) {
	TrueBB->addLiveIn(AArch64::NZCV);
	EndBB->addLiveIn(AArch64::NZCV);
	}

	BuildMI(*EndBB, EndBB->begin(), DL, TII->get(AArch64::PHI), DestReg)
	.addReg(IfTrueReg)
	.addMBB(TrueBB)
	.addReg(IfFalseReg)
	.addMBB(MBB);

	MI.eraseFromParent();
	return EndBB;
	}

	MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchRet(
	MachineInstr &MI, MachineBasicBlock *BB) const {
	assert(!isAsynchronousEHPersonality(classifyEHPersonality(
	BB->getParent()->getFunction().getPersonalityFn())) &&
	"SEH does not use catchret!");
	return BB;
	}

	MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
	MachineInstr &MI, MachineBasicBlock *BB) const {
	switch (MI.getOpcode()) {
	default:
	#ifndef NDEBUG
	MI.dump();
	#endif
	llvm_unreachable("Unexpected instruction for custom inserter!");

	case AArch64::F128CSEL:
	return EmitF128CSEL(MI, BB);

	case TargetOpcode::STACKMAP:
	case TargetOpcode::PATCHPOINT:
	case TargetOpcode::STATEPOINT:
	return emitPatchPoint(MI, BB);

	case AArch64::CATCHRET:
	return EmitLoweredCatchRet(MI, BB);
	}
	}

	//===----------------------------------------------------------------------===//
	// AArch64 Lowering private implementation.
	//===----------------------------------------------------------------------===//

	//===----------------------------------------------------------------------===//
	// Lowering Code
	//===----------------------------------------------------------------------===//

	// Forward declarations of SVE fixed length lowering helpers
	static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT);
	static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V);
	static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V);
	static SDValue convertFixedMaskToScalableVector(SDValue Mask,
	SelectionDAG &DAG);

	/// isZerosVector - Check whether SDNode N is a zero-filled vector.
	static bool isZerosVector(const SDNode *N) {
	// Look through a bit convert.
	while (N->getOpcode() == ISD::BITCAST)
	N = N->getOperand(0).getNode();

	if (ISD::isConstantSplatVectorAllZeros(N))
	return true;

	if (N->getOpcode() != AArch64ISD::DUP)
	return false;

	auto Opnd0 = N->getOperand(0);
	auto *CINT = dyn_cast<ConstantSDNode>(Opnd0);
	auto *CFP = dyn_cast<ConstantFPSDNode>(Opnd0);
	return (CINT && CINT->isNullValue()) \|\| (CFP && CFP->isZero());
	}

	/// changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64
	/// CC
	static AArch64CC::CondCode changeIntCCToAArch64CC(ISD::CondCode CC) {
	switch (CC) {
	default:
	llvm_unreachable("Unknown condition code!");
	case ISD::SETNE:
	return AArch64CC::NE;
	case ISD::SETEQ:
	return AArch64CC::EQ;
	case ISD::SETGT:
	return AArch64CC::GT;
	case ISD::SETGE:
	return AArch64CC::GE;
	case ISD::SETLT:
	return AArch64CC::LT;
	case ISD::SETLE:
	return AArch64CC::LE;
	case ISD::SETUGT:
	return AArch64CC::HI;
	case ISD::SETUGE:
	return AArch64CC::HS;
	case ISD::SETULT:
	return AArch64CC::LO;
	case ISD::SETULE:
	return AArch64CC::LS;
	}
	}

	/// changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC.
	static void changeFPCCToAArch64CC(ISD::CondCode CC,
	AArch64CC::CondCode &CondCode,
	AArch64CC::CondCode &CondCode2) {
	CondCode2 = AArch64CC::AL;
	switch (CC) {
	default:
	llvm_unreachable("Unknown FP condition!");
	case ISD::SETEQ:
	case ISD::SETOEQ:
	CondCode = AArch64CC::EQ;
	break;
	case ISD::SETGT:
	case ISD::SETOGT:
	CondCode = AArch64CC::GT;
	break;
	case ISD::SETGE:
	case ISD::SETOGE:
	CondCode = AArch64CC::GE;
	break;
	case ISD::SETOLT:
	CondCode = AArch64CC::MI;
	break;
	case ISD::SETOLE:
	CondCode = AArch64CC::LS;
	break;
	case ISD::SETONE:
	CondCode = AArch64CC::MI;
	CondCode2 = AArch64CC::GT;
	break;
	case ISD::SETO:
	CondCode = AArch64CC::VC;
	break;
	case ISD::SETUO:
	CondCode = AArch64CC::VS;
	break;
	case ISD::SETUEQ:
	CondCode = AArch64CC::EQ;
	CondCode2 = AArch64CC::VS;
	break;
	case ISD::SETUGT:
	CondCode = AArch64CC::HI;
	break;
	case ISD::SETUGE:
	CondCode = AArch64CC::PL;
	break;
	case ISD::SETLT:
	case ISD::SETULT:
	CondCode = AArch64CC::LT;
	break;
	case ISD::SETLE:
	case ISD::SETULE:
	CondCode = AArch64CC::LE;
	break;
	case ISD::SETNE:
	case ISD::SETUNE:
	CondCode = AArch64CC::NE;
	break;
	}
	}

	/// Convert a DAG fp condition code to an AArch64 CC.
	/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
	/// should be AND'ed instead of OR'ed.
	static void changeFPCCToANDAArch64CC(ISD::CondCode CC,
	AArch64CC::CondCode &CondCode,
	AArch64CC::CondCode &CondCode2) {
	CondCode2 = AArch64CC::AL;
	switch (CC) {
	default:
	changeFPCCToAArch64CC(CC, CondCode, CondCode2);
	assert(CondCode2 == AArch64CC::AL);
	break;
	case ISD::SETONE:
	// (a one b)
	// == ((a olt b) \|\| (a ogt b))
	// == ((a ord b) && (a une b))
	CondCode = AArch64CC::VC;
	CondCode2 = AArch64CC::NE;
	break;
	case ISD::SETUEQ:
	// (a ueq b)
	// == ((a uno b) \|\| (a oeq b))
	// == ((a ule b) && (a uge b))
	CondCode = AArch64CC::PL;
	CondCode2 = AArch64CC::LE;
	break;
	}
	}

	/// changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64
	/// CC usable with the vector instructions. Fewer operations are available
	/// without a real NZCV register, so we have to use less efficient combinations
	/// to get the same effect.
	static void changeVectorFPCCToAArch64CC(ISD::CondCode CC,
	AArch64CC::CondCode &CondCode,
	AArch64CC::CondCode &CondCode2,
	bool &Invert) {
	Invert = false;
	switch (CC) {
	default:
	// Mostly the scalar mappings work fine.
	changeFPCCToAArch64CC(CC, CondCode, CondCode2);
	break;
	case ISD::SETUO:
	Invert = true;
	LLVM_FALLTHROUGH;
	case ISD::SETO:
	CondCode = AArch64CC::MI;
	CondCode2 = AArch64CC::GE;
	break;
	case ISD::SETUEQ:
	case ISD::SETULT:
	case ISD::SETULE:
	case ISD::SETUGT:
	case ISD::SETUGE:
	// All of the compare-mask comparisons are ordered, but we can switch
	// between the two by a double inversion. E.g. ULE == !OGT.
	Invert = true;
	changeFPCCToAArch64CC(getSetCCInverse(CC, /* FP inverse */ MVT::f32),
	CondCode, CondCode2);
	break;
	}
	}

	static bool isLegalArithImmed(uint64_t C) {
	// Matches AArch64DAGToDAGISel::SelectArithImmed().
	bool IsLegal = (C >> 12 == 0) \|\| ((C & 0xFFFULL) == 0 && C >> 24 == 0);
	LLVM_DEBUG(dbgs() << "Is imm " << C
	<< " legal: " << (IsLegal ? "yes\n" : "no\n"));
	return IsLegal;
	}

	// Can a (CMP op1, (sub 0, op2) be turned into a CMN instruction on
	// the grounds that "op1 - (-op2) == op1 + op2" ? Not always, the C and V flags
	// can be set differently by this operation. It comes down to whether
	// "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
	// everything is fine. If not then the optimization is wrong. Thus general
	// comparisons are only valid if op2 != 0.
	//
	// So, finally, the only LLVM-native comparisons that don't mention C and V
	// are SETEQ and SETNE. They're the only ones we can safely use CMN for in
	// the absence of information about op2.
	static bool isCMN(SDValue Op, ISD::CondCode CC) {
	return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0)) &&
	(CC == ISD::SETEQ \|\| CC == ISD::SETNE);
	}

	static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &dl,
	SelectionDAG &DAG, SDValue Chain,
	bool IsSignaling) {
	EVT VT = LHS.getValueType();
	assert(VT != MVT::f128);
	assert(VT != MVT::f16 && "Lowering of strict fp16 not yet implemented");
	unsigned Opcode =
	IsSignaling ? AArch64ISD::STRICT_FCMPE : AArch64ISD::STRICT_FCMP;
	return DAG.getNode(Opcode, dl, {VT, MVT::Other}, {Chain, LHS, RHS});
	}

	static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
	const SDLoc &dl, SelectionDAG &DAG) {
	EVT VT = LHS.getValueType();
	const bool FullFP16 =
	static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();

	if (VT.isFloatingPoint()) {
	assert(VT != MVT::f128);
	if (VT == MVT::f16 && !FullFP16) {
	LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
	RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
	VT = MVT::f32;
	}
	return DAG.getNode(AArch64ISD::FCMP, dl, VT, LHS, RHS);
	}

	// The CMP instruction is just an alias for SUBS, and representing it as
	// SUBS means that it's possible to get CSE with subtract operations.
	// A later phase can perform the optimization of setting the destination
	// register to WZR/XZR if it ends up being unused.
	unsigned Opcode = AArch64ISD::SUBS;

	if (isCMN(RHS, CC)) {
	// Can we combine a (CMP op1, (sub 0, op2) into a CMN instruction ?
	Opcode = AArch64ISD::ADDS;
	RHS = RHS.getOperand(1);
	} else if (isCMN(LHS, CC)) {
	// As we are looking for EQ/NE compares, the operands can be commuted ; can
	// we combine a (CMP (sub 0, op1), op2) into a CMN instruction ?
	Opcode = AArch64ISD::ADDS;
	LHS = LHS.getOperand(1);
	} else if (isNullConstant(RHS) && !isUnsignedIntSetCC(CC)) {
	if (LHS.getOpcode() == ISD::AND) {
	// Similarly, (CMP (and X, Y), 0) can be implemented with a TST
	// (a.k.a. ANDS) except that the flags are only guaranteed to work for one
	// of the signed comparisons.
	const SDValue ANDSNode = DAG.getNode(AArch64ISD::ANDS, dl,
	DAG.getVTList(VT, MVT_CC),
	LHS.getOperand(0),
	LHS.getOperand(1));
	// Replace all users of (and X, Y) with newly generated (ands X, Y)
	DAG.ReplaceAllUsesWith(LHS, ANDSNode);
	return ANDSNode.getValue(1);
	} else if (LHS.getOpcode() == AArch64ISD::ANDS) {
	// Use result of ANDS
	return LHS.getValue(1);
	}
	}

	return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT_CC), LHS, RHS)
	.getValue(1);
	}

	/// \defgroup AArch64CCMP CMP;CCMP matching
	///
	/// These functions deal with the formation of CMP;CCMP;... sequences.
	/// The CCMP/CCMN/FCCMP/FCCMPE instructions allow the conditional execution of
	/// a comparison. They set the NZCV flags to a predefined value if their
	/// predicate is false. This allows to express arbitrary conjunctions, for
	/// example "cmp 0 (and (setCA (cmp A)) (setCB (cmp B)))"
	/// expressed as:
	/// cmp A
	/// ccmp B, inv(CB), CA
	/// check for CB flags
	///
	/// This naturally lets us implement chains of AND operations with SETCC
	/// operands. And we can even implement some other situations by transforming
	/// them:
	/// - We can implement (NEG SETCC) i.e. negating a single comparison by
	/// negating the flags used in a CCMP/FCCMP operations.
	/// - We can negate the result of a whole chain of CMP/CCMP/FCCMP operations
	/// by negating the flags we test for afterwards. i.e.
	/// NEG (CMP CCMP CCCMP ...) can be implemented.
	/// - Note that we can only ever negate all previously processed results.
	/// What we can not implement by flipping the flags to test is a negation
	/// of two sub-trees (because the negation affects all sub-trees emitted so
	/// far, so the 2nd sub-tree we emit would also affect the first).
	/// With those tools we can implement some OR operations:
	/// - (OR (SETCC A) (SETCC B)) can be implemented via:
	/// NEG (AND (NEG (SETCC A)) (NEG (SETCC B)))
	/// - After transforming OR to NEG/AND combinations we may be able to use NEG
	/// elimination rules from earlier to implement the whole thing as a
	/// CCMP/FCCMP chain.
	///
	/// As complete example:
	/// or (or (setCA (cmp A)) (setCB (cmp B)))
	/// (and (setCC (cmp C)) (setCD (cmp D)))"
	/// can be reassociated to:
	/// or (and (setCC (cmp C)) setCD (cmp D))
	// (or (setCA (cmp A)) (setCB (cmp B)))
	/// can be transformed to:
	/// not (and (not (and (setCC (cmp C)) (setCD (cmp D))))
	/// (and (not (setCA (cmp A)) (not (setCB (cmp B))))))"
	/// which can be implemented as:
	/// cmp C
	/// ccmp D, inv(CD), CC
	/// ccmp A, CA, inv(CD)
	/// ccmp B, CB, inv(CA)
	/// check for CB flags
	///
	/// A counterexample is "or (and A B) (and C D)" which translates to
	/// not (and (not (and (not A) (not B))) (not (and (not C) (not D)))), we
	/// can only implement 1 of the inner (not) operations, but not both!
	/// @{

	/// Create a conditional comparison; Use CCMP, CCMN or FCCMP as appropriate.
	static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
	ISD::CondCode CC, SDValue CCOp,
	AArch64CC::CondCode Predicate,
	AArch64CC::CondCode OutCC,
	const SDLoc &DL, SelectionDAG &DAG) {
	unsigned Opcode = 0;
	const bool FullFP16 =
	static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();

	if (LHS.getValueType().isFloatingPoint()) {
	assert(LHS.getValueType() != MVT::f128);
	if (LHS.getValueType() == MVT::f16 && !FullFP16) {
	LHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, LHS);
	RHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, RHS);
	}
	Opcode = AArch64ISD::FCCMP;
	} else if (RHS.getOpcode() == ISD::SUB) {
	SDValue SubOp0 = RHS.getOperand(0);
	if (isNullConstant(SubOp0) && (CC == ISD::SETEQ \|\| CC == ISD::SETNE)) {
	// See emitComparison() on why we can only do this for SETEQ and SETNE.
	Opcode = AArch64ISD::CCMN;
	RHS = RHS.getOperand(1);
	}
	}
	if (Opcode == 0)
	Opcode = AArch64ISD::CCMP;

	SDValue Condition = DAG.getConstant(Predicate, DL, MVT_CC);
	AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
	unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
	SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
	return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp);
	}

	/// Returns true if @p Val is a tree of AND/OR/SETCC operations that can be
	/// expressed as a conjunction. See \ref AArch64CCMP.
	/// \param CanNegate Set to true if we can negate the whole sub-tree just by
	/// changing the conditions on the SETCC tests.
	/// (this means we can call emitConjunctionRec() with
	/// Negate==true on this sub-tree)
	/// \param MustBeFirst Set to true if this subtree needs to be negated and we
	/// cannot do the negation naturally. We are required to
	/// emit the subtree first in this case.
	/// \param WillNegate Is true if are called when the result of this
	/// subexpression must be negated. This happens when the
	/// outer expression is an OR. We can use this fact to know
	/// that we have a double negation (or (or ...) ...) that
	/// can be implemented for free.
	static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
	bool &MustBeFirst, bool WillNegate,
	unsigned Depth = 0) {
	if (!Val.hasOneUse())
	return false;
	unsigned Opcode = Val->getOpcode();
	if (Opcode == ISD::SETCC) {
	if (Val->getOperand(0).getValueType() == MVT::f128)
	return false;
	CanNegate = true;
	MustBeFirst = false;
	return true;
	}
	// Protect against exponential runtime and stack overflow.
	if (Depth > 6)
	return false;
	if (Opcode == ISD::AND \|\| Opcode == ISD::OR) {
	bool IsOR = Opcode == ISD::OR;
	SDValue O0 = Val->getOperand(0);
	SDValue O1 = Val->getOperand(1);
	bool CanNegateL;
	bool MustBeFirstL;
	if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, Depth+1))
	return false;
	bool CanNegateR;
	bool MustBeFirstR;
	if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, Depth+1))
	return false;

	if (MustBeFirstL && MustBeFirstR)
	return false;

	if (IsOR) {
	// For an OR expression we need to be able to naturally negate at least
	// one side or we cannot do the transformation at all.
	if (!CanNegateL && !CanNegateR)
	return false;
	// If we the result of the OR will be negated and we can naturally negate
	// the leafs, then this sub-tree as a whole negates naturally.
	CanNegate = WillNegate && CanNegateL && CanNegateR;
	// If we cannot naturally negate the whole sub-tree, then this must be
	// emitted first.
	MustBeFirst = !CanNegate;
	} else {
	assert(Opcode == ISD::AND && "Must be OR or AND");
	// We cannot naturally negate an AND operation.
	CanNegate = false;
	MustBeFirst = MustBeFirstL \|\| MustBeFirstR;
	}
	return true;
	}
	return false;
	}

	/// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain
	/// of CCMP/CFCMP ops. See @ref AArch64CCMP.
	/// Tries to transform the given i1 producing node @p Val to a series compare
	/// and conditional compare operations. @returns an NZCV flags producing node
	/// and sets @p OutCC to the flags that should be tested or returns SDValue() if
	/// transformation was not possible.
	/// \p Negate is true if we want this sub-tree being negated just by changing
	/// SETCC conditions.
	static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val,
	AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp,
	AArch64CC::CondCode Predicate) {
	// We're at a tree leaf, produce a conditional comparison operation.
	unsigned Opcode = Val->getOpcode();
	if (Opcode == ISD::SETCC) {
	SDValue LHS = Val->getOperand(0);
	SDValue RHS = Val->getOperand(1);
	ISD::CondCode CC = cast<CondCodeSDNode>(Val->getOperand(2))->get();
	bool isInteger = LHS.getValueType().isInteger();
	if (Negate)
	CC = getSetCCInverse(CC, LHS.getValueType());
	SDLoc DL(Val);
	// Determine OutCC and handle FP special case.
	if (isInteger) {
	OutCC = changeIntCCToAArch64CC(CC);
	} else {
	assert(LHS.getValueType().isFloatingPoint());
	AArch64CC::CondCode ExtraCC;
	changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
	// Some floating point conditions can't be tested with a single condition
	// code. Construct an additional comparison in this case.
	if (ExtraCC != AArch64CC::AL) {
	SDValue ExtraCmp;
	if (!CCOp.getNode())
	ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG);
	else
	ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate,
	ExtraCC, DL, DAG);
	CCOp = ExtraCmp;
	Predicate = ExtraCC;
	}
	}

	// Produce a normal comparison if we are first in the chain
	if (!CCOp)
	return emitComparison(LHS, RHS, CC, DL, DAG);
	// Otherwise produce a ccmp.
	return emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, OutCC, DL,
	DAG);
	}
	assert(Val->hasOneUse() && "Valid conjunction/disjunction tree");

	bool IsOR = Opcode == ISD::OR;

	SDValue LHS = Val->getOperand(0);
	bool CanNegateL;
	bool MustBeFirstL;
	bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR);
	assert(ValidL && "Valid conjunction/disjunction tree");
	(void)ValidL;

	SDValue RHS = Val->getOperand(1);
	bool CanNegateR;
	bool MustBeFirstR;
	bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR);
	assert(ValidR && "Valid conjunction/disjunction tree");
	(void)ValidR;

	// Swap sub-tree that must come first to the right side.
	if (MustBeFirstL) {
	assert(!MustBeFirstR && "Valid conjunction/disjunction tree");
	std::swap(LHS, RHS);
	std::swap(CanNegateL, CanNegateR);
	std::swap(MustBeFirstL, MustBeFirstR);
	}

	bool NegateR;
	bool NegateAfterR;
	bool NegateL;
	bool NegateAfterAll;
	if (Opcode == ISD::OR) {
	// Swap the sub-tree that we can negate naturally to the left.
	if (!CanNegateL) {
	assert(CanNegateR && "at least one side must be negatable");
	assert(!MustBeFirstR && "invalid conjunction/disjunction tree");
	assert(!Negate);
	std::swap(LHS, RHS);
	NegateR = false;
	NegateAfterR = true;
	} else {
	// Negate the left sub-tree if possible, otherwise negate the result.
	NegateR = CanNegateR;
	NegateAfterR = !CanNegateR;
	}
	NegateL = true;
	NegateAfterAll = !Negate;
	} else {
	assert(Opcode == ISD::AND && "Valid conjunction/disjunction tree");
	assert(!Negate && "Valid conjunction/disjunction tree");

	NegateL = false;
	NegateR = false;
	NegateAfterR = false;
	NegateAfterAll = false;
	}

	// Emit sub-trees.
	AArch64CC::CondCode RHSCC;
	SDValue CmpR = emitConjunctionRec(DAG, RHS, RHSCC, NegateR, CCOp, Predicate);
	if (NegateAfterR)
	RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
	SDValue CmpL = emitConjunctionRec(DAG, LHS, OutCC, NegateL, CmpR, RHSCC);
	if (NegateAfterAll)
	OutCC = AArch64CC::getInvertedCondCode(OutCC);
	return CmpL;
	}

	/// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
	/// In some cases this is even possible with OR operations in the expression.
	/// See \ref AArch64CCMP.
	/// \see emitConjunctionRec().
	static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val,
	AArch64CC::CondCode &OutCC) {
	bool DummyCanNegate;
	bool DummyMustBeFirst;
	if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false))
	return SDValue();

	return emitConjunctionRec(DAG, Val, OutCC, false, SDValue(), AArch64CC::AL);
	}

	/// @}

	/// Returns how profitable it is to fold a comparison's operand's shift and/or
	/// extension operations.
	static unsigned getCmpOperandFoldingProfit(SDValue Op) {
	auto isSupportedExtend = [&](SDValue V) {
	if (V.getOpcode() == ISD::SIGN_EXTEND_INREG)
	return true;

	if (V.getOpcode() == ISD::AND)
	if (ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
	uint64_t Mask = MaskCst->getZExtValue();
	return (Mask == 0xFF \|\| Mask == 0xFFFF \|\| Mask == 0xFFFFFFFF);
	}

	return false;
	};

	if (!Op.hasOneUse())
	return 0;

	if (isSupportedExtend(Op))
	return 1;

	unsigned Opc = Op.getOpcode();
	if (Opc == ISD::SHL \|\| Opc == ISD::SRL \|\| Opc == ISD::SRA)
	if (ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
	uint64_t Shift = ShiftCst->getZExtValue();
	if (isSupportedExtend(Op.getOperand(0)))
	return (Shift <= 4) ? 2 : 1;
	EVT VT = Op.getValueType();
	if ((VT == MVT::i32 && Shift <= 31) \|\| (VT == MVT::i64 && Shift <= 63))
	return 1;
	}

	return 0;
	}

	static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
	SDValue &AArch64cc, SelectionDAG &DAG,
	const SDLoc &dl) {
	if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
	EVT VT = RHS.getValueType();
	uint64_t C = RHSC->getZExtValue();
	if (!isLegalArithImmed(C)) {
	// Constant does not fit, try adjusting it by one?
	switch (CC) {
	default:
	break;
	case ISD::SETLT:
	case ISD::SETGE:
	if ((VT == MVT::i32 && C != 0x80000000 &&
	isLegalArithImmed((uint32_t)(C - 1))) \|\|
	(VT == MVT::i64 && C != 0x80000000ULL &&
	isLegalArithImmed(C - 1ULL))) {
	CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
	C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
	RHS = DAG.getConstant(C, dl, VT);
	}
	break;
	case ISD::SETULT:
	case ISD::SETUGE:
	if ((VT == MVT::i32 && C != 0 &&
	isLegalArithImmed((uint32_t)(C - 1))) \|\|
	(VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) {
	CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
	C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
	RHS = DAG.getConstant(C, dl, VT);
	}
	break;
	case ISD::SETLE:
	case ISD::SETGT:
	if ((VT == MVT::i32 && C != INT32_MAX &&
	isLegalArithImmed((uint32_t)(C + 1))) \|\|
	(VT == MVT::i64 && C != INT64_MAX &&
	isLegalArithImmed(C + 1ULL))) {
	CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
	C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
	RHS = DAG.getConstant(C, dl, VT);
	}
	break;
	case ISD::SETULE:
	case ISD::SETUGT:
	if ((VT == MVT::i32 && C != UINT32_MAX &&
	isLegalArithImmed((uint32_t)(C + 1))) \|\|
	(VT == MVT::i64 && C != UINT64_MAX &&
	isLegalArithImmed(C + 1ULL))) {
	CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
	C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
	RHS = DAG.getConstant(C, dl, VT);
	}
	break;
	}
	}
	}

	// Comparisons are canonicalized so that the RHS operand is simpler than the
	// LHS one, the extreme case being when RHS is an immediate. However, AArch64
	// can fold some shift+extend operations on the RHS operand, so swap the
	// operands if that can be done.
	//
	// For example:
	// lsl w13, w11, #1
	// cmp w13, w12
	// can be turned into:
	// cmp w12, w11, lsl #1
	if (!isa<ConstantSDNode>(RHS) \|\|
	!isLegalArithImmed(cast<ConstantSDNode>(RHS)->getZExtValue())) {
	SDValue TheLHS = isCMN(LHS, CC) ? LHS.getOperand(1) : LHS;

	if (getCmpOperandFoldingProfit(TheLHS) > getCmpOperandFoldingProfit(RHS)) {
	std::swap(LHS, RHS);
	CC = ISD::getSetCCSwappedOperands(CC);
	}
	}

	SDValue Cmp;
	AArch64CC::CondCode AArch64CC;
	if ((CC == ISD::SETEQ \|\| CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
	const ConstantSDNode *RHSC = cast<ConstantSDNode>(RHS);

	// The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.
	// For the i8 operand, the largest immediate is 255, so this can be easily
	// encoded in the compare instruction. For the i16 operand, however, the
	// largest immediate cannot be encoded in the compare.
	// Therefore, use a sign extending load and cmn to avoid materializing the
	// -1 constant. For example,
	// movz w1, #65535
	// ldrh w0, [x0, #0]
	// cmp w0, w1
	// >
	// ldrsh w0, [x0, #0]
	// cmn w0, #1
	// Fundamental, we're relying on the property that (zext LHS) == (zext RHS)
	// if and only if (sext LHS) == (sext RHS). The checks are in place to
	// ensure both the LHS and RHS are truly zero extended and to make sure the
	// transformation is profitable.
	if ((RHSC->getZExtValue() >> 16 == 0) && isa<LoadSDNode>(LHS) &&
	cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD &&
	cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 &&
	LHS.getNode()->hasNUsesOfValue(1, 0)) {
	int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue();
	if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) {
	SDValue SExt =
	DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
	DAG.getValueType(MVT::i16));
	Cmp = emitComparison(SExt, DAG.getConstant(ValueofRHS, dl,
	RHS.getValueType()),
	CC, dl, DAG);
	AArch64CC = changeIntCCToAArch64CC(CC);
	}
	}

	if (!Cmp && (RHSC->isNullValue() \|\| RHSC->isOne())) {
	if ((Cmp = emitConjunction(DAG, LHS, AArch64CC))) {
	if ((CC == ISD::SETNE) ^ RHSC->isNullValue())
	AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC);
	}
	}
	}

	if (!Cmp) {
	Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
	AArch64CC = changeIntCCToAArch64CC(CC);
	}
	AArch64cc = DAG.getConstant(AArch64CC, dl, MVT_CC);
	return Cmp;
	}

	static std::pair<SDValue, SDValue>
	getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
	assert((Op.getValueType() == MVT::i32 \|\| Op.getValueType() == MVT::i64) &&
	"Unsupported value type");
	SDValue Value, Overflow;
	SDLoc DL(Op);
	SDValue LHS = Op.getOperand(0);
	SDValue RHS = Op.getOperand(1);
	unsigned Opc = 0;
	switch (Op.getOpcode()) {
	default:
	llvm_unreachable("Unknown overflow instruction!");
	case ISD::SADDO:
	Opc = AArch64ISD::ADDS;
	CC = AArch64CC::VS;
	break;
	case ISD::UADDO:
	Opc = AArch64ISD::ADDS;
	CC = AArch64CC::HS;
	break;
	case ISD::SSUBO:
	Opc = AArch64ISD::SUBS;
	CC = AArch64CC::VS;
	break;
	case ISD::USUBO:
	Opc = AArch64ISD::SUBS;
	CC = AArch64CC::LO;
	break;
	// Multiply needs a little bit extra work.
	case ISD::SMULO:
	case ISD::UMULO: {
	CC = AArch64CC::NE;
	bool IsSigned = Op.getOpcode() == ISD::SMULO;
	if (Op.getValueType() == MVT::i32) {
	// Extend to 64-bits, then perform a 64-bit multiply.
	unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
	LHS = DAG.getNode(ExtendOpc, DL, MVT::i64, LHS);
	RHS = DAG.getNode(ExtendOpc, DL, MVT::i64, RHS);
	SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
	Value = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);

	// Check that the result fits into a 32-bit integer.
	SDVTList VTs = DAG.getVTList(MVT::i64, MVT_CC);
	if (IsSigned) {
	// cmp xreg, wreg, sxtw
	SDValue SExtMul = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Value);
	Overflow =
	DAG.getNode(AArch64ISD::SUBS, DL, VTs, Mul, SExtMul).getValue(1);
	} else {
	// tst xreg, #0xffffffff00000000
	SDValue UpperBits = DAG.getConstant(0xFFFFFFFF00000000, DL, MVT::i64);
	Overflow =
	DAG.getNode(AArch64ISD::ANDS, DL, VTs, Mul, UpperBits).getValue(1);
	}
	break;
	}
	assert(Op.getValueType() == MVT::i64 && "Expected an i64 value type");
	// For the 64 bit multiply
	Value = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
	if (IsSigned) {
	SDValue UpperBits = DAG.getNode(ISD::MULHS, DL, MVT::i64, LHS, RHS);
	SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i64, Value,
	DAG.getConstant(63, DL, MVT::i64));
	// It is important that LowerBits is last, otherwise the arithmetic
	// shift will not be folded into the compare (SUBS).
	SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
	Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
	.getValue(1);
	} else {
	SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS);
	SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
	Overflow =
	DAG.getNode(AArch64ISD::SUBS, DL, VTs,
	DAG.getConstant(0, DL, MVT::i64),
	UpperBits).getValue(1);
	}
	break;
	}
	} // switch (...)

	if (Opc) {
	SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32);

	// Emit the AArch64 operation with overflow check.
	Value = DAG.getNode(Opc, DL, VTs, LHS, RHS);
	Overflow = Value.getValue(1);
	}
	return std::make_pair(Value, Overflow);
	}

	SDValue AArch64TargetLowering::LowerXOR(SDValue Op, SelectionDAG &DAG) const {
	if (useSVEForFixedLengthVectorVT(Op.getValueType()))
	return LowerToScalableOp(Op, DAG);

	SDValue Sel = Op.getOperand(0);
	SDValue Other = Op.getOperand(1);
	SDLoc dl(Sel);

	// If the operand is an overflow checking operation, invert the condition
	// code and kill the Not operation. I.e., transform:
	// (xor (overflow_op_bool, 1))
	// -->
	// (csel 1, 0, invert(cc), overflow_op_bool)
	// ... which later gets transformed to just a cset instruction with an
	// inverted condition code, rather than a cset + eor sequence.
	if (isOneConstant(Other) && ISD::isOverflowIntrOpRes(Sel)) {
	// Only lower legal XALUO ops.
	if (!DAG.getTargetLoweringInfo().isTypeLegal(Sel->getValueType(0)))
	return SDValue();

	SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
	SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
	AArch64CC::CondCode CC;
	SDValue Value, Overflow;
	std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Sel.getValue(0), DAG);
	SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
	return DAG.getNode(AArch64ISD::CSEL, dl, Op.getValueType(), TVal, FVal,
	CCVal, Overflow);
	}
	// If neither operand is a SELECT_CC, give up.
	if (Sel.getOpcode() != ISD::SELECT_CC)
	std::swap(Sel, Other);
	if (Sel.getOpcode() != ISD::SELECT_CC)
	return Op;

	// The folding we want to perform is:
	// (xor x, (select_cc a, b, cc, 0, -1) )
	// -->
	// (csel x, (xor x, -1), cc ...)
	//
	// The latter will get matched to a CSINV instruction.

	ISD::CondCode CC = cast<CondCodeSDNode>(Sel.getOperand(4))->get();
	SDValue LHS = Sel.getOperand(0);
	SDValue RHS = Sel.getOperand(1);
	SDValue TVal = Sel.getOperand(2);
	SDValue FVal = Sel.getOperand(3);

	// FIXME: This could be generalized to non-integer comparisons.
	if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
	return Op;

	ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
	ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);

	// The values aren't constants, this isn't the pattern we're looking for.
	if (!CFVal \|\| !CTVal)
	return Op;

	// We can commute the SELECT_CC by inverting the condition. This
	// might be needed to make this fit into a CSINV pattern.
	if (CTVal->isAllOnesValue() && CFVal->isNullValue()) {
	std::swap(TVal, FVal);
	std::swap(CTVal, CFVal);
	CC = ISD::getSetCCInverse(CC, LHS.getValueType());
	}

	// If the constants line up, perform the transform!
	if (CTVal->isNullValue() && CFVal->isAllOnesValue()) {
	SDValue CCVal;
	SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);

	FVal = Other;
	TVal = DAG.getNode(ISD::XOR, dl, Other.getValueType(), Other,
	DAG.getConstant(-1ULL, dl, Other.getValueType()));

	return DAG.getNode(AArch64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal,
	CCVal, Cmp);
	}

	return Op;
	}

	static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
	EVT VT = Op.getValueType();

	// Let legalize expand this if it isn't a legal type yet.
	if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
	return SDValue();

	SDVTList VTs = DAG.getVTList(VT, MVT::i32);

	unsigned Opc;
	bool ExtraOp = false;
	switch (Op.getOpcode()) {
	default:
	llvm_unreachable("Invalid code");
	case ISD::ADDC:
	Opc = AArch64ISD::ADDS;
	break;
	case ISD::SUBC:
	Opc = AArch64ISD::SUBS;
	break;
	case ISD::ADDE:
	Opc = AArch64ISD::ADCS;
	ExtraOp = true;
	break;
	case ISD::SUBE:
	Opc = AArch64ISD::SBCS;
	ExtraOp = true;
	break;
	}

	if (!ExtraOp)
	return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1));
	return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1),
	Op.getOperand(2));
	}

	static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
	// Let legalize expand this if it isn't a legal type yet.
	if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
	return SDValue();

	SDLoc dl(Op);
	AArch64CC::CondCode CC;
	// The actual operation that sets the overflow or carry flag.
	SDValue Value, Overflow;
	std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Op, DAG);

	// We use 0 and 1 as false and true values.
	SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
	SDValue FVal = DAG.getConstant(0, dl, MVT::i32);

	// We use an inverted condition, because the conditional select is inverted
	// too. This will allow it to be selected to a single instruction:
	// CSINC Wd, WZR, WZR, invert(cond).
	SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
	Overflow = DAG.getNode(AArch64ISD::CSEL, dl, MVT::i32, FVal, TVal,
	CCVal, Overflow);

	SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
	return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
	}

	// Prefetch operands are:
	// 1: Address to prefetch
	// 2: bool isWrite
	// 3: int locality (0 = no locality ... 3 = extreme locality)
	// 4: bool isDataCache
	static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) {
	SDLoc DL(Op);
	unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
	unsigned Locality = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
	unsigned IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();

	bool IsStream = !Locality;
	// When the locality number is set
	if (Locality) {
	// The front-end should have filtered out the out-of-range values
	assert(Locality <= 3 && "Prefetch locality out-of-range");
	// The locality degree is the opposite of the cache speed.
	// Put the number the other way around.
	// The encoding starts at 0 for level 1
	Locality = 3 - Locality;
	}

	// built the mask value encoding the expected behavior.
	unsigned PrfOp = (IsWrite << 4) \| // Load/Store bit
	(!IsData << 3) \| // IsDataCache bit
	(Locality << 1) \| // Cache level bits
	(unsigned)IsStream; // Stream bit
	return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0),
	DAG.getConstant(PrfOp, DL, MVT::i32), Op.getOperand(1));
	}

	SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op,
	SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();
	if (VT.isScalableVector())
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_EXTEND_MERGE_PASSTHRU);

	if (useSVEForFixedLengthVectorVT(VT))
	return LowerFixedLengthFPExtendToSVE(Op, DAG);

	assert(Op.getValueType() == MVT::f128 && "Unexpected lowering");
	return SDValue();
	}

	SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
	SelectionDAG &DAG) const {
	if (Op.getValueType().isScalableVector())
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_ROUND_MERGE_PASSTHRU);

	bool IsStrict = Op->isStrictFPOpcode();
	SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
	EVT SrcVT = SrcVal.getValueType();

	if (useSVEForFixedLengthVectorVT(SrcVT))
	return LowerFixedLengthFPRoundToSVE(Op, DAG);

	if (SrcVT != MVT::f128) {
	// Expand cases where the input is a vector bigger than NEON.
	if (useSVEForFixedLengthVectorVT(SrcVT))
	return SDValue();

	// It's legal except when f128 is involved
	return Op;
	}

	return SDValue();
	}

	SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
	SelectionDAG &DAG) const {
	// Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
	// Any additional optimization in this function should be recorded
	// in the cost tables.
	EVT InVT = Op.getOperand(0).getValueType();
	EVT VT = Op.getValueType();

	if (VT.isScalableVector()) {
	unsigned Opcode = Op.getOpcode() == ISD::FP_TO_UINT
	? AArch64ISD::FCVTZU_MERGE_PASSTHRU
	: AArch64ISD::FCVTZS_MERGE_PASSTHRU;
	return LowerToPredicatedOp(Op, DAG, Opcode);
	}

	if (useSVEForFixedLengthVectorVT(VT) \|\| useSVEForFixedLengthVectorVT(InVT))
	return LowerFixedLengthFPToIntToSVE(Op, DAG);

	unsigned NumElts = InVT.getVectorNumElements();

	// f16 conversions are promoted to f32 when full fp16 is not supported.
	if (InVT.getVectorElementType() == MVT::f16 &&
	!Subtarget->hasFullFP16()) {
	MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts);
	SDLoc dl(Op);
	return DAG.getNode(
	Op.getOpcode(), dl, Op.getValueType(),
	DAG.getNode(ISD::FP_EXTEND, dl, NewVT, Op.getOperand(0)));
	}

	uint64_t VTSize = VT.getFixedSizeInBits();
	uint64_t InVTSize = InVT.getFixedSizeInBits();
	if (VTSize < InVTSize) {
	SDLoc dl(Op);
	SDValue Cv =
	DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(),
	Op.getOperand(0));
	return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv);
	}

	if (VTSize > InVTSize) {
	SDLoc dl(Op);
	MVT ExtVT =
	MVT::getVectorVT(MVT::getFloatingPointVT(VT.getScalarSizeInBits()),
	VT.getVectorNumElements());
	SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, ExtVT, Op.getOperand(0));
	return DAG.getNode(Op.getOpcode(), dl, VT, Ext);
	}

	// Type changing conversions are illegal.
	return Op;
	}

	SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
	SelectionDAG &DAG) const {
	bool IsStrict = Op->isStrictFPOpcode();
	SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);

	if (SrcVal.getValueType().isVector())
	return LowerVectorFP_TO_INT(Op, DAG);

	// f16 conversions are promoted to f32 when full fp16 is not supported.
	if (SrcVal.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
	assert(!IsStrict && "Lowering of strict fp16 not yet implemented");
	SDLoc dl(Op);
	return DAG.getNode(
	Op.getOpcode(), dl, Op.getValueType(),
	DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, SrcVal));
	}

	if (SrcVal.getValueType() != MVT::f128) {
	// It's legal except when f128 is involved
	return Op;
	}

	return SDValue();
	}

	SDValue AArch64TargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
	SelectionDAG &DAG) const {
	// AArch64 FP-to-int conversions saturate to the destination register size, so
	// we can lower common saturating conversions to simple instructions.
	SDValue SrcVal = Op.getOperand(0);

	EVT SrcVT = SrcVal.getValueType();
	EVT DstVT = Op.getValueType();

	EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
	uint64_t SatWidth = SatVT.getScalarSizeInBits();
	uint64_t DstWidth = DstVT.getScalarSizeInBits();
	assert(SatWidth <= DstWidth && "Saturation width cannot exceed result width");

	// TODO: Support lowering of NEON and SVE conversions.
	if (SrcVT.isVector())
	return SDValue();

	// TODO: Saturate to SatWidth explicitly.
	if (SatWidth != DstWidth)
	return SDValue();

	// In the absence of FP16 support, promote f32 to f16, like LowerFP_TO_INT().
	if (SrcVT == MVT::f16 && !Subtarget->hasFullFP16())
	return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
	DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, SrcVal),
	Op.getOperand(1));

	// Cases that we can emit directly.
	if ((SrcVT == MVT::f64 \|\| SrcVT == MVT::f32 \|\|
	(SrcVT == MVT::f16 && Subtarget->hasFullFP16())) &&
	(DstVT == MVT::i64 \|\| DstVT == MVT::i32))
	return Op;

	// For all other cases, fall back on the expanded form.
	return SDValue();
	}

	SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op,
	SelectionDAG &DAG) const {
	// Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
	// Any additional optimization in this function should be recorded
	// in the cost tables.
	EVT VT = Op.getValueType();
	SDLoc dl(Op);
	SDValue In = Op.getOperand(0);
	EVT InVT = In.getValueType();
	unsigned Opc = Op.getOpcode();
	bool IsSigned = Opc == ISD::SINT_TO_FP \|\| Opc == ISD::STRICT_SINT_TO_FP;

	if (VT.isScalableVector()) {
	if (InVT.getVectorElementType() == MVT::i1) {
	// We can't directly extend an SVE predicate; extend it first.
	unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
	EVT CastVT = getPromotedVTForPredicate(InVT);
	In = DAG.getNode(CastOpc, dl, CastVT, In);
	return DAG.getNode(Opc, dl, VT, In);
	}

	unsigned Opcode = IsSigned ? AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU
	: AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU;
	return LowerToPredicatedOp(Op, DAG, Opcode);
	}

	if (useSVEForFixedLengthVectorVT(VT) \|\| useSVEForFixedLengthVectorVT(InVT))
	return LowerFixedLengthIntToFPToSVE(Op, DAG);

	uint64_t VTSize = VT.getFixedSizeInBits();
	uint64_t InVTSize = InVT.getFixedSizeInBits();
	if (VTSize < InVTSize) {
	MVT CastVT =
	MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()),
	InVT.getVectorNumElements());
	In = DAG.getNode(Opc, dl, CastVT, In);
	return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0, dl));
	}

	if (VTSize > InVTSize) {
	unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
	EVT CastVT = VT.changeVectorElementTypeToInteger();
	In = DAG.getNode(CastOpc, dl, CastVT, In);
	return DAG.getNode(Opc, dl, VT, In);
	}

	return Op;
	}

	SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
	SelectionDAG &DAG) const {
	if (Op.getValueType().isVector())
	return LowerVectorINT_TO_FP(Op, DAG);

	bool IsStrict = Op->isStrictFPOpcode();
	SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);

	// f16 conversions are promoted to f32 when full fp16 is not supported.
	if (Op.getValueType() == MVT::f16 &&
	!Subtarget->hasFullFP16()) {
	assert(!IsStrict && "Lowering of strict fp16 not yet implemented");
	SDLoc dl(Op);
	return DAG.getNode(
	ISD::FP_ROUND, dl, MVT::f16,
	DAG.getNode(Op.getOpcode(), dl, MVT::f32, SrcVal),
	DAG.getIntPtrConstant(0, dl));
	}

	// i128 conversions are libcalls.
	if (SrcVal.getValueType() == MVT::i128)
	return SDValue();

	// Other conversions are legal, unless it's to the completely software-based
	// fp128.
	if (Op.getValueType() != MVT::f128)
	return Op;
	return SDValue();
	}

	SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
	SelectionDAG &DAG) const {
	// For iOS, we want to call an alternative entry point: __sincos_stret,
	// which returns the values in two S / D registers.
	SDLoc dl(Op);
	SDValue Arg = Op.getOperand(0);
	EVT ArgVT = Arg.getValueType();
	Type ArgTy = ArgVT.getTypeForEVT(DAG.getContext());

	ArgListTy Args;
	ArgListEntry Entry;

	Entry.Node = Arg;
	Entry.Ty = ArgTy;
	Entry.IsSExt = false;
	Entry.IsZExt = false;
	Args.push_back(Entry);

	RTLIB::Libcall LC = ArgVT == MVT::f64 ? RTLIB::SINCOS_STRET_F64
	: RTLIB::SINCOS_STRET_F32;
	const char *LibcallName = getLibcallName(LC);
	SDValue Callee =
	DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));

	StructType *RetTy = StructType::get(ArgTy, ArgTy);
	TargetLowering::CallLoweringInfo CLI(DAG);
	CLI.setDebugLoc(dl)
	.setChain(DAG.getEntryNode())
	.setLibCallee(CallingConv::Fast, RetTy, Callee, std::move(Args));

	std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
	return CallResult.first;
	}

	static MVT getSVEContainerType(EVT ContentTy);

	SDValue AArch64TargetLowering::LowerBITCAST(SDValue Op,
	SelectionDAG &DAG) const {
	EVT OpVT = Op.getValueType();
	EVT ArgVT = Op.getOperand(0).getValueType();

	if (useSVEForFixedLengthVectorVT(OpVT))
	return LowerFixedLengthBitcastToSVE(Op, DAG);

	if (OpVT.isScalableVector()) {
	if (isTypeLegal(OpVT) && !isTypeLegal(ArgVT)) {
	assert(OpVT.isFloatingPoint() && !ArgVT.isFloatingPoint() &&
	"Expected int->fp bitcast!");
	SDValue ExtResult =
	DAG.getNode(ISD::ANY_EXTEND, SDLoc(Op), getSVEContainerType(ArgVT),
	Op.getOperand(0));
	return getSVESafeBitCast(OpVT, ExtResult, DAG);
	}
	return getSVESafeBitCast(OpVT, Op.getOperand(0), DAG);
	}

	if (OpVT != MVT::f16 && OpVT != MVT::bf16)
	return SDValue();

	assert(ArgVT == MVT::i16);
	SDLoc DL(Op);

	Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op.getOperand(0));
	Op = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op);
	return SDValue(
	DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, OpVT, Op,
	DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
	0);
	}

	static EVT getExtensionTo64Bits(const EVT &OrigVT) {
	if (OrigVT.getSizeInBits() >= 64)
	return OrigVT;

	assert(OrigVT.isSimple() && "Expecting a simple value type");

	MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
	switch (OrigSimpleTy) {
	default: llvm_unreachable("Unexpected Vector Type");
	case MVT::v2i8:
	case MVT::v2i16:
	return MVT::v2i32;
	case MVT::v4i8:
	return MVT::v4i16;
	}
	}

	static SDValue addRequiredExtensionForVectorMULL(SDValue N, SelectionDAG &DAG,
	const EVT &OrigTy,
	const EVT &ExtTy,
	unsigned ExtOpcode) {
	// The vector originally had a size of OrigTy. It was then extended to ExtTy.
	// We expect the ExtTy to be 128-bits total. If the OrigTy is less than
	// 64-bits we need to insert a new extension so that it will be 64-bits.
	assert(ExtTy.is128BitVector() && "Unexpected extension size");
	if (OrigTy.getSizeInBits() >= 64)
	return N;

	// Must extend size to at least 64 bits to be used as an operand for VMULL.
	EVT NewVT = getExtensionTo64Bits(OrigTy);

	return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
	}

	static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
	bool isSigned) {
	EVT VT = N->getValueType(0);

	if (N->getOpcode() != ISD::BUILD_VECTOR)
	return false;

	for (const SDValue &Elt : N->op_values()) {
	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
	unsigned EltSize = VT.getScalarSizeInBits();
	unsigned HalfSize = EltSize / 2;
	if (isSigned) {
	if (!isIntN(HalfSize, C->getSExtValue()))
	return false;
	} else {
	if (!isUIntN(HalfSize, C->getZExtValue()))
	return false;
	}
	continue;
	}
	return false;
	}

	return true;
	}

	static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) {
	if (N->getOpcode() == ISD::SIGN_EXTEND \|\|
	N->getOpcode() == ISD::ZERO_EXTEND \|\| N->getOpcode() == ISD::ANY_EXTEND)
	return addRequiredExtensionForVectorMULL(N->getOperand(0), DAG,
	N->getOperand(0)->getValueType(0),
	N->getValueType(0),
	N->getOpcode());

	assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
	EVT VT = N->getValueType(0);
	SDLoc dl(N);
	unsigned EltSize = VT.getScalarSizeInBits() / 2;
	unsigned NumElts = VT.getVectorNumElements();
	MVT TruncVT = MVT::getIntegerVT(EltSize);
	SmallVector<SDValue, 8> Ops;
	for (unsigned i = 0; i != NumElts; ++i) {
	ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
	const APInt &CInt = C->getAPIntValue();
	// Element types smaller than 32 bits are not legal, so use i32 elements.
	// The values are implicitly truncated so sext vs. zext doesn't matter.
	Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
	}
	return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
	}

	static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
	return N->getOpcode() == ISD::SIGN_EXTEND \|\|
	N->getOpcode() == ISD::ANY_EXTEND \|\|
	isExtendedBUILD_VECTOR(N, DAG, true);
	}

	static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
	return N->getOpcode() == ISD::ZERO_EXTEND \|\|
	N->getOpcode() == ISD::ANY_EXTEND \|\|
	isExtendedBUILD_VECTOR(N, DAG, false);
	}

	static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
	unsigned Opcode = N->getOpcode();
	if (Opcode == ISD::ADD \|\| Opcode == ISD::SUB) {
	SDNode *N0 = N->getOperand(0).getNode();
	SDNode *N1 = N->getOperand(1).getNode();
	return N0->hasOneUse() && N1->hasOneUse() &&
	isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
	}
	return false;
	}

	static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
	unsigned Opcode = N->getOpcode();
	if (Opcode == ISD::ADD \|\| Opcode == ISD::SUB) {
	SDNode *N0 = N->getOperand(0).getNode();
	SDNode *N1 = N->getOperand(1).getNode();
	return N0->hasOneUse() && N1->hasOneUse() &&
	isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
	}
	return false;
	}

	SDValue AArch64TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
	SelectionDAG &DAG) const {
	// The rounding mode is in bits 23:22 of the FPSCR.
	// The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
	// The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
	// so that the shift + and get folded into a bitfield extract.
	SDLoc dl(Op);

	SDValue Chain = Op.getOperand(0);
	SDValue FPCR_64 = DAG.getNode(
	ISD::INTRINSIC_W_CHAIN, dl, {MVT::i64, MVT::Other},
	{Chain, DAG.getConstant(Intrinsic::aarch64_get_fpcr, dl, MVT::i64)});
	Chain = FPCR_64.getValue(1);
	SDValue FPCR_32 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, FPCR_64);
	SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPCR_32,
	DAG.getConstant(1U << 22, dl, MVT::i32));
	SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
	DAG.getConstant(22, dl, MVT::i32));
	SDValue AND = DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
	DAG.getConstant(3, dl, MVT::i32));
	return DAG.getMergeValues({AND, Chain}, dl);
	}

	SDValue AArch64TargetLowering::LowerSET_ROUNDING(SDValue Op,
	SelectionDAG &DAG) const {
	SDLoc DL(Op);
	SDValue Chain = Op->getOperand(0);
	SDValue RMValue = Op->getOperand(1);

	// The rounding mode is in bits 23:22 of the FPCR.
	// The llvm.set.rounding argument value to the rounding mode in FPCR mapping
	// is 0->3, 1->0, 2->1, 3->2. The formula we use to implement this is
	// ((arg - 1) & 3) << 22).
	//
	// The argument of llvm.set.rounding must be within the segment [0, 3], so
	// NearestTiesToAway (4) is not handled here. It is responsibility of the code
	// generated llvm.set.rounding to ensure this condition.

	// Calculate new value of FPCR[23:22].
	RMValue = DAG.getNode(ISD::SUB, DL, MVT::i32, RMValue,
	DAG.getConstant(1, DL, MVT::i32));
	RMValue = DAG.getNode(ISD::AND, DL, MVT::i32, RMValue,
	DAG.getConstant(0x3, DL, MVT::i32));
	RMValue =
	DAG.getNode(ISD::SHL, DL, MVT::i32, RMValue,
	DAG.getConstant(AArch64::RoundingBitsPos, DL, MVT::i32));
	RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, RMValue);

	// Get current value of FPCR.
	SDValue Ops[] = {
	Chain, DAG.getTargetConstant(Intrinsic::aarch64_get_fpcr, DL, MVT::i64)};
	SDValue FPCR =
	DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i64, MVT::Other}, Ops);
	Chain = FPCR.getValue(1);
	FPCR = FPCR.getValue(0);

	// Put new rounding mode into FPSCR[23:22].
	const int RMMask = ~(AArch64::Rounding::rmMask << AArch64::RoundingBitsPos);
	FPCR = DAG.getNode(ISD::AND, DL, MVT::i64, FPCR,
	DAG.getConstant(RMMask, DL, MVT::i64));
	FPCR = DAG.getNode(ISD::OR, DL, MVT::i64, FPCR, RMValue);
	SDValue Ops2[] = {
	Chain, DAG.getTargetConstant(Intrinsic::aarch64_set_fpcr, DL, MVT::i64),
	FPCR};
	return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
	}

	SDValue AArch64TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();

	// If SVE is available then i64 vector multiplications can also be made legal.
	bool OverrideNEON = VT == MVT::v2i64 \|\| VT == MVT::v1i64;

	if (VT.isScalableVector() \|\| useSVEForFixedLengthVectorVT(VT, OverrideNEON))
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::MUL_PRED, OverrideNEON);

	// Multiplications are only custom-lowered for 128-bit vectors so that
	// VMULL can be detected. Otherwise v2i64 multiplications are not legal.
	assert(VT.is128BitVector() && VT.isInteger() &&
	"unexpected type for custom-lowering ISD::MUL");
	SDNode *N0 = Op.getOperand(0).getNode();
	SDNode *N1 = Op.getOperand(1).getNode();
	unsigned NewOpc = 0;
	bool isMLA = false;
	bool isN0SExt = isSignExtended(N0, DAG);
	bool isN1SExt = isSignExtended(N1, DAG);
	if (isN0SExt && isN1SExt)
	NewOpc = AArch64ISD::SMULL;
	else {
	bool isN0ZExt = isZeroExtended(N0, DAG);
	bool isN1ZExt = isZeroExtended(N1, DAG);
	if (isN0ZExt && isN1ZExt)
	NewOpc = AArch64ISD::UMULL;
	else if (isN1SExt \|\| isN1ZExt) {
	// Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
	// into (s/zext A * s/zext C) + (s/zext B * s/zext C)
	if (isN1SExt && isAddSubSExt(N0, DAG)) {
	NewOpc = AArch64ISD::SMULL;
	isMLA = true;
	} else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
	NewOpc = AArch64ISD::UMULL;
	isMLA = true;
	} else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
	std::swap(N0, N1);
	NewOpc = AArch64ISD::UMULL;
	isMLA = true;
	}
	}

	if (!NewOpc) {
	if (VT == MVT::v2i64)
	// Fall through to expand this. It is not legal.
	return SDValue();
	else
	// Other vector multiplications are legal.
	return Op;
	}
	}

	// Legalize to a S/UMULL instruction
	SDLoc DL(Op);
	SDValue Op0;
	SDValue Op1 = skipExtensionForVectorMULL(N1, DAG);
	if (!isMLA) {
	Op0 = skipExtensionForVectorMULL(N0, DAG);
	assert(Op0.getValueType().is64BitVector() &&
	Op1.getValueType().is64BitVector() &&
	"unexpected types for extended operands to VMULL");
	return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
	}
	// Optimizing (zext A + zext B) * C, to (S/UMULL A, C) + (S/UMULL B, C) during
	// isel lowering to take advantage of no-stall back to back s/umul + s/umla.
	// This is true for CPUs with accumulate forwarding such as Cortex-A53/A57
	SDValue N00 = skipExtensionForVectorMULL(N0->getOperand(0).getNode(), DAG);
	SDValue N01 = skipExtensionForVectorMULL(N0->getOperand(1).getNode(), DAG);
	EVT Op1VT = Op1.getValueType();
	return DAG.getNode(N0->getOpcode(), DL, VT,
	DAG.getNode(NewOpc, DL, VT,
	DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
	DAG.getNode(NewOpc, DL, VT,
	DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
	}

	static inline SDValue getPTrue(SelectionDAG &DAG, SDLoc DL, EVT VT,
	int Pattern) {
	return DAG.getNode(AArch64ISD::PTRUE, DL, VT,
	DAG.getTargetConstant(Pattern, DL, MVT::i32));
	}

	static SDValue lowerConvertToSVBool(SDValue Op, SelectionDAG &DAG) {
	SDLoc DL(Op);
	EVT OutVT = Op.getValueType();
	SDValue InOp = Op.getOperand(1);
	EVT InVT = InOp.getValueType();

	// Return the operand if the cast isn't changing type,
	// i.e. <n x 16 x i1> -> <n x 16 x i1>
	if (InVT == OutVT)
	return InOp;

	SDValue Reinterpret =
	DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, OutVT, InOp);

	// If the argument converted to an svbool is a ptrue or a comparison, the
	// lanes introduced by the widening are zero by construction.
	switch (InOp.getOpcode()) {
	case AArch64ISD::SETCC_MERGE_ZERO:
	return Reinterpret;
	case ISD::INTRINSIC_WO_CHAIN:
	if (InOp.getConstantOperandVal(0) == Intrinsic::aarch64_sve_ptrue)
	return Reinterpret;
	}

	// Otherwise, zero the newly introduced lanes.
	SDValue Mask = getPTrue(DAG, DL, InVT, AArch64SVEPredPattern::all);
	SDValue MaskReinterpret =
	DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, OutVT, Mask);
	return DAG.getNode(ISD::AND, DL, OutVT, Reinterpret, MaskReinterpret);
	}

	SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
	SelectionDAG &DAG) const {
	unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
	SDLoc dl(Op);
	switch (IntNo) {
	default: return SDValue(); // Don't custom lower most intrinsics.
	case Intrinsic::thread_pointer: {
	EVT PtrVT = getPointerTy(DAG.getDataLayout());
	return DAG.getNode(AArch64ISD::THREAD_POINTER, dl, PtrVT);
	}
	case Intrinsic::aarch64_neon_abs: {
	EVT Ty = Op.getValueType();
	if (Ty == MVT::i64) {
	SDValue Result = DAG.getNode(ISD::BITCAST, dl, MVT::v1i64,
	Op.getOperand(1));
	Result = DAG.getNode(ISD::ABS, dl, MVT::v1i64, Result);
	return DAG.getNode(ISD::BITCAST, dl, MVT::i64, Result);
	} else if (Ty.isVector() && Ty.isInteger() && isTypeLegal(Ty)) {
	return DAG.getNode(ISD::ABS, dl, Ty, Op.getOperand(1));
	} else {
	report_fatal_error("Unexpected type for AArch64 NEON intrinic");
	}
	}
	case Intrinsic::aarch64_neon_smax:
	return DAG.getNode(ISD::SMAX, dl, Op.getValueType(),
	Op.getOperand(1), Op.getOperand(2));
	case Intrinsic::aarch64_neon_umax:
	return DAG.getNode(ISD::UMAX, dl, Op.getValueType(),
	Op.getOperand(1), Op.getOperand(2));
	case Intrinsic::aarch64_neon_smin:
	return DAG.getNode(ISD::SMIN, dl, Op.getValueType(),
	Op.getOperand(1), Op.getOperand(2));
	case Intrinsic::aarch64_neon_umin:
	return DAG.getNode(ISD::UMIN, dl, Op.getValueType(),
	Op.getOperand(1), Op.getOperand(2));

	case Intrinsic::aarch64_sve_sunpkhi:
	return DAG.getNode(AArch64ISD::SUNPKHI, dl, Op.getValueType(),
	Op.getOperand(1));
	case Intrinsic::aarch64_sve_sunpklo:
	return DAG.getNode(AArch64ISD::SUNPKLO, dl, Op.getValueType(),
	Op.getOperand(1));
	case Intrinsic::aarch64_sve_uunpkhi:
	return DAG.getNode(AArch64ISD::UUNPKHI, dl, Op.getValueType(),
	Op.getOperand(1));
	case Intrinsic::aarch64_sve_uunpklo:
	return DAG.getNode(AArch64ISD::UUNPKLO, dl, Op.getValueType(),
	Op.getOperand(1));
	case Intrinsic::aarch64_sve_clasta_n:
	return DAG.getNode(AArch64ISD::CLASTA_N, dl, Op.getValueType(),
	Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
	case Intrinsic::aarch64_sve_clastb_n:
	return DAG.getNode(AArch64ISD::CLASTB_N, dl, Op.getValueType(),
	Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
	case Intrinsic::aarch64_sve_lasta:
	return DAG.getNode(AArch64ISD::LASTA, dl, Op.getValueType(),
	Op.getOperand(1), Op.getOperand(2));
	case Intrinsic::aarch64_sve_lastb:
	return DAG.getNode(AArch64ISD::LASTB, dl, Op.getValueType(),
	Op.getOperand(1), Op.getOperand(2));
	case Intrinsic::aarch64_sve_rev:
	return DAG.getNode(ISD::VECTOR_REVERSE, dl, Op.getValueType(),
	Op.getOperand(1));
	case Intrinsic::aarch64_sve_tbl:
	return DAG.getNode(AArch64ISD::TBL, dl, Op.getValueType(),
	Op.getOperand(1), Op.getOperand(2));
	case Intrinsic::aarch64_sve_trn1:
	return DAG.getNode(AArch64ISD::TRN1, dl, Op.getValueType(),
	Op.getOperand(1), Op.getOperand(2));
	case Intrinsic::aarch64_sve_trn2:
	return DAG.getNode(AArch64ISD::TRN2, dl, Op.getValueType(),
	Op.getOperand(1), Op.getOperand(2));
	case Intrinsic::aarch64_sve_uzp1:
	return DAG.getNode(AArch64ISD::UZP1, dl, Op.getValueType(),
	Op.getOperand(1), Op.getOperand(2));
	case Intrinsic::aarch64_sve_uzp2:
	return DAG.getNode(AArch64ISD::UZP2, dl, Op.getValueType(),
	Op.getOperand(1), Op.getOperand(2));
	case Intrinsic::aarch64_sve_zip1:
	return DAG.getNode(AArch64ISD::ZIP1, dl, Op.getValueType(),
	Op.getOperand(1), Op.getOperand(2));
	case Intrinsic::aarch64_sve_zip2:
	return DAG.getNode(AArch64ISD::ZIP2, dl, Op.getValueType(),
	Op.getOperand(1), Op.getOperand(2));
	case Intrinsic::aarch64_sve_splice:
	return DAG.getNode(AArch64ISD::SPLICE, dl, Op.getValueType(),
	Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
	case Intrinsic::aarch64_sve_ptrue:
	return DAG.getNode(AArch64ISD::PTRUE, dl, Op.getValueType(),
	Op.getOperand(1));
	case Intrinsic::aarch64_sve_clz:
	return DAG.getNode(AArch64ISD::CTLZ_MERGE_PASSTHRU, dl, Op.getValueType(),
	Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
	case Intrinsic::aarch64_sve_cnt: {
	SDValue Data = Op.getOperand(3);
	// CTPOP only supports integer operands.
	if (Data.getValueType().isFloatingPoint())
	Data = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Data);
	return DAG.getNode(AArch64ISD::CTPOP_MERGE_PASSTHRU, dl, Op.getValueType(),
	Op.getOperand(2), Data, Op.getOperand(1));
	}
	case Intrinsic::aarch64_sve_dupq_lane:
	return LowerDUPQLane(Op, DAG);
	case Intrinsic::aarch64_sve_convert_from_svbool:
	return DAG.getNode(AArch64ISD::REINTERPRET_CAST, dl, Op.getValueType(),
	Op.getOperand(1));
	case Intrinsic::aarch64_sve_convert_to_svbool:
	return lowerConvertToSVBool(Op, DAG);
	case Intrinsic::aarch64_sve_fneg:
	return DAG.getNode(AArch64ISD::FNEG_MERGE_PASSTHRU, dl, Op.getValueType(),
	Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
	case Intrinsic::aarch64_sve_frintp:
	return DAG.getNode(AArch64ISD::FCEIL_MERGE_PASSTHRU, dl, Op.getValueType(),
	Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
	case Intrinsic::aarch64_sve_frintm:
	return DAG.getNode(AArch64ISD::FFLOOR_MERGE_PASSTHRU, dl, Op.getValueType(),
	Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
	case Intrinsic::aarch64_sve_frinti:
	return DAG.getNode(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU, dl, Op.getValueType(),
	Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
	case Intrinsic::aarch64_sve_frintx:
	return DAG.getNode(AArch64ISD::FRINT_MERGE_PASSTHRU, dl, Op.getValueType(),
	Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
	case Intrinsic::aarch64_sve_frinta:
	return DAG.getNode(AArch64ISD::FROUND_MERGE_PASSTHRU, dl, Op.getValueType(),
	Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
	case Intrinsic::aarch64_sve_frintn:
	return DAG.getNode(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU, dl, Op.getValueType(),
	Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
	case Intrinsic::aarch64_sve_frintz:
	return DAG.getNode(AArch64ISD::FTRUNC_MERGE_PASSTHRU, dl, Op.getValueType(),
	Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
	case Intrinsic::aarch64_sve_ucvtf:
	return DAG.getNode(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU, dl,
	Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
	Op.getOperand(1));
	case Intrinsic::aarch64_sve_scvtf:
	return DAG.getNode(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU, dl,
	Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
	Op.getOperand(1));
	case Intrinsic::aarch64_sve_fcvtzu:
	return DAG.getNode(AArch64ISD::FCVTZU_MERGE_PASSTHRU, dl,
	Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
	Op.getOperand(1));
	case Intrinsic::aarch64_sve_fcvtzs:
	return DAG.getNode(AArch64ISD::FCVTZS_MERGE_PASSTHRU, dl,
	Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
	Op.getOperand(1));
	case Intrinsic::aarch64_sve_fsqrt:
	return DAG.getNode(AArch64ISD::FSQRT_MERGE_PASSTHRU, dl, Op.getValueType(),
	Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
	case Intrinsic::aarch64_sve_frecpx:
	return DAG.getNode(AArch64ISD::FRECPX_MERGE_PASSTHRU, dl, Op.getValueType(),
	Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
	case Intrinsic::aarch64_sve_fabs:
	return DAG.getNode(AArch64ISD::FABS_MERGE_PASSTHRU, dl, Op.getValueType(),
	Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
	case Intrinsic::aarch64_sve_abs:
	return DAG.getNode(AArch64ISD::ABS_MERGE_PASSTHRU, dl, Op.getValueType(),
	Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
	case Intrinsic::aarch64_sve_neg:
	return DAG.getNode(AArch64ISD::NEG_MERGE_PASSTHRU, dl, Op.getValueType(),
	Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
	case Intrinsic::aarch64_sve_insr: {
	SDValue Scalar = Op.getOperand(2);
	EVT ScalarTy = Scalar.getValueType();
	if ((ScalarTy == MVT::i8) \|\| (ScalarTy == MVT::i16))
	Scalar = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Scalar);

	return DAG.getNode(AArch64ISD::INSR, dl, Op.getValueType(),
	Op.getOperand(1), Scalar);
	}
	case Intrinsic::aarch64_sve_rbit:
	return DAG.getNode(AArch64ISD::BITREVERSE_MERGE_PASSTHRU, dl,
	Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
	Op.getOperand(1));
	case Intrinsic::aarch64_sve_revb:
	return DAG.getNode(AArch64ISD::BSWAP_MERGE_PASSTHRU, dl, Op.getValueType(),
	Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
	case Intrinsic::aarch64_sve_sxtb:
	return DAG.getNode(
	AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
	Op.getOperand(2), Op.getOperand(3),
	DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i8)),
	Op.getOperand(1));
	case Intrinsic::aarch64_sve_sxth:
	return DAG.getNode(
	AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
	Op.getOperand(2), Op.getOperand(3),
	DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i16)),
	Op.getOperand(1));
	case Intrinsic::aarch64_sve_sxtw:
	return DAG.getNode(
	AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
	Op.getOperand(2), Op.getOperand(3),
	DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i32)),
	Op.getOperand(1));
	case Intrinsic::aarch64_sve_uxtb:
	return DAG.getNode(
	AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
	Op.getOperand(2), Op.getOperand(3),
	DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i8)),
	Op.getOperand(1));
	case Intrinsic::aarch64_sve_uxth:
	return DAG.getNode(
	AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
	Op.getOperand(2), Op.getOperand(3),
	DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i16)),
	Op.getOperand(1));
	case Intrinsic::aarch64_sve_uxtw:
	return DAG.getNode(
	AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
	Op.getOperand(2), Op.getOperand(3),
	DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i32)),
	Op.getOperand(1));

	case Intrinsic::localaddress: {
	const auto &MF = DAG.getMachineFunction();
	const auto *RegInfo = Subtarget->getRegisterInfo();
	unsigned Reg = RegInfo->getLocalAddressRegister(MF);
	return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg,
	Op.getSimpleValueType());
	}

	case Intrinsic::eh_recoverfp: {
	// FIXME: This needs to be implemented to correctly handle highly aligned
	// stack objects. For now we simply return the incoming FP. Refer D53541
	// for more details.
	SDValue FnOp = Op.getOperand(1);
	SDValue IncomingFPOp = Op.getOperand(2);
	GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(FnOp);
	auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->getGlobal() : nullptr);
	if (!Fn)
	report_fatal_error(
	"llvm.eh.recoverfp must take a function as the first argument");
	return IncomingFPOp;
	}

	case Intrinsic::aarch64_neon_vsri:
	case Intrinsic::aarch64_neon_vsli: {
	EVT Ty = Op.getValueType();

	if (!Ty.isVector())
	report_fatal_error("Unexpected type for aarch64_neon_vsli");

	assert(Op.getConstantOperandVal(3) <= Ty.getScalarSizeInBits());

	bool IsShiftRight = IntNo == Intrinsic::aarch64_neon_vsri;
	unsigned Opcode = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
	return DAG.getNode(Opcode, dl, Ty, Op.getOperand(1), Op.getOperand(2),
	Op.getOperand(3));
	}

	case Intrinsic::aarch64_neon_srhadd:
	case Intrinsic::aarch64_neon_urhadd:
	case Intrinsic::aarch64_neon_shadd:
	case Intrinsic::aarch64_neon_uhadd: {
	bool IsSignedAdd = (IntNo == Intrinsic::aarch64_neon_srhadd \|\|
	IntNo == Intrinsic::aarch64_neon_shadd);
	bool IsRoundingAdd = (IntNo == Intrinsic::aarch64_neon_srhadd \|\|
	IntNo == Intrinsic::aarch64_neon_urhadd);
	unsigned Opcode =
	IsSignedAdd ? (IsRoundingAdd ? AArch64ISD::SRHADD : AArch64ISD::SHADD)
	: (IsRoundingAdd ? AArch64ISD::URHADD : AArch64ISD::UHADD);
	return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
	Op.getOperand(2));
	}
	case Intrinsic::aarch64_neon_sabd:
	case Intrinsic::aarch64_neon_uabd: {
	unsigned Opcode = IntNo == Intrinsic::aarch64_neon_uabd ? ISD::ABDU
	: ISD::ABDS;
	return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
	Op.getOperand(2));
	}
	case Intrinsic::aarch64_neon_uaddlp: {
	unsigned Opcode = AArch64ISD::UADDLP;
	return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1));
	}
	case Intrinsic::aarch64_neon_sdot:
	case Intrinsic::aarch64_neon_udot:
	case Intrinsic::aarch64_sve_sdot:
	case Intrinsic::aarch64_sve_udot: {
	unsigned Opcode = (IntNo == Intrinsic::aarch64_neon_udot \|\|
	IntNo == Intrinsic::aarch64_sve_udot)
	? AArch64ISD::UDOT
	: AArch64ISD::SDOT;
	return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
	Op.getOperand(2), Op.getOperand(3));
	}
	}
	}

	bool AArch64TargetLowering::shouldExtendGSIndex(EVT VT, EVT &EltTy) const {
	if (VT.getVectorElementType() == MVT::i8 \|\|
	VT.getVectorElementType() == MVT::i16) {
	EltTy = MVT::i32;
	return true;
	}
	return false;
	}

	bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const {
	if (VT.getVectorElementType() == MVT::i32 &&
	VT.getVectorElementCount().getKnownMinValue() >= 4)
	return true;

	return false;
	}

	bool AArch64TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
	return ExtVal.getValueType().isScalableVector();
	}

	unsigned getGatherVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) {
	std::map<std::tuple<bool, bool, bool>, unsigned> AddrModes = {
	{std::make_tuple(/Scaled/ false, /Signed/ false, /Extend/ false),
	AArch64ISD::GLD1_MERGE_ZERO},
	{std::make_tuple(/Scaled/ false, /Signed/ false, /Extend/ true),
	AArch64ISD::GLD1_UXTW_MERGE_ZERO},
	{std::make_tuple(/Scaled/ false, /Signed/ true, /Extend/ false),
	AArch64ISD::GLD1_MERGE_ZERO},
	{std::make_tuple(/Scaled/ false, /Signed/ true, /Extend/ true),
	AArch64ISD::GLD1_SXTW_MERGE_ZERO},
	{std::make_tuple(/Scaled/ true, /Signed/ false, /Extend/ false),
	AArch64ISD::GLD1_SCALED_MERGE_ZERO},
	{std::make_tuple(/Scaled/ true, /Signed/ false, /Extend/ true),
	AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO},
	{std::make_tuple(/Scaled/ true, /Signed/ true, /Extend/ false),
	AArch64ISD::GLD1_SCALED_MERGE_ZERO},
	{std::make_tuple(/Scaled/ true, /Signed/ true, /Extend/ true),
	AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO},
	};
	auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend);
	return AddrModes.find(Key)->second;
	}

	unsigned getScatterVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) {
	std::map<std::tuple<bool, bool, bool>, unsigned> AddrModes = {
	{std::make_tuple(/Scaled/ false, /Signed/ false, /Extend/ false),
	AArch64ISD::SST1_PRED},
	{std::make_tuple(/Scaled/ false, /Signed/ false, /Extend/ true),
	AArch64ISD::SST1_UXTW_PRED},
	{std::make_tuple(/Scaled/ false, /Signed/ true, /Extend/ false),
	AArch64ISD::SST1_PRED},
	{std::make_tuple(/Scaled/ false, /Signed/ true, /Extend/ true),
	AArch64ISD::SST1_SXTW_PRED},
	{std::make_tuple(/Scaled/ true, /Signed/ false, /Extend/ false),
	AArch64ISD::SST1_SCALED_PRED},
	{std::make_tuple(/Scaled/ true, /Signed/ false, /Extend/ true),
	AArch64ISD::SST1_UXTW_SCALED_PRED},
	{std::make_tuple(/Scaled/ true, /Signed/ true, /Extend/ false),
	AArch64ISD::SST1_SCALED_PRED},
	{std::make_tuple(/Scaled/ true, /Signed/ true, /Extend/ true),
	AArch64ISD::SST1_SXTW_SCALED_PRED},
	};
	auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend);
	return AddrModes.find(Key)->second;
	}

	unsigned getSignExtendedGatherOpcode(unsigned Opcode) {
	switch (Opcode) {
	default:
	llvm_unreachable("unimplemented opcode");
	return Opcode;
	case AArch64ISD::GLD1_MERGE_ZERO:
	return AArch64ISD::GLD1S_MERGE_ZERO;
	case AArch64ISD::GLD1_IMM_MERGE_ZERO:
	return AArch64ISD::GLD1S_IMM_MERGE_ZERO;
	case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
	return AArch64ISD::GLD1S_UXTW_MERGE_ZERO;
	case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
	return AArch64ISD::GLD1S_SXTW_MERGE_ZERO;
	case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
	return AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
	case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
	return AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO;
	case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
	return AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO;
	}
	}

	bool getGatherScatterIndexIsExtended(SDValue Index) {
	unsigned Opcode = Index.getOpcode();
	if (Opcode == ISD::SIGN_EXTEND_INREG)
	return true;

	if (Opcode == ISD::AND) {
	SDValue Splat = Index.getOperand(1);
	if (Splat.getOpcode() != ISD::SPLAT_VECTOR)
	return false;
	ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(Splat.getOperand(0));
	if (!Mask \|\| Mask->getZExtValue() != 0xFFFFFFFF)
	return false;
	return true;
	}

	return false;
	}

	// If the base pointer of a masked gather or scatter is null, we
	// may be able to swap BasePtr & Index and use the vector + register
	// or vector + immediate addressing mode, e.g.
	// VECTOR + REGISTER:
	// getelementptr nullptr, <vscale x N x T> (splat(%offset)) + %indices)
	// -> getelementptr %offset, <vscale x N x T> %indices
	// VECTOR + IMMEDIATE:
	// getelementptr nullptr, <vscale x N x T> (splat(#x)) + %indices)
	// -> getelementptr #x, <vscale x N x T> %indices
	void selectGatherScatterAddrMode(SDValue &BasePtr, SDValue &Index, EVT MemVT,
	unsigned &Opcode, bool IsGather,
	SelectionDAG &DAG) {
	if (!isNullConstant(BasePtr))
	return;

	// FIXME: This will not match for fixed vector type codegen as the nodes in
	// question will have fixed<->scalable conversions around them. This should be
	// moved to a DAG combine or complex pattern so that is executes after all of
	// the fixed vector insert and extracts have been removed. This deficiency
	// will result in a sub-optimal addressing mode being used, i.e. an ADD not
	// being folded into the scatter/gather.
	ConstantSDNode *Offset = nullptr;
	if (Index.getOpcode() == ISD::ADD)
	if (auto SplatVal = DAG.getSplatValue(Index.getOperand(1))) {
	if (isa<ConstantSDNode>(SplatVal))
	Offset = cast<ConstantSDNode>(SplatVal);
	else {
	BasePtr = SplatVal;
	Index = Index->getOperand(0);
	return;
	}
	}

	unsigned NewOp =
	IsGather ? AArch64ISD::GLD1_IMM_MERGE_ZERO : AArch64ISD::SST1_IMM_PRED;

	if (!Offset) {
	std::swap(BasePtr, Index);
	Opcode = NewOp;
	return;
	}

	uint64_t OffsetVal = Offset->getZExtValue();
	unsigned ScalarSizeInBytes = MemVT.getScalarSizeInBits() / 8;
	auto ConstOffset = DAG.getConstant(OffsetVal, SDLoc(Index), MVT::i64);

	if (OffsetVal % ScalarSizeInBytes \|\| OffsetVal / ScalarSizeInBytes > 31) {
	// Index is out of range for the immediate addressing mode
	BasePtr = ConstOffset;
	Index = Index->getOperand(0);
	return;
	}

	// Immediate is in range
	Opcode = NewOp;
	BasePtr = Index->getOperand(0);
	Index = ConstOffset;
	}

	SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op,
	SelectionDAG &DAG) const {
	SDLoc DL(Op);
	MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(Op);
	assert(MGT && "Can only custom lower gather load nodes");

	bool IsFixedLength = MGT->getMemoryVT().isFixedLengthVector();

	SDValue Index = MGT->getIndex();
	SDValue Chain = MGT->getChain();
	SDValue PassThru = MGT->getPassThru();
	SDValue Mask = MGT->getMask();
	SDValue BasePtr = MGT->getBasePtr();
	ISD::LoadExtType ExtTy = MGT->getExtensionType();

	ISD::MemIndexType IndexType = MGT->getIndexType();
	bool IsScaled =
	IndexType == ISD::SIGNED_SCALED \|\| IndexType == ISD::UNSIGNED_SCALED;
	bool IsSigned =
	IndexType == ISD::SIGNED_SCALED \|\| IndexType == ISD::SIGNED_UNSCALED;
	bool IdxNeedsExtend =
	getGatherScatterIndexIsExtended(Index) \|\|
	Index.getSimpleValueType().getVectorElementType() == MVT::i32;
	bool ResNeedsSignExtend = ExtTy == ISD::EXTLOAD \|\| ExtTy == ISD::SEXTLOAD;

	EVT VT = PassThru.getSimpleValueType();
	EVT IndexVT = Index.getSimpleValueType();
	EVT MemVT = MGT->getMemoryVT();
	SDValue InputVT = DAG.getValueType(MemVT);

	if (VT.getVectorElementType() == MVT::bf16 &&
	!static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
	return SDValue();

	if (IsFixedLength) {
	assert(Subtarget->useSVEForFixedLengthVectors() &&
	"Cannot lower when not using SVE for fixed vectors");
	if (MemVT.getScalarSizeInBits() <= IndexVT.getScalarSizeInBits()) {
	IndexVT = getContainerForFixedLengthVector(DAG, IndexVT);
	MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType());
	} else {
	MemVT = getContainerForFixedLengthVector(DAG, MemVT);
	IndexVT = MemVT.changeTypeToInteger();
	}
	InputVT = DAG.getValueType(MemVT.changeTypeToInteger());
	Mask = DAG.getNode(
	ISD::ZERO_EXTEND, DL,
	VT.changeVectorElementType(IndexVT.getVectorElementType()), Mask);
	}

	if (PassThru->isUndef() \|\| isZerosVector(PassThru.getNode()))
	PassThru = SDValue();

	if (VT.isFloatingPoint() && !IsFixedLength) {
	// Handle FP data by using an integer gather and casting the result.
	if (PassThru) {
	EVT PassThruVT = getPackedSVEVectorVT(VT.getVectorElementCount());
	PassThru = getSVESafeBitCast(PassThruVT, PassThru, DAG);
	}
	InputVT = DAG.getValueType(MemVT.changeVectorElementTypeToInteger());
	}

	SDVTList VTs = DAG.getVTList(IndexVT, MVT::Other);

	if (getGatherScatterIndexIsExtended(Index))
	Index = Index.getOperand(0);

	unsigned Opcode = getGatherVecOpcode(IsScaled, IsSigned, IdxNeedsExtend);
	selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode,
	/isGather=/true, DAG);

	if (ResNeedsSignExtend)
	Opcode = getSignExtendedGatherOpcode(Opcode);

	if (IsFixedLength) {
	if (Index.getSimpleValueType().isFixedLengthVector())
	Index = convertToScalableVector(DAG, IndexVT, Index);
	if (BasePtr.getSimpleValueType().isFixedLengthVector())
	BasePtr = convertToScalableVector(DAG, IndexVT, BasePtr);
	Mask = convertFixedMaskToScalableVector(Mask, DAG);
	}

	SDValue Ops[] = {Chain, Mask, BasePtr, Index, InputVT};
	SDValue Result = DAG.getNode(Opcode, DL, VTs, Ops);
	Chain = Result.getValue(1);

	if (IsFixedLength) {
	Result = convertFromScalableVector(
	DAG, VT.changeVectorElementType(IndexVT.getVectorElementType()),
	Result);
	Result = DAG.getNode(ISD::TRUNCATE, DL, VT.changeTypeToInteger(), Result);
	Result = DAG.getNode(ISD::BITCAST, DL, VT, Result);

	if (PassThru)
	Result = DAG.getSelect(DL, VT, MGT->getMask(), Result, PassThru);
	} else {
	if (PassThru)
	Result = DAG.getSelect(DL, IndexVT, Mask, Result, PassThru);

	if (VT.isFloatingPoint())
	Result = getSVESafeBitCast(VT, Result, DAG);
	}

	return DAG.getMergeValues({Result, Chain}, DL);
	}

	SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op,
	SelectionDAG &DAG) const {
	SDLoc DL(Op);
	MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(Op);
	assert(MSC && "Can only custom lower scatter store nodes");

	bool IsFixedLength = MSC->getMemoryVT().isFixedLengthVector();

	SDValue Index = MSC->getIndex();
	SDValue Chain = MSC->getChain();
	SDValue StoreVal = MSC->getValue();
	SDValue Mask = MSC->getMask();
	SDValue BasePtr = MSC->getBasePtr();

	ISD::MemIndexType IndexType = MSC->getIndexType();
	bool IsScaled =
	IndexType == ISD::SIGNED_SCALED \|\| IndexType == ISD::UNSIGNED_SCALED;
	bool IsSigned =
	IndexType == ISD::SIGNED_SCALED \|\| IndexType == ISD::SIGNED_UNSCALED;
	bool NeedsExtend =
	getGatherScatterIndexIsExtended(Index) \|\|
	Index.getSimpleValueType().getVectorElementType() == MVT::i32;

	EVT VT = StoreVal.getSimpleValueType();
	EVT IndexVT = Index.getSimpleValueType();
	SDVTList VTs = DAG.getVTList(MVT::Other);
	EVT MemVT = MSC->getMemoryVT();
	SDValue InputVT = DAG.getValueType(MemVT);

	if (VT.getVectorElementType() == MVT::bf16 &&
	!static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
	return SDValue();

	if (IsFixedLength) {
	assert(Subtarget->useSVEForFixedLengthVectors() &&
	"Cannot lower when not using SVE for fixed vectors");
	if (MemVT.getScalarSizeInBits() <= IndexVT.getScalarSizeInBits()) {
	IndexVT = getContainerForFixedLengthVector(DAG, IndexVT);
	MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType());
	} else {
	MemVT = getContainerForFixedLengthVector(DAG, MemVT);
	IndexVT = MemVT.changeTypeToInteger();
	}
	InputVT = DAG.getValueType(MemVT.changeTypeToInteger());

	StoreVal =
	DAG.getNode(ISD::BITCAST, DL, VT.changeTypeToInteger(), StoreVal);
	StoreVal = DAG.getNode(
	ISD::ANY_EXTEND, DL,
	VT.changeVectorElementType(IndexVT.getVectorElementType()), StoreVal);
	StoreVal = convertToScalableVector(DAG, IndexVT, StoreVal);
	Mask = DAG.getNode(
	ISD::ZERO_EXTEND, DL,
	VT.changeVectorElementType(IndexVT.getVectorElementType()), Mask);
	} else if (VT.isFloatingPoint()) {
	// Handle FP data by casting the data so an integer scatter can be used.
	EVT StoreValVT = getPackedSVEVectorVT(VT.getVectorElementCount());
	StoreVal = getSVESafeBitCast(StoreValVT, StoreVal, DAG);
	InputVT = DAG.getValueType(MemVT.changeVectorElementTypeToInteger());
	}

	if (getGatherScatterIndexIsExtended(Index))
	Index = Index.getOperand(0);

	unsigned Opcode = getScatterVecOpcode(IsScaled, IsSigned, NeedsExtend);
	selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode,
	/isGather=/false, DAG);

	if (IsFixedLength) {
	if (Index.getSimpleValueType().isFixedLengthVector())
	Index = convertToScalableVector(DAG, IndexVT, Index);
	if (BasePtr.getSimpleValueType().isFixedLengthVector())
	BasePtr = convertToScalableVector(DAG, IndexVT, BasePtr);
	Mask = convertFixedMaskToScalableVector(Mask, DAG);
	}

	SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, InputVT};
	return DAG.getNode(Opcode, DL, VTs, Ops);
	}

	SDValue AArch64TargetLowering::LowerMLOAD(SDValue Op, SelectionDAG &DAG) const {
	SDLoc DL(Op);
	MaskedLoadSDNode *LoadNode = cast<MaskedLoadSDNode>(Op);
	assert(LoadNode && "Expected custom lowering of a masked load node");
	EVT VT = Op->getValueType(0);

	if (useSVEForFixedLengthVectorVT(VT, true))
	return LowerFixedLengthVectorMLoadToSVE(Op, DAG);

	SDValue PassThru = LoadNode->getPassThru();
	SDValue Mask = LoadNode->getMask();

	if (PassThru->isUndef() \|\| isZerosVector(PassThru.getNode()))
	return Op;

	SDValue Load = DAG.getMaskedLoad(
	VT, DL, LoadNode->getChain(), LoadNode->getBasePtr(),
	LoadNode->getOffset(), Mask, DAG.getUNDEF(VT), LoadNode->getMemoryVT(),
	LoadNode->getMemOperand(), LoadNode->getAddressingMode(),
	LoadNode->getExtensionType());

	SDValue Result = DAG.getSelect(DL, VT, Mask, Load, PassThru);

	return DAG.getMergeValues({Result, Load.getValue(1)}, DL);
	}

	// Custom lower trunc store for v4i8 vectors, since it is promoted to v4i16.
	static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST,
	EVT VT, EVT MemVT,
	SelectionDAG &DAG) {
	assert(VT.isVector() && "VT should be a vector type");
	assert(MemVT == MVT::v4i8 && VT == MVT::v4i16);

	SDValue Value = ST->getValue();

	// It first extend the promoted v4i16 to v8i16, truncate to v8i8, and extract
	// the word lane which represent the v4i8 subvector. It optimizes the store
	// to:
	//
	// xtn v0.8b, v0.8h
	// str s0, [x0]

	SDValue Undef = DAG.getUNDEF(MVT::i16);
	SDValue UndefVec = DAG.getBuildVector(MVT::v4i16, DL,
	{Undef, Undef, Undef, Undef});

	SDValue TruncExt = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i16,
	Value, UndefVec);
	SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i8, TruncExt);

	Trunc = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Trunc);
	SDValue ExtractTrunc = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
	Trunc, DAG.getConstant(0, DL, MVT::i64));

	return DAG.getStore(ST->getChain(), DL, ExtractTrunc,
	ST->getBasePtr(), ST->getMemOperand());
	}

	// Custom lowering for any store, vector or scalar and/or default or with
	// a truncate operations. Currently only custom lower truncate operation
	// from vector v4i16 to v4i8 or volatile stores of i128.
	SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
	SelectionDAG &DAG) const {
	SDLoc Dl(Op);
	StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
	assert (StoreNode && "Can only custom lower store nodes");

	SDValue Value = StoreNode->getValue();

	EVT VT = Value.getValueType();
	EVT MemVT = StoreNode->getMemoryVT();

	if (VT.isVector()) {
	if (useSVEForFixedLengthVectorVT(VT, true))
	return LowerFixedLengthVectorStoreToSVE(Op, DAG);

	unsigned AS = StoreNode->getAddressSpace();
	Align Alignment = StoreNode->getAlign();
	if (Alignment < MemVT.getStoreSize() &&
	!allowsMisalignedMemoryAccesses(MemVT, AS, Alignment,
	StoreNode->getMemOperand()->getFlags(),
	nullptr)) {
	return scalarizeVectorStore(StoreNode, DAG);
	}

	if (StoreNode->isTruncatingStore() && VT == MVT::v4i16 &&
	MemVT == MVT::v4i8) {
	return LowerTruncateVectorStore(Dl, StoreNode, VT, MemVT, DAG);
	}
	// 256 bit non-temporal stores can be lowered to STNP. Do this as part of
	// the custom lowering, as there are no un-paired non-temporal stores and
	// legalization will break up 256 bit inputs.
	ElementCount EC = MemVT.getVectorElementCount();
	if (StoreNode->isNonTemporal() && MemVT.getSizeInBits() == 256u &&
	EC.isKnownEven() &&
	((MemVT.getScalarSizeInBits() == 8u \|\|
	MemVT.getScalarSizeInBits() == 16u \|\|
	MemVT.getScalarSizeInBits() == 32u \|\|
	MemVT.getScalarSizeInBits() == 64u))) {
	SDValue Lo =
	DAG.getNode(ISD::EXTRACT_SUBVECTOR, Dl,
	MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
	StoreNode->getValue(), DAG.getConstant(0, Dl, MVT::i64));
	SDValue Hi =
	DAG.getNode(ISD::EXTRACT_SUBVECTOR, Dl,
	MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
	StoreNode->getValue(),
	DAG.getConstant(EC.getKnownMinValue() / 2, Dl, MVT::i64));
	SDValue Result = DAG.getMemIntrinsicNode(
	AArch64ISD::STNP, Dl, DAG.getVTList(MVT::Other),
	{StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
	StoreNode->getMemoryVT(), StoreNode->getMemOperand());
	return Result;
	}
	} else if (MemVT == MVT::i128 && StoreNode->isVolatile()) {
	assert(StoreNode->getValue()->getValueType(0) == MVT::i128);
	SDValue Lo =
	DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i64, StoreNode->getValue(),
	DAG.getConstant(0, Dl, MVT::i64));
	SDValue Hi =
	DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i64, StoreNode->getValue(),
	DAG.getConstant(1, Dl, MVT::i64));
	SDValue Result = DAG.getMemIntrinsicNode(
	AArch64ISD::STP, Dl, DAG.getVTList(MVT::Other),
	{StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
	StoreNode->getMemoryVT(), StoreNode->getMemOperand());
	return Result;
	} else if (MemVT == MVT::i64x8) {
	SDValue Value = StoreNode->getValue();
	assert(Value->getValueType(0) == MVT::i64x8);
	SDValue Chain = StoreNode->getChain();
	SDValue Base = StoreNode->getBasePtr();
	EVT PtrVT = Base.getValueType();
	for (unsigned i = 0; i < 8; i++) {
	SDValue Part = DAG.getNode(AArch64ISD::LS64_EXTRACT, Dl, MVT::i64,
	Value, DAG.getConstant(i, Dl, MVT::i32));
	SDValue Ptr = DAG.getNode(ISD::ADD, Dl, PtrVT, Base,
	DAG.getConstant(i * 8, Dl, PtrVT));
	Chain = DAG.getStore(Chain, Dl, Part, Ptr, StoreNode->getPointerInfo(),
	StoreNode->getOriginalAlign());
	}
	return Chain;
	}

	return SDValue();
	}

	SDValue AArch64TargetLowering::LowerLOAD(SDValue Op,
	SelectionDAG &DAG) const {
	SDLoc DL(Op);
	LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
	assert(LoadNode && "Expected custom lowering of a load node");

	if (LoadNode->getMemoryVT() == MVT::i64x8) {
	SmallVector<SDValue, 8> Ops;
	SDValue Base = LoadNode->getBasePtr();
	SDValue Chain = LoadNode->getChain();
	EVT PtrVT = Base.getValueType();
	for (unsigned i = 0; i < 8; i++) {
	SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Base,
	DAG.getConstant(i * 8, DL, PtrVT));
	SDValue Part = DAG.getLoad(MVT::i64, DL, Chain, Ptr,
	LoadNode->getPointerInfo(),
	LoadNode->getOriginalAlign());
	Ops.push_back(Part);
	Chain = SDValue(Part.getNode(), 1);
	}
	SDValue Loaded = DAG.getNode(AArch64ISD::LS64_BUILD, DL, MVT::i64x8, Ops);
	return DAG.getMergeValues({Loaded, Chain}, DL);
	}

	// Custom lowering for extending v4i8 vector loads.
	EVT VT = Op->getValueType(0);
	assert((VT == MVT::v4i16 \|\| VT == MVT::v4i32) && "Expected v4i16 or v4i32");

	if (LoadNode->getMemoryVT() != MVT::v4i8)
	return SDValue();

	unsigned ExtType;
	if (LoadNode->getExtensionType() == ISD::SEXTLOAD)
	ExtType = ISD::SIGN_EXTEND;
	else if (LoadNode->getExtensionType() == ISD::ZEXTLOAD \|\|
	LoadNode->getExtensionType() == ISD::EXTLOAD)
	ExtType = ISD::ZERO_EXTEND;
	else
	return SDValue();

	SDValue Load = DAG.getLoad(MVT::f32, DL, LoadNode->getChain(),
	LoadNode->getBasePtr(), MachinePointerInfo());
	SDValue Chain = Load.getValue(1);
	SDValue Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f32, Load);
	SDValue BC = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Vec);
	SDValue Ext = DAG.getNode(ExtType, DL, MVT::v8i16, BC);
	Ext = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Ext,
	DAG.getConstant(0, DL, MVT::i64));
	if (VT == MVT::v4i32)
	Ext = DAG.getNode(ExtType, DL, MVT::v4i32, Ext);
	return DAG.getMergeValues({Ext, Chain}, DL);
	}

	// Generate SUBS and CSEL for integer abs.
	SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
	MVT VT = Op.getSimpleValueType();

	if (VT.isVector())
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABS_MERGE_PASSTHRU);

	SDLoc DL(Op);
	SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
	Op.getOperand(0));
	// Generate SUBS & CSEL.
	SDValue Cmp =
	DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32),
	Op.getOperand(0), DAG.getConstant(0, DL, VT));
	return DAG.getNode(AArch64ISD::CSEL, DL, VT, Op.getOperand(0), Neg,
	DAG.getConstant(AArch64CC::PL, DL, MVT::i32),
	Cmp.getValue(1));
	}

	SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
	SelectionDAG &DAG) const {
	LLVM_DEBUG(dbgs() << "Custom lowering: ");
	LLVM_DEBUG(Op.dump());

	switch (Op.getOpcode()) {
	default:
	llvm_unreachable("unimplemented operand");
	return SDValue();
	case ISD::BITCAST:
	return LowerBITCAST(Op, DAG);
	case ISD::GlobalAddress:
	return LowerGlobalAddress(Op, DAG);
	case ISD::GlobalTLSAddress:
	return LowerGlobalTLSAddress(Op, DAG);
	case ISD::SETCC:
	case ISD::STRICT_FSETCC:
	case ISD::STRICT_FSETCCS:
	return LowerSETCC(Op, DAG);
	case ISD::BR_CC:
	return LowerBR_CC(Op, DAG);
	case ISD::SELECT:
	return LowerSELECT(Op, DAG);
	case ISD::SELECT_CC:
	return LowerSELECT_CC(Op, DAG);
	case ISD::JumpTable:
	return LowerJumpTable(Op, DAG);
	case ISD::BR_JT:
	return LowerBR_JT(Op, DAG);
	case ISD::ConstantPool:
	return LowerConstantPool(Op, DAG);
	case ISD::BlockAddress:
	return LowerBlockAddress(Op, DAG);
	case ISD::VASTART:
	return LowerVASTART(Op, DAG);
	case ISD::VACOPY:
	return LowerVACOPY(Op, DAG);
	case ISD::VAARG:
	return LowerVAARG(Op, DAG);
	case ISD::ADDC:
	case ISD::ADDE:
	case ISD::SUBC:
	case ISD::SUBE:
	return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
	case ISD::SADDO:
	case ISD::UADDO:
	case ISD::SSUBO:
	case ISD::USUBO:
	case ISD::SMULO:
	case ISD::UMULO:
	return LowerXALUO(Op, DAG);
	case ISD::FADD:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::FADD_PRED);
	case ISD::FSUB:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSUB_PRED);
	case ISD::FMUL:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMUL_PRED);
	case ISD::FMA:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMA_PRED);
	case ISD::FDIV:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::FDIV_PRED);
	case ISD::FNEG:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEG_MERGE_PASSTHRU);
	case ISD::FCEIL:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::FCEIL_MERGE_PASSTHRU);
	case ISD::FFLOOR:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::FFLOOR_MERGE_PASSTHRU);
	case ISD::FNEARBYINT:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEARBYINT_MERGE_PASSTHRU);
	case ISD::FRINT:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::FRINT_MERGE_PASSTHRU);
	case ISD::FROUND:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUND_MERGE_PASSTHRU);
	case ISD::FROUNDEVEN:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU);
	case ISD::FTRUNC:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::FTRUNC_MERGE_PASSTHRU);
	case ISD::FSQRT:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSQRT_MERGE_PASSTHRU);
	case ISD::FABS:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::FABS_MERGE_PASSTHRU);
	case ISD::FP_ROUND:
	case ISD::STRICT_FP_ROUND:
	return LowerFP_ROUND(Op, DAG);
	case ISD::FP_EXTEND:
	return LowerFP_EXTEND(Op, DAG);
	case ISD::FRAMEADDR:
	return LowerFRAMEADDR(Op, DAG);
	case ISD::SPONENTRY:
	return LowerSPONENTRY(Op, DAG);
	case ISD::RETURNADDR:
	return LowerRETURNADDR(Op, DAG);
	case ISD::ADDROFRETURNADDR:
	return LowerADDROFRETURNADDR(Op, DAG);
	case ISD::CONCAT_VECTORS:
	return LowerCONCAT_VECTORS(Op, DAG);
	case ISD::INSERT_VECTOR_ELT:
	return LowerINSERT_VECTOR_ELT(Op, DAG);
	case ISD::EXTRACT_VECTOR_ELT:
	return LowerEXTRACT_VECTOR_ELT(Op, DAG);
	case ISD::BUILD_VECTOR:
	return LowerBUILD_VECTOR(Op, DAG);
	case ISD::VECTOR_SHUFFLE:
	return LowerVECTOR_SHUFFLE(Op, DAG);
	case ISD::SPLAT_VECTOR:
	return LowerSPLAT_VECTOR(Op, DAG);
	case ISD::EXTRACT_SUBVECTOR:
	return LowerEXTRACT_SUBVECTOR(Op, DAG);
	case ISD::INSERT_SUBVECTOR:
	return LowerINSERT_SUBVECTOR(Op, DAG);
	case ISD::SDIV:
	case ISD::UDIV:
	return LowerDIV(Op, DAG);
	case ISD::SMIN:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED,
	/OverrideNEON=/true);
	case ISD::UMIN:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED,
	/OverrideNEON=/true);
	case ISD::SMAX:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_PRED,
	/OverrideNEON=/true);
	case ISD::UMAX:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_PRED,
	/OverrideNEON=/true);
	case ISD::SRA:
	case ISD::SRL:
	case ISD::SHL:
	return LowerVectorSRA_SRL_SHL(Op, DAG);
	case ISD::SHL_PARTS:
	case ISD::SRL_PARTS:
	case ISD::SRA_PARTS:
	return LowerShiftParts(Op, DAG);
	case ISD::CTPOP:
	return LowerCTPOP(Op, DAG);
	case ISD::FCOPYSIGN:
	return LowerFCOPYSIGN(Op, DAG);
	case ISD::OR:
	return LowerVectorOR(Op, DAG);
	case ISD::XOR:
	return LowerXOR(Op, DAG);
	case ISD::PREFETCH:
	return LowerPREFETCH(Op, DAG);
	case ISD::SINT_TO_FP:
	case ISD::UINT_TO_FP:
	case ISD::STRICT_SINT_TO_FP:
	case ISD::STRICT_UINT_TO_FP:
	return LowerINT_TO_FP(Op, DAG);
	case ISD::FP_TO_SINT:
	case ISD::FP_TO_UINT:
	case ISD::STRICT_FP_TO_SINT:
	case ISD::STRICT_FP_TO_UINT:
	return LowerFP_TO_INT(Op, DAG);
	case ISD::FP_TO_SINT_SAT:
	case ISD::FP_TO_UINT_SAT:
	return LowerFP_TO_INT_SAT(Op, DAG);
	case ISD::FSINCOS:
	return LowerFSINCOS(Op, DAG);
	case ISD::FLT_ROUNDS_:
	return LowerFLT_ROUNDS_(Op, DAG);
	case ISD::SET_ROUNDING:
	return LowerSET_ROUNDING(Op, DAG);
	case ISD::MUL:
	return LowerMUL(Op, DAG);
	case ISD::MULHS:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHS_PRED,
	/OverrideNEON=/true);
	case ISD::MULHU:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHU_PRED,
	/OverrideNEON=/true);
	case ISD::INTRINSIC_WO_CHAIN:
	return LowerINTRINSIC_WO_CHAIN(Op, DAG);
	case ISD::STORE:
	return LowerSTORE(Op, DAG);
	case ISD::MSTORE:
	return LowerFixedLengthVectorMStoreToSVE(Op, DAG);
	case ISD::MGATHER:
	return LowerMGATHER(Op, DAG);
	case ISD::MSCATTER:
	return LowerMSCATTER(Op, DAG);
	case ISD::VECREDUCE_SEQ_FADD:
	return LowerVECREDUCE_SEQ_FADD(Op, DAG);
	case ISD::VECREDUCE_ADD:
	case ISD::VECREDUCE_AND:
	case ISD::VECREDUCE_OR:
	case ISD::VECREDUCE_XOR:
	case ISD::VECREDUCE_SMAX:
	case ISD::VECREDUCE_SMIN:
	case ISD::VECREDUCE_UMAX:
	case ISD::VECREDUCE_UMIN:
	case ISD::VECREDUCE_FADD:
	case ISD::VECREDUCE_FMAX:
	case ISD::VECREDUCE_FMIN:
	return LowerVECREDUCE(Op, DAG);
	case ISD::ATOMIC_LOAD_SUB:
	return LowerATOMIC_LOAD_SUB(Op, DAG);
	case ISD::ATOMIC_LOAD_AND:
	return LowerATOMIC_LOAD_AND(Op, DAG);
	case ISD::DYNAMIC_STACKALLOC:
	return LowerDYNAMIC_STACKALLOC(Op, DAG);
	case ISD::VSCALE:
	return LowerVSCALE(Op, DAG);
	case ISD::ANY_EXTEND:
	case ISD::SIGN_EXTEND:
	case ISD::ZERO_EXTEND:
	return LowerFixedLengthVectorIntExtendToSVE(Op, DAG);
	case ISD::SIGN_EXTEND_INREG: {
	// Only custom lower when ExtraVT has a legal byte based element type.
	EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
	EVT ExtraEltVT = ExtraVT.getVectorElementType();
	if ((ExtraEltVT != MVT::i8) && (ExtraEltVT != MVT::i16) &&
	(ExtraEltVT != MVT::i32) && (ExtraEltVT != MVT::i64))
	return SDValue();

	return LowerToPredicatedOp(Op, DAG,
	AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU);
	}
	case ISD::TRUNCATE:
	return LowerTRUNCATE(Op, DAG);
	case ISD::MLOAD:
	return LowerMLOAD(Op, DAG);
	case ISD::LOAD:
	if (useSVEForFixedLengthVectorVT(Op.getValueType()))
	return LowerFixedLengthVectorLoadToSVE(Op, DAG);
	return LowerLOAD(Op, DAG);
	case ISD::ADD:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::ADD_PRED);
	case ISD::AND:
	return LowerToScalableOp(Op, DAG);
	case ISD::SUB:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::SUB_PRED);
	case ISD::FMAXIMUM:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMAX_PRED);
	case ISD::FMAXNUM:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMAXNM_PRED);
	case ISD::FMINIMUM:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMIN_PRED);
	case ISD::FMINNUM:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMINNM_PRED);
	case ISD::VSELECT:
	return LowerFixedLengthVectorSelectToSVE(Op, DAG);
	case ISD::ABS:
	return LowerABS(Op, DAG);
	case ISD::BITREVERSE:
	return LowerBitreverse(Op, DAG);
	case ISD::BSWAP:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::BSWAP_MERGE_PASSTHRU);
	case ISD::CTLZ:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTLZ_MERGE_PASSTHRU,
	/OverrideNEON=/true);
	case ISD::CTTZ:
	return LowerCTTZ(Op, DAG);
	case ISD::VECTOR_SPLICE:
	return LowerVECTOR_SPLICE(Op, DAG);
	}
	}

	bool AArch64TargetLowering::mergeStoresAfterLegalization(EVT VT) const {
	return !Subtarget->useSVEForFixedLengthVectors();
	}

	bool AArch64TargetLowering::useSVEForFixedLengthVectorVT(
	EVT VT, bool OverrideNEON) const {
	if (!Subtarget->useSVEForFixedLengthVectors())
	return false;

	if (!VT.isFixedLengthVector())
	return false;

	// Don't use SVE for vectors we cannot scalarize if required.
	switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
	// Fixed length predicates should be promoted to i8.
	// NOTE: This is consistent with how NEON (and thus 64/128bit vectors) work.
	case MVT::i1:
	default:
	return false;
	case MVT::i8:
	case MVT::i16:
	case MVT::i32:
	case MVT::i64:
	case MVT::f16:
	case MVT::f32:
	case MVT::f64:
	break;
	}

	// All SVE implementations support NEON sized vectors.
	if (OverrideNEON && (VT.is128BitVector() \|\| VT.is64BitVector()))
	return true;

	// Ensure NEON MVTs only belong to a single register class.
	if (VT.getFixedSizeInBits() <= 128)
	return false;

	// Don't use SVE for types that don't fit.
	if (VT.getFixedSizeInBits() > Subtarget->getMinSVEVectorSizeInBits())
	return false;

	// TODO: Perhaps an artificial restriction, but worth having whilst getting
	// the base fixed length SVE support in place.
	if (!VT.isPow2VectorType())
	return false;

	return true;
	}

	//===----------------------------------------------------------------------===//
	// Calling Convention Implementation
	//===----------------------------------------------------------------------===//

	/// Selects the correct CCAssignFn for a given CallingConvention value.
	CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
	bool IsVarArg) const {
	switch (CC) {
	default:
	report_fatal_error("Unsupported calling convention.");
	case CallingConv::WebKit_JS:
	return CC_AArch64_WebKit_JS;
	case CallingConv::GHC:
	return CC_AArch64_GHC;
	case CallingConv::C:
	case CallingConv::Fast:
	case CallingConv::PreserveMost:
	case CallingConv::CXX_FAST_TLS:
	case CallingConv::Swift:
	case CallingConv::SwiftTail:
	case CallingConv::Tail:
	if (Subtarget->isTargetWindows() && IsVarArg)
	return CC_AArch64_Win64_VarArg;
	if (!Subtarget->isTargetDarwin())
	return CC_AArch64_AAPCS;
	if (!IsVarArg)
	return CC_AArch64_DarwinPCS;
	return Subtarget->isTargetILP32() ? CC_AArch64_DarwinPCS_ILP32_VarArg
	: CC_AArch64_DarwinPCS_VarArg;
	case CallingConv::Win64:
	return IsVarArg ? CC_AArch64_Win64_VarArg : CC_AArch64_AAPCS;
	case CallingConv::CFGuard_Check:
	return CC_AArch64_Win64_CFGuard_Check;
	case CallingConv::AArch64_VectorCall:
	case CallingConv::AArch64_SVE_VectorCall:
	return CC_AArch64_AAPCS;
	}
	}

	CCAssignFn *
	AArch64TargetLowering::CCAssignFnForReturn(CallingConv::ID CC) const {
	return CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
	: RetCC_AArch64_AAPCS;
	}

	SDValue AArch64TargetLowering::LowerFormalArguments(
	SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
	MachineFunction &MF = DAG.getMachineFunction();
	MachineFrameInfo &MFI = MF.getFrameInfo();
	bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());

	// Assign locations to all of the incoming arguments.
	SmallVector<CCValAssign, 16> ArgLocs;
	DenseMap<unsigned, SDValue> CopiedRegs;
	CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
	*DAG.getContext());

	// At this point, Ins[].VT may already be promoted to i32. To correctly
	// handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
	// i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
	// Since AnalyzeFormalArguments uses Ins[].VT for both ValVT and LocVT, here
	// we use a special version of AnalyzeFormalArguments to pass in ValVT and
	// LocVT.
	unsigned NumArgs = Ins.size();
	Function::const_arg_iterator CurOrigArg = MF.getFunction().arg_begin();
	unsigned CurArgIdx = 0;
	for (unsigned i = 0; i != NumArgs; ++i) {
	MVT ValVT = Ins[i].VT;
	if (Ins[i].isOrigArg()) {
	std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx);
	CurArgIdx = Ins[i].getOrigArgIndex();

	// Get type of the original argument.
	EVT ActualVT = getValueType(DAG.getDataLayout(), CurOrigArg->getType(),
	/AllowUnknown/ true);
	MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other;
	// If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
	if (ActualMVT == MVT::i1 \|\| ActualMVT == MVT::i8)
	ValVT = MVT::i8;
	else if (ActualMVT == MVT::i16)
	ValVT = MVT::i16;
	}
	bool UseVarArgCC = false;
	if (IsWin64)
	UseVarArgCC = isVarArg;
	CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, UseVarArgCC);
	bool Res =
	AssignFn(i, ValVT, ValVT, CCValAssign::Full, Ins[i].Flags, CCInfo);
	assert(!Res && "Call operand has unhandled type");
	(void)Res;
	}
	SmallVector<SDValue, 16> ArgValues;
	unsigned ExtraArgLocs = 0;
	for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
	CCValAssign &VA = ArgLocs[i - ExtraArgLocs];

	if (Ins[i].Flags.isByVal()) {
	// Byval is used for HFAs in the PCS, but the system should work in a
	// non-compliant manner for larger structs.
	EVT PtrVT = getPointerTy(DAG.getDataLayout());
	int Size = Ins[i].Flags.getByValSize();
	unsigned NumRegs = (Size + 7) / 8;

	// FIXME: This works on big-endian for composite byvals, which are the common
	// case. It should also work for fundamental types too.
	unsigned FrameIdx =
	MFI.CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
	SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrVT);
	InVals.push_back(FrameIdxN);

	continue;
	}

	if (Ins[i].Flags.isSwiftAsync())
	MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);

	SDValue ArgValue;
	if (VA.isRegLoc()) {
	// Arguments stored in registers.
	EVT RegVT = VA.getLocVT();
	const TargetRegisterClass *RC;

	if (RegVT == MVT::i32)
	RC = &AArch64::GPR32RegClass;
	else if (RegVT == MVT::i64)
	RC = &AArch64::GPR64RegClass;
	else if (RegVT == MVT::f16 \|\| RegVT == MVT::bf16)
	RC = &AArch64::FPR16RegClass;
	else if (RegVT == MVT::f32)
	RC = &AArch64::FPR32RegClass;
	else if (RegVT == MVT::f64 \|\| RegVT.is64BitVector())
	RC = &AArch64::FPR64RegClass;
	else if (RegVT == MVT::f128 \|\| RegVT.is128BitVector())
	RC = &AArch64::FPR128RegClass;
	else if (RegVT.isScalableVector() &&
	RegVT.getVectorElementType() == MVT::i1)
	RC = &AArch64::PPRRegClass;
	else if (RegVT.isScalableVector())
	RC = &AArch64::ZPRRegClass;
	else
	llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");

	// Transform the arguments in physical registers into virtual ones.
	unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
	ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);

	// If this is an 8, 16 or 32-bit value, it is really passed promoted
	// to 64 bits. Insert an assert[sz]ext to capture this, then
	// truncate to the right size.
	switch (VA.getLocInfo()) {
	default:
	llvm_unreachable("Unknown loc info!");
	case CCValAssign::Full:
	break;
	case CCValAssign::Indirect:
	assert(VA.getValVT().isScalableVector() &&
	"Only scalable vectors can be passed indirectly");
	break;
	case CCValAssign::BCvt:
	ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), ArgValue);
	break;
	case CCValAssign::AExt:
	case CCValAssign::SExt:
	case CCValAssign::ZExt:
	break;
	case CCValAssign::AExtUpper:
	ArgValue = DAG.getNode(ISD::SRL, DL, RegVT, ArgValue,
	DAG.getConstant(32, DL, RegVT));
	ArgValue = DAG.getZExtOrTrunc(ArgValue, DL, VA.getValVT());
	break;
	}
	} else { // VA.isRegLoc()
	assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem");
	unsigned ArgOffset = VA.getLocMemOffset();
	unsigned ArgSize = (VA.getLocInfo() == CCValAssign::Indirect
	? VA.getLocVT().getSizeInBits()
	: VA.getValVT().getSizeInBits()) / 8;

	uint32_t BEAlign = 0;
	if (!Subtarget->isLittleEndian() && ArgSize < 8 &&
	!Ins[i].Flags.isInConsecutiveRegs())
	BEAlign = 8 - ArgSize;

	int FI = MFI.CreateFixedObject(ArgSize, ArgOffset + BEAlign, true);

	// Create load nodes to retrieve arguments from the stack.
	SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));

	// For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT)
	ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
	MVT MemVT = VA.getValVT();

	switch (VA.getLocInfo()) {
	default:
	break;
	case CCValAssign::Trunc:
	case CCValAssign::BCvt:
	MemVT = VA.getLocVT();
	break;
	case CCValAssign::Indirect:
	assert(VA.getValVT().isScalableVector() &&
	"Only scalable vectors can be passed indirectly");
	MemVT = VA.getLocVT();
	break;
	case CCValAssign::SExt:
	ExtType = ISD::SEXTLOAD;
	break;
	case CCValAssign::ZExt:
	ExtType = ISD::ZEXTLOAD;
	break;
	case CCValAssign::AExt:
	ExtType = ISD::EXTLOAD;
	break;
	}

	ArgValue = DAG.getExtLoad(
	ExtType, DL, VA.getLocVT(), Chain, FIN,
	MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
	MemVT);
	}

	if (VA.getLocInfo() == CCValAssign::Indirect) {
	assert(VA.getValVT().isScalableVector() &&
	"Only scalable vectors can be passed indirectly");

	uint64_t PartSize = VA.getValVT().getStoreSize().getKnownMinSize();
	unsigned NumParts = 1;
	if (Ins[i].Flags.isInConsecutiveRegs()) {
	assert(!Ins[i].Flags.isInConsecutiveRegsLast());
	while (!Ins[i + NumParts - 1].Flags.isInConsecutiveRegsLast())
	++NumParts;
	}

	MVT PartLoad = VA.getValVT();
	SDValue Ptr = ArgValue;

	// Ensure we generate all loads for each tuple part, whilst updating the
	// pointer after each load correctly using vscale.
	while (NumParts > 0) {
	ArgValue = DAG.getLoad(PartLoad, DL, Chain, Ptr, MachinePointerInfo());
	InVals.push_back(ArgValue);
	NumParts--;
	if (NumParts > 0) {
	SDValue BytesIncrement = DAG.getVScale(
	DL, Ptr.getValueType(),
	APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize));
	SDNodeFlags Flags;
	Flags.setNoUnsignedWrap(true);
	Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
	BytesIncrement, Flags);
	ExtraArgLocs++;
	i++;
	}
	}
	} else {
	if (Subtarget->isTargetILP32() && Ins[i].Flags.isPointer())
	ArgValue = DAG.getNode(ISD::AssertZext, DL, ArgValue.getValueType(),
	ArgValue, DAG.getValueType(MVT::i32));
	InVals.push_back(ArgValue);
	}
	}
	assert((ArgLocs.size() + ExtraArgLocs) == Ins.size());

	// varargs
	AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
	if (isVarArg) {
	if (!Subtarget->isTargetDarwin() \|\| IsWin64) {
	// The AAPCS variadic function ABI is identical to the non-variadic
	// one. As a result there may be more arguments in registers and we should
	// save them for future reference.
	// Win64 variadic functions also pass arguments in registers, but all float
	// arguments are passed in integer registers.
	saveVarArgRegisters(CCInfo, DAG, DL, Chain);
	}

	// This will point to the next argument passed via stack.
	unsigned StackOffset = CCInfo.getNextStackOffset();
	// We currently pass all varargs at 8-byte alignment, or 4 for ILP32
	StackOffset = alignTo(StackOffset, Subtarget->isTargetILP32() ? 4 : 8);
	FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));

	if (MFI.hasMustTailInVarArgFunc()) {
	SmallVector<MVT, 2> RegParmTypes;
	RegParmTypes.push_back(MVT::i64);
	RegParmTypes.push_back(MVT::f128);
	// Compute the set of forwarded registers. The rest are scratch.
	SmallVectorImpl<ForwardedRegister> &Forwards =
	FuncInfo->getForwardedMustTailRegParms();
	CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes,
	CC_AArch64_AAPCS);

	// Conservatively forward X8, since it might be used for aggregate return.
	if (!CCInfo.isAllocated(AArch64::X8)) {
	unsigned X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass);
	Forwards.push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
	}
	}
	}

	// On Windows, InReg pointers must be returned, so record the pointer in a
	// virtual register at the start of the function so it can be returned in the
	// epilogue.
	if (IsWin64) {
	for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
	if (Ins[I].Flags.isInReg()) {
	assert(!FuncInfo->getSRetReturnReg());

	MVT PtrTy = getPointerTy(DAG.getDataLayout());
	Register Reg =
	MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
	FuncInfo->setSRetReturnReg(Reg);

	SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[I]);
	Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Copy, Chain);
	break;
	}
	}
	}

	unsigned StackArgSize = CCInfo.getNextStackOffset();
	bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
	if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
	// This is a non-standard ABI so by fiat I say we're allowed to make full
	// use of the stack area to be popped, which must be aligned to 16 bytes in
	// any case:
	StackArgSize = alignTo(StackArgSize, 16);

	// If we're expected to restore the stack (e.g. fastcc) then we'll be adding
	// a multiple of 16.
	FuncInfo->setArgumentStackToRestore(StackArgSize);

	// This realignment carries over to the available bytes below. Our own
	// callers will guarantee the space is free by giving an aligned value to
	// CALLSEQ_START.
	}
	// Even if we're not expected to free up the space, it's useful to know how
	// much is there while considering tail calls (because we can reuse it).
	FuncInfo->setBytesInStackArgArea(StackArgSize);

	if (Subtarget->hasCustomCallingConv())
	Subtarget->getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);

	return Chain;
	}

	void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
	SelectionDAG &DAG,
	const SDLoc &DL,
	SDValue &Chain) const {
	MachineFunction &MF = DAG.getMachineFunction();
	MachineFrameInfo &MFI = MF.getFrameInfo();
	AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
	auto PtrVT = getPointerTy(DAG.getDataLayout());
	bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());

	SmallVector<SDValue, 8> MemOps;

	static const MCPhysReg GPRArgRegs[] = { AArch64::X0, AArch64::X1, AArch64::X2,
	AArch64::X3, AArch64::X4, AArch64::X5,
	AArch64::X6, AArch64::X7 };
	static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs);
	unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs);

	unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
	int GPRIdx = 0;
	if (GPRSaveSize != 0) {
	if (IsWin64) {
	GPRIdx = MFI.CreateFixedObject(GPRSaveSize, -(int)GPRSaveSize, false);
	if (GPRSaveSize & 15)
	// The extra size here, if triggered, will always be 8.
	MFI.CreateFixedObject(16 - (GPRSaveSize & 15), -(int)alignTo(GPRSaveSize, 16), false);
	} else
	GPRIdx = MFI.CreateStackObject(GPRSaveSize, Align(8), false);

	SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT);

	for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
	unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass);
	SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
	SDValue Store = DAG.getStore(
	Val.getValue(1), DL, Val, FIN,
	IsWin64
	? MachinePointerInfo::getFixedStack(DAG.getMachineFunction(),
	GPRIdx,
	(i - FirstVariadicGPR) * 8)
	: MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 8));
	MemOps.push_back(Store);
	FIN =
	DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(8, DL, PtrVT));
	}
	}
	FuncInfo->setVarArgsGPRIndex(GPRIdx);
	FuncInfo->setVarArgsGPRSize(GPRSaveSize);

	if (Subtarget->hasFPARMv8() && !IsWin64) {
	static const MCPhysReg FPRArgRegs[] = {
	AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
	AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7};
	static const unsigned NumFPRArgRegs = array_lengthof(FPRArgRegs);
	unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(FPRArgRegs);

	unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
	int FPRIdx = 0;
	if (FPRSaveSize != 0) {
	FPRIdx = MFI.CreateStackObject(FPRSaveSize, Align(16), false);

	SDValue FIN = DAG.getFrameIndex(FPRIdx, PtrVT);

	for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
	unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass);
	SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);

	SDValue Store = DAG.getStore(
	Val.getValue(1), DL, Val, FIN,
	MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 16));
	MemOps.push_back(Store);
	FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
	DAG.getConstant(16, DL, PtrVT));
	}
	}
	FuncInfo->setVarArgsFPRIndex(FPRIdx);
	FuncInfo->setVarArgsFPRSize(FPRSaveSize);
	}

	if (!MemOps.empty()) {
	Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
	}
	}

	/// LowerCallResult - Lower the result values of a call into the
	/// appropriate copies out of appropriate physical registers.
	SDValue AArch64TargetLowering::LowerCallResult(
	SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
	SDValue ThisVal) const {
	CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
	// Assign locations to each value returned by this call.
	SmallVector<CCValAssign, 16> RVLocs;
	DenseMap<unsigned, SDValue> CopiedRegs;
	CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
	*DAG.getContext());
	CCInfo.AnalyzeCallResult(Ins, RetCC);

	// Copy all of the result registers out of their specified physreg.
	for (unsigned i = 0; i != RVLocs.size(); ++i) {
	CCValAssign VA = RVLocs[i];

	// Pass 'this' value directly from the argument to return value, to avoid
	// reg unit interference
	if (i == 0 && isThisReturn) {
	assert(!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&
	"unexpected return calling convention register assignment");
	InVals.push_back(ThisVal);
	continue;
	}

	// Avoid copying a physreg twice since RegAllocFast is incompetent and only
	// allows one use of a physreg per block.
	SDValue Val = CopiedRegs.lookup(VA.getLocReg());
	if (!Val) {
	Val =
	DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag);
	Chain = Val.getValue(1);
	InFlag = Val.getValue(2);
	CopiedRegs[VA.getLocReg()] = Val;
	}

	switch (VA.getLocInfo()) {
	default:
	llvm_unreachable("Unknown loc info!");
	case CCValAssign::Full:
	break;
	case CCValAssign::BCvt:
	Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
	break;
	case CCValAssign::AExtUpper:
	Val = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Val,
	DAG.getConstant(32, DL, VA.getLocVT()));
	LLVM_FALLTHROUGH;
	case CCValAssign::AExt:
	LLVM_FALLTHROUGH;
	case CCValAssign::ZExt:
	Val = DAG.getZExtOrTrunc(Val, DL, VA.getValVT());
	break;
	}

	InVals.push_back(Val);
	}

	return Chain;
	}

	/// Return true if the calling convention is one that we can guarantee TCO for.
	static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) {
	return (CC == CallingConv::Fast && GuaranteeTailCalls) \|\|
	CC == CallingConv::Tail \|\| CC == CallingConv::SwiftTail;
	}

	/// Return true if we might ever do TCO for calls with this calling convention.
	static bool mayTailCallThisCC(CallingConv::ID CC) {
	switch (CC) {
	case CallingConv::C:
	case CallingConv::AArch64_SVE_VectorCall:
	case CallingConv::PreserveMost:
	case CallingConv::Swift:
	case CallingConv::SwiftTail:
	case CallingConv::Tail:
	case CallingConv::Fast:
	return true;
	default:
	return false;
	}
	}

	bool AArch64TargetLowering::isEligibleForTailCallOptimization(
	SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
	const SmallVectorImpl<ISD::OutputArg> &Outs,
	const SmallVectorImpl<SDValue> &OutVals,
	const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
	if (!mayTailCallThisCC(CalleeCC))
	return false;

	MachineFunction &MF = DAG.getMachineFunction();
	const Function &CallerF = MF.getFunction();
	CallingConv::ID CallerCC = CallerF.getCallingConv();

	// Functions using the C or Fast calling convention that have an SVE signature
	// preserve more registers and should assume the SVE_VectorCall CC.
	// The check for matching callee-saved regs will determine whether it is
	// eligible for TCO.
	if ((CallerCC == CallingConv::C \|\| CallerCC == CallingConv::Fast) &&
	AArch64RegisterInfo::hasSVEArgsOrReturn(&MF))
	CallerCC = CallingConv::AArch64_SVE_VectorCall;

	bool CCMatch = CallerCC == CalleeCC;

	// When using the Windows calling convention on a non-windows OS, we want
	// to back up and restore X18 in such functions; we can't do a tail call
	// from those functions.
	if (CallerCC == CallingConv::Win64 && !Subtarget->isTargetWindows() &&
	CalleeCC != CallingConv::Win64)
	return false;

	// Byval parameters hand the function a pointer directly into the stack area
	// we want to reuse during a tail call. Working around this is possible (see
	// X86) but less efficient and uglier in LowerCall.
	for (Function::const_arg_iterator i = CallerF.arg_begin(),
	e = CallerF.arg_end();
	i != e; ++i) {
	if (i->hasByValAttr())
	return false;

	// On Windows, "inreg" attributes signify non-aggregate indirect returns.
	// In this case, it is necessary to save/restore X0 in the callee. Tail
	// call opt interferes with this. So we disable tail call opt when the
	// caller has an argument with "inreg" attribute.

	// FIXME: Check whether the callee also has an "inreg" argument.
	if (i->hasInRegAttr())
	return false;
	}

	if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt))
	return CCMatch;

	// Externally-defined functions with weak linkage should not be
	// tail-called on AArch64 when the OS does not support dynamic
	// pre-emption of symbols, as the AAELF spec requires normal calls
	// to undefined weak functions to be replaced with a NOP or jump to the
	// next instruction. The behaviour of branch instructions in this
	// situation (as used for tail calls) is implementation-defined, so we
	// cannot rely on the linker replacing the tail call with a return.
	if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
	const GlobalValue *GV = G->getGlobal();
	const Triple &TT = getTargetMachine().getTargetTriple();
	if (GV->hasExternalWeakLinkage() &&
	(!TT.isOSWindows() \|\| TT.isOSBinFormatELF() \|\| TT.isOSBinFormatMachO()))
	return false;
	}

	// Now we search for cases where we can use a tail call without changing the
	// ABI. Sibcall is used in some places (particularly gcc) to refer to this
	// concept.

	// I want anyone implementing a new calling convention to think long and hard
	// about this assert.
	assert((!isVarArg \|\| CalleeCC == CallingConv::C) &&
	"Unexpected variadic calling convention");

	LLVMContext &C = *DAG.getContext();
	if (isVarArg && !Outs.empty()) {
	// At least two cases here: if caller is fastcc then we can't have any
	// memory arguments (we'd be expected to clean up the stack afterwards). If
	// caller is C then we could potentially use its argument area.

	// FIXME: for now we take the most conservative of these in both cases:
	// disallow all variadic memory operands.
	SmallVector<CCValAssign, 16> ArgLocs;
	CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);

	CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, true));
	for (const CCValAssign &ArgLoc : ArgLocs)
	if (!ArgLoc.isRegLoc())
	return false;
	}

	// Check that the call results are passed in the same way.
	if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
	CCAssignFnForCall(CalleeCC, isVarArg),
	CCAssignFnForCall(CallerCC, isVarArg)))
	return false;
	// The callee has to preserve all registers the caller needs to preserve.
	const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
	const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
	if (!CCMatch) {
	const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
	if (Subtarget->hasCustomCallingConv()) {
	TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
	TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
	}
	if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
	return false;
	}

	// Nothing more to check if the callee is taking no arguments
	if (Outs.empty())
	return true;

	SmallVector<CCValAssign, 16> ArgLocs;
	CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);

	CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));

	const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();

	// If any of the arguments is passed indirectly, it must be SVE, so the
	// 'getBytesInStackArgArea' is not sufficient to determine whether we need to
	// allocate space on the stack. That is why we determine this explicitly here
	// the call cannot be a tailcall.
	if (llvm::any_of(ArgLocs, [](CCValAssign &A) {
	assert((A.getLocInfo() != CCValAssign::Indirect \|\|
	A.getValVT().isScalableVector()) &&
	"Expected value to be scalable");
	return A.getLocInfo() == CCValAssign::Indirect;
	}))
	return false;

	// If the stack arguments for this call do not fit into our own save area then
	// the call cannot be made tail.
	if (CCInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea())
	return false;

	const MachineRegisterInfo &MRI = MF.getRegInfo();
	if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
	return false;

	return true;
	}

	SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
	SelectionDAG &DAG,
	MachineFrameInfo &MFI,
	int ClobberedFI) const {
	SmallVector<SDValue, 8> ArgChains;
	int64_t FirstByte = MFI.getObjectOffset(ClobberedFI);
	int64_t LastByte = FirstByte + MFI.getObjectSize(ClobberedFI) - 1;

	// Include the original chain at the beginning of the list. When this is
	// used by target LowerCall hooks, this helps legalize find the
	// CALLSEQ_BEGIN node.
	ArgChains.push_back(Chain);

	// Add a chain value for each stack argument corresponding
	for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(),
	UE = DAG.getEntryNode().getNode()->use_end();
	U != UE; ++U)
	if (LoadSDNode L = dyn_cast<LoadSDNode>(U))
	if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
	if (FI->getIndex() < 0) {
	int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex());
	int64_t InLastByte = InFirstByte;
	InLastByte += MFI.getObjectSize(FI->getIndex()) - 1;

	if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) \|\|
	(FirstByte <= InFirstByte && InFirstByte <= LastByte))
	ArgChains.push_back(SDValue(L, 1));
	}

	// Build a tokenfactor for all the chains.
	return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains);
	}

	bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC,
	bool TailCallOpt) const {
	return (CallCC == CallingConv::Fast && TailCallOpt) \|\|
	CallCC == CallingConv::Tail \|\| CallCC == CallingConv::SwiftTail;
	}

	/// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain,
	/// and add input and output parameter nodes.
	SDValue
	AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
	SmallVectorImpl<SDValue> &InVals) const {
	SelectionDAG &DAG = CLI.DAG;
	SDLoc &DL = CLI.DL;
	SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
	SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
	SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
	SDValue Chain = CLI.Chain;
	SDValue Callee = CLI.Callee;
	bool &IsTailCall = CLI.IsTailCall;
	CallingConv::ID CallConv = CLI.CallConv;
	bool IsVarArg = CLI.IsVarArg;

	MachineFunction &MF = DAG.getMachineFunction();
	MachineFunction::CallSiteInfo CSInfo;
	bool IsThisReturn = false;

	AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
	bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
	bool IsSibCall = false;
	bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CallConv);

	// Check callee args/returns for SVE registers and set calling convention
	// accordingly.
	if (CallConv == CallingConv::C \|\| CallConv == CallingConv::Fast) {
	bool CalleeOutSVE = any_of(Outs, [](ISD::OutputArg &Out){
	return Out.VT.isScalableVector();
	});
	bool CalleeInSVE = any_of(Ins, [](ISD::InputArg &In){
	return In.VT.isScalableVector();
	});

	if (CalleeInSVE \|\| CalleeOutSVE)
	CallConv = CallingConv::AArch64_SVE_VectorCall;
	}

	if (IsTailCall) {
	// Check if it's really possible to do a tail call.
	IsTailCall = isEligibleForTailCallOptimization(
	Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG);

	// A sibling call is one where we're under the usual C ABI and not planning
	// to change that but can still do a tail call:
	if (!TailCallOpt && IsTailCall && CallConv != CallingConv::Tail &&
	CallConv != CallingConv::SwiftTail)
	IsSibCall = true;

	if (IsTailCall)
	++NumTailCalls;
	}

	if (!IsTailCall && CLI.CB && CLI.CB->isMustTailCall())
	report_fatal_error("failed to perform tail call elimination on a call "
	"site marked musttail");

	// Analyze operands of the call, assigning locations to each operand.
	SmallVector<CCValAssign, 16> ArgLocs;
	CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
	*DAG.getContext());

	if (IsVarArg) {
	// Handle fixed and variable vector arguments differently.
	// Variable vector arguments always go into memory.
	unsigned NumArgs = Outs.size();

	for (unsigned i = 0; i != NumArgs; ++i) {
	MVT ArgVT = Outs[i].VT;
	if (!Outs[i].IsFixed && ArgVT.isScalableVector())
	report_fatal_error("Passing SVE types to variadic functions is "
	"currently not supported");

	ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
	bool UseVarArgCC = !Outs[i].IsFixed;
	// On Windows, the fixed arguments in a vararg call are passed in GPRs
	// too, so use the vararg CC to force them to integer registers.
	if (IsCalleeWin64)
	UseVarArgCC = true;
	CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, UseVarArgCC);
	bool Res = AssignFn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
	assert(!Res && "Call operand has unhandled type");
	(void)Res;
	}
	} else {
	// At this point, Outs[].VT may already be promoted to i32. To correctly
	// handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
	// i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
	// Since AnalyzeCallOperands uses Ins[].VT for both ValVT and LocVT, here
	// we use a special version of AnalyzeCallOperands to pass in ValVT and
	// LocVT.
	unsigned NumArgs = Outs.size();
	for (unsigned i = 0; i != NumArgs; ++i) {
	MVT ValVT = Outs[i].VT;
	// Get type of the original argument.
	EVT ActualVT = getValueType(DAG.getDataLayout(),
	CLI.getArgs()[Outs[i].OrigArgIndex].Ty,
	/AllowUnknown/ true);
	MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ValVT;
	ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
	// If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
	if (ActualMVT == MVT::i1 \|\| ActualMVT == MVT::i8)
	ValVT = MVT::i8;
	else if (ActualMVT == MVT::i16)
	ValVT = MVT::i16;

	CCAssignFn AssignFn = CCAssignFnForCall(CallConv, /IsVarArg=*/false);
	bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full, ArgFlags, CCInfo);
	assert(!Res && "Call operand has unhandled type");
	(void)Res;
	}
	}

	// Get a count of how many bytes are to be pushed on the stack.
	unsigned NumBytes = CCInfo.getNextStackOffset();

	if (IsSibCall) {
	// Since we're not changing the ABI to make this a tail call, the memory
	// operands are already available in the caller's incoming argument space.
	NumBytes = 0;
	}

	// FPDiff is the byte offset of the call's argument area from the callee's.
	// Stores to callee stack arguments will be placed in FixedStackSlots offset
	// by this amount for a tail call. In a sibling call it must be 0 because the
	// caller will deallocate the entire stack and the callee still expects its
	// arguments to begin at SP+0. Completely unused for non-tail calls.
	int FPDiff = 0;

	if (IsTailCall && !IsSibCall) {
	unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();

	// Since callee will pop argument stack as a tail call, we must keep the
	// popped size 16-byte aligned.
	NumBytes = alignTo(NumBytes, 16);

	// FPDiff will be negative if this tail call requires more space than we
	// would automatically have in our incoming argument space. Positive if we
	// can actually shrink the stack.
	FPDiff = NumReusableBytes - NumBytes;

	// Update the required reserved area if this is the tail call requiring the
	// most argument stack space.
	if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (unsigned)-FPDiff)
	FuncInfo->setTailCallReservedStack(-FPDiff);

	// The stack pointer must be 16-byte aligned at all times it's used for a
	// memory operation, which in practice means at all times and in
	// particular across call boundaries. Therefore our own arguments started at
	// a 16-byte aligned SP and the delta applied for the tail call should
	// satisfy the same constraint.
	assert(FPDiff % 16 == 0 && "unaligned stack on tail call");
	}

	// Adjust the stack pointer for the new arguments...
	// These operations are automatically eliminated by the prolog/epilog pass
	if (!IsSibCall)
	Chain = DAG.getCALLSEQ_START(Chain, IsTailCall ? 0 : NumBytes, 0, DL);

	SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP,
	getPointerTy(DAG.getDataLayout()));

	SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
	SmallSet<unsigned, 8> RegsUsed;
	SmallVector<SDValue, 8> MemOpChains;
	auto PtrVT = getPointerTy(DAG.getDataLayout());

	if (IsVarArg && CLI.CB && CLI.CB->isMustTailCall()) {
	const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
	for (const auto &F : Forwards) {
	SDValue Val = DAG.getCopyFromReg(Chain, DL, F.VReg, F.VT);
	RegsToPass.emplace_back(F.PReg, Val);
	}
	}

	// Walk the register/memloc assignments, inserting copies/loads.
	unsigned ExtraArgLocs = 0;
	for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
	CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
	SDValue Arg = OutVals[i];
	ISD::ArgFlagsTy Flags = Outs[i].Flags;

	// Promote the value if needed.
	switch (VA.getLocInfo()) {
	default:
	llvm_unreachable("Unknown loc info!");
	case CCValAssign::Full:
	break;
	case CCValAssign::SExt:
	Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
	break;
	case CCValAssign::ZExt:
	Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
	break;
	case CCValAssign::AExt:
	if (Outs[i].ArgVT == MVT::i1) {
	// AAPCS requires i1 to be zero-extended to 8-bits by the caller.
	Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
	Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i8, Arg);
	}
	Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
	break;
	case CCValAssign::AExtUpper:
	assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits");
	Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
	Arg = DAG.getNode(ISD::SHL, DL, VA.getLocVT(), Arg,
	DAG.getConstant(32, DL, VA.getLocVT()));
	break;
	case CCValAssign::BCvt:
	Arg = DAG.getBitcast(VA.getLocVT(), Arg);
	break;
	case CCValAssign::Trunc:
	Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
	break;
	case CCValAssign::FPExt:
	Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg);
	break;
	case CCValAssign::Indirect:
	assert(VA.getValVT().isScalableVector() &&
	"Only scalable vectors can be passed indirectly");

	uint64_t StoreSize = VA.getValVT().getStoreSize().getKnownMinSize();
	uint64_t PartSize = StoreSize;
	unsigned NumParts = 1;
	if (Outs[i].Flags.isInConsecutiveRegs()) {
	assert(!Outs[i].Flags.isInConsecutiveRegsLast());
	while (!Outs[i + NumParts - 1].Flags.isInConsecutiveRegsLast())
	++NumParts;
	StoreSize *= NumParts;
	}

	MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
	Type Ty = EVT(VA.getValVT()).getTypeForEVT(DAG.getContext());
	Align Alignment = DAG.getDataLayout().getPrefTypeAlign(Ty);
	int FI = MFI.CreateStackObject(StoreSize, Alignment, false);
	MFI.setStackID(FI, TargetStackID::ScalableVector);

	MachinePointerInfo MPI =
	MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
	SDValue Ptr = DAG.getFrameIndex(
	FI, DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout()));
	SDValue SpillSlot = Ptr;

	// Ensure we generate all stores for each tuple part, whilst updating the
	// pointer after each store correctly using vscale.
	while (NumParts) {
	Chain = DAG.getStore(Chain, DL, OutVals[i], Ptr, MPI);
	NumParts--;
	if (NumParts > 0) {
	SDValue BytesIncrement = DAG.getVScale(
	DL, Ptr.getValueType(),
	APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize));
	SDNodeFlags Flags;
	Flags.setNoUnsignedWrap(true);

	MPI = MachinePointerInfo(MPI.getAddrSpace());
	Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
	BytesIncrement, Flags);
	ExtraArgLocs++;
	i++;
	}
	}

	Arg = SpillSlot;
	break;
	}

	if (VA.isRegLoc()) {
	if (i == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
	Outs[0].VT == MVT::i64) {
	assert(VA.getLocVT() == MVT::i64 &&
	"unexpected calling convention register assignment");
	assert(!Ins.empty() && Ins[0].VT == MVT::i64 &&
	"unexpected use of 'returned'");
	IsThisReturn = true;
	}
	if (RegsUsed.count(VA.getLocReg())) {
	// If this register has already been used then we're trying to pack
	// parts of an [N x i32] into an X-register. The extension type will
	// take care of putting the two halves in the right place but we have to
	// combine them.
	SDValue &Bits =
	llvm::find_if(RegsToPass,
	[=](const std::pair<unsigned, SDValue> &Elt) {
	return Elt.first == VA.getLocReg();
	})
	->second;
	Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg);
	// Call site info is used for function's parameter entry value
	// tracking. For now we track only simple cases when parameter
	// is transferred through whole register.
	llvm::erase_if(CSInfo, [&VA](MachineFunction::ArgRegPair ArgReg) {
	return ArgReg.Reg == VA.getLocReg();
	});
	} else {
	RegsToPass.emplace_back(VA.getLocReg(), Arg);
	RegsUsed.insert(VA.getLocReg());
	const TargetOptions &Options = DAG.getTarget().Options;
	if (Options.EmitCallSiteInfo)
	CSInfo.emplace_back(VA.getLocReg(), i);
	}
	} else {
	assert(VA.isMemLoc());

	SDValue DstAddr;
	MachinePointerInfo DstInfo;

	// FIXME: This works on big-endian for composite byvals, which are the
	// common case. It should also work for fundamental types too.
	uint32_t BEAlign = 0;
	unsigned OpSize;
	if (VA.getLocInfo() == CCValAssign::Indirect \|\|
	VA.getLocInfo() == CCValAssign::Trunc)
	OpSize = VA.getLocVT().getFixedSizeInBits();
	else
	OpSize = Flags.isByVal() ? Flags.getByValSize() * 8
	: VA.getValVT().getSizeInBits();
	OpSize = (OpSize + 7) / 8;
	if (!Subtarget->isLittleEndian() && !Flags.isByVal() &&
	!Flags.isInConsecutiveRegs()) {
	if (OpSize < 8)
	BEAlign = 8 - OpSize;
	}
	unsigned LocMemOffset = VA.getLocMemOffset();
	int32_t Offset = LocMemOffset + BEAlign;
	SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
	PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);

	if (IsTailCall) {
	Offset = Offset + FPDiff;
	int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);

	DstAddr = DAG.getFrameIndex(FI, PtrVT);
	DstInfo =
	MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);

	// Make sure any stack arguments overlapping with where we're storing
	// are loaded before this eventual operation. Otherwise they'll be
	// clobbered.
	Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI);
	} else {
	SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);

	DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
	DstInfo = MachinePointerInfo::getStack(DAG.getMachineFunction(),
	LocMemOffset);
	}

	if (Outs[i].Flags.isByVal()) {
	SDValue SizeNode =
	DAG.getConstant(Outs[i].Flags.getByValSize(), DL, MVT::i64);
	SDValue Cpy = DAG.getMemcpy(
	Chain, DL, DstAddr, Arg, SizeNode,
	Outs[i].Flags.getNonZeroByValAlign(),
	/isVol = / false, /AlwaysInline = / false,
	/isTailCall = / false, DstInfo, MachinePointerInfo());

	MemOpChains.push_back(Cpy);
	} else {
	// Since we pass i1/i8/i16 as i1/i8/i16 on stack and Arg is already
	// promoted to a legal register type i32, we should truncate Arg back to
	// i1/i8/i16.
	if (VA.getValVT() == MVT::i1 \|\| VA.getValVT() == MVT::i8 \|\|
	VA.getValVT() == MVT::i16)
	Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);

	SDValue Store = DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo);
	MemOpChains.push_back(Store);
	}
	}
	}

	if (!MemOpChains.empty())
	Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);

	// Build a sequence of copy-to-reg nodes chained together with token chain
	// and flag operands which copy the outgoing args into the appropriate regs.
	SDValue InFlag;
	for (auto &RegToPass : RegsToPass) {
	Chain = DAG.getCopyToReg(Chain, DL, RegToPass.first,
	RegToPass.second, InFlag);
	InFlag = Chain.getValue(1);
	}

	// If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
	// direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
	// node so that legalize doesn't hack it.
	if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
	auto GV = G->getGlobal();
	unsigned OpFlags =
	Subtarget->classifyGlobalFunctionReference(GV, getTargetMachine());
	if (OpFlags & AArch64II::MO_GOT) {
	Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
	Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
	} else {
	const GlobalValue *GV = G->getGlobal();
	Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
	}
	} else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
	if (getTargetMachine().getCodeModel() == CodeModel::Large &&
	Subtarget->isTargetMachO()) {
	const char *Sym = S->getSymbol();
	Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, AArch64II::MO_GOT);
	Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
	} else {
	const char *Sym = S->getSymbol();
	Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0);
	}
	}

	// We don't usually want to end the call-sequence here because we would tidy
	// the frame up after the call, however in the ABI-changing tail-call case
	// we've carefully laid out the parameters so that when sp is reset they'll be
	// in the correct location.
	if (IsTailCall && !IsSibCall) {
	Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, DL, true),
	DAG.getIntPtrConstant(0, DL, true), InFlag, DL);
	InFlag = Chain.getValue(1);
	}

	std::vector<SDValue> Ops;
	Ops.push_back(Chain);
	Ops.push_back(Callee);

	if (IsTailCall) {
	// Each tail call may have to adjust the stack by a different amount, so
	// this information must travel along with the operation for eventual
	// consumption by emitEpilogue.
	Ops.push_back(DAG.getTargetConstant(FPDiff, DL, MVT::i32));
	}

	// Add argument registers to the end of the list so that they are known live
	// into the call.
	for (auto &RegToPass : RegsToPass)
	Ops.push_back(DAG.getRegister(RegToPass.first,
	RegToPass.second.getValueType()));

	// Add a register mask operand representing the call-preserved registers.
	const uint32_t *Mask;
	const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
	if (IsThisReturn) {
	// For 'this' returns, use the X0-preserving mask if applicable
	Mask = TRI->getThisReturnPreservedMask(MF, CallConv);
	if (!Mask) {
	IsThisReturn = false;
	Mask = TRI->getCallPreservedMask(MF, CallConv);
	}
	} else
	Mask = TRI->getCallPreservedMask(MF, CallConv);

	if (Subtarget->hasCustomCallingConv())
	TRI->UpdateCustomCallPreservedMask(MF, &Mask);

	if (TRI->isAnyArgRegReserved(MF))
	TRI->emitReservedArgRegCallError(MF);

	assert(Mask && "Missing call preserved mask for calling convention");
	Ops.push_back(DAG.getRegisterMask(Mask));

	if (InFlag.getNode())
	Ops.push_back(InFlag);

	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);

	// If we're doing a tall call, use a TC_RETURN here rather than an
	// actual call instruction.
	if (IsTailCall) {
	MF.getFrameInfo().setHasTailCall();
	SDValue Ret = DAG.getNode(AArch64ISD::TC_RETURN, DL, NodeTys, Ops);
	DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
	return Ret;
	}

	unsigned CallOpc = AArch64ISD::CALL;
	// Calls with operand bundle "clang.arc.attachedcall" are special. They should
	// be expanded to the call, directly followed by a special marker sequence.
	// Use the CALL_RVMARKER to do that.
	if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) {
	assert(!IsTailCall &&
	"tail calls cannot be marked with clang.arc.attachedcall");
	CallOpc = AArch64ISD::CALL_RVMARKER;
	}

	// Returns a chain and a flag for retval copy to use.
	Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops);
	DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
	InFlag = Chain.getValue(1);
	DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));

	uint64_t CalleePopBytes =
	DoesCalleeRestoreStack(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : 0;

	Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
	DAG.getIntPtrConstant(CalleePopBytes, DL, true),
	InFlag, DL);
	if (!Ins.empty())
	InFlag = Chain.getValue(1);

	// Handle result values, copying them out of physregs into vregs that we
	// return.
	return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG,
	InVals, IsThisReturn,
	IsThisReturn ? OutVals[0] : SDValue());
	}

	bool AArch64TargetLowering::CanLowerReturn(
	CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
	const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
	CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
	SmallVector<CCValAssign, 16> RVLocs;
	CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
	return CCInfo.CheckReturn(Outs, RetCC);
	}

	SDValue
	AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
	bool isVarArg,
	const SmallVectorImpl<ISD::OutputArg> &Outs,
	const SmallVectorImpl<SDValue> &OutVals,
	const SDLoc &DL, SelectionDAG &DAG) const {
	auto &MF = DAG.getMachineFunction();
	auto *FuncInfo = MF.getInfo<AArch64FunctionInfo>();

	CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
	SmallVector<CCValAssign, 16> RVLocs;
	CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
	*DAG.getContext());
	CCInfo.AnalyzeReturn(Outs, RetCC);

	// Copy the result values into the output registers.
	SDValue Flag;
	SmallVector<std::pair<unsigned, SDValue>, 4> RetVals;
	SmallSet<unsigned, 4> RegsUsed;
	for (unsigned i = 0, realRVLocIdx = 0; i != RVLocs.size();
	++i, ++realRVLocIdx) {
	CCValAssign &VA = RVLocs[i];
	assert(VA.isRegLoc() && "Can only return in registers!");
	SDValue Arg = OutVals[realRVLocIdx];

	switch (VA.getLocInfo()) {
	default:
	llvm_unreachable("Unknown loc info!");
	case CCValAssign::Full:
	if (Outs[i].ArgVT == MVT::i1) {
	// AAPCS requires i1 to be zero-extended to i8 by the producer of the
	// value. This is strictly redundant on Darwin (which uses "zeroext
	// i1"), but will be optimised out before ISel.
	Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
	Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
	}
	break;
	case CCValAssign::BCvt:
	Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
	break;
	case CCValAssign::AExt:
	case CCValAssign::ZExt:
	Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
	break;
	case CCValAssign::AExtUpper:
	assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits");
	Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
	Arg = DAG.getNode(ISD::SHL, DL, VA.getLocVT(), Arg,
	DAG.getConstant(32, DL, VA.getLocVT()));
	break;
	}

	if (RegsUsed.count(VA.getLocReg())) {
	SDValue &Bits =
	llvm::find_if(RetVals, [=](const std::pair<unsigned, SDValue> &Elt) {
	return Elt.first == VA.getLocReg();
	})->second;
	Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg);
	} else {
	RetVals.emplace_back(VA.getLocReg(), Arg);
	RegsUsed.insert(VA.getLocReg());
	}
	}

	SmallVector<SDValue, 4> RetOps(1, Chain);
	for (auto &RetVal : RetVals) {
	Chain = DAG.getCopyToReg(Chain, DL, RetVal.first, RetVal.second, Flag);
	Flag = Chain.getValue(1);
	RetOps.push_back(
	DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
	}

	// Windows AArch64 ABIs require that for returning structs by value we copy
	// the sret argument into X0 for the return.
	// We saved the argument into a virtual register in the entry block,
	// so now we copy the value out and into X0.
	if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
	SDValue Val = DAG.getCopyFromReg(RetOps[0], DL, SRetReg,
	getPointerTy(MF.getDataLayout()));

	unsigned RetValReg = AArch64::X0;
	Chain = DAG.getCopyToReg(Chain, DL, RetValReg, Val, Flag);
	Flag = Chain.getValue(1);

	RetOps.push_back(
	DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
	}

	const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
	const MCPhysReg *I =
	TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
	if (I) {
	for (; *I; ++I) {
	if (AArch64::GPR64RegClass.contains(*I))
	RetOps.push_back(DAG.getRegister(*I, MVT::i64));
	else if (AArch64::FPR64RegClass.contains(*I))
	RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
	else
	llvm_unreachable("Unexpected register class in CSRsViaCopy!");
	}
	}

	RetOps[0] = Chain; // Update chain.

	// Add the flag if we have it.
	if (Flag.getNode())
	RetOps.push_back(Flag);

	return DAG.getNode(AArch64ISD::RET_FLAG, DL, MVT::Other, RetOps);
	}

	//===----------------------------------------------------------------------===//
	// Other Lowering Code
	//===----------------------------------------------------------------------===//

	SDValue AArch64TargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty,
	SelectionDAG &DAG,
	unsigned Flag) const {
	return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty,
	N->getOffset(), Flag);
	}

	SDValue AArch64TargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty,
	SelectionDAG &DAG,
	unsigned Flag) const {
	return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag);
	}

	SDValue AArch64TargetLowering::getTargetNode(ConstantPoolSDNode *N, EVT Ty,
	SelectionDAG &DAG,
	unsigned Flag) const {
	return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
	N->getOffset(), Flag);
	}

	SDValue AArch64TargetLowering::getTargetNode(BlockAddressSDNode* N, EVT Ty,
	SelectionDAG &DAG,
	unsigned Flag) const {
	return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag);
	}

	// (loadGOT sym)
	template <class NodeTy>
	SDValue AArch64TargetLowering::getGOT(NodeTy *N, SelectionDAG &DAG,
	unsigned Flags) const {
	LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getGOT\n");
	SDLoc DL(N);
	EVT Ty = getPointerTy(DAG.getDataLayout());
	SDValue GotAddr = getTargetNode(N, Ty, DAG, AArch64II::MO_GOT \| Flags);
	// FIXME: Once remat is capable of dealing with instructions with register
	// operands, expand this into two nodes instead of using a wrapper node.
	return DAG.getNode(AArch64ISD::LOADgot, DL, Ty, GotAddr);
	}

	// (wrapper %highest(sym), %higher(sym), %hi(sym), %lo(sym))
	template <class NodeTy>
	SDValue AArch64TargetLowering::getAddrLarge(NodeTy *N, SelectionDAG &DAG,
	unsigned Flags) const {
	LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrLarge\n");
	SDLoc DL(N);
	EVT Ty = getPointerTy(DAG.getDataLayout());
	const unsigned char MO_NC = AArch64II::MO_NC;
	return DAG.getNode(
	AArch64ISD::WrapperLarge, DL, Ty,
	getTargetNode(N, Ty, DAG, AArch64II::MO_G3 \| Flags),
	getTargetNode(N, Ty, DAG, AArch64II::MO_G2 \| MO_NC \| Flags),
	getTargetNode(N, Ty, DAG, AArch64II::MO_G1 \| MO_NC \| Flags),
	getTargetNode(N, Ty, DAG, AArch64II::MO_G0 \| MO_NC \| Flags));
	}

	// (addlow (adrp %hi(sym)) %lo(sym))
	template <class NodeTy>
	SDValue AArch64TargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
	unsigned Flags) const {
	LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddr\n");
	SDLoc DL(N);
	EVT Ty = getPointerTy(DAG.getDataLayout());
	SDValue Hi = getTargetNode(N, Ty, DAG, AArch64II::MO_PAGE \| Flags);
	SDValue Lo = getTargetNode(N, Ty, DAG,
	AArch64II::MO_PAGEOFF \| AArch64II::MO_NC \| Flags);
	SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, Ty, Hi);
	return DAG.getNode(AArch64ISD::ADDlow, DL, Ty, ADRP, Lo);
	}

	// (adr sym)
	template <class NodeTy>
	SDValue AArch64TargetLowering::getAddrTiny(NodeTy *N, SelectionDAG &DAG,
	unsigned Flags) const {
	LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrTiny\n");
	SDLoc DL(N);
	EVT Ty = getPointerTy(DAG.getDataLayout());
	SDValue Sym = getTargetNode(N, Ty, DAG, Flags);
	return DAG.getNode(AArch64ISD::ADR, DL, Ty, Sym);
	}

	SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
	SelectionDAG &DAG) const {
	GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
	const GlobalValue *GV = GN->getGlobal();
	unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, getTargetMachine());

	if (OpFlags != AArch64II::MO_NO_FLAG)
	assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
	"unexpected offset in global node");

	// This also catches the large code model case for Darwin, and tiny code
	// model with got relocations.
	if ((OpFlags & AArch64II::MO_GOT) != 0) {
	return getGOT(GN, DAG, OpFlags);
	}

	SDValue Result;
	if (getTargetMachine().getCodeModel() == CodeModel::Large) {
	Result = getAddrLarge(GN, DAG, OpFlags);
	} else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
	Result = getAddrTiny(GN, DAG, OpFlags);
	} else {
	Result = getAddr(GN, DAG, OpFlags);
	}
	EVT PtrVT = getPointerTy(DAG.getDataLayout());
	SDLoc DL(GN);
	if (OpFlags & (AArch64II::MO_DLLIMPORT \| AArch64II::MO_COFFSTUB))
	Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
	MachinePointerInfo::getGOT(DAG.getMachineFunction()));
	return Result;
	}

	/// Convert a TLS address reference into the correct sequence of loads
	/// and calls to compute the variable's address (for Darwin, currently) and
	/// return an SDValue containing the final node.

	/// Darwin only has one TLS scheme which must be capable of dealing with the
	/// fully general situation, in the worst case. This means:
	/// + "extern __thread" declaration.
	/// + Defined in a possibly unknown dynamic library.
	///
	/// The general system is that each __thread variable has a [3 x i64] descriptor
	/// which contains information used by the runtime to calculate the address. The
	/// only part of this the compiler needs to know about is the first xword, which
	/// contains a function pointer that must be called with the address of the
	/// entire descriptor in "x0".
	///
	/// Since this descriptor may be in a different unit, in general even the
	/// descriptor must be accessed via an indirect load. The "ideal" code sequence
	/// is:
	/// adrp x0, _var@TLVPPAGE
	/// ldr x0, [x0, _var@TLVPPAGEOFF] ; x0 now contains address of descriptor
	/// ldr x1, [x0] ; x1 contains 1st entry of descriptor,
	/// ; the function pointer
	/// blr x1 ; Uses descriptor address in x0
	/// ; Address of _var is now in x0.
	///
	/// If the address of _var's descriptor is known to the linker, then it can
	/// change the first "ldr" instruction to an appropriate "add x0, x0, #imm" for
	/// a slight efficiency gain.
	SDValue
	AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
	SelectionDAG &DAG) const {
	assert(Subtarget->isTargetDarwin() &&
	"This function expects a Darwin target");

	SDLoc DL(Op);
	MVT PtrVT = getPointerTy(DAG.getDataLayout());
	MVT PtrMemVT = getPointerMemTy(DAG.getDataLayout());
	const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();

	SDValue TLVPAddr =
	DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
	SDValue DescAddr = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TLVPAddr);

	// The first entry in the descriptor is a function pointer that we must call
	// to obtain the address of the variable.
	SDValue Chain = DAG.getEntryNode();
	SDValue FuncTLVGet = DAG.getLoad(
	PtrMemVT, DL, Chain, DescAddr,
	MachinePointerInfo::getGOT(DAG.getMachineFunction()),
	Align(PtrMemVT.getSizeInBits() / 8),
	MachineMemOperand::MOInvariant \| MachineMemOperand::MODereferenceable);
	Chain = FuncTLVGet.getValue(1);

	// Extend loaded pointer if necessary (i.e. if ILP32) to DAG pointer.
	FuncTLVGet = DAG.getZExtOrTrunc(FuncTLVGet, DL, PtrVT);

	MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
	MFI.setAdjustsStack(true);

	// TLS calls preserve all registers except those that absolutely must be
	// trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
	// silly).
	const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
	const uint32_t *Mask = TRI->getTLSCallPreservedMask();
	if (Subtarget->hasCustomCallingConv())
	TRI->UpdateCustomCallPreservedMask(DAG.getMachineFunction(), &Mask);

	// Finally, we can make the call. This is just a degenerate version of a
	// normal AArch64 call node: x0 takes the address of the descriptor, and
	// returns the address of the variable in this thread.
	Chain = DAG.getCopyToReg(Chain, DL, AArch64::X0, DescAddr, SDValue());
	Chain =
	DAG.getNode(AArch64ISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
	Chain, FuncTLVGet, DAG.getRegister(AArch64::X0, MVT::i64),
	DAG.getRegisterMask(Mask), Chain.getValue(1));
	return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Chain.getValue(1));
	}

	/// Convert a thread-local variable reference into a sequence of instructions to
	/// compute the variable's address for the local exec TLS model of ELF targets.
	/// The sequence depends on the maximum TLS area size.
	SDValue AArch64TargetLowering::LowerELFTLSLocalExec(const GlobalValue *GV,
	SDValue ThreadBase,
	const SDLoc &DL,
	SelectionDAG &DAG) const {
	EVT PtrVT = getPointerTy(DAG.getDataLayout());
	SDValue TPOff, Addr;

	switch (DAG.getTarget().Options.TLSSize) {
	default:
	llvm_unreachable("Unexpected TLS size");

	case 12: {
	// mrs x0, TPIDR_EL0
	// add x0, x0, :tprel_lo12:a
	SDValue Var = DAG.getTargetGlobalAddress(
	GV, DL, PtrVT, 0, AArch64II::MO_TLS \| AArch64II::MO_PAGEOFF);
	return SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
	Var,
	DAG.getTargetConstant(0, DL, MVT::i32)),
	0);
	}

	case 24: {
	// mrs x0, TPIDR_EL0
	// add x0, x0, :tprel_hi12:a
	// add x0, x0, :tprel_lo12_nc:a
	SDValue HiVar = DAG.getTargetGlobalAddress(
	GV, DL, PtrVT, 0, AArch64II::MO_TLS \| AArch64II::MO_HI12);
	SDValue LoVar = DAG.getTargetGlobalAddress(
	GV, DL, PtrVT, 0,
	AArch64II::MO_TLS \| AArch64II::MO_PAGEOFF \| AArch64II::MO_NC);
	Addr = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
	HiVar,
	DAG.getTargetConstant(0, DL, MVT::i32)),
	0);
	return SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, Addr,
	LoVar,
	DAG.getTargetConstant(0, DL, MVT::i32)),
	0);
	}

	case 32: {
	// mrs x1, TPIDR_EL0
	// movz x0, #:tprel_g1:a
	// movk x0, #:tprel_g0_nc:a
	// add x0, x1, x0
	SDValue HiVar = DAG.getTargetGlobalAddress(
	GV, DL, PtrVT, 0, AArch64II::MO_TLS \| AArch64II::MO_G1);
	SDValue LoVar = DAG.getTargetGlobalAddress(
	GV, DL, PtrVT, 0,
	AArch64II::MO_TLS \| AArch64II::MO_G0 \| AArch64II::MO_NC);
	TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
	DAG.getTargetConstant(16, DL, MVT::i32)),
	0);
	TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, LoVar,
	DAG.getTargetConstant(0, DL, MVT::i32)),
	0);
	return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
	}

	case 48: {
	// mrs x1, TPIDR_EL0
	// movz x0, #:tprel_g2:a
	// movk x0, #:tprel_g1_nc:a
	// movk x0, #:tprel_g0_nc:a
	// add x0, x1, x0
	SDValue HiVar = DAG.getTargetGlobalAddress(
	GV, DL, PtrVT, 0, AArch64II::MO_TLS \| AArch64II::MO_G2);
	SDValue MiVar = DAG.getTargetGlobalAddress(
	GV, DL, PtrVT, 0,
	AArch64II::MO_TLS \| AArch64II::MO_G1 \| AArch64II::MO_NC);
	SDValue LoVar = DAG.getTargetGlobalAddress(
	GV, DL, PtrVT, 0,
	AArch64II::MO_TLS \| AArch64II::MO_G0 \| AArch64II::MO_NC);
	TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
	DAG.getTargetConstant(32, DL, MVT::i32)),
	0);
	TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, MiVar,
	DAG.getTargetConstant(16, DL, MVT::i32)),
	0);
	TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, LoVar,
	DAG.getTargetConstant(0, DL, MVT::i32)),
	0);
	return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
	}
	}
	}

	/// When accessing thread-local variables under either the general-dynamic or
	/// local-dynamic system, we make a "TLS-descriptor" call. The variable will
	/// have a descriptor, accessible via a PC-relative ADRP, and whose first entry
	/// is a function pointer to carry out the resolution.
	///
	/// The sequence is:
	/// adrp x0, :tlsdesc:var
	/// ldr x1, [x0, #:tlsdesc_lo12:var]
	/// add x0, x0, #:tlsdesc_lo12:var
	/// .tlsdesccall var
	/// blr x1
	/// (TPIDR_EL0 offset now in x0)
	///
	/// The above sequence must be produced unscheduled, to enable the linker to
	/// optimize/relax this sequence.
	/// Therefore, a pseudo-instruction (TLSDESC_CALLSEQ) is used to represent the
	/// above sequence, and expanded really late in the compilation flow, to ensure
	/// the sequence is produced as per above.
	SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(SDValue SymAddr,
	const SDLoc &DL,
	SelectionDAG &DAG) const {
	EVT PtrVT = getPointerTy(DAG.getDataLayout());

	SDValue Chain = DAG.getEntryNode();
	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);

	Chain =
	DAG.getNode(AArch64ISD::TLSDESC_CALLSEQ, DL, NodeTys, {Chain, SymAddr});
	SDValue Glue = Chain.getValue(1);

	return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue);
	}

	SDValue
	AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
	SelectionDAG &DAG) const {
	assert(Subtarget->isTargetELF() && "This function expects an ELF target");

	const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);

	TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());

	if (!EnableAArch64ELFLocalDynamicTLSGeneration) {
	if (Model == TLSModel::LocalDynamic)
	Model = TLSModel::GeneralDynamic;
	}

	if (getTargetMachine().getCodeModel() == CodeModel::Large &&
	Model != TLSModel::LocalExec)
	report_fatal_error("ELF TLS only supported in small memory model or "
	"in local exec TLS model");
	// Different choices can be made for the maximum size of the TLS area for a
	// module. For the small address model, the default TLS size is 16MiB and the
	// maximum TLS size is 4GiB.
	// FIXME: add tiny and large code model support for TLS access models other
	// than local exec. We currently generate the same code as small for tiny,
	// which may be larger than needed.

	SDValue TPOff;
	EVT PtrVT = getPointerTy(DAG.getDataLayout());
	SDLoc DL(Op);
	const GlobalValue *GV = GA->getGlobal();

	SDValue ThreadBase = DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT);

	if (Model == TLSModel::LocalExec) {
	return LowerELFTLSLocalExec(GV, ThreadBase, DL, DAG);
	} else if (Model == TLSModel::InitialExec) {
	TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
	TPOff = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TPOff);
	} else if (Model == TLSModel::LocalDynamic) {
	// Local-dynamic accesses proceed in two phases. A general-dynamic TLS
	// descriptor call against the special symbol _TLS_MODULE_BASE_ to calculate
	// the beginning of the module's TLS region, followed by a DTPREL offset
	// calculation.

	// These accesses will need deduplicating if there's more than one.
	AArch64FunctionInfo *MFI =
	DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
	MFI->incNumLocalDynamicTLSAccesses();

	// The call needs a relocation too for linker relaxation. It doesn't make
	// sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
	// the address.
	SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
	AArch64II::MO_TLS);

	// Now we can calculate the offset from TPIDR_EL0 to this module's
	// thread-local area.
	TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);

	// Now use :dtprel_whatever: operations to calculate this variable's offset
	// in its thread-storage area.
	SDValue HiVar = DAG.getTargetGlobalAddress(
	GV, DL, MVT::i64, 0, AArch64II::MO_TLS \| AArch64II::MO_HI12);
	SDValue LoVar = DAG.getTargetGlobalAddress(
	GV, DL, MVT::i64, 0,
	AArch64II::MO_TLS \| AArch64II::MO_PAGEOFF \| AArch64II::MO_NC);

	TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, HiVar,
	DAG.getTargetConstant(0, DL, MVT::i32)),
	0);
	TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, LoVar,
	DAG.getTargetConstant(0, DL, MVT::i32)),
	0);
	} else if (Model == TLSModel::GeneralDynamic) {
	// The call needs a relocation too for linker relaxation. It doesn't make
	// sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
	// the address.
	SDValue SymAddr =
	DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);

	// Finally we can make a call to calculate the offset from tpidr_el0.
	TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
	} else
	llvm_unreachable("Unsupported ELF TLS access model");

	return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
	}

	SDValue
	AArch64TargetLowering::LowerWindowsGlobalTLSAddress(SDValue Op,
	SelectionDAG &DAG) const {
	assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");

	SDValue Chain = DAG.getEntryNode();
	EVT PtrVT = getPointerTy(DAG.getDataLayout());
	SDLoc DL(Op);

	SDValue TEB = DAG.getRegister(AArch64::X18, MVT::i64);

	// Load the ThreadLocalStoragePointer from the TEB
	// A pointer to the TLS array is located at offset 0x58 from the TEB.
	SDValue TLSArray =
	DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x58, DL));
	TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
	Chain = TLSArray.getValue(1);

	// Load the TLS index from the C runtime;
	// This does the same as getAddr(), but without having a GlobalAddressSDNode.
	// This also does the same as LOADgot, but using a generic i32 load,
	// while LOADgot only loads i64.
	SDValue TLSIndexHi =
	DAG.getTargetExternalSymbol("_tls_index", PtrVT, AArch64II::MO_PAGE);
	SDValue TLSIndexLo = DAG.getTargetExternalSymbol(
	"_tls_index", PtrVT, AArch64II::MO_PAGEOFF \| AArch64II::MO_NC);
	SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, TLSIndexHi);
	SDValue TLSIndex =
	DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, TLSIndexLo);
	TLSIndex = DAG.getLoad(MVT::i32, DL, Chain, TLSIndex, MachinePointerInfo());
	Chain = TLSIndex.getValue(1);

	// The pointer to the thread's TLS data area is at the TLS Index scaled by 8
	// offset into the TLSArray.
	TLSIndex = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TLSIndex);
	SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
	DAG.getConstant(3, DL, PtrVT));
	SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
	DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
	MachinePointerInfo());
	Chain = TLS.getValue(1);

	const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
	const GlobalValue *GV = GA->getGlobal();
	SDValue TGAHi = DAG.getTargetGlobalAddress(
	GV, DL, PtrVT, 0, AArch64II::MO_TLS \| AArch64II::MO_HI12);
	SDValue TGALo = DAG.getTargetGlobalAddress(
	GV, DL, PtrVT, 0,
	AArch64II::MO_TLS \| AArch64II::MO_PAGEOFF \| AArch64II::MO_NC);

	// Add the offset from the start of the .tls section (section base).
	SDValue Addr =
	SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TLS, TGAHi,
	DAG.getTargetConstant(0, DL, MVT::i32)),
	0);
	Addr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, Addr, TGALo);
	return Addr;
	}

	SDValue AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
	SelectionDAG &DAG) const {
	const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
	if (DAG.getTarget().useEmulatedTLS())
	return LowerToTLSEmulatedModel(GA, DAG);

	if (Subtarget->isTargetDarwin())
	return LowerDarwinGlobalTLSAddress(Op, DAG);
	if (Subtarget->isTargetELF())
	return LowerELFGlobalTLSAddress(Op, DAG);
	if (Subtarget->isTargetWindows())
	return LowerWindowsGlobalTLSAddress(Op, DAG);

	llvm_unreachable("Unexpected platform trying to use TLS");
	}

	// Looks through \param Val to determine the bit that can be used to
	// check the sign of the value. It returns the unextended value and
	// the sign bit position.
	std::pair<SDValue, uint64_t> lookThroughSignExtension(SDValue Val) {
	if (Val.getOpcode() == ISD::SIGN_EXTEND_INREG)
	return {Val.getOperand(0),
	cast<VTSDNode>(Val.getOperand(1))->getVT().getFixedSizeInBits() -
	1};

	if (Val.getOpcode() == ISD::SIGN_EXTEND)
	return {Val.getOperand(0),
	Val.getOperand(0)->getValueType(0).getFixedSizeInBits() - 1};

	return {Val, Val.getValueSizeInBits() - 1};
	}

	SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
	SDValue Chain = Op.getOperand(0);
	ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
	SDValue LHS = Op.getOperand(2);
	SDValue RHS = Op.getOperand(3);
	SDValue Dest = Op.getOperand(4);
	SDLoc dl(Op);

	MachineFunction &MF = DAG.getMachineFunction();
	// Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
	// will not be produced, as they are conditional branch instructions that do
	// not set flags.
	bool ProduceNonFlagSettingCondBr =
	!MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);

	// Handle f128 first, since lowering it will result in comparing the return
	// value of a libcall against zero, which is just what the rest of LowerBR_CC
	// is expecting to deal with.
	if (LHS.getValueType() == MVT::f128) {
	softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS);

	// If softenSetCCOperands returned a scalar, we need to compare the result
	// against zero to select between true and false values.
	if (!RHS.getNode()) {
	RHS = DAG.getConstant(0, dl, LHS.getValueType());
	CC = ISD::SETNE;
	}
	}

	// Optimize {s\|u}{add\|sub\|mul}.with.overflow feeding into a branch
	// instruction.
	if (ISD::isOverflowIntrOpRes(LHS) && isOneConstant(RHS) &&
	(CC == ISD::SETEQ \|\| CC == ISD::SETNE)) {
	// Only lower legal XALUO ops.
	if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
	return SDValue();

	// The actual operation with overflow check.
	AArch64CC::CondCode OFCC;
	SDValue Value, Overflow;
	std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, LHS.getValue(0), DAG);

	if (CC == ISD::SETNE)
	OFCC = getInvertedCondCode(OFCC);
	SDValue CCVal = DAG.getConstant(OFCC, dl, MVT::i32);

	return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
	Overflow);
	}

	if (LHS.getValueType().isInteger()) {
	assert((LHS.getValueType() == RHS.getValueType()) &&
	(LHS.getValueType() == MVT::i32 \|\| LHS.getValueType() == MVT::i64));

	// If the RHS of the comparison is zero, we can potentially fold this
	// to a specialized branch.
	const ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS);
	if (RHSC && RHSC->getZExtValue() == 0 && ProduceNonFlagSettingCondBr) {
	if (CC == ISD::SETEQ) {
	// See if we can use a TBZ to fold in an AND as well.
	// TBZ has a smaller branch displacement than CBZ. If the offset is
	// out of bounds, a late MI-layer pass rewrites branches.
	// 403.gcc is an example that hits this case.
	if (LHS.getOpcode() == ISD::AND &&
	isa<ConstantSDNode>(LHS.getOperand(1)) &&
	isPowerOf2_64(LHS.getConstantOperandVal(1))) {
	SDValue Test = LHS.getOperand(0);
	uint64_t Mask = LHS.getConstantOperandVal(1);
	return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, Test,
	DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
	Dest);
	}

	return DAG.getNode(AArch64ISD::CBZ, dl, MVT::Other, Chain, LHS, Dest);
	} else if (CC == ISD::SETNE) {
	// See if we can use a TBZ to fold in an AND as well.
	// TBZ has a smaller branch displacement than CBZ. If the offset is
	// out of bounds, a late MI-layer pass rewrites branches.
	// 403.gcc is an example that hits this case.
	if (LHS.getOpcode() == ISD::AND &&
	isa<ConstantSDNode>(LHS.getOperand(1)) &&
	isPowerOf2_64(LHS.getConstantOperandVal(1))) {
	SDValue Test = LHS.getOperand(0);
	uint64_t Mask = LHS.getConstantOperandVal(1);
	return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, Test,
	DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
	Dest);
	}

	return DAG.getNode(AArch64ISD::CBNZ, dl, MVT::Other, Chain, LHS, Dest);
	} else if (CC == ISD::SETLT && LHS.getOpcode() != ISD::AND) {
	// Don't combine AND since emitComparison converts the AND to an ANDS
	// (a.k.a. TST) and the test in the test bit and branch instruction
	// becomes redundant. This would also increase register pressure.
	uint64_t SignBitPos;
	std::tie(LHS, SignBitPos) = lookThroughSignExtension(LHS);
	return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, LHS,
	DAG.getConstant(SignBitPos, dl, MVT::i64), Dest);
	}
	}
	if (RHSC && RHSC->getSExtValue() == -1 && CC == ISD::SETGT &&
	LHS.getOpcode() != ISD::AND && ProduceNonFlagSettingCondBr) {
	// Don't combine AND since emitComparison converts the AND to an ANDS
	// (a.k.a. TST) and the test in the test bit and branch instruction
	// becomes redundant. This would also increase register pressure.
	uint64_t SignBitPos;
	std::tie(LHS, SignBitPos) = lookThroughSignExtension(LHS);
	return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, LHS,
	DAG.getConstant(SignBitPos, dl, MVT::i64), Dest);
	}

	SDValue CCVal;
	SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
	return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
	Cmp);
	}

	assert(LHS.getValueType() == MVT::f16 \|\| LHS.getValueType() == MVT::bf16 \|\|
	LHS.getValueType() == MVT::f32 \|\| LHS.getValueType() == MVT::f64);

	// Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
	// clean. Some of them require two branches to implement.
	SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
	AArch64CC::CondCode CC1, CC2;
	changeFPCCToAArch64CC(CC, CC1, CC2);
	SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
	SDValue BR1 =
	DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CC1Val, Cmp);
	if (CC2 != AArch64CC::AL) {
	SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
	return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, BR1, Dest, CC2Val,
	Cmp);
	}

	return BR1;
	}

	SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
	SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();
	SDLoc DL(Op);

	SDValue In1 = Op.getOperand(0);
	SDValue In2 = Op.getOperand(1);
	EVT SrcVT = In2.getValueType();

	if (SrcVT.bitsLT(VT))
	In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2);
	else if (SrcVT.bitsGT(VT))
	In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2, DAG.getIntPtrConstant(0, DL));

	EVT VecVT;
	uint64_t EltMask;
	SDValue VecVal1, VecVal2;

	auto setVecVal = [&] (int Idx) {
	if (!VT.isVector()) {
	VecVal1 = DAG.getTargetInsertSubreg(Idx, DL, VecVT,
	DAG.getUNDEF(VecVT), In1);
	VecVal2 = DAG.getTargetInsertSubreg(Idx, DL, VecVT,
	DAG.getUNDEF(VecVT), In2);
	} else {
	VecVal1 = DAG.getNode(ISD::BITCAST, DL, VecVT, In1);
	VecVal2 = DAG.getNode(ISD::BITCAST, DL, VecVT, In2);
	}
	};

	if (VT == MVT::f32 \|\| VT == MVT::v2f32 \|\| VT == MVT::v4f32) {
	VecVT = (VT == MVT::v2f32 ? MVT::v2i32 : MVT::v4i32);
	EltMask = 0x80000000ULL;
	setVecVal(AArch64::ssub);
	} else if (VT == MVT::f64 \|\| VT == MVT::v2f64) {
	VecVT = MVT::v2i64;

	// We want to materialize a mask with the high bit set, but the AdvSIMD
	// immediate moves cannot materialize that in a single instruction for
	// 64-bit elements. Instead, materialize zero and then negate it.
	EltMask = 0;

	setVecVal(AArch64::dsub);
	} else if (VT == MVT::f16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v8f16) {
	VecVT = (VT == MVT::v4f16 ? MVT::v4i16 : MVT::v8i16);
	EltMask = 0x8000ULL;
	setVecVal(AArch64::hsub);
	} else {
	llvm_unreachable("Invalid type for copysign!");
	}

	SDValue BuildVec = DAG.getConstant(EltMask, DL, VecVT);

	// If we couldn't materialize the mask above, then the mask vector will be
	// the zero vector, and we need to negate it here.
	if (VT == MVT::f64 \|\| VT == MVT::v2f64) {
	BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, BuildVec);
	BuildVec = DAG.getNode(ISD::FNEG, DL, MVT::v2f64, BuildVec);
	BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, BuildVec);
	}

	SDValue Sel =
	DAG.getNode(AArch64ISD::BIT, DL, VecVT, VecVal1, VecVal2, BuildVec);

	if (VT == MVT::f16)
	return DAG.getTargetExtractSubreg(AArch64::hsub, DL, VT, Sel);
	if (VT == MVT::f32)
	return DAG.getTargetExtractSubreg(AArch64::ssub, DL, VT, Sel);
	else if (VT == MVT::f64)
	return DAG.getTargetExtractSubreg(AArch64::dsub, DL, VT, Sel);
	else
	return DAG.getNode(ISD::BITCAST, DL, VT, Sel);
	}

	SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
	if (DAG.getMachineFunction().getFunction().hasFnAttribute(
	Attribute::NoImplicitFloat))
	return SDValue();

	if (!Subtarget->hasNEON())
	return SDValue();

	// While there is no integer popcount instruction, it can
	// be more efficiently lowered to the following sequence that uses
	// AdvSIMD registers/instructions as long as the copies to/from
	// the AdvSIMD registers are cheap.
	// FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
	// CNT V0.8B, V0.8B // 8xbyte pop-counts
	// ADDV B0, V0.8B // sum 8xbyte pop-counts
	// UMOV X0, V0.B[0] // copy byte result back to integer reg
	SDValue Val = Op.getOperand(0);
	SDLoc DL(Op);
	EVT VT = Op.getValueType();

	if (VT == MVT::i32 \|\| VT == MVT::i64) {
	if (VT == MVT::i32)
	Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
	Val = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val);

	SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, Val);
	SDValue UaddLV = DAG.getNode(
	ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
	DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);

	if (VT == MVT::i64)
	UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV);
	return UaddLV;
	} else if (VT == MVT::i128) {
	Val = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Val);

	SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v16i8, Val);
	SDValue UaddLV = DAG.getNode(
	ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
	DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);

	return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, UaddLV);
	}

	if (VT.isScalableVector() \|\| useSVEForFixedLengthVectorVT(VT))
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTPOP_MERGE_PASSTHRU);

	assert((VT == MVT::v1i64 \|\| VT == MVT::v2i64 \|\| VT == MVT::v2i32 \|\|
	VT == MVT::v4i32 \|\| VT == MVT::v4i16 \|\| VT == MVT::v8i16) &&
	"Unexpected type for custom ctpop lowering");

	EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
	Val = DAG.getBitcast(VT8Bit, Val);
	Val = DAG.getNode(ISD::CTPOP, DL, VT8Bit, Val);

	// Widen v8i8/v16i8 CTPOP result to VT by repeatedly widening pairwise adds.
	unsigned EltSize = 8;
	unsigned NumElts = VT.is64BitVector() ? 8 : 16;
	while (EltSize != VT.getScalarSizeInBits()) {
	EltSize *= 2;
	NumElts /= 2;
	MVT WidenVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize), NumElts);
	Val = DAG.getNode(
	ISD::INTRINSIC_WO_CHAIN, DL, WidenVT,
	DAG.getConstant(Intrinsic::aarch64_neon_uaddlp, DL, MVT::i32), Val);
	}

	return Val;
	}

	SDValue AArch64TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();
	assert(VT.isScalableVector() \|\|
	useSVEForFixedLengthVectorVT(VT, /OverrideNEON=/true));

	SDLoc DL(Op);
	SDValue RBIT = DAG.getNode(ISD::BITREVERSE, DL, VT, Op.getOperand(0));
	return DAG.getNode(ISD::CTLZ, DL, VT, RBIT);
	}

	SDValue AArch64TargetLowering::LowerBitreverse(SDValue Op,
	SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();

	if (VT.isScalableVector() \|\|
	useSVEForFixedLengthVectorVT(VT, /OverrideNEON=/true))
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::BITREVERSE_MERGE_PASSTHRU,
	true);

	SDLoc DL(Op);
	SDValue REVB;
	MVT VST;

	switch (VT.getSimpleVT().SimpleTy) {
	default:
	llvm_unreachable("Invalid type for bitreverse!");

	case MVT::v2i32: {
	VST = MVT::v8i8;
	REVB = DAG.getNode(AArch64ISD::REV32, DL, VST, Op.getOperand(0));

	break;
	}

	case MVT::v4i32: {
	VST = MVT::v16i8;
	REVB = DAG.getNode(AArch64ISD::REV32, DL, VST, Op.getOperand(0));

	break;
	}

	case MVT::v1i64: {
	VST = MVT::v8i8;
	REVB = DAG.getNode(AArch64ISD::REV64, DL, VST, Op.getOperand(0));

	break;
	}

	case MVT::v2i64: {
	VST = MVT::v16i8;
	REVB = DAG.getNode(AArch64ISD::REV64, DL, VST, Op.getOperand(0));

	break;
	}
	}

	return DAG.getNode(AArch64ISD::NVCAST, DL, VT,
	DAG.getNode(ISD::BITREVERSE, DL, VST, REVB));
	}

	SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {

	if (Op.getValueType().isVector())
	return LowerVSETCC(Op, DAG);

	bool IsStrict = Op->isStrictFPOpcode();
	bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS;
	unsigned OpNo = IsStrict ? 1 : 0;
	SDValue Chain;
	if (IsStrict)
	Chain = Op.getOperand(0);
	SDValue LHS = Op.getOperand(OpNo + 0);
	SDValue RHS = Op.getOperand(OpNo + 1);
	ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(OpNo + 2))->get();
	SDLoc dl(Op);

	// We chose ZeroOrOneBooleanContents, so use zero and one.
	EVT VT = Op.getValueType();
	SDValue TVal = DAG.getConstant(1, dl, VT);
	SDValue FVal = DAG.getConstant(0, dl, VT);

	// Handle f128 first, since one possible outcome is a normal integer
	// comparison which gets picked up by the next if statement.
	if (LHS.getValueType() == MVT::f128) {
	softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS, Chain,
	IsSignaling);

	// If softenSetCCOperands returned a scalar, use it.
	if (!RHS.getNode()) {
	assert(LHS.getValueType() == Op.getValueType() &&
	"Unexpected setcc expansion!");
	return IsStrict ? DAG.getMergeValues({LHS, Chain}, dl) : LHS;
	}
	}

	if (LHS.getValueType().isInteger()) {
	SDValue CCVal;
	SDValue Cmp = getAArch64Cmp(
	LHS, RHS, ISD::getSetCCInverse(CC, LHS.getValueType()), CCVal, DAG, dl);

	// Note that we inverted the condition above, so we reverse the order of
	// the true and false operands here. This will allow the setcc to be
	// matched to a single CSINC instruction.
	SDValue Res = DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CCVal, Cmp);
	return IsStrict ? DAG.getMergeValues({Res, Chain}, dl) : Res;
	}

	// Now we know we're dealing with FP values.
	assert(LHS.getValueType() == MVT::f16 \|\| LHS.getValueType() == MVT::f32 \|\|
	LHS.getValueType() == MVT::f64);

	// If that fails, we'll need to perform an FCMP + CSEL sequence. Go ahead
	// and do the comparison.
	SDValue Cmp;
	if (IsStrict)
	Cmp = emitStrictFPComparison(LHS, RHS, dl, DAG, Chain, IsSignaling);
	else
	Cmp = emitComparison(LHS, RHS, CC, dl, DAG);

	AArch64CC::CondCode CC1, CC2;
	changeFPCCToAArch64CC(CC, CC1, CC2);
	SDValue Res;
	if (CC2 == AArch64CC::AL) {
	changeFPCCToAArch64CC(ISD::getSetCCInverse(CC, LHS.getValueType()), CC1,
	CC2);
	SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);

	// Note that we inverted the condition above, so we reverse the order of
	// the true and false operands here. This will allow the setcc to be
	// matched to a single CSINC instruction.
	Res = DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CC1Val, Cmp);
	} else {
	// Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
	// totally clean. Some of them require two CSELs to implement. As is in
	// this case, we emit the first CSEL and then emit a second using the output
	// of the first as the RHS. We're effectively OR'ing the two CC's together.

	// FIXME: It would be nice if we could match the two CSELs to two CSINCs.
	SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
	SDValue CS1 =
	DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);

	SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
	Res = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
	}
	return IsStrict ? DAG.getMergeValues({Res, Cmp.getValue(1)}, dl) : Res;
	}

	SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
	SDValue RHS, SDValue TVal,
	SDValue FVal, const SDLoc &dl,
	SelectionDAG &DAG) const {
	// Handle f128 first, because it will result in a comparison of some RTLIB
	// call result against zero.
	if (LHS.getValueType() == MVT::f128) {
	softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS);

	// If softenSetCCOperands returned a scalar, we need to compare the result
	// against zero to select between true and false values.
	if (!RHS.getNode()) {
	RHS = DAG.getConstant(0, dl, LHS.getValueType());
	CC = ISD::SETNE;
	}
	}

	// Also handle f16, for which we need to do a f32 comparison.
	if (LHS.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
	LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
	RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
	}

	// Next, handle integers.
	if (LHS.getValueType().isInteger()) {
	assert((LHS.getValueType() == RHS.getValueType()) &&
	(LHS.getValueType() == MVT::i32 \|\| LHS.getValueType() == MVT::i64));

	ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
	ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
	ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS);
	// Check for sign pattern (SELECT_CC setgt, iN lhs, -1, 1, -1) and transform
	// into (OR (ASR lhs, N-1), 1), which requires less instructions for the
	// supported types.
	if (CC == ISD::SETGT && RHSC && RHSC->isAllOnesValue() && CTVal && CFVal &&
	CTVal->isOne() && CFVal->isAllOnesValue() &&
	LHS.getValueType() == TVal.getValueType()) {
	EVT VT = LHS.getValueType();
	SDValue Shift =
	DAG.getNode(ISD::SRA, dl, VT, LHS,
	DAG.getConstant(VT.getSizeInBits() - 1, dl, VT));
	return DAG.getNode(ISD::OR, dl, VT, Shift, DAG.getConstant(1, dl, VT));
	}

	unsigned Opcode = AArch64ISD::CSEL;

	// If both the TVal and the FVal are constants, see if we can swap them in
	// order to for a CSINV or CSINC out of them.
	if (CTVal && CFVal && CTVal->isAllOnesValue() && CFVal->isNullValue()) {
	std::swap(TVal, FVal);
	std::swap(CTVal, CFVal);
	CC = ISD::getSetCCInverse(CC, LHS.getValueType());
	} else if (CTVal && CFVal && CTVal->isOne() && CFVal->isNullValue()) {
	std::swap(TVal, FVal);
	std::swap(CTVal, CFVal);
	CC = ISD::getSetCCInverse(CC, LHS.getValueType());
	} else if (TVal.getOpcode() == ISD::XOR) {
	// If TVal is a NOT we want to swap TVal and FVal so that we can match
	// with a CSINV rather than a CSEL.
	if (isAllOnesConstant(TVal.getOperand(1))) {
	std::swap(TVal, FVal);
	std::swap(CTVal, CFVal);
	CC = ISD::getSetCCInverse(CC, LHS.getValueType());
	}
	} else if (TVal.getOpcode() == ISD::SUB) {
	// If TVal is a negation (SUB from 0) we want to swap TVal and FVal so
	// that we can match with a CSNEG rather than a CSEL.
	if (isNullConstant(TVal.getOperand(0))) {
	std::swap(TVal, FVal);
	std::swap(CTVal, CFVal);
	CC = ISD::getSetCCInverse(CC, LHS.getValueType());
	}
	} else if (CTVal && CFVal) {
	const int64_t TrueVal = CTVal->getSExtValue();
	const int64_t FalseVal = CFVal->getSExtValue();
	bool Swap = false;

	// If both TVal and FVal are constants, see if FVal is the
	// inverse/negation/increment of TVal and generate a CSINV/CSNEG/CSINC
	// instead of a CSEL in that case.
	if (TrueVal == ~FalseVal) {
	Opcode = AArch64ISD::CSINV;
	} else if (FalseVal > std::numeric_limits<int64_t>::min() &&
	TrueVal == -FalseVal) {
	Opcode = AArch64ISD::CSNEG;
	} else if (TVal.getValueType() == MVT::i32) {
	// If our operands are only 32-bit wide, make sure we use 32-bit
	// arithmetic for the check whether we can use CSINC. This ensures that
	// the addition in the check will wrap around properly in case there is
	// an overflow (which would not be the case if we do the check with
	// 64-bit arithmetic).
	const uint32_t TrueVal32 = CTVal->getZExtValue();
	const uint32_t FalseVal32 = CFVal->getZExtValue();

	if ((TrueVal32 == FalseVal32 + 1) \|\| (TrueVal32 + 1 == FalseVal32)) {
	Opcode = AArch64ISD::CSINC;

	if (TrueVal32 > FalseVal32) {
	Swap = true;
	}
	}
	// 64-bit check whether we can use CSINC.
	} else if ((TrueVal == FalseVal + 1) \|\| (TrueVal + 1 == FalseVal)) {
	Opcode = AArch64ISD::CSINC;

	if (TrueVal > FalseVal) {
	Swap = true;
	}
	}

	// Swap TVal and FVal if necessary.
	if (Swap) {
	std::swap(TVal, FVal);
	std::swap(CTVal, CFVal);
	CC = ISD::getSetCCInverse(CC, LHS.getValueType());
	}

	if (Opcode != AArch64ISD::CSEL) {
	// Drop FVal since we can get its value by simply inverting/negating
	// TVal.
	FVal = TVal;
	}
	}

	// Avoid materializing a constant when possible by reusing a known value in
	// a register. However, don't perform this optimization if the known value
	// is one, zero or negative one in the case of a CSEL. We can always
	// materialize these values using CSINC, CSEL and CSINV with wzr/xzr as the
	// FVal, respectively.
	ConstantSDNode *RHSVal = dyn_cast<ConstantSDNode>(RHS);
	if (Opcode == AArch64ISD::CSEL && RHSVal && !RHSVal->isOne() &&
	!RHSVal->isNullValue() && !RHSVal->isAllOnesValue()) {
	AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
	// Transform "a == C ? C : x" to "a == C ? a : x" and "a != C ? x : C" to
	// "a != C ? x : a" to avoid materializing C.
	if (CTVal && CTVal == RHSVal && AArch64CC == AArch64CC::EQ)
	TVal = LHS;
	else if (CFVal && CFVal == RHSVal && AArch64CC == AArch64CC::NE)
	FVal = LHS;
	} else if (Opcode == AArch64ISD::CSNEG && RHSVal && RHSVal->isOne()) {
	assert (CTVal && CFVal && "Expected constant operands for CSNEG.");
	// Use a CSINV to transform "a == C ? 1 : -1" to "a == C ? a : -1" to
	// avoid materializing C.
	AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
	if (CTVal == RHSVal && AArch64CC == AArch64CC::EQ) {
	Opcode = AArch64ISD::CSINV;
	TVal = LHS;
	FVal = DAG.getConstant(0, dl, FVal.getValueType());
	}
	}

	SDValue CCVal;
	SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
	EVT VT = TVal.getValueType();
	return DAG.getNode(Opcode, dl, VT, TVal, FVal, CCVal, Cmp);
	}

	// Now we know we're dealing with FP values.
	assert(LHS.getValueType() == MVT::f16 \|\| LHS.getValueType() == MVT::f32 \|\|
	LHS.getValueType() == MVT::f64);
	assert(LHS.getValueType() == RHS.getValueType());
	EVT VT = TVal.getValueType();
	SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);

	// Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
	// clean. Some of them require two CSELs to implement.
	AArch64CC::CondCode CC1, CC2;
	changeFPCCToAArch64CC(CC, CC1, CC2);

	if (DAG.getTarget().Options.UnsafeFPMath) {
	// Transform "a == 0.0 ? 0.0 : x" to "a == 0.0 ? a : x" and
	// "a != 0.0 ? x : 0.0" to "a != 0.0 ? x : a" to avoid materializing 0.0.
	ConstantFPSDNode *RHSVal = dyn_cast<ConstantFPSDNode>(RHS);
	if (RHSVal && RHSVal->isZero()) {
	ConstantFPSDNode *CFVal = dyn_cast<ConstantFPSDNode>(FVal);
	ConstantFPSDNode *CTVal = dyn_cast<ConstantFPSDNode>(TVal);

	if ((CC == ISD::SETEQ \|\| CC == ISD::SETOEQ \|\| CC == ISD::SETUEQ) &&
	CTVal && CTVal->isZero() && TVal.getValueType() == LHS.getValueType())
	TVal = LHS;
	else if ((CC == ISD::SETNE \|\| CC == ISD::SETONE \|\| CC == ISD::SETUNE) &&
	CFVal && CFVal->isZero() &&
	FVal.getValueType() == LHS.getValueType())
	FVal = LHS;
	}
	}

	// Emit first, and possibly only, CSEL.
	SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
	SDValue CS1 = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);

	// If we need a second CSEL, emit it, using the output of the first as the
	// RHS. We're effectively OR'ing the two CC's together.
	if (CC2 != AArch64CC::AL) {
	SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
	return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
	}

	// Otherwise, return the output of the first CSEL.
	return CS1;
	}

	SDValue AArch64TargetLowering::LowerVECTOR_SPLICE(SDValue Op,
	SelectionDAG &DAG) const {

	EVT Ty = Op.getValueType();
	auto Idx = Op.getConstantOperandAPInt(2);
	if (Idx.sge(-1) && Idx.slt(Ty.getVectorMinNumElements()))
	return Op;
	return SDValue();
	}

	SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
	SelectionDAG &DAG) const {
	ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
	SDValue LHS = Op.getOperand(0);
	SDValue RHS = Op.getOperand(1);
	SDValue TVal = Op.getOperand(2);
	SDValue FVal = Op.getOperand(3);
	SDLoc DL(Op);
	return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
	}

	SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
	SelectionDAG &DAG) const {
	SDValue CCVal = Op->getOperand(0);
	SDValue TVal = Op->getOperand(1);
	SDValue FVal = Op->getOperand(2);
	SDLoc DL(Op);

	EVT Ty = Op.getValueType();
	if (Ty.isScalableVector()) {
	SDValue TruncCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, CCVal);
	MVT PredVT = MVT::getVectorVT(MVT::i1, Ty.getVectorElementCount());
	SDValue SplatPred = DAG.getNode(ISD::SPLAT_VECTOR, DL, PredVT, TruncCC);
	return DAG.getNode(ISD::VSELECT, DL, Ty, SplatPred, TVal, FVal);
	}

	if (useSVEForFixedLengthVectorVT(Ty)) {
	// FIXME: Ideally this would be the same as above using i1 types, however
	// for the moment we can't deal with fixed i1 vector types properly, so
	// instead extend the predicate to a result type sized integer vector.
	MVT SplatValVT = MVT::getIntegerVT(Ty.getScalarSizeInBits());
	MVT PredVT = MVT::getVectorVT(SplatValVT, Ty.getVectorElementCount());
	SDValue SplatVal = DAG.getSExtOrTrunc(CCVal, DL, SplatValVT);
	SDValue SplatPred = DAG.getNode(ISD::SPLAT_VECTOR, DL, PredVT, SplatVal);
	return DAG.getNode(ISD::VSELECT, DL, Ty, SplatPred, TVal, FVal);
	}

	// Optimize {s\|u}{add\|sub\|mul}.with.overflow feeding into a select
	// instruction.
	if (ISD::isOverflowIntrOpRes(CCVal)) {
	// Only lower legal XALUO ops.
	if (!DAG.getTargetLoweringInfo().isTypeLegal(CCVal->getValueType(0)))
	return SDValue();

	AArch64CC::CondCode OFCC;
	SDValue Value, Overflow;
	std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CCVal.getValue(0), DAG);
	SDValue CCVal = DAG.getConstant(OFCC, DL, MVT::i32);

	return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal,
	CCVal, Overflow);
	}

	// Lower it the same way as we would lower a SELECT_CC node.
	ISD::CondCode CC;
	SDValue LHS, RHS;
	if (CCVal.getOpcode() == ISD::SETCC) {
	LHS = CCVal.getOperand(0);
	RHS = CCVal.getOperand(1);
	CC = cast<CondCodeSDNode>(CCVal.getOperand(2))->get();
	} else {
	LHS = CCVal;
	RHS = DAG.getConstant(0, DL, CCVal.getValueType());
	CC = ISD::SETNE;
	}
	return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
	}

	SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op,
	SelectionDAG &DAG) const {
	// Jump table entries as PC relative offsets. No additional tweaking
	// is necessary here. Just get the address of the jump table.
	JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);

	if (getTargetMachine().getCodeModel() == CodeModel::Large &&
	!Subtarget->isTargetMachO()) {
	return getAddrLarge(JT, DAG);
	} else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
	return getAddrTiny(JT, DAG);
	}
	return getAddr(JT, DAG);
	}

	SDValue AArch64TargetLowering::LowerBR_JT(SDValue Op,
	SelectionDAG &DAG) const {
	// Jump table entries as PC relative offsets. No additional tweaking
	// is necessary here. Just get the address of the jump table.
	SDLoc DL(Op);
	SDValue JT = Op.getOperand(1);
	SDValue Entry = Op.getOperand(2);
	int JTI = cast<JumpTableSDNode>(JT.getNode())->getIndex();

	auto *AFI = DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
	AFI->setJumpTableEntryInfo(JTI, 4, nullptr);

	SDNode *Dest =
	DAG.getMachineNode(AArch64::JumpTableDest32, DL, MVT::i64, MVT::i64, JT,
	Entry, DAG.getTargetJumpTable(JTI, MVT::i32));
	return DAG.getNode(ISD::BRIND, DL, MVT::Other, Op.getOperand(0),
	SDValue(Dest, 0));
	}

	SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op,
	SelectionDAG &DAG) const {
	ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);

	if (getTargetMachine().getCodeModel() == CodeModel::Large) {
	// Use the GOT for the large code model on iOS.
	if (Subtarget->isTargetMachO()) {
	return getGOT(CP, DAG);
	}
	return getAddrLarge(CP, DAG);
	} else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
	return getAddrTiny(CP, DAG);
	} else {
	return getAddr(CP, DAG);
	}
	}

	SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op,
	SelectionDAG &DAG) const {
	BlockAddressSDNode *BA = cast<BlockAddressSDNode>(Op);
	if (getTargetMachine().getCodeModel() == CodeModel::Large &&
	!Subtarget->isTargetMachO()) {
	return getAddrLarge(BA, DAG);
	} else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
	return getAddrTiny(BA, DAG);
	}
	return getAddr(BA, DAG);
	}

	SDValue AArch64TargetLowering::LowerDarwin_VASTART(SDValue Op,
	SelectionDAG &DAG) const {
	AArch64FunctionInfo *FuncInfo =
	DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();

	SDLoc DL(Op);
	SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(),
	getPointerTy(DAG.getDataLayout()));
	FR = DAG.getZExtOrTrunc(FR, DL, getPointerMemTy(DAG.getDataLayout()));
	const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
	return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
	MachinePointerInfo(SV));
	}

	SDValue AArch64TargetLowering::LowerWin64_VASTART(SDValue Op,
	SelectionDAG &DAG) const {
	AArch64FunctionInfo *FuncInfo =
	DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();

	SDLoc DL(Op);
	SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsGPRSize() > 0
	? FuncInfo->getVarArgsGPRIndex()
	: FuncInfo->getVarArgsStackIndex(),
	getPointerTy(DAG.getDataLayout()));
	const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
	return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
	MachinePointerInfo(SV));
	}

	SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
	SelectionDAG &DAG) const {
	// The layout of the va_list struct is specified in the AArch64 Procedure Call
	// Standard, section B.3.
	MachineFunction &MF = DAG.getMachineFunction();
	AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
	unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
	auto PtrMemVT = getPointerMemTy(DAG.getDataLayout());
	auto PtrVT = getPointerTy(DAG.getDataLayout());
	SDLoc DL(Op);

	SDValue Chain = Op.getOperand(0);
	SDValue VAList = Op.getOperand(1);
	const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
	SmallVector<SDValue, 4> MemOps;

	// void *__stack at offset 0
	unsigned Offset = 0;
	SDValue Stack = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), PtrVT);
	Stack = DAG.getZExtOrTrunc(Stack, DL, PtrMemVT);
	MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList,
	MachinePointerInfo(SV), Align(PtrSize)));

	// void *__gr_top at offset 8 (4 on ILP32)
	Offset += PtrSize;
	int GPRSize = FuncInfo->getVarArgsGPRSize();
	if (GPRSize > 0) {
	SDValue GRTop, GRTopAddr;

	GRTopAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
	DAG.getConstant(Offset, DL, PtrVT));

	GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), PtrVT);
	GRTop = DAG.getNode(ISD::ADD, DL, PtrVT, GRTop,
	DAG.getConstant(GPRSize, DL, PtrVT));
	GRTop = DAG.getZExtOrTrunc(GRTop, DL, PtrMemVT);

	MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr,
	MachinePointerInfo(SV, Offset),
	Align(PtrSize)));
	}

	// void *__vr_top at offset 16 (8 on ILP32)
	Offset += PtrSize;
	int FPRSize = FuncInfo->getVarArgsFPRSize();
	if (FPRSize > 0) {
	SDValue VRTop, VRTopAddr;
	VRTopAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
	DAG.getConstant(Offset, DL, PtrVT));

	VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), PtrVT);
	VRTop = DAG.getNode(ISD::ADD, DL, PtrVT, VRTop,
	DAG.getConstant(FPRSize, DL, PtrVT));
	VRTop = DAG.getZExtOrTrunc(VRTop, DL, PtrMemVT);

	MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr,
	MachinePointerInfo(SV, Offset),
	Align(PtrSize)));
	}

	// int __gr_offs at offset 24 (12 on ILP32)
	Offset += PtrSize;
	SDValue GROffsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
	DAG.getConstant(Offset, DL, PtrVT));
	MemOps.push_back(
	DAG.getStore(Chain, DL, DAG.getConstant(-GPRSize, DL, MVT::i32),
	GROffsAddr, MachinePointerInfo(SV, Offset), Align(4)));

	// int __vr_offs at offset 28 (16 on ILP32)
	Offset += 4;
	SDValue VROffsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
	DAG.getConstant(Offset, DL, PtrVT));
	MemOps.push_back(
	DAG.getStore(Chain, DL, DAG.getConstant(-FPRSize, DL, MVT::i32),
	VROffsAddr, MachinePointerInfo(SV, Offset), Align(4)));

	return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
	}

	SDValue AArch64TargetLowering::LowerVASTART(SDValue Op,
	SelectionDAG &DAG) const {
	MachineFunction &MF = DAG.getMachineFunction();

	if (Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv()))
	return LowerWin64_VASTART(Op, DAG);
	else if (Subtarget->isTargetDarwin())
	return LowerDarwin_VASTART(Op, DAG);
	else
	return LowerAAPCS_VASTART(Op, DAG);
	}

	SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op,
	SelectionDAG &DAG) const {
	// AAPCS has three pointers and two ints (= 32 bytes), Darwin has single
	// pointer.
	SDLoc DL(Op);
	unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
	unsigned VaListSize =
	(Subtarget->isTargetDarwin() \|\| Subtarget->isTargetWindows())
	? PtrSize
	: Subtarget->isTargetILP32() ? 20 : 32;
	const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
	const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();

	return DAG.getMemcpy(Op.getOperand(0), DL, Op.getOperand(1), Op.getOperand(2),
	DAG.getConstant(VaListSize, DL, MVT::i32),
	Align(PtrSize), false, false, false,
	MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV));
	}

	SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
	assert(Subtarget->isTargetDarwin() &&
	"automatic va_arg instruction only works on Darwin");

	const Value *V = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
	EVT VT = Op.getValueType();
	SDLoc DL(Op);
	SDValue Chain = Op.getOperand(0);
	SDValue Addr = Op.getOperand(1);
	MaybeAlign Align(Op.getConstantOperandVal(3));
	unsigned MinSlotSize = Subtarget->isTargetILP32() ? 4 : 8;
	auto PtrVT = getPointerTy(DAG.getDataLayout());
	auto PtrMemVT = getPointerMemTy(DAG.getDataLayout());
	SDValue VAList =
	DAG.getLoad(PtrMemVT, DL, Chain, Addr, MachinePointerInfo(V));
	Chain = VAList.getValue(1);
	VAList = DAG.getZExtOrTrunc(VAList, DL, PtrVT);

	if (VT.isScalableVector())
	report_fatal_error("Passing SVE types to variadic functions is "
	"currently not supported");

	if (Align && *Align > MinSlotSize) {
	VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
	DAG.getConstant(Align->value() - 1, DL, PtrVT));
	VAList = DAG.getNode(ISD::AND, DL, PtrVT, VAList,
	DAG.getConstant(-(int64_t)Align->value(), DL, PtrVT));
	}

	Type ArgTy = VT.getTypeForEVT(DAG.getContext());
	unsigned ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy);

	// Scalar integer and FP values smaller than 64 bits are implicitly extended
	// up to 64 bits. At the very least, we have to increase the striding of the
	// vaargs list to match this, and for FP values we need to introduce
	// FP_ROUND nodes as well.
	if (VT.isInteger() && !VT.isVector())
	ArgSize = std::max(ArgSize, MinSlotSize);
	bool NeedFPTrunc = false;
	if (VT.isFloatingPoint() && !VT.isVector() && VT != MVT::f64) {
	ArgSize = 8;
	NeedFPTrunc = true;
	}

	// Increment the pointer, VAList, to the next vaarg
	SDValue VANext = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
	DAG.getConstant(ArgSize, DL, PtrVT));
	VANext = DAG.getZExtOrTrunc(VANext, DL, PtrMemVT);

	// Store the incremented VAList to the legalized pointer
	SDValue APStore =
	DAG.getStore(Chain, DL, VANext, Addr, MachinePointerInfo(V));

	// Load the actual argument out of the pointer VAList
	if (NeedFPTrunc) {
	// Load the value as an f64.
	SDValue WideFP =
	DAG.getLoad(MVT::f64, DL, APStore, VAList, MachinePointerInfo());
	// Round the value down to an f32.
	SDValue NarrowFP = DAG.getNode(ISD::FP_ROUND, DL, VT, WideFP.getValue(0),
	DAG.getIntPtrConstant(1, DL));
	SDValue Ops[] = { NarrowFP, WideFP.getValue(1) };
	// Merge the rounded value with the chain output of the load.
	return DAG.getMergeValues(Ops, DL);
	}

	return DAG.getLoad(VT, DL, APStore, VAList, MachinePointerInfo());
	}

	SDValue AArch64TargetLowering::LowerFRAMEADDR(SDValue Op,
	SelectionDAG &DAG) const {
	MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
	MFI.setFrameAddressIsTaken(true);

	EVT VT = Op.getValueType();
	SDLoc DL(Op);
	unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
	SDValue FrameAddr =
	DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, MVT::i64);
	while (Depth--)
	FrameAddr = DAG.getLoad(VT, DL, DAG.getEntryNode(), FrameAddr,
	MachinePointerInfo());

	if (Subtarget->isTargetILP32())
	FrameAddr = DAG.getNode(ISD::AssertZext, DL, MVT::i64, FrameAddr,
	DAG.getValueType(VT));

	return FrameAddr;
	}

	SDValue AArch64TargetLowering::LowerSPONENTRY(SDValue Op,
	SelectionDAG &DAG) const {
	MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();

	EVT VT = getPointerTy(DAG.getDataLayout());
	SDLoc DL(Op);
	int FI = MFI.CreateFixedObject(4, 0, false);
	return DAG.getFrameIndex(FI, VT);
	}

	#define GET_REGISTER_MATCHER
	#include "AArch64GenAsmMatcher.inc"

	// FIXME? Maybe this could be a TableGen attribute on some registers and
	// this table could be generated automatically from RegInfo.
	Register AArch64TargetLowering::
	getRegisterByName(const char* RegName, LLT VT, const MachineFunction &MF) const {
	Register Reg = MatchRegisterName(RegName);
	if (AArch64::X1 <= Reg && Reg <= AArch64::X28) {
	const MCRegisterInfo *MRI = Subtarget->getRegisterInfo();
	unsigned DwarfRegNum = MRI->getDwarfRegNum(Reg, false);
	if (!Subtarget->isXRegisterReserved(DwarfRegNum))
	Reg = 0;
	}
	if (Reg)
	return Reg;
	report_fatal_error(Twine("Invalid register name \""
	+ StringRef(RegName) + "\"."));
	}

	SDValue AArch64TargetLowering::LowerADDROFRETURNADDR(SDValue Op,
	SelectionDAG &DAG) const {
	DAG.getMachineFunction().getFrameInfo().setFrameAddressIsTaken(true);

	EVT VT = Op.getValueType();
	SDLoc DL(Op);

	SDValue FrameAddr =
	DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, VT);
	SDValue Offset = DAG.getConstant(8, DL, getPointerTy(DAG.getDataLayout()));

	return DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset);
	}

	SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op,
	SelectionDAG &DAG) const {
	MachineFunction &MF = DAG.getMachineFunction();
	MachineFrameInfo &MFI = MF.getFrameInfo();
	MFI.setReturnAddressIsTaken(true);

	EVT VT = Op.getValueType();
	SDLoc DL(Op);
	unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
	SDValue ReturnAddress;
	if (Depth) {
	SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
	SDValue Offset = DAG.getConstant(8, DL, getPointerTy(DAG.getDataLayout()));
	ReturnAddress = DAG.getLoad(
	VT, DL, DAG.getEntryNode(),
	DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), MachinePointerInfo());
	} else {
	// Return LR, which contains the return address. Mark it an implicit
	// live-in.
	unsigned Reg = MF.addLiveIn(AArch64::LR, &AArch64::GPR64RegClass);
	ReturnAddress = DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, VT);
	}

	// The XPACLRI instruction assembles to a hint-space instruction before
	// Armv8.3-A therefore this instruction can be safely used for any pre
	// Armv8.3-A architectures. On Armv8.3-A and onwards XPACI is available so use
	// that instead.
	SDNode *St;
	if (Subtarget->hasPAuth()) {
	St = DAG.getMachineNode(AArch64::XPACI, DL, VT, ReturnAddress);
	} else {
	// XPACLRI operates on LR therefore we must move the operand accordingly.
	SDValue Chain =
	DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::LR, ReturnAddress);
	St = DAG.getMachineNode(AArch64::XPACLRI, DL, VT, Chain);
	}
	return SDValue(St, 0);
	}

	/// LowerShiftParts - Lower SHL_PARTS/SRA_PARTS/SRL_PARTS, which returns two
	/// i32 values and take a 2 x i32 value to shift plus a shift amount.
	SDValue AArch64TargetLowering::LowerShiftParts(SDValue Op,
	SelectionDAG &DAG) const {
	SDValue Lo, Hi;
	expandShiftParts(Op.getNode(), Lo, Hi, DAG);
	return DAG.getMergeValues({Lo, Hi}, SDLoc(Op));
	}

	bool AArch64TargetLowering::isOffsetFoldingLegal(
	const GlobalAddressSDNode *GA) const {
	// Offsets are folded in the DAG combine rather than here so that we can
	// intelligently choose an offset based on the uses.
	return false;
	}

	bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
	bool OptForSize) const {
	bool IsLegal = false;
	// We can materialize #0.0 as fmov $Rd, XZR for 64-bit, 32-bit cases, and
	// 16-bit case when target has full fp16 support.
	// FIXME: We should be able to handle f128 as well with a clever lowering.
	const APInt ImmInt = Imm.bitcastToAPInt();
	if (VT == MVT::f64)
	IsLegal = AArch64_AM::getFP64Imm(ImmInt) != -1 \|\| Imm.isPosZero();
	else if (VT == MVT::f32)
	IsLegal = AArch64_AM::getFP32Imm(ImmInt) != -1 \|\| Imm.isPosZero();
	else if (VT == MVT::f16 && Subtarget->hasFullFP16())
	IsLegal = AArch64_AM::getFP16Imm(ImmInt) != -1 \|\| Imm.isPosZero();
	// TODO: fmov h0, w0 is also legal, however on't have an isel pattern to
	// generate that fmov.

	// If we can not materialize in immediate field for fmov, check if the
	// value can be encoded as the immediate operand of a logical instruction.
	// The immediate value will be created with either MOVZ, MOVN, or ORR.
	if (!IsLegal && (VT == MVT::f64 \|\| VT == MVT::f32)) {
	// The cost is actually exactly the same for mov+fmov vs. adrp+ldr;
	// however the mov+fmov sequence is always better because of the reduced
	// cache pressure. The timings are still the same if you consider
	// movw+movk+fmov vs. adrp+ldr (it's one instruction longer, but the
	// movw+movk is fused). So we limit up to 2 instrdduction at most.
	SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
	AArch64_IMM::expandMOVImm(ImmInt.getZExtValue(), VT.getSizeInBits(),
	Insn);
	unsigned Limit = (OptForSize ? 1 : (Subtarget->hasFuseLiterals() ? 5 : 2));
	IsLegal = Insn.size() <= Limit;
	}

	LLVM_DEBUG(dbgs() << (IsLegal ? "Legal " : "Illegal ") << VT.getEVTString()
	<< " imm value: "; Imm.dump(););
	return IsLegal;
	}

	//===----------------------------------------------------------------------===//
	// AArch64 Optimization Hooks
	//===----------------------------------------------------------------------===//

	static SDValue getEstimate(const AArch64Subtarget *ST, unsigned Opcode,
	SDValue Operand, SelectionDAG &DAG,
	int &ExtraSteps) {
	EVT VT = Operand.getValueType();
	if (ST->hasNEON() &&
	(VT == MVT::f64 \|\| VT == MVT::v1f64 \|\| VT == MVT::v2f64 \|\|
	VT == MVT::f32 \|\| VT == MVT::v1f32 \|\|
	VT == MVT::v2f32 \|\| VT == MVT::v4f32)) {
	if (ExtraSteps == TargetLoweringBase::ReciprocalEstimate::Unspecified)
	// For the reciprocal estimates, convergence is quadratic, so the number
	// of digits is doubled after each iteration. In ARMv8, the accuracy of
	// the initial estimate is 2^-8. Thus the number of extra steps to refine
	// the result for float (23 mantissa bits) is 2 and for double (52
	// mantissa bits) is 3.
	ExtraSteps = VT.getScalarType() == MVT::f64 ? 3 : 2;

	return DAG.getNode(Opcode, SDLoc(Operand), VT, Operand);
	}

	return SDValue();
	}

	SDValue
	AArch64TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
	const DenormalMode &Mode) const {
	SDLoc DL(Op);
	EVT VT = Op.getValueType();
	EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
	SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
	return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
	}

	SDValue
	AArch64TargetLowering::getSqrtResultForDenormInput(SDValue Op,
	SelectionDAG &DAG) const {
	return Op;
	}

	SDValue AArch64TargetLowering::getSqrtEstimate(SDValue Operand,
	SelectionDAG &DAG, int Enabled,
	int &ExtraSteps,
	bool &UseOneConst,
	bool Reciprocal) const {
	if (Enabled == ReciprocalEstimate::Enabled \|\|
	(Enabled == ReciprocalEstimate::Unspecified && Subtarget->useRSqrt()))
	if (SDValue Estimate = getEstimate(Subtarget, AArch64ISD::FRSQRTE, Operand,
	DAG, ExtraSteps)) {
	SDLoc DL(Operand);
	EVT VT = Operand.getValueType();

	SDNodeFlags Flags;
	Flags.setAllowReassociation(true);

	// Newton reciprocal square root iteration: E * 0.5 * (3 - X * E^2)
	// AArch64 reciprocal square root iteration instruction: 0.5 * (3 - M * N)
	for (int i = ExtraSteps; i > 0; --i) {
	SDValue Step = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Estimate,
	Flags);
	Step = DAG.getNode(AArch64ISD::FRSQRTS, DL, VT, Operand, Step, Flags);
	Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags);
	}
	if (!Reciprocal)
	Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate, Flags);

	ExtraSteps = 0;
	return Estimate;
	}

	return SDValue();
	}

	SDValue AArch64TargetLowering::getRecipEstimate(SDValue Operand,
	SelectionDAG &DAG, int Enabled,
	int &ExtraSteps) const {
	if (Enabled == ReciprocalEstimate::Enabled)
	if (SDValue Estimate = getEstimate(Subtarget, AArch64ISD::FRECPE, Operand,
	DAG, ExtraSteps)) {
	SDLoc DL(Operand);
	EVT VT = Operand.getValueType();

	SDNodeFlags Flags;
	Flags.setAllowReassociation(true);

	// Newton reciprocal iteration: E * (2 - X * E)
	// AArch64 reciprocal iteration instruction: (2 - M * N)
	for (int i = ExtraSteps; i > 0; --i) {
	SDValue Step = DAG.getNode(AArch64ISD::FRECPS, DL, VT, Operand,
	Estimate, Flags);
	Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags);
	}

	ExtraSteps = 0;
	return Estimate;
	}

	return SDValue();
	}

	//===----------------------------------------------------------------------===//
	// AArch64 Inline Assembly Support
	//===----------------------------------------------------------------------===//

	// Table of Constraints
	// TODO: This is the current set of constraints supported by ARM for the
	// compiler, not all of them may make sense.
	//
	// r - A general register
	// w - An FP/SIMD register of some size in the range v0-v31
	// x - An FP/SIMD register of some size in the range v0-v15
	// I - Constant that can be used with an ADD instruction
	// J - Constant that can be used with a SUB instruction
	// K - Constant that can be used with a 32-bit logical instruction
	// L - Constant that can be used with a 64-bit logical instruction
	// M - Constant that can be used as a 32-bit MOV immediate
	// N - Constant that can be used as a 64-bit MOV immediate
	// Q - A memory reference with base register and no offset
	// S - A symbolic address
	// Y - Floating point constant zero
	// Z - Integer constant zero
	//
	// Note that general register operands will be output using their 64-bit x
	// register name, whatever the size of the variable, unless the asm operand
	// is prefixed by the %w modifier. Floating-point and SIMD register operands
	// will be output with the v prefix unless prefixed by the %b, %h, %s, %d or
	// %q modifier.
	const char *AArch64TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
	// At this point, we have to lower this constraint to something else, so we
	// lower it to an "r" or "w". However, by doing this we will force the result
	// to be in register, while the X constraint is much more permissive.
	//
	// Although we are correct (we are free to emit anything, without
	// constraints), we might break use cases that would expect us to be more
	// efficient and emit something else.
	if (!Subtarget->hasFPARMv8())
	return "r";

	if (ConstraintVT.isFloatingPoint())
	return "w";

	if (ConstraintVT.isVector() &&
	(ConstraintVT.getSizeInBits() == 64 \|\|
	ConstraintVT.getSizeInBits() == 128))
	return "w";

	return "r";
	}

	enum PredicateConstraint {
	Upl,
	Upa,
	Invalid
	};

	static PredicateConstraint parsePredicateConstraint(StringRef Constraint) {
	PredicateConstraint P = PredicateConstraint::Invalid;
	if (Constraint == "Upa")
	P = PredicateConstraint::Upa;
	if (Constraint == "Upl")
	P = PredicateConstraint::Upl;
	return P;
	}

	/// getConstraintType - Given a constraint letter, return the type of
	/// constraint it is for this target.
	AArch64TargetLowering::ConstraintType
	AArch64TargetLowering::getConstraintType(StringRef Constraint) const {
	if (Constraint.size() == 1) {
	switch (Constraint[0]) {
	default:
	break;
	case 'x':
	case 'w':
	case 'y':
	return C_RegisterClass;
	// An address with a single base register. Due to the way we
	// currently handle addresses it is the same as 'r'.
	case 'Q':
	return C_Memory;
	case 'I':
	case 'J':
	case 'K':
	case 'L':
	case 'M':
	case 'N':
	case 'Y':
	case 'Z':
	return C_Immediate;
	case 'z':
	case 'S': // A symbolic address
	return C_Other;
	}
	} else if (parsePredicateConstraint(Constraint) !=
	PredicateConstraint::Invalid)
	return C_RegisterClass;
	return TargetLowering::getConstraintType(Constraint);
	}

	/// Examine constraint type and operand type and determine a weight value.
	/// This object must already have been set up with the operand type
	/// and the current alternative constraint selected.
	TargetLowering::ConstraintWeight
	AArch64TargetLowering::getSingleConstraintMatchWeight(
	AsmOperandInfo &info, const char *constraint) const {
	ConstraintWeight weight = CW_Invalid;
	Value *CallOperandVal = info.CallOperandVal;
	// If we don't have a value, we can't do a match,
	// but allow it at the lowest weight.
	if (!CallOperandVal)
	return CW_Default;
	Type *type = CallOperandVal->getType();
	// Look at the constraint type.
	switch (*constraint) {
	default:
	weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
	break;
	case 'x':
	case 'w':
	case 'y':
	if (type->isFloatingPointTy() \|\| type->isVectorTy())
	weight = CW_Register;
	break;
	case 'z':
	weight = CW_Constant;
	break;
	case 'U':
	if (parsePredicateConstraint(constraint) != PredicateConstraint::Invalid)
	weight = CW_Register;
	break;
	}
	return weight;
	}

	std::pair<unsigned, const TargetRegisterClass *>
	AArch64TargetLowering::getRegForInlineAsmConstraint(
	const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
	if (Constraint.size() == 1) {
	switch (Constraint[0]) {
	case 'r':
	if (VT.isScalableVector())
	return std::make_pair(0U, nullptr);
	if (Subtarget->hasLS64() && VT.getSizeInBits() == 512)
	return std::make_pair(0U, &AArch64::GPR64x8ClassRegClass);
	if (VT.getFixedSizeInBits() == 64)
	return std::make_pair(0U, &AArch64::GPR64commonRegClass);
	return std::make_pair(0U, &AArch64::GPR32commonRegClass);
	case 'w': {
	if (!Subtarget->hasFPARMv8())
	break;
	if (VT.isScalableVector()) {
	if (VT.getVectorElementType() != MVT::i1)
	return std::make_pair(0U, &AArch64::ZPRRegClass);
	return std::make_pair(0U, nullptr);
	}
	uint64_t VTSize = VT.getFixedSizeInBits();
	if (VTSize == 16)
	return std::make_pair(0U, &AArch64::FPR16RegClass);
	if (VTSize == 32)
	return std::make_pair(0U, &AArch64::FPR32RegClass);
	if (VTSize == 64)
	return std::make_pair(0U, &AArch64::FPR64RegClass);
	if (VTSize == 128)
	return std::make_pair(0U, &AArch64::FPR128RegClass);
	break;
	}
	// The instructions that this constraint is designed for can
	// only take 128-bit registers so just use that regclass.
	case 'x':
	if (!Subtarget->hasFPARMv8())
	break;
	if (VT.isScalableVector())
	return std::make_pair(0U, &AArch64::ZPR_4bRegClass);
	if (VT.getSizeInBits() == 128)
	return std::make_pair(0U, &AArch64::FPR128_loRegClass);
	break;
	case 'y':
	if (!Subtarget->hasFPARMv8())
	break;
	if (VT.isScalableVector())
	return std::make_pair(0U, &AArch64::ZPR_3bRegClass);
	break;
	}
	} else {
	PredicateConstraint PC = parsePredicateConstraint(Constraint);
	if (PC != PredicateConstraint::Invalid) {
	if (!VT.isScalableVector() \|\| VT.getVectorElementType() != MVT::i1)
	return std::make_pair(0U, nullptr);
	bool restricted = (PC == PredicateConstraint::Upl);
	return restricted ? std::make_pair(0U, &AArch64::PPR_3bRegClass)
	: std::make_pair(0U, &AArch64::PPRRegClass);
	}
	}
	if (StringRef("{cc}").equals_insensitive(Constraint))
	return std::make_pair(unsigned(AArch64::NZCV), &AArch64::CCRRegClass);

	// Use the default implementation in TargetLowering to convert the register
	// constraint into a member of a register class.
	std::pair<unsigned, const TargetRegisterClass *> Res;
	Res = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);

	// Not found as a standard register?
	if (!Res.second) {
	unsigned Size = Constraint.size();
	if ((Size == 4 \|\| Size == 5) && Constraint[0] == '{' &&
	tolower(Constraint[1]) == 'v' && Constraint[Size - 1] == '}') {
	int RegNo;
	bool Failed = Constraint.slice(2, Size - 1).getAsInteger(10, RegNo);
	if (!Failed && RegNo >= 0 && RegNo <= 31) {
	// v0 - v31 are aliases of q0 - q31 or d0 - d31 depending on size.
	// By default we'll emit v0-v31 for this unless there's a modifier where
	// we'll emit the correct register as well.
	if (VT != MVT::Other && VT.getSizeInBits() == 64) {
	Res.first = AArch64::FPR64RegClass.getRegister(RegNo);
	Res.second = &AArch64::FPR64RegClass;
	} else {
	Res.first = AArch64::FPR128RegClass.getRegister(RegNo);
	Res.second = &AArch64::FPR128RegClass;
	}
	}
	}
	}

	if (Res.second && !Subtarget->hasFPARMv8() &&
	!AArch64::GPR32allRegClass.hasSubClassEq(Res.second) &&
	!AArch64::GPR64allRegClass.hasSubClassEq(Res.second))
	return std::make_pair(0U, nullptr);

	return Res;
	}

	EVT AArch64TargetLowering::getAsmOperandValueType(const DataLayout &DL,
	llvm::Type *Ty,
	bool AllowUnknown) const {
	if (Subtarget->hasLS64() && Ty->isIntegerTy(512))
	return EVT(MVT::i64x8);

	return TargetLowering::getAsmOperandValueType(DL, Ty, AllowUnknown);
	}

	/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
	/// vector. If it is invalid, don't add anything to Ops.
	void AArch64TargetLowering::LowerAsmOperandForConstraint(
	SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
	SelectionDAG &DAG) const {
	SDValue Result;

	// Currently only support length 1 constraints.
	if (Constraint.length() != 1)
	return;

	char ConstraintLetter = Constraint[0];
	switch (ConstraintLetter) {
	default:
	break;

	// This set of constraints deal with valid constants for various instructions.
	// Validate and return a target constant for them if we can.
	case 'z': {
	// 'z' maps to xzr or wzr so it needs an input of 0.
	if (!isNullConstant(Op))
	return;

	if (Op.getValueType() == MVT::i64)
	Result = DAG.getRegister(AArch64::XZR, MVT::i64);
	else
	Result = DAG.getRegister(AArch64::WZR, MVT::i32);
	break;
	}
	case 'S': {
	// An absolute symbolic address or label reference.
	if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
	Result = DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
	GA->getValueType(0));
	} else if (const BlockAddressSDNode *BA =
	dyn_cast<BlockAddressSDNode>(Op)) {
	Result =
	DAG.getTargetBlockAddress(BA->getBlockAddress(), BA->getValueType(0));
	} else
	return;
	break;
	}

	case 'I':
	case 'J':
	case 'K':
	case 'L':
	case 'M':
	case 'N':
	ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
	if (!C)
	return;

	// Grab the value and do some validation.
	uint64_t CVal = C->getZExtValue();
	switch (ConstraintLetter) {
	// The I constraint applies only to simple ADD or SUB immediate operands:
	// i.e. 0 to 4095 with optional shift by 12
	// The J constraint applies only to ADD or SUB immediates that would be
	// valid when negated, i.e. if [an add pattern] were to be output as a SUB
	// instruction [or vice versa], in other words -1 to -4095 with optional
	// left shift by 12.
	case 'I':
	if (isUInt<12>(CVal) \|\| isShiftedUInt<12, 12>(CVal))
	break;
	return;
	case 'J': {
	uint64_t NVal = -C->getSExtValue();
	if (isUInt<12>(NVal) \|\| isShiftedUInt<12, 12>(NVal)) {
	CVal = C->getSExtValue();
	break;
	}
	return;
	}
	// The K and L constraints apply only to logical immediates, including
	// what used to be the MOVI alias for ORR (though the MOVI alias has now
	// been removed and MOV should be used). So these constraints have to
	// distinguish between bit patterns that are valid 32-bit or 64-bit
	// "bitmask immediates": for example 0xaaaaaaaa is a valid bimm32 (K), but
	// not a valid bimm64 (L) where 0xaaaaaaaaaaaaaaaa would be valid, and vice
	// versa.
	case 'K':
	if (AArch64_AM::isLogicalImmediate(CVal, 32))
	break;
	return;
	case 'L':
	if (AArch64_AM::isLogicalImmediate(CVal, 64))
	break;
	return;
	// The M and N constraints are a superset of K and L respectively, for use
	// with the MOV (immediate) alias. As well as the logical immediates they
	// also match 32 or 64-bit immediates that can be loaded either using a
	// single MOVZ or MOVN , such as 32-bit 0x12340000, 0x00001234, 0xffffedca
	// (M) or 64-bit 0x1234000000000000 (N) etc.
	// As a note some of this code is liberally stolen from the asm parser.
	case 'M': {
	if (!isUInt<32>(CVal))
	return;
	if (AArch64_AM::isLogicalImmediate(CVal, 32))
	break;
	if ((CVal & 0xFFFF) == CVal)
	break;
	if ((CVal & 0xFFFF0000ULL) == CVal)
	break;
	uint64_t NCVal = ~(uint32_t)CVal;
	if ((NCVal & 0xFFFFULL) == NCVal)
	break;
	if ((NCVal & 0xFFFF0000ULL) == NCVal)
	break;
	return;
	}
	case 'N': {
	if (AArch64_AM::isLogicalImmediate(CVal, 64))
	break;
	if ((CVal & 0xFFFFULL) == CVal)
	break;
	if ((CVal & 0xFFFF0000ULL) == CVal)
	break;
	if ((CVal & 0xFFFF00000000ULL) == CVal)
	break;
	if ((CVal & 0xFFFF000000000000ULL) == CVal)
	break;
	uint64_t NCVal = ~CVal;
	if ((NCVal & 0xFFFFULL) == NCVal)
	break;
	if ((NCVal & 0xFFFF0000ULL) == NCVal)
	break;
	if ((NCVal & 0xFFFF00000000ULL) == NCVal)
	break;
	if ((NCVal & 0xFFFF000000000000ULL) == NCVal)
	break;
	return;
	}
	default:
	return;
	}

	// All assembler immediates are 64-bit integers.
	Result = DAG.getTargetConstant(CVal, SDLoc(Op), MVT::i64);
	break;
	}

	if (Result.getNode()) {
	Ops.push_back(Result);
	return;
	}

	return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
	}

	//===----------------------------------------------------------------------===//
	// AArch64 Advanced SIMD Support
	//===----------------------------------------------------------------------===//

	/// WidenVector - Given a value in the V64 register class, produce the
	/// equivalent value in the V128 register class.
	static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG) {
	EVT VT = V64Reg.getValueType();
	unsigned NarrowSize = VT.getVectorNumElements();
	MVT EltTy = VT.getVectorElementType().getSimpleVT();
	MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
	SDLoc DL(V64Reg);

	return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideTy, DAG.getUNDEF(WideTy),
	V64Reg, DAG.getConstant(0, DL, MVT::i64));
	}

	/// getExtFactor - Determine the adjustment factor for the position when
	/// generating an "extract from vector registers" instruction.
	static unsigned getExtFactor(SDValue &V) {
	EVT EltType = V.getValueType().getVectorElementType();
	return EltType.getSizeInBits() / 8;
	}

	/// NarrowVector - Given a value in the V128 register class, produce the
	/// equivalent value in the V64 register class.
	static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
	EVT VT = V128Reg.getValueType();
	unsigned WideSize = VT.getVectorNumElements();
	MVT EltTy = VT.getVectorElementType().getSimpleVT();
	MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
	SDLoc DL(V128Reg);

	return DAG.getTargetExtractSubreg(AArch64::dsub, DL, NarrowTy, V128Reg);
	}

	// Gather data to see if the operation can be modelled as a
	// shuffle in combination with VEXTs.
	SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
	SelectionDAG &DAG) const {
	assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
	LLVM_DEBUG(dbgs() << "AArch64TargetLowering::ReconstructShuffle\n");
	SDLoc dl(Op);
	EVT VT = Op.getValueType();
	assert(!VT.isScalableVector() &&
	"Scalable vectors cannot be used with ISD::BUILD_VECTOR");
	unsigned NumElts = VT.getVectorNumElements();

	struct ShuffleSourceInfo {
	SDValue Vec;
	unsigned MinElt;
	unsigned MaxElt;

	// We may insert some combination of BITCASTs and VEXT nodes to force Vec to
	// be compatible with the shuffle we intend to construct. As a result
	// ShuffleVec will be some sliding window into the original Vec.
	SDValue ShuffleVec;

	// Code should guarantee that element i in Vec starts at element "WindowBase
	// + i * WindowScale in ShuffleVec".
	int WindowBase;
	int WindowScale;

	ShuffleSourceInfo(SDValue Vec)
	: Vec(Vec), MinElt(std::numeric_limits<unsigned>::max()), MaxElt(0),
	ShuffleVec(Vec), WindowBase(0), WindowScale(1) {}

	bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
	};

	// First gather all vectors used as an immediate source for this BUILD_VECTOR
	// node.
	SmallVector<ShuffleSourceInfo, 2> Sources;
	for (unsigned i = 0; i < NumElts; ++i) {
	SDValue V = Op.getOperand(i);
	if (V.isUndef())
	continue;
	else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
	!isa<ConstantSDNode>(V.getOperand(1))) {
	LLVM_DEBUG(
	dbgs() << "Reshuffle failed: "
	"a shuffle can only come from building a vector from "
	"various elements of other vectors, provided their "
	"indices are constant\n");
	return SDValue();
	}

	// Add this element source to the list if it's not already there.
	SDValue SourceVec = V.getOperand(0);
	auto Source = find(Sources, SourceVec);
	if (Source == Sources.end())
	Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));

	// Update the minimum and maximum lane number seen.
	unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
	Source->MinElt = std::min(Source->MinElt, EltNo);
	Source->MaxElt = std::max(Source->MaxElt, EltNo);
	}

	if (Sources.size() > 2) {
	LLVM_DEBUG(
	dbgs() << "Reshuffle failed: currently only do something sane when at "
	"most two source vectors are involved\n");
	return SDValue();
	}

	// Find out the smallest element size among result and two sources, and use
	// it as element size to build the shuffle_vector.
	EVT SmallestEltTy = VT.getVectorElementType();
	for (auto &Source : Sources) {
	EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
	if (SrcEltTy.bitsLT(SmallestEltTy)) {
	SmallestEltTy = SrcEltTy;
	}
	}
	unsigned ResMultiplier =
	VT.getScalarSizeInBits() / SmallestEltTy.getFixedSizeInBits();
	uint64_t VTSize = VT.getFixedSizeInBits();
	NumElts = VTSize / SmallestEltTy.getFixedSizeInBits();
	EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);

	// If the source vector is too wide or too narrow, we may nevertheless be able
	// to construct a compatible shuffle either by concatenating it with UNDEF or
	// extracting a suitable range of elements.
	for (auto &Src : Sources) {
	EVT SrcVT = Src.ShuffleVec.getValueType();

	uint64_t SrcVTSize = SrcVT.getFixedSizeInBits();
	if (SrcVTSize == VTSize)
	continue;

	// This stage of the search produces a source with the same element type as
	// the original, but with a total width matching the BUILD_VECTOR output.
	EVT EltVT = SrcVT.getVectorElementType();
	unsigned NumSrcElts = VTSize / EltVT.getFixedSizeInBits();
	EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);

	if (SrcVTSize < VTSize) {
	assert(2 * SrcVTSize == VTSize);
	// We can pad out the smaller vector for free, so if it's part of a
	// shuffle...
	Src.ShuffleVec =
	DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
	DAG.getUNDEF(Src.ShuffleVec.getValueType()));
	continue;
	}

	if (SrcVTSize != 2 * VTSize) {
	LLVM_DEBUG(
	dbgs() << "Reshuffle failed: result vector too small to extract\n");
	return SDValue();
	}

	if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
	LLVM_DEBUG(
	dbgs() << "Reshuffle failed: span too large for a VEXT to cope\n");
	return SDValue();
	}

	if (Src.MinElt >= NumSrcElts) {
	// The extraction can just take the second half
	Src.ShuffleVec =
	DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
	DAG.getConstant(NumSrcElts, dl, MVT::i64));
	Src.WindowBase = -NumSrcElts;
	} else if (Src.MaxElt < NumSrcElts) {
	// The extraction can just take the first half
	Src.ShuffleVec =
	DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
	DAG.getConstant(0, dl, MVT::i64));
	} else {
	// An actual VEXT is needed
	SDValue VEXTSrc1 =
	DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
	DAG.getConstant(0, dl, MVT::i64));
	SDValue VEXTSrc2 =
	DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
	DAG.getConstant(NumSrcElts, dl, MVT::i64));
	unsigned Imm = Src.MinElt * getExtFactor(VEXTSrc1);

	if (!SrcVT.is64BitVector()) {
	LLVM_DEBUG(
	dbgs() << "Reshuffle failed: don't know how to lower AArch64ISD::EXT "
	"for SVE vectors.");
	return SDValue();
	}

	Src.ShuffleVec = DAG.getNode(AArch64ISD::EXT, dl, DestVT, VEXTSrc1,
	VEXTSrc2,
	DAG.getConstant(Imm, dl, MVT::i32));
	Src.WindowBase = -Src.MinElt;
	}
	}

	// Another possible incompatibility occurs from the vector element types. We
	// can fix this by bitcasting the source vectors to the same type we intend
	// for the shuffle.
	for (auto &Src : Sources) {
	EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
	if (SrcEltTy == SmallestEltTy)
	continue;
	assert(ShuffleVT.getVectorElementType() == SmallestEltTy);
	Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
	Src.WindowScale =
	SrcEltTy.getFixedSizeInBits() / SmallestEltTy.getFixedSizeInBits();
	Src.WindowBase *= Src.WindowScale;
	}

	// Final sanity check before we try to actually produce a shuffle.
	LLVM_DEBUG(for (auto Src
	: Sources)
	assert(Src.ShuffleVec.getValueType() == ShuffleVT););

	// The stars all align, our next step is to produce the mask for the shuffle.
	SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1);
	int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits();
	for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
	SDValue Entry = Op.getOperand(i);
	if (Entry.isUndef())
	continue;

	auto Src = find(Sources, Entry.getOperand(0));
	int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();

	// EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
	// trunc. So only std::min(SrcBits, DestBits) actually get defined in this
	// segment.
	EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
	int BitsDefined = std::min(OrigEltTy.getScalarSizeInBits(),
	VT.getScalarSizeInBits());
	int LanesDefined = BitsDefined / BitsPerShuffleLane;

	// This source is expected to fill ResMultiplier lanes of the final shuffle,
	// starting at the appropriate offset.
	int LaneMask = &Mask[i ResMultiplier];

	int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
	ExtractBase += NumElts * (Src - Sources.begin());
	for (int j = 0; j < LanesDefined; ++j)
	LaneMask[j] = ExtractBase + j;
	}

	// Final check before we try to produce nonsense...
	if (!isShuffleMaskLegal(Mask, ShuffleVT)) {
	LLVM_DEBUG(dbgs() << "Reshuffle failed: illegal shuffle mask\n");
	return SDValue();
	}

	SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
	for (unsigned i = 0; i < Sources.size(); ++i)
	ShuffleOps[i] = Sources[i].ShuffleVec;

	SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
	ShuffleOps[1], Mask);
	SDValue V = DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);

	LLVM_DEBUG(dbgs() << "Reshuffle, creating node: "; Shuffle.dump();
	dbgs() << "Reshuffle, creating node: "; V.dump(););

	return V;
	}

	// check if an EXT instruction can handle the shuffle mask when the
	// vector sources of the shuffle are the same.
	static bool isSingletonEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
	unsigned NumElts = VT.getVectorNumElements();

	// Assume that the first shuffle index is not UNDEF. Fail if it is.
	if (M[0] < 0)
	return false;

	Imm = M[0];

	// If this is a VEXT shuffle, the immediate value is the index of the first
	// element. The other shuffle indices must be the successive elements after
	// the first one.
	unsigned ExpectedElt = Imm;
	for (unsigned i = 1; i < NumElts; ++i) {
	// Increment the expected index. If it wraps around, just follow it
	// back to index zero and keep going.
	++ExpectedElt;
	if (ExpectedElt == NumElts)
	ExpectedElt = 0;

	if (M[i] < 0)
	continue; // ignore UNDEF indices
	if (ExpectedElt != static_cast<unsigned>(M[i]))
	return false;
	}

	return true;
	}

	/// Check if a vector shuffle corresponds to a DUP instructions with a larger
	/// element width than the vector lane type. If that is the case the function
	/// returns true and writes the value of the DUP instruction lane operand into
	/// DupLaneOp
	static bool isWideDUPMask(ArrayRef<int> M, EVT VT, unsigned BlockSize,
	unsigned &DupLaneOp) {
	assert((BlockSize == 16 \|\| BlockSize == 32 \|\| BlockSize == 64) &&
	"Only possible block sizes for wide DUP are: 16, 32, 64");

	if (BlockSize <= VT.getScalarSizeInBits())
	return false;
	if (BlockSize % VT.getScalarSizeInBits() != 0)
	return false;
	if (VT.getSizeInBits() % BlockSize != 0)
	return false;

	size_t SingleVecNumElements = VT.getVectorNumElements();
	size_t NumEltsPerBlock = BlockSize / VT.getScalarSizeInBits();
	size_t NumBlocks = VT.getSizeInBits() / BlockSize;

	// We are looking for masks like
	// [0, 1, 0, 1] or [2, 3, 2, 3] or [4, 5, 6, 7, 4, 5, 6, 7] where any element
	// might be replaced by 'undefined'. BlockIndices will eventually contain
	// lane indices of the duplicated block (i.e. [0, 1], [2, 3] and [4, 5, 6, 7]
	// for the above examples)
	SmallVector<int, 8> BlockElts(NumEltsPerBlock, -1);
	for (size_t BlockIndex = 0; BlockIndex < NumBlocks; BlockIndex++)
	for (size_t I = 0; I < NumEltsPerBlock; I++) {
	int Elt = M[BlockIndex * NumEltsPerBlock + I];
	if (Elt < 0)
	continue;
	// For now we don't support shuffles that use the second operand
	if ((unsigned)Elt >= SingleVecNumElements)
	return false;
	if (BlockElts[I] < 0)
	BlockElts[I] = Elt;
	else if (BlockElts[I] != Elt)
	return false;
	}

	// We found a candidate block (possibly with some undefs). It must be a
	// sequence of consecutive integers starting with a value divisible by
	// NumEltsPerBlock with some values possibly replaced by undef-s.

	// Find first non-undef element
	auto FirstRealEltIter = find_if(BlockElts, [](int Elt) { return Elt >= 0; });
	assert(FirstRealEltIter != BlockElts.end() &&
	"Shuffle with all-undefs must have been caught by previous cases, "
	"e.g. isSplat()");
	if (FirstRealEltIter == BlockElts.end()) {
	DupLaneOp = 0;
	return true;
	}

	// Index of FirstRealElt in BlockElts
	size_t FirstRealIndex = FirstRealEltIter - BlockElts.begin();

	if ((unsigned)*FirstRealEltIter < FirstRealIndex)
	return false;
	// BlockElts[0] must have the following value if it isn't undef:
	size_t Elt0 = *FirstRealEltIter - FirstRealIndex;

	// Check the first element
	if (Elt0 % NumEltsPerBlock != 0)
	return false;
	// Check that the sequence indeed consists of consecutive integers (modulo
	// undefs)
	for (size_t I = 0; I < NumEltsPerBlock; I++)
	if (BlockElts[I] >= 0 && (unsigned)BlockElts[I] != Elt0 + I)
	return false;

	DupLaneOp = Elt0 / NumEltsPerBlock;
	return true;
	}

	// check if an EXT instruction can handle the shuffle mask when the
	// vector sources of the shuffle are different.
	static bool isEXTMask(ArrayRef<int> M, EVT VT, bool &ReverseEXT,
	unsigned &Imm) {
	// Look for the first non-undef element.
	const int *FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; });

	// Benefit form APInt to handle overflow when calculating expected element.
	unsigned NumElts = VT.getVectorNumElements();
	unsigned MaskBits = APInt(32, NumElts * 2).logBase2();
	APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1);
	// The following shuffle indices must be the successive elements after the
	// first real element.
	const int *FirstWrongElt = std::find_if(FirstRealElt + 1, M.end(),
	[&](int Elt) {return Elt != ExpectedElt++ && Elt != -1;});
	if (FirstWrongElt != M.end())
	return false;

	// The index of an EXT is the first element if it is not UNDEF.
	// Watch out for the beginning UNDEFs. The EXT index should be the expected
	// value of the first element. E.g.
	// <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
	// <-1, -1, 0, 1, ...> is treated as <2NumElts-2, 2NumElts-1, 0, 1, ...>.
	// ExpectedElt is the last mask index plus 1.
	Imm = ExpectedElt.getZExtValue();

	// There are two difference cases requiring to reverse input vectors.
	// For example, for vector <4 x i32> we have the following cases,
	// Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>)
	// Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>)
	// For both cases, we finally use mask <5, 6, 7, 0>, which requires
	// to reverse two input vectors.
	if (Imm < NumElts)
	ReverseEXT = true;
	else
	Imm -= NumElts;

	return true;
	}

	/// isREVMask - Check if a vector shuffle corresponds to a REV
	/// instruction with the specified blocksize. (The order of the elements
	/// within each block of the vector is reversed.)
	static bool isREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
	assert((BlockSize == 16 \|\| BlockSize == 32 \|\| BlockSize == 64) &&
	"Only possible block sizes for REV are: 16, 32, 64");

	unsigned EltSz = VT.getScalarSizeInBits();
	if (EltSz == 64)
	return false;

	unsigned NumElts = VT.getVectorNumElements();
	unsigned BlockElts = M[0] + 1;
	// If the first shuffle index is UNDEF, be optimistic.
	if (M[0] < 0)
	BlockElts = BlockSize / EltSz;

	if (BlockSize <= EltSz \|\| BlockSize != BlockElts * EltSz)
	return false;

	for (unsigned i = 0; i < NumElts; ++i) {
	if (M[i] < 0)
	continue; // ignore UNDEF indices
	if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
	return false;
	}

	return true;
	}

	static bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
	unsigned NumElts = VT.getVectorNumElements();
	if (NumElts % 2 != 0)
	return false;
	WhichResult = (M[0] == 0 ? 0 : 1);
	unsigned Idx = WhichResult * NumElts / 2;
	for (unsigned i = 0; i != NumElts; i += 2) {
	if ((M[i] >= 0 && (unsigned)M[i] != Idx) \|\|
	(M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx + NumElts))
	return false;
	Idx += 1;
	}

	return true;
	}

	static bool isUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
	unsigned NumElts = VT.getVectorNumElements();
	WhichResult = (M[0] == 0 ? 0 : 1);
	for (unsigned i = 0; i != NumElts; ++i) {
	if (M[i] < 0)
	continue; // ignore UNDEF indices
	if ((unsigned)M[i] != 2 * i + WhichResult)
	return false;
	}

	return true;
	}

	static bool isTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
	unsigned NumElts = VT.getVectorNumElements();
	if (NumElts % 2 != 0)
	return false;
	WhichResult = (M[0] == 0 ? 0 : 1);
	for (unsigned i = 0; i < NumElts; i += 2) {
	if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) \|\|
	(M[i + 1] >= 0 && (unsigned)M[i + 1] != i + NumElts + WhichResult))
	return false;
	}
	return true;
	}

	/// isZIP_v_undef_Mask - Special case of isZIPMask for canonical form of
	/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
	/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
	static bool isZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
	unsigned NumElts = VT.getVectorNumElements();
	if (NumElts % 2 != 0)
	return false;
	WhichResult = (M[0] == 0 ? 0 : 1);
	unsigned Idx = WhichResult * NumElts / 2;
	for (unsigned i = 0; i != NumElts; i += 2) {
	if ((M[i] >= 0 && (unsigned)M[i] != Idx) \|\|
	(M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx))
	return false;
	Idx += 1;
	}

	return true;
	}

	/// isUZP_v_undef_Mask - Special case of isUZPMask for canonical form of
	/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
	/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
	static bool isUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
	unsigned Half = VT.getVectorNumElements() / 2;
	WhichResult = (M[0] == 0 ? 0 : 1);
	for (unsigned j = 0; j != 2; ++j) {
	unsigned Idx = WhichResult;
	for (unsigned i = 0; i != Half; ++i) {
	int MIdx = M[i + j * Half];
	if (MIdx >= 0 && (unsigned)MIdx != Idx)
	return false;
	Idx += 2;
	}
	}

	return true;
	}

	/// isTRN_v_undef_Mask - Special case of isTRNMask for canonical form of
	/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
	/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
	static bool isTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
	unsigned NumElts = VT.getVectorNumElements();
	if (NumElts % 2 != 0)
	return false;
	WhichResult = (M[0] == 0 ? 0 : 1);
	for (unsigned i = 0; i < NumElts; i += 2) {
	if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) \|\|
	(M[i + 1] >= 0 && (unsigned)M[i + 1] != i + WhichResult))
	return false;
	}
	return true;
	}

	static bool isINSMask(ArrayRef<int> M, int NumInputElements,
	bool &DstIsLeft, int &Anomaly) {
	if (M.size() != static_cast<size_t>(NumInputElements))
	return false;

	int NumLHSMatch = 0, NumRHSMatch = 0;
	int LastLHSMismatch = -1, LastRHSMismatch = -1;

	for (int i = 0; i < NumInputElements; ++i) {
	if (M[i] == -1) {
	++NumLHSMatch;
	++NumRHSMatch;
	continue;
	}

	if (M[i] == i)
	++NumLHSMatch;
	else
	LastLHSMismatch = i;

	if (M[i] == i + NumInputElements)
	++NumRHSMatch;
	else
	LastRHSMismatch = i;
	}

	if (NumLHSMatch == NumInputElements - 1) {
	DstIsLeft = true;
	Anomaly = LastLHSMismatch;
	return true;
	} else if (NumRHSMatch == NumInputElements - 1) {
	DstIsLeft = false;
	Anomaly = LastRHSMismatch;
	return true;
	}

	return false;
	}

	static bool isConcatMask(ArrayRef<int> Mask, EVT VT, bool SplitLHS) {
	if (VT.getSizeInBits() != 128)
	return false;

	unsigned NumElts = VT.getVectorNumElements();

	for (int I = 0, E = NumElts / 2; I != E; I++) {
	if (Mask[I] != I)
	return false;
	}

	int Offset = NumElts / 2;
	for (int I = NumElts / 2, E = NumElts; I != E; I++) {
	if (Mask[I] != I + SplitLHS * Offset)
	return false;
	}

	return true;
	}

	static SDValue tryFormConcatFromShuffle(SDValue Op, SelectionDAG &DAG) {
	SDLoc DL(Op);
	EVT VT = Op.getValueType();
	SDValue V0 = Op.getOperand(0);
	SDValue V1 = Op.getOperand(1);
	ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();

	if (VT.getVectorElementType() != V0.getValueType().getVectorElementType() \|\|
	VT.getVectorElementType() != V1.getValueType().getVectorElementType())
	return SDValue();

	bool SplitV0 = V0.getValueSizeInBits() == 128;

	if (!isConcatMask(Mask, VT, SplitV0))
	return SDValue();

	EVT CastVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
	if (SplitV0) {
	V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V0,
	DAG.getConstant(0, DL, MVT::i64));
	}
	if (V1.getValueSizeInBits() == 128) {
	V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V1,
	DAG.getConstant(0, DL, MVT::i64));
	}
	return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, V0, V1);
	}

	/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
	/// the specified operations to build the shuffle.
	static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
	SDValue RHS, SelectionDAG &DAG,
	const SDLoc &dl) {
	unsigned OpNum = (PFEntry >> 26) & 0x0F;
	unsigned LHSID = (PFEntry >> 13) & ((1 << 13) - 1);
	unsigned RHSID = (PFEntry >> 0) & ((1 << 13) - 1);

	enum {
	OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
	OP_VREV,
	OP_VDUP0,
	OP_VDUP1,
	OP_VDUP2,
	OP_VDUP3,
	OP_VEXT1,
	OP_VEXT2,
	OP_VEXT3,
	OP_VUZPL, // VUZP, left result
	OP_VUZPR, // VUZP, right result
	OP_VZIPL, // VZIP, left result
	OP_VZIPR, // VZIP, right result
	OP_VTRNL, // VTRN, left result
	OP_VTRNR // VTRN, right result
	};

	if (OpNum == OP_COPY) {
	if (LHSID == (1 * 9 + 2) * 9 + 3)
	return LHS;
	assert(LHSID == ((4 * 9 + 5) * 9 + 6) * 9 + 7 && "Illegal OP_COPY!");
	return RHS;
	}

	SDValue OpLHS, OpRHS;
	OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
	OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
	EVT VT = OpLHS.getValueType();

	switch (OpNum) {
	default:
	llvm_unreachable("Unknown shuffle opcode!");
	case OP_VREV:
	// VREV divides the vector in half and swaps within the half.
	if (VT.getVectorElementType() == MVT::i32 \|\|
	VT.getVectorElementType() == MVT::f32)
	return DAG.getNode(AArch64ISD::REV64, dl, VT, OpLHS);
	// vrev <4 x i16> -> REV32
	if (VT.getVectorElementType() == MVT::i16 \|\|
	VT.getVectorElementType() == MVT::f16 \|\|
	VT.getVectorElementType() == MVT::bf16)
	return DAG.getNode(AArch64ISD::REV32, dl, VT, OpLHS);
	// vrev <4 x i8> -> REV16
	assert(VT.getVectorElementType() == MVT::i8);
	return DAG.getNode(AArch64ISD::REV16, dl, VT, OpLHS);
	case OP_VDUP0:
	case OP_VDUP1:
	case OP_VDUP2:
	case OP_VDUP3: {
	EVT EltTy = VT.getVectorElementType();
	unsigned Opcode;
	if (EltTy == MVT::i8)
	Opcode = AArch64ISD::DUPLANE8;
	else if (EltTy == MVT::i16 \|\| EltTy == MVT::f16 \|\| EltTy == MVT::bf16)
	Opcode = AArch64ISD::DUPLANE16;
	else if (EltTy == MVT::i32 \|\| EltTy == MVT::f32)
	Opcode = AArch64ISD::DUPLANE32;
	else if (EltTy == MVT::i64 \|\| EltTy == MVT::f64)
	Opcode = AArch64ISD::DUPLANE64;
	else
	llvm_unreachable("Invalid vector element type?");

	if (VT.getSizeInBits() == 64)
	OpLHS = WidenVector(OpLHS, DAG);
	SDValue Lane = DAG.getConstant(OpNum - OP_VDUP0, dl, MVT::i64);
	return DAG.getNode(Opcode, dl, VT, OpLHS, Lane);
	}
	case OP_VEXT1:
	case OP_VEXT2:
	case OP_VEXT3: {
	unsigned Imm = (OpNum - OP_VEXT1 + 1) * getExtFactor(OpLHS);
	return DAG.getNode(AArch64ISD::EXT, dl, VT, OpLHS, OpRHS,
	DAG.getConstant(Imm, dl, MVT::i32));
	}
	case OP_VUZPL:
	return DAG.getNode(AArch64ISD::UZP1, dl, DAG.getVTList(VT, VT), OpLHS,
	OpRHS);
	case OP_VUZPR:
	return DAG.getNode(AArch64ISD::UZP2, dl, DAG.getVTList(VT, VT), OpLHS,
	OpRHS);
	case OP_VZIPL:
	return DAG.getNode(AArch64ISD::ZIP1, dl, DAG.getVTList(VT, VT), OpLHS,
	OpRHS);
	case OP_VZIPR:
	return DAG.getNode(AArch64ISD::ZIP2, dl, DAG.getVTList(VT, VT), OpLHS,
	OpRHS);
	case OP_VTRNL:
	return DAG.getNode(AArch64ISD::TRN1, dl, DAG.getVTList(VT, VT), OpLHS,
	OpRHS);
	case OP_VTRNR:
	return DAG.getNode(AArch64ISD::TRN2, dl, DAG.getVTList(VT, VT), OpLHS,
	OpRHS);
	}
	}

	static SDValue GenerateTBL(SDValue Op, ArrayRef<int> ShuffleMask,
	SelectionDAG &DAG) {
	// Check to see if we can use the TBL instruction.
	SDValue V1 = Op.getOperand(0);
	SDValue V2 = Op.getOperand(1);
	SDLoc DL(Op);

	EVT EltVT = Op.getValueType().getVectorElementType();
	unsigned BytesPerElt = EltVT.getSizeInBits() / 8;

	SmallVector<SDValue, 8> TBLMask;
	for (int Val : ShuffleMask) {
	for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
	unsigned Offset = Byte + Val * BytesPerElt;
	TBLMask.push_back(DAG.getConstant(Offset, DL, MVT::i32));
	}
	}

	MVT IndexVT = MVT::v8i8;
	unsigned IndexLen = 8;
	if (Op.getValueSizeInBits() == 128) {
	IndexVT = MVT::v16i8;
	IndexLen = 16;
	}

	SDValue V1Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V1);
	SDValue V2Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V2);

	SDValue Shuffle;
	if (V2.getNode()->isUndef()) {
	if (IndexLen == 8)
	V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V1Cst);
	Shuffle = DAG.getNode(
	ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
	DAG.getConstant(Intrinsic::aarch64_neon_tbl1, DL, MVT::i32), V1Cst,
	DAG.getBuildVector(IndexVT, DL,
	makeArrayRef(TBLMask.data(), IndexLen)));
	} else {
	if (IndexLen == 8) {
	V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V2Cst);
	Shuffle = DAG.getNode(
	ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
	DAG.getConstant(Intrinsic::aarch64_neon_tbl1, DL, MVT::i32), V1Cst,
	DAG.getBuildVector(IndexVT, DL,
	makeArrayRef(TBLMask.data(), IndexLen)));
	} else {
	// FIXME: We cannot, for the moment, emit a TBL2 instruction because we
	// cannot currently represent the register constraints on the input
	// table registers.
	// Shuffle = DAG.getNode(AArch64ISD::TBL2, DL, IndexVT, V1Cst, V2Cst,
	// DAG.getBuildVector(IndexVT, DL, &TBLMask[0],
	// IndexLen));
	Shuffle = DAG.getNode(
	ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
	DAG.getConstant(Intrinsic::aarch64_neon_tbl2, DL, MVT::i32), V1Cst,
	V2Cst, DAG.getBuildVector(IndexVT, DL,
	makeArrayRef(TBLMask.data(), IndexLen)));
	}
	}
	return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Shuffle);
	}

	static unsigned getDUPLANEOp(EVT EltType) {
	if (EltType == MVT::i8)
	return AArch64ISD::DUPLANE8;
	if (EltType == MVT::i16 \|\| EltType == MVT::f16 \|\| EltType == MVT::bf16)
	return AArch64ISD::DUPLANE16;
	if (EltType == MVT::i32 \|\| EltType == MVT::f32)
	return AArch64ISD::DUPLANE32;
	if (EltType == MVT::i64 \|\| EltType == MVT::f64)
	return AArch64ISD::DUPLANE64;

	llvm_unreachable("Invalid vector element type?");
	}

	static SDValue constructDup(SDValue V, int Lane, SDLoc dl, EVT VT,
	unsigned Opcode, SelectionDAG &DAG) {
	// Try to eliminate a bitcasted extract subvector before a DUPLANE.
	auto getScaledOffsetDup = [](SDValue BitCast, int &LaneC, MVT &CastVT) {
	// Match: dup (bitcast (extract_subv X, C)), LaneC
	if (BitCast.getOpcode() != ISD::BITCAST \|\|
	BitCast.getOperand(0).getOpcode() != ISD::EXTRACT_SUBVECTOR)
	return false;

	// The extract index must align in the destination type. That may not
	// happen if the bitcast is from narrow to wide type.
	SDValue Extract = BitCast.getOperand(0);
	unsigned ExtIdx = Extract.getConstantOperandVal(1);
	unsigned SrcEltBitWidth = Extract.getScalarValueSizeInBits();
	unsigned ExtIdxInBits = ExtIdx * SrcEltBitWidth;
	unsigned CastedEltBitWidth = BitCast.getScalarValueSizeInBits();
	if (ExtIdxInBits % CastedEltBitWidth != 0)
	return false;

	// Update the lane value by offsetting with the scaled extract index.
	LaneC += ExtIdxInBits / CastedEltBitWidth;

	// Determine the casted vector type of the wide vector input.
	// dup (bitcast (extract_subv X, C)), LaneC --> dup (bitcast X), LaneC'
	// Examples:
	// dup (bitcast (extract_subv v2f64 X, 1) to v2f32), 1 --> dup v4f32 X, 3
	// dup (bitcast (extract_subv v16i8 X, 8) to v4i16), 1 --> dup v8i16 X, 5
	unsigned SrcVecNumElts =
	Extract.getOperand(0).getValueSizeInBits() / CastedEltBitWidth;
	CastVT = MVT::getVectorVT(BitCast.getSimpleValueType().getScalarType(),
	SrcVecNumElts);
	return true;
	};
	MVT CastVT;
	if (getScaledOffsetDup(V, Lane, CastVT)) {
	V = DAG.getBitcast(CastVT, V.getOperand(0).getOperand(0));
	} else if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
	// The lane is incremented by the index of the extract.
	// Example: dup v2f32 (extract v4f32 X, 2), 1 --> dup v4f32 X, 3
	Lane += V.getConstantOperandVal(1);
	V = V.getOperand(0);
	} else if (V.getOpcode() == ISD::CONCAT_VECTORS) {
	// The lane is decremented if we are splatting from the 2nd operand.
	// Example: dup v4i32 (concat v2i32 X, v2i32 Y), 3 --> dup v4i32 Y, 1
	unsigned Idx = Lane >= (int)VT.getVectorNumElements() / 2;
	Lane -= Idx * VT.getVectorNumElements() / 2;
	V = WidenVector(V.getOperand(Idx), DAG);
	} else if (VT.getSizeInBits() == 64) {
	// Widen the operand to 128-bit register with undef.
	V = WidenVector(V, DAG);
	}
	return DAG.getNode(Opcode, dl, VT, V, DAG.getConstant(Lane, dl, MVT::i64));
	}

	SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
	SelectionDAG &DAG) const {
	SDLoc dl(Op);
	EVT VT = Op.getValueType();

	ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());

	if (useSVEForFixedLengthVectorVT(VT))
	return LowerFixedLengthVECTOR_SHUFFLEToSVE(Op, DAG);

	// Convert shuffles that are directly supported on NEON to target-specific
	// DAG nodes, instead of keeping them as shuffles and matching them again
	// during code selection. This is more efficient and avoids the possibility
	// of inconsistencies between legalization and selection.
	ArrayRef<int> ShuffleMask = SVN->getMask();

	SDValue V1 = Op.getOperand(0);
	SDValue V2 = Op.getOperand(1);

	assert(V1.getValueType() == VT && "Unexpected VECTOR_SHUFFLE type!");
	assert(ShuffleMask.size() == VT.getVectorNumElements() &&
	"Unexpected VECTOR_SHUFFLE mask size!");

	if (SVN->isSplat()) {
	int Lane = SVN->getSplatIndex();
	// If this is undef splat, generate it via "just" vdup, if possible.
	if (Lane == -1)
	Lane = 0;

	if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR)
	return DAG.getNode(AArch64ISD::DUP, dl, V1.getValueType(),
	V1.getOperand(0));
	// Test if V1 is a BUILD_VECTOR and the lane being referenced is a non-
	// constant. If so, we can just reference the lane's definition directly.
	if (V1.getOpcode() == ISD::BUILD_VECTOR &&
	!isa<ConstantSDNode>(V1.getOperand(Lane)))
	return DAG.getNode(AArch64ISD::DUP, dl, VT, V1.getOperand(Lane));

	// Otherwise, duplicate from the lane of the input vector.
	unsigned Opcode = getDUPLANEOp(V1.getValueType().getVectorElementType());
	return constructDup(V1, Lane, dl, VT, Opcode, DAG);
	}

	// Check if the mask matches a DUP for a wider element
	for (unsigned LaneSize : {64U, 32U, 16U}) {
	unsigned Lane = 0;
	if (isWideDUPMask(ShuffleMask, VT, LaneSize, Lane)) {
	unsigned Opcode = LaneSize == 64 ? AArch64ISD::DUPLANE64
	: LaneSize == 32 ? AArch64ISD::DUPLANE32
	: AArch64ISD::DUPLANE16;
	// Cast V1 to an integer vector with required lane size
	MVT NewEltTy = MVT::getIntegerVT(LaneSize);
	unsigned NewEltCount = VT.getSizeInBits() / LaneSize;
	MVT NewVecTy = MVT::getVectorVT(NewEltTy, NewEltCount);
	V1 = DAG.getBitcast(NewVecTy, V1);
	// Constuct the DUP instruction
	V1 = constructDup(V1, Lane, dl, NewVecTy, Opcode, DAG);
	// Cast back to the original type
	return DAG.getBitcast(VT, V1);
	}
	}

	if (isREVMask(ShuffleMask, VT, 64))
	return DAG.getNode(AArch64ISD::REV64, dl, V1.getValueType(), V1, V2);
	if (isREVMask(ShuffleMask, VT, 32))
	return DAG.getNode(AArch64ISD::REV32, dl, V1.getValueType(), V1, V2);
	if (isREVMask(ShuffleMask, VT, 16))
	return DAG.getNode(AArch64ISD::REV16, dl, V1.getValueType(), V1, V2);

	if (((VT.getVectorNumElements() == 8 && VT.getScalarSizeInBits() == 16) \|\|
	(VT.getVectorNumElements() == 16 && VT.getScalarSizeInBits() == 8)) &&
	ShuffleVectorInst::isReverseMask(ShuffleMask)) {
	SDValue Rev = DAG.getNode(AArch64ISD::REV64, dl, VT, V1);
	return DAG.getNode(AArch64ISD::EXT, dl, VT, Rev, Rev,
	DAG.getConstant(8, dl, MVT::i32));
	}

	bool ReverseEXT = false;
	unsigned Imm;
	if (isEXTMask(ShuffleMask, VT, ReverseEXT, Imm)) {
	if (ReverseEXT)
	std::swap(V1, V2);
	Imm *= getExtFactor(V1);
	return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V2,
	DAG.getConstant(Imm, dl, MVT::i32));
	} else if (V2->isUndef() && isSingletonEXTMask(ShuffleMask, VT, Imm)) {
	Imm *= getExtFactor(V1);
	return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V1,
	DAG.getConstant(Imm, dl, MVT::i32));
	}

	unsigned WhichResult;
	if (isZIPMask(ShuffleMask, VT, WhichResult)) {
	unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
	return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
	}
	if (isUZPMask(ShuffleMask, VT, WhichResult)) {
	unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
	return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
	}
	if (isTRNMask(ShuffleMask, VT, WhichResult)) {
	unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
	return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
	}

	if (isZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
	unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
	return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
	}
	if (isUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
	unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
	return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
	}
	if (isTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
	unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
	return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
	}

	if (SDValue Concat = tryFormConcatFromShuffle(Op, DAG))
	return Concat;

	bool DstIsLeft;
	int Anomaly;
	int NumInputElements = V1.getValueType().getVectorNumElements();
	if (isINSMask(ShuffleMask, NumInputElements, DstIsLeft, Anomaly)) {
	SDValue DstVec = DstIsLeft ? V1 : V2;
	SDValue DstLaneV = DAG.getConstant(Anomaly, dl, MVT::i64);

	SDValue SrcVec = V1;
	int SrcLane = ShuffleMask[Anomaly];
	if (SrcLane >= NumInputElements) {
	SrcVec = V2;
	SrcLane -= VT.getVectorNumElements();
	}
	SDValue SrcLaneV = DAG.getConstant(SrcLane, dl, MVT::i64);

	EVT ScalarVT = VT.getVectorElementType();

	if (ScalarVT.getFixedSizeInBits() < 32 && ScalarVT.isInteger())
	ScalarVT = MVT::i32;

	return DAG.getNode(
	ISD::INSERT_VECTOR_ELT, dl, VT, DstVec,
	DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, SrcVec, SrcLaneV),
	DstLaneV);
	}

	// If the shuffle is not directly supported and it has 4 elements, use
	// the PerfectShuffle-generated table to synthesize it from other shuffles.
	unsigned NumElts = VT.getVectorNumElements();
	if (NumElts == 4) {
	unsigned PFIndexes[4];
	for (unsigned i = 0; i != 4; ++i) {
	if (ShuffleMask[i] < 0)
	PFIndexes[i] = 8;
	else
	PFIndexes[i] = ShuffleMask[i];
	}

	// Compute the index in the perfect shuffle table.
	unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
	PFIndexes[2] * 9 + PFIndexes[3];
	unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
	unsigned Cost = (PFEntry >> 30);

	if (Cost <= 4)
	return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
	}

	return GenerateTBL(Op, ShuffleMask, DAG);
	}

	SDValue AArch64TargetLowering::LowerSPLAT_VECTOR(SDValue Op,
	SelectionDAG &DAG) const {
	SDLoc dl(Op);
	EVT VT = Op.getValueType();
	EVT ElemVT = VT.getScalarType();
	SDValue SplatVal = Op.getOperand(0);

	if (useSVEForFixedLengthVectorVT(VT))
	return LowerToScalableOp(Op, DAG);

	// Extend input splat value where needed to fit into a GPR (32b or 64b only)
	// FPRs don't have this restriction.
	switch (ElemVT.getSimpleVT().SimpleTy) {
	case MVT::i1: {
	// The only legal i1 vectors are SVE vectors, so we can use SVE-specific
	// lowering code.
	if (auto *ConstVal = dyn_cast<ConstantSDNode>(SplatVal)) {
	if (ConstVal->isOne())
	return getPTrue(DAG, dl, VT, AArch64SVEPredPattern::all);
	// TODO: Add special case for constant false
	}
	// The general case of i1. There isn't any natural way to do this,
	// so we use some trickery with whilelo.
	SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i64);
	SplatVal = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i64, SplatVal,
	DAG.getValueType(MVT::i1));
	SDValue ID = DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo, dl,
	MVT::i64);
	return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, ID,
	DAG.getConstant(0, dl, MVT::i64), SplatVal);
	}
	case MVT::i8:
	case MVT::i16:
	case MVT::i32:
	SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i32);
	break;
	case MVT::i64:
	SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i64);
	break;
	case MVT::f16:
	case MVT::bf16:
	case MVT::f32:
	case MVT::f64:
	// Fine as is
	break;
	default:
	report_fatal_error("Unsupported SPLAT_VECTOR input operand type");
	}

	return DAG.getNode(AArch64ISD::DUP, dl, VT, SplatVal);
	}

	SDValue AArch64TargetLowering::LowerDUPQLane(SDValue Op,
	SelectionDAG &DAG) const {
	SDLoc DL(Op);

	EVT VT = Op.getValueType();
	if (!isTypeLegal(VT) \|\| !VT.isScalableVector())
	return SDValue();

	// Current lowering only supports the SVE-ACLE types.
	if (VT.getSizeInBits().getKnownMinSize() != AArch64::SVEBitsPerBlock)
	return SDValue();

	// The DUPQ operation is indepedent of element type so normalise to i64s.
	SDValue V = DAG.getNode(ISD::BITCAST, DL, MVT::nxv2i64, Op.getOperand(1));
	SDValue Idx128 = Op.getOperand(2);

	// DUPQ can be used when idx is in range.
	auto *CIdx = dyn_cast<ConstantSDNode>(Idx128);
	if (CIdx && (CIdx->getZExtValue() <= 3)) {
	SDValue CI = DAG.getTargetConstant(CIdx->getZExtValue(), DL, MVT::i64);
	SDNode *DUPQ =
	DAG.getMachineNode(AArch64::DUP_ZZI_Q, DL, MVT::nxv2i64, V, CI);
	return DAG.getNode(ISD::BITCAST, DL, VT, SDValue(DUPQ, 0));
	}

	// The ACLE says this must produce the same result as:
	// svtbl(data, svadd_x(svptrue_b64(),
	// svand_x(svptrue_b64(), svindex_u64(0, 1), 1),
	// index * 2))
	SDValue One = DAG.getConstant(1, DL, MVT::i64);
	SDValue SplatOne = DAG.getNode(ISD::SPLAT_VECTOR, DL, MVT::nxv2i64, One);

	// create the vector 0,1,0,1,...
	SDValue SV = DAG.getStepVector(DL, MVT::nxv2i64);
	SV = DAG.getNode(ISD::AND, DL, MVT::nxv2i64, SV, SplatOne);

	// create the vector idx64,idx64+1,idx64,idx64+1,...
	SDValue Idx64 = DAG.getNode(ISD::ADD, DL, MVT::i64, Idx128, Idx128);
	SDValue SplatIdx64 = DAG.getNode(ISD::SPLAT_VECTOR, DL, MVT::nxv2i64, Idx64);
	SDValue ShuffleMask = DAG.getNode(ISD::ADD, DL, MVT::nxv2i64, SV, SplatIdx64);

	// create the vector Val[idx64],Val[idx64+1],Val[idx64],Val[idx64+1],...
	SDValue TBL = DAG.getNode(AArch64ISD::TBL, DL, MVT::nxv2i64, V, ShuffleMask);
	return DAG.getNode(ISD::BITCAST, DL, VT, TBL);
	}


	static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits,
	APInt &UndefBits) {
	EVT VT = BVN->getValueType(0);
	APInt SplatBits, SplatUndef;
	unsigned SplatBitSize;
	bool HasAnyUndefs;
	if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
	unsigned NumSplats = VT.getSizeInBits() / SplatBitSize;

	for (unsigned i = 0; i < NumSplats; ++i) {
	CnstBits <<= SplatBitSize;
	UndefBits <<= SplatBitSize;
	CnstBits \|= SplatBits.zextOrTrunc(VT.getSizeInBits());
	UndefBits \|= (SplatBits ^ SplatUndef).zextOrTrunc(VT.getSizeInBits());
	}

	return true;
	}

	return false;
	}

	// Try 64-bit splatted SIMD immediate.
	static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
	const APInt &Bits) {
	if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
	uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
	EVT VT = Op.getValueType();
	MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v2i64 : MVT::f64;

	if (AArch64_AM::isAdvSIMDModImmType10(Value)) {
	Value = AArch64_AM::encodeAdvSIMDModImmType10(Value);

	SDLoc dl(Op);
	SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
	DAG.getConstant(Value, dl, MVT::i32));
	return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
	}
	}

	return SDValue();
	}

	// Try 32-bit splatted SIMD immediate.
	static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
	const APInt &Bits,
	const SDValue *LHS = nullptr) {
	if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
	uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
	EVT VT = Op.getValueType();
	MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
	bool isAdvSIMDModImm = false;
	uint64_t Shift;

	if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType1(Value))) {
	Value = AArch64_AM::encodeAdvSIMDModImmType1(Value);
	Shift = 0;
	}
	else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType2(Value))) {
	Value = AArch64_AM::encodeAdvSIMDModImmType2(Value);
	Shift = 8;
	}
	else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType3(Value))) {
	Value = AArch64_AM::encodeAdvSIMDModImmType3(Value);
	Shift = 16;
	}
	else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType4(Value))) {
	Value = AArch64_AM::encodeAdvSIMDModImmType4(Value);
	Shift = 24;
	}

	if (isAdvSIMDModImm) {
	SDLoc dl(Op);
	SDValue Mov;

	if (LHS)
	Mov = DAG.getNode(NewOp, dl, MovTy, *LHS,
	DAG.getConstant(Value, dl, MVT::i32),
	DAG.getConstant(Shift, dl, MVT::i32));
	else
	Mov = DAG.getNode(NewOp, dl, MovTy,
	DAG.getConstant(Value, dl, MVT::i32),
	DAG.getConstant(Shift, dl, MVT::i32));

	return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
	}
	}

	return SDValue();
	}

	// Try 16-bit splatted SIMD immediate.
	static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
	const APInt &Bits,
	const SDValue *LHS = nullptr) {
	if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
	uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
	EVT VT = Op.getValueType();
	MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
	bool isAdvSIMDModImm = false;
	uint64_t Shift;

	if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType5(Value))) {
	Value = AArch64_AM::encodeAdvSIMDModImmType5(Value);
	Shift = 0;
	}
	else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType6(Value))) {
	Value = AArch64_AM::encodeAdvSIMDModImmType6(Value);
	Shift = 8;
	}

	if (isAdvSIMDModImm) {
	SDLoc dl(Op);
	SDValue Mov;

	if (LHS)
	Mov = DAG.getNode(NewOp, dl, MovTy, *LHS,
	DAG.getConstant(Value, dl, MVT::i32),
	DAG.getConstant(Shift, dl, MVT::i32));
	else
	Mov = DAG.getNode(NewOp, dl, MovTy,
	DAG.getConstant(Value, dl, MVT::i32),
	DAG.getConstant(Shift, dl, MVT::i32));

	return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
	}
	}

	return SDValue();
	}

	// Try 32-bit splatted SIMD immediate with shifted ones.
	static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op,
	SelectionDAG &DAG, const APInt &Bits) {
	if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
	uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
	EVT VT = Op.getValueType();
	MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
	bool isAdvSIMDModImm = false;
	uint64_t Shift;

	if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType7(Value))) {
	Value = AArch64_AM::encodeAdvSIMDModImmType7(Value);
	Shift = 264;
	}
	else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType8(Value))) {
	Value = AArch64_AM::encodeAdvSIMDModImmType8(Value);
	Shift = 272;
	}

	if (isAdvSIMDModImm) {
	SDLoc dl(Op);
	SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
	DAG.getConstant(Value, dl, MVT::i32),
	DAG.getConstant(Shift, dl, MVT::i32));
	return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
	}
	}

	return SDValue();
	}

	// Try 8-bit splatted SIMD immediate.
	static SDValue tryAdvSIMDModImm8(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
	const APInt &Bits) {
	if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
	uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
	EVT VT = Op.getValueType();
	MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v16i8 : MVT::v8i8;

	if (AArch64_AM::isAdvSIMDModImmType9(Value)) {
	Value = AArch64_AM::encodeAdvSIMDModImmType9(Value);

	SDLoc dl(Op);
	SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
	DAG.getConstant(Value, dl, MVT::i32));
	return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
	}
	}

	return SDValue();
	}

	// Try FP splatted SIMD immediate.
	static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
	const APInt &Bits) {
	if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
	uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
	EVT VT = Op.getValueType();
	bool isWide = (VT.getSizeInBits() == 128);
	MVT MovTy;
	bool isAdvSIMDModImm = false;

	if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType11(Value))) {
	Value = AArch64_AM::encodeAdvSIMDModImmType11(Value);
	MovTy = isWide ? MVT::v4f32 : MVT::v2f32;
	}
	else if (isWide &&
	(isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType12(Value))) {
	Value = AArch64_AM::encodeAdvSIMDModImmType12(Value);
	MovTy = MVT::v2f64;
	}

	if (isAdvSIMDModImm) {
	SDLoc dl(Op);
	SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
	DAG.getConstant(Value, dl, MVT::i32));
	return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
	}
	}

	return SDValue();
	}

	// Specialized code to quickly find if PotentialBVec is a BuildVector that
	// consists of only the same constant int value, returned in reference arg
	// ConstVal
	static bool isAllConstantBuildVector(const SDValue &PotentialBVec,
	uint64_t &ConstVal) {
	BuildVectorSDNode *Bvec = dyn_cast<BuildVectorSDNode>(PotentialBVec);
	if (!Bvec)
	return false;
	ConstantSDNode *FirstElt = dyn_cast<ConstantSDNode>(Bvec->getOperand(0));
	if (!FirstElt)
	return false;
	EVT VT = Bvec->getValueType(0);
	unsigned NumElts = VT.getVectorNumElements();
	for (unsigned i = 1; i < NumElts; ++i)
	if (dyn_cast<ConstantSDNode>(Bvec->getOperand(i)) != FirstElt)
	return false;
	ConstVal = FirstElt->getZExtValue();
	return true;
	}

	static unsigned getIntrinsicID(const SDNode *N) {
	unsigned Opcode = N->getOpcode();
	switch (Opcode) {
	default:
	return Intrinsic::not_intrinsic;
	case ISD::INTRINSIC_WO_CHAIN: {
	unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
	if (IID < Intrinsic::num_intrinsics)
	return IID;
	return Intrinsic::not_intrinsic;
	}
	}
	}

	// Attempt to form a vector S[LR]I from (or (and X, BvecC1), (lsl Y, C2)),
	// to (SLI X, Y, C2), where X and Y have matching vector types, BvecC1 is a
	// BUILD_VECTORs with constant element C1, C2 is a constant, and:
	// - for the SLI case: C1 == ~(Ones(ElemSizeInBits) << C2)
	// - for the SRI case: C1 == ~(Ones(ElemSizeInBits) >> C2)
	// The (or (lsl Y, C2), (and X, BvecC1)) case is also handled.
	static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) {
	EVT VT = N->getValueType(0);

	if (!VT.isVector())
	return SDValue();

	SDLoc DL(N);

	SDValue And;
	SDValue Shift;

	SDValue FirstOp = N->getOperand(0);
	unsigned FirstOpc = FirstOp.getOpcode();
	SDValue SecondOp = N->getOperand(1);
	unsigned SecondOpc = SecondOp.getOpcode();

	// Is one of the operands an AND or a BICi? The AND may have been optimised to
	// a BICi in order to use an immediate instead of a register.
	// Is the other operand an shl or lshr? This will have been turned into:
	// AArch64ISD::VSHL vector, #shift or AArch64ISD::VLSHR vector, #shift.
	if ((FirstOpc == ISD::AND \|\| FirstOpc == AArch64ISD::BICi) &&
	(SecondOpc == AArch64ISD::VSHL \|\| SecondOpc == AArch64ISD::VLSHR)) {
	And = FirstOp;
	Shift = SecondOp;

	} else if ((SecondOpc == ISD::AND \|\| SecondOpc == AArch64ISD::BICi) &&
	(FirstOpc == AArch64ISD::VSHL \|\| FirstOpc == AArch64ISD::VLSHR)) {
	And = SecondOp;
	Shift = FirstOp;
	} else
	return SDValue();

	bool IsAnd = And.getOpcode() == ISD::AND;
	bool IsShiftRight = Shift.getOpcode() == AArch64ISD::VLSHR;

	// Is the shift amount constant?
	ConstantSDNode *C2node = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
	if (!C2node)
	return SDValue();

	uint64_t C1;
	if (IsAnd) {
	// Is the and mask vector all constant?
	if (!isAllConstantBuildVector(And.getOperand(1), C1))
	return SDValue();
	} else {
	// Reconstruct the corresponding AND immediate from the two BICi immediates.
	ConstantSDNode *C1nodeImm = dyn_cast<ConstantSDNode>(And.getOperand(1));
	ConstantSDNode *C1nodeShift = dyn_cast<ConstantSDNode>(And.getOperand(2));
	assert(C1nodeImm && C1nodeShift);
	C1 = ~(C1nodeImm->getZExtValue() << C1nodeShift->getZExtValue());
	}

	// Is C1 == ~(Ones(ElemSizeInBits) << C2) or
	// C1 == ~(Ones(ElemSizeInBits) >> C2), taking into account
	// how much one can shift elements of a particular size?
	uint64_t C2 = C2node->getZExtValue();
	unsigned ElemSizeInBits = VT.getScalarSizeInBits();
	if (C2 > ElemSizeInBits)
	return SDValue();

	APInt C1AsAPInt(ElemSizeInBits, C1);
	APInt RequiredC1 = IsShiftRight ? APInt::getHighBitsSet(ElemSizeInBits, C2)
	: APInt::getLowBitsSet(ElemSizeInBits, C2);
	if (C1AsAPInt != RequiredC1)
	return SDValue();

	SDValue X = And.getOperand(0);
	SDValue Y = Shift.getOperand(0);

	unsigned Inst = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
	SDValue ResultSLI = DAG.getNode(Inst, DL, VT, X, Y, Shift.getOperand(1));

	LLVM_DEBUG(dbgs() << "aarch64-lower: transformed: \n");
	LLVM_DEBUG(N->dump(&DAG));
	LLVM_DEBUG(dbgs() << "into: \n");
	LLVM_DEBUG(ResultSLI->dump(&DAG));

	++NumShiftInserts;
	return ResultSLI;
	}

	SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
	SelectionDAG &DAG) const {
	if (useSVEForFixedLengthVectorVT(Op.getValueType()))
	return LowerToScalableOp(Op, DAG);

	// Attempt to form a vector S[LR]I from (or (and X, C1), (lsl Y, C2))
	if (SDValue Res = tryLowerToSLI(Op.getNode(), DAG))
	return Res;

	EVT VT = Op.getValueType();

	SDValue LHS = Op.getOperand(0);
	BuildVectorSDNode *BVN =
	dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode());
	if (!BVN) {
	// OR commutes, so try swapping the operands.
	LHS = Op.getOperand(1);
	BVN = dyn_cast<BuildVectorSDNode>(Op.getOperand(0).getNode());
	}
	if (!BVN)
	return Op;

	APInt DefBits(VT.getSizeInBits(), 0);
	APInt UndefBits(VT.getSizeInBits(), 0);
	if (resolveBuildVector(BVN, DefBits, UndefBits)) {
	SDValue NewOp;

	if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::ORRi, Op, DAG,
	DefBits, &LHS)) \|\|
	(NewOp = tryAdvSIMDModImm16(AArch64ISD::ORRi, Op, DAG,
	DefBits, &LHS)))
	return NewOp;

	if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::ORRi, Op, DAG,
	UndefBits, &LHS)) \|\|
	(NewOp = tryAdvSIMDModImm16(AArch64ISD::ORRi, Op, DAG,
	UndefBits, &LHS)))
	return NewOp;
	}

	// We can always fall back to a non-immediate OR.
	return Op;
	}

	// Normalize the operands of BUILD_VECTOR. The value of constant operands will
	// be truncated to fit element width.
	static SDValue NormalizeBuildVector(SDValue Op,
	SelectionDAG &DAG) {
	assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
	SDLoc dl(Op);
	EVT VT = Op.getValueType();
	EVT EltTy= VT.getVectorElementType();

	if (EltTy.isFloatingPoint() \|\| EltTy.getSizeInBits() > 16)
	return Op;

	SmallVector<SDValue, 16> Ops;
	for (SDValue Lane : Op->ops()) {
	// For integer vectors, type legalization would have promoted the
	// operands already. Otherwise, if Op is a floating-point splat
	// (with operands cast to integers), then the only possibilities
	// are constants and UNDEFs.
	if (auto *CstLane = dyn_cast<ConstantSDNode>(Lane)) {
	APInt LowBits(EltTy.getSizeInBits(),
	CstLane->getZExtValue());
	Lane = DAG.getConstant(LowBits.getZExtValue(), dl, MVT::i32);
	} else if (Lane.getNode()->isUndef()) {
	Lane = DAG.getUNDEF(MVT::i32);
	} else {
	assert(Lane.getValueType() == MVT::i32 &&
	"Unexpected BUILD_VECTOR operand type");
	}
	Ops.push_back(Lane);
	}
	return DAG.getBuildVector(VT, dl, Ops);
	}

	static SDValue ConstantBuildVector(SDValue Op, SelectionDAG &DAG) {
	EVT VT = Op.getValueType();

	APInt DefBits(VT.getSizeInBits(), 0);
	APInt UndefBits(VT.getSizeInBits(), 0);
	BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
	if (resolveBuildVector(BVN, DefBits, UndefBits)) {
	SDValue NewOp;
	if ((NewOp = tryAdvSIMDModImm64(AArch64ISD::MOVIedit, Op, DAG, DefBits)) \|\|
	(NewOp = tryAdvSIMDModImm32(AArch64ISD::MOVIshift, Op, DAG, DefBits)) \|\|
	(NewOp = tryAdvSIMDModImm321s(AArch64ISD::MOVImsl, Op, DAG, DefBits)) \|\|
	(NewOp = tryAdvSIMDModImm16(AArch64ISD::MOVIshift, Op, DAG, DefBits)) \|\|
	(NewOp = tryAdvSIMDModImm8(AArch64ISD::MOVI, Op, DAG, DefBits)) \|\|
	(NewOp = tryAdvSIMDModImmFP(AArch64ISD::FMOV, Op, DAG, DefBits)))
	return NewOp;

	DefBits = ~DefBits;
	if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::MVNIshift, Op, DAG, DefBits)) \|\|
	(NewOp = tryAdvSIMDModImm321s(AArch64ISD::MVNImsl, Op, DAG, DefBits)) \|\|
	(NewOp = tryAdvSIMDModImm16(AArch64ISD::MVNIshift, Op, DAG, DefBits)))
	return NewOp;

	DefBits = UndefBits;
	if ((NewOp = tryAdvSIMDModImm64(AArch64ISD::MOVIedit, Op, DAG, DefBits)) \|\|
	(NewOp = tryAdvSIMDModImm32(AArch64ISD::MOVIshift, Op, DAG, DefBits)) \|\|
	(NewOp = tryAdvSIMDModImm321s(AArch64ISD::MOVImsl, Op, DAG, DefBits)) \|\|
	(NewOp = tryAdvSIMDModImm16(AArch64ISD::MOVIshift, Op, DAG, DefBits)) \|\|
	(NewOp = tryAdvSIMDModImm8(AArch64ISD::MOVI, Op, DAG, DefBits)) \|\|
	(NewOp = tryAdvSIMDModImmFP(AArch64ISD::FMOV, Op, DAG, DefBits)))
	return NewOp;

	DefBits = ~UndefBits;
	if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::MVNIshift, Op, DAG, DefBits)) \|\|
	(NewOp = tryAdvSIMDModImm321s(AArch64ISD::MVNImsl, Op, DAG, DefBits)) \|\|
	(NewOp = tryAdvSIMDModImm16(AArch64ISD::MVNIshift, Op, DAG, DefBits)))
	return NewOp;
	}

	return SDValue();
	}

	SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
	SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();

	// Try to build a simple constant vector.
	Op = NormalizeBuildVector(Op, DAG);
	if (VT.isInteger()) {
	// Certain vector constants, used to express things like logical NOT and
	// arithmetic NEG, are passed through unmodified. This allows special
	// patterns for these operations to match, which will lower these constants
	// to whatever is proven necessary.
	BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
	if (BVN->isConstant())
	if (ConstantSDNode *Const = BVN->getConstantSplatNode()) {
	unsigned BitSize = VT.getVectorElementType().getSizeInBits();
	APInt Val(BitSize,
	Const->getAPIntValue().zextOrTrunc(BitSize).getZExtValue());
	if (Val.isNullValue() \|\| Val.isAllOnesValue())
	return Op;
	}
	}

	if (SDValue V = ConstantBuildVector(Op, DAG))
	return V;

	// Scan through the operands to find some interesting properties we can
	// exploit:
	// 1) If only one value is used, we can use a DUP, or
	// 2) if only the low element is not undef, we can just insert that, or
	// 3) if only one constant value is used (w/ some non-constant lanes),
	// we can splat the constant value into the whole vector then fill
	// in the non-constant lanes.
	// 4) FIXME: If different constant values are used, but we can intelligently
	// select the values we'll be overwriting for the non-constant
	// lanes such that we can directly materialize the vector
	// some other way (MOVI, e.g.), we can be sneaky.
	// 5) if all operands are EXTRACT_VECTOR_ELT, check for VUZP.
	SDLoc dl(Op);
	unsigned NumElts = VT.getVectorNumElements();
	bool isOnlyLowElement = true;
	bool usesOnlyOneValue = true;
	bool usesOnlyOneConstantValue = true;
	bool isConstant = true;
	bool AllLanesExtractElt = true;
	unsigned NumConstantLanes = 0;
	unsigned NumDifferentLanes = 0;
	unsigned NumUndefLanes = 0;
	SDValue Value;
	SDValue ConstantValue;
	for (unsigned i = 0; i < NumElts; ++i) {
	SDValue V = Op.getOperand(i);
	if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
	AllLanesExtractElt = false;
	if (V.isUndef()) {
	++NumUndefLanes;
	continue;
	}
	if (i > 0)
	isOnlyLowElement = false;
	if (!isIntOrFPConstant(V))
	isConstant = false;

	if (isIntOrFPConstant(V)) {
	++NumConstantLanes;
	if (!ConstantValue.getNode())
	ConstantValue = V;
	else if (ConstantValue != V)
	usesOnlyOneConstantValue = false;
	}

	if (!Value.getNode())
	Value = V;
	else if (V != Value) {
	usesOnlyOneValue = false;
	++NumDifferentLanes;
	}
	}

	if (!Value.getNode()) {
	LLVM_DEBUG(
	dbgs() << "LowerBUILD_VECTOR: value undefined, creating undef node\n");
	return DAG.getUNDEF(VT);
	}

	// Convert BUILD_VECTOR where all elements but the lowest are undef into
	// SCALAR_TO_VECTOR, except for when we have a single-element constant vector
	// as SimplifyDemandedBits will just turn that back into BUILD_VECTOR.
	if (isOnlyLowElement && !(NumElts == 1 && isIntOrFPConstant(Value))) {
	LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: only low element used, creating 1 "
	"SCALAR_TO_VECTOR node\n");
	return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
	}

	if (AllLanesExtractElt) {
	SDNode *Vector = nullptr;
	bool Even = false;
	bool Odd = false;
	// Check whether the extract elements match the Even pattern <0,2,4,...> or
	// the Odd pattern <1,3,5,...>.
	for (unsigned i = 0; i < NumElts; ++i) {
	SDValue V = Op.getOperand(i);
	const SDNode *N = V.getNode();
	if (!isa<ConstantSDNode>(N->getOperand(1)))
	break;
	SDValue N0 = N->getOperand(0);

	// All elements are extracted from the same vector.
	if (!Vector) {
	Vector = N0.getNode();
	// Check that the type of EXTRACT_VECTOR_ELT matches the type of
	// BUILD_VECTOR.
	if (VT.getVectorElementType() !=
	N0.getValueType().getVectorElementType())
	break;
	} else if (Vector != N0.getNode()) {
	Odd = false;
	Even = false;
	break;
	}

	// Extracted values are either at Even indices <0,2,4,...> or at Odd
	// indices <1,3,5,...>.
	uint64_t Val = N->getConstantOperandVal(1);
	if (Val == 2 * i) {
	Even = true;
	continue;
	}
	if (Val - 1 == 2 * i) {
	Odd = true;
	continue;
	}

	// Something does not match: abort.
	Odd = false;
	Even = false;
	break;
	}
	if (Even \|\| Odd) {
	SDValue LHS =
	DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SDValue(Vector, 0),
	DAG.getConstant(0, dl, MVT::i64));
	SDValue RHS =
	DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SDValue(Vector, 0),
	DAG.getConstant(NumElts, dl, MVT::i64));

	if (Even && !Odd)
	return DAG.getNode(AArch64ISD::UZP1, dl, DAG.getVTList(VT, VT), LHS,
	RHS);
	if (Odd && !Even)
	return DAG.getNode(AArch64ISD::UZP2, dl, DAG.getVTList(VT, VT), LHS,
	RHS);
	}
	}

	// Use DUP for non-constant splats. For f32 constant splats, reduce to
	// i32 and try again.
	if (usesOnlyOneValue) {
	if (!isConstant) {
	if (Value.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
	Value.getValueType() != VT) {
	LLVM_DEBUG(
	dbgs() << "LowerBUILD_VECTOR: use DUP for non-constant splats\n");
	return DAG.getNode(AArch64ISD::DUP, dl, VT, Value);
	}

	// This is actually a DUPLANExx operation, which keeps everything vectory.

	SDValue Lane = Value.getOperand(1);
	Value = Value.getOperand(0);
	if (Value.getValueSizeInBits() == 64) {
	LLVM_DEBUG(
	dbgs() << "LowerBUILD_VECTOR: DUPLANE works on 128-bit vectors, "
	"widening it\n");
	Value = WidenVector(Value, DAG);
	}

	unsigned Opcode = getDUPLANEOp(VT.getVectorElementType());
	return DAG.getNode(Opcode, dl, VT, Value, Lane);
	}

	if (VT.getVectorElementType().isFloatingPoint()) {
	SmallVector<SDValue, 8> Ops;
	EVT EltTy = VT.getVectorElementType();
	assert ((EltTy == MVT::f16 \|\| EltTy == MVT::bf16 \|\| EltTy == MVT::f32 \|\|
	EltTy == MVT::f64) && "Unsupported floating-point vector type");
	LLVM_DEBUG(
	dbgs() << "LowerBUILD_VECTOR: float constant splats, creating int "
	"BITCASTS, and try again\n");
	MVT NewType = MVT::getIntegerVT(EltTy.getSizeInBits());
	for (unsigned i = 0; i < NumElts; ++i)
	Ops.push_back(DAG.getNode(ISD::BITCAST, dl, NewType, Op.getOperand(i)));
	EVT VecVT = EVT::getVectorVT(*DAG.getContext(), NewType, NumElts);
	SDValue Val = DAG.getBuildVector(VecVT, dl, Ops);
	LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: trying to lower new vector: ";
	Val.dump(););
	Val = LowerBUILD_VECTOR(Val, DAG);
	if (Val.getNode())
	return DAG.getNode(ISD::BITCAST, dl, VT, Val);
	}
	}

	// If we need to insert a small number of different non-constant elements and
	// the vector width is sufficiently large, prefer using DUP with the common
	// value and INSERT_VECTOR_ELT for the different lanes. If DUP is preferred,
	// skip the constant lane handling below.
	bool PreferDUPAndInsert =
	!isConstant && NumDifferentLanes >= 1 &&
	NumDifferentLanes < ((NumElts - NumUndefLanes) / 2) &&
	NumDifferentLanes >= NumConstantLanes;

	// If there was only one constant value used and for more than one lane,
	// start by splatting that value, then replace the non-constant lanes. This
	// is better than the default, which will perform a separate initialization
	// for each lane.
	if (!PreferDUPAndInsert && NumConstantLanes > 0 && usesOnlyOneConstantValue) {
	// Firstly, try to materialize the splat constant.
	SDValue Vec = DAG.getSplatBuildVector(VT, dl, ConstantValue),
	Val = ConstantBuildVector(Vec, DAG);
	if (!Val) {
	// Otherwise, materialize the constant and splat it.
	Val = DAG.getNode(AArch64ISD::DUP, dl, VT, ConstantValue);
	DAG.ReplaceAllUsesWith(Vec.getNode(), &Val);
	}

	// Now insert the non-constant lanes.
	for (unsigned i = 0; i < NumElts; ++i) {
	SDValue V = Op.getOperand(i);
	SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i64);
	if (!isIntOrFPConstant(V))
	// Note that type legalization likely mucked about with the VT of the
	// source operand, so we may have to convert it here before inserting.
	Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, V, LaneIdx);
	}
	return Val;
	}

	// This will generate a load from the constant pool.
	if (isConstant) {
	LLVM_DEBUG(
	dbgs() << "LowerBUILD_VECTOR: all elements are constant, use default "
	"expansion\n");
	return SDValue();
	}

	// Empirical tests suggest this is rarely worth it for vectors of length <= 2.
	if (NumElts >= 4) {
	if (SDValue shuffle = ReconstructShuffle(Op, DAG))
	return shuffle;
	}

	if (PreferDUPAndInsert) {
	// First, build a constant vector with the common element.
	SmallVector<SDValue, 8> Ops(NumElts, Value);
	SDValue NewVector = LowerBUILD_VECTOR(DAG.getBuildVector(VT, dl, Ops), DAG);
	// Next, insert the elements that do not match the common value.
	for (unsigned I = 0; I < NumElts; ++I)
	if (Op.getOperand(I) != Value)
	NewVector =
	DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, NewVector,
	Op.getOperand(I), DAG.getConstant(I, dl, MVT::i64));

	return NewVector;
	}

	// If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
	// know the default expansion would otherwise fall back on something even
	// worse. For a vector with one or two non-undef values, that's
	// scalar_to_vector for the elements followed by a shuffle (provided the
	// shuffle is valid for the target) and materialization element by element
	// on the stack followed by a load for everything else.
	if (!isConstant && !usesOnlyOneValue) {
	LLVM_DEBUG(
	dbgs() << "LowerBUILD_VECTOR: alternatives failed, creating sequence "
	"of INSERT_VECTOR_ELT\n");

	SDValue Vec = DAG.getUNDEF(VT);
	SDValue Op0 = Op.getOperand(0);
	unsigned i = 0;

	// Use SCALAR_TO_VECTOR for lane zero to
	// a) Avoid a RMW dependency on the full vector register, and
	// b) Allow the register coalescer to fold away the copy if the
	// value is already in an S or D register, and we're forced to emit an
	// INSERT_SUBREG that we can't fold anywhere.
	//
	// We also allow types like i8 and i16 which are illegal scalar but legal
	// vector element types. After type-legalization the inserted value is
	// extended (i32) and it is safe to cast them to the vector type by ignoring
	// the upper bits of the lowest lane (e.g. v8i8, v4i16).
	if (!Op0.isUndef()) {
	LLVM_DEBUG(dbgs() << "Creating node for op0, it is not undefined:\n");
	Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op0);
	++i;
	}
	LLVM_DEBUG(if (i < NumElts) dbgs()
	<< "Creating nodes for the other vector elements:\n";);
	for (; i < NumElts; ++i) {
	SDValue V = Op.getOperand(i);
	if (V.isUndef())
	continue;
	SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i64);
	Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
	}
	return Vec;
	}

	LLVM_DEBUG(
	dbgs() << "LowerBUILD_VECTOR: use default expansion, failed to find "
	"better alternative\n");
	return SDValue();
	}

	SDValue AArch64TargetLowering::LowerCONCAT_VECTORS(SDValue Op,
	SelectionDAG &DAG) const {
	if (useSVEForFixedLengthVectorVT(Op.getValueType()))
	return LowerFixedLengthConcatVectorsToSVE(Op, DAG);

	assert(Op.getValueType().isScalableVector() &&
	isTypeLegal(Op.getValueType()) &&
	"Expected legal scalable vector type!");

	if (isTypeLegal(Op.getOperand(0).getValueType()) && Op.getNumOperands() == 2)
	return Op;

	return SDValue();
	}

	SDValue AArch64TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
	SelectionDAG &DAG) const {
	assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!");

	if (useSVEForFixedLengthVectorVT(Op.getValueType()))
	return LowerFixedLengthInsertVectorElt(Op, DAG);

	// Check for non-constant or out of range lane.
	EVT VT = Op.getOperand(0).getValueType();

	if (VT.getScalarType() == MVT::i1) {
	EVT VectorVT = getPromotedVTForPredicate(VT);
	SDLoc DL(Op);
	SDValue ExtendedVector =
	DAG.getAnyExtOrTrunc(Op.getOperand(0), DL, VectorVT);
	SDValue ExtendedValue =
	DAG.getAnyExtOrTrunc(Op.getOperand(1), DL,
	VectorVT.getScalarType().getSizeInBits() < 32
	? MVT::i32
	: VectorVT.getScalarType());
	ExtendedVector =
	DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VectorVT, ExtendedVector,
	ExtendedValue, Op.getOperand(2));
	return DAG.getAnyExtOrTrunc(ExtendedVector, DL, VT);
	}

	ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Op.getOperand(2));
	if (!CI \|\| CI->getZExtValue() >= VT.getVectorNumElements())
	return SDValue();

	// Insertion/extraction are legal for V128 types.
	if (VT == MVT::v16i8 \|\| VT == MVT::v8i16 \|\| VT == MVT::v4i32 \|\|
	VT == MVT::v2i64 \|\| VT == MVT::v4f32 \|\| VT == MVT::v2f64 \|\|
	VT == MVT::v8f16 \|\| VT == MVT::v8bf16)
	return Op;

	if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
	VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16 &&
	VT != MVT::v4bf16)
	return SDValue();

	// For V64 types, we perform insertion by expanding the value
	// to a V128 type and perform the insertion on that.
	SDLoc DL(Op);
	SDValue WideVec = WidenVector(Op.getOperand(0), DAG);
	EVT WideTy = WideVec.getValueType();

	SDValue Node = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideTy, WideVec,
	Op.getOperand(1), Op.getOperand(2));
	// Re-narrow the resultant vector.
	return NarrowVector(Node, DAG);
	}

	SDValue
	AArch64TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
	SelectionDAG &DAG) const {
	assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!");
	EVT VT = Op.getOperand(0).getValueType();

	if (VT.getScalarType() == MVT::i1) {
	// We can't directly extract from an SVE predicate; extend it first.
	// (This isn't the only possible lowering, but it's straightforward.)
	EVT VectorVT = getPromotedVTForPredicate(VT);
	SDLoc DL(Op);
	SDValue Extend =
	DAG.getNode(ISD::ANY_EXTEND, DL, VectorVT, Op.getOperand(0));
	MVT ExtractTy = VectorVT == MVT::nxv2i64 ? MVT::i64 : MVT::i32;
	SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtractTy,
	Extend, Op.getOperand(1));
	return DAG.getAnyExtOrTrunc(Extract, DL, Op.getValueType());
	}

	if (useSVEForFixedLengthVectorVT(VT))
	return LowerFixedLengthExtractVectorElt(Op, DAG);

	// Check for non-constant or out of range lane.
	ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Op.getOperand(1));
	if (!CI \|\| CI->getZExtValue() >= VT.getVectorNumElements())
	return SDValue();

	// Insertion/extraction are legal for V128 types.
	if (VT == MVT::v16i8 \|\| VT == MVT::v8i16 \|\| VT == MVT::v4i32 \|\|
	VT == MVT::v2i64 \|\| VT == MVT::v4f32 \|\| VT == MVT::v2f64 \|\|
	VT == MVT::v8f16 \|\| VT == MVT::v8bf16)
	return Op;

	if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
	VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16 &&
	VT != MVT::v4bf16)
	return SDValue();

	// For V64 types, we perform extraction by expanding the value
	// to a V128 type and perform the extraction on that.
	SDLoc DL(Op);
	SDValue WideVec = WidenVector(Op.getOperand(0), DAG);
	EVT WideTy = WideVec.getValueType();

	EVT ExtrTy = WideTy.getVectorElementType();
	if (ExtrTy == MVT::i16 \|\| ExtrTy == MVT::i8)
	ExtrTy = MVT::i32;

	// For extractions, we just return the result directly.
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtrTy, WideVec,
	Op.getOperand(1));
	}

	SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
	SelectionDAG &DAG) const {
	assert(Op.getValueType().isFixedLengthVector() &&
	"Only cases that extract a fixed length vector are supported!");

	EVT InVT = Op.getOperand(0).getValueType();
	unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
	unsigned Size = Op.getValueSizeInBits();

	if (InVT.isScalableVector()) {
	// This will be matched by custom code during ISelDAGToDAG.
	if (Idx == 0 && isPackedVectorType(InVT, DAG))
	return Op;

	return SDValue();
	}

	// This will get lowered to an appropriate EXTRACT_SUBREG in ISel.
	if (Idx == 0 && InVT.getSizeInBits() <= 128)
	return Op;

	// If this is extracting the upper 64-bits of a 128-bit vector, we match
	// that directly.
	if (Size == 64 && Idx * InVT.getScalarSizeInBits() == 64 &&
	InVT.getSizeInBits() == 128)
	return Op;

	return SDValue();
	}

	SDValue AArch64TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
	SelectionDAG &DAG) const {
	assert(Op.getValueType().isScalableVector() &&
	"Only expect to lower inserts into scalable vectors!");

	EVT InVT = Op.getOperand(1).getValueType();
	unsigned Idx = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();

	if (InVT.isScalableVector()) {
	SDLoc DL(Op);
	EVT VT = Op.getValueType();

	if (!isTypeLegal(VT) \|\| !VT.isInteger())
	return SDValue();

	SDValue Vec0 = Op.getOperand(0);
	SDValue Vec1 = Op.getOperand(1);

	// Ensure the subvector is half the size of the main vector.
	if (VT.getVectorElementCount() != (InVT.getVectorElementCount() * 2))
	return SDValue();

	// Extend elements of smaller vector...
	EVT WideVT = InVT.widenIntegerVectorElementType(*(DAG.getContext()));
	SDValue ExtVec = DAG.getNode(ISD::ANY_EXTEND, DL, WideVT, Vec1);

	if (Idx == 0) {
	SDValue HiVec0 = DAG.getNode(AArch64ISD::UUNPKHI, DL, WideVT, Vec0);
	return DAG.getNode(AArch64ISD::UZP1, DL, VT, ExtVec, HiVec0);
	} else if (Idx == InVT.getVectorMinNumElements()) {
	SDValue LoVec0 = DAG.getNode(AArch64ISD::UUNPKLO, DL, WideVT, Vec0);
	return DAG.getNode(AArch64ISD::UZP1, DL, VT, LoVec0, ExtVec);
	}

	return SDValue();
	}

	// This will be matched by custom code during ISelDAGToDAG.
	if (Idx == 0 && isPackedVectorType(InVT, DAG) && Op.getOperand(0).isUndef())
	return Op;

	return SDValue();
	}

	SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();

	if (useSVEForFixedLengthVectorVT(VT, /OverrideNEON=/true))
	return LowerFixedLengthVectorIntDivideToSVE(Op, DAG);

	assert(VT.isScalableVector() && "Expected a scalable vector.");

	bool Signed = Op.getOpcode() == ISD::SDIV;
	unsigned PredOpcode = Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED;

	if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv2i64)
	return LowerToPredicatedOp(Op, DAG, PredOpcode);

	// SVE doesn't have i8 and i16 DIV operations; widen them to 32-bit
	// operations, and truncate the result.
	EVT WidenedVT;
	if (VT == MVT::nxv16i8)
	WidenedVT = MVT::nxv8i16;
	else if (VT == MVT::nxv8i16)
	WidenedVT = MVT::nxv4i32;
	else
	llvm_unreachable("Unexpected Custom DIV operation");

	SDLoc dl(Op);
	unsigned UnpkLo = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
	unsigned UnpkHi = Signed ? AArch64ISD::SUNPKHI : AArch64ISD::UUNPKHI;
	SDValue Op0Lo = DAG.getNode(UnpkLo, dl, WidenedVT, Op.getOperand(0));
	SDValue Op1Lo = DAG.getNode(UnpkLo, dl, WidenedVT, Op.getOperand(1));
	SDValue Op0Hi = DAG.getNode(UnpkHi, dl, WidenedVT, Op.getOperand(0));
	SDValue Op1Hi = DAG.getNode(UnpkHi, dl, WidenedVT, Op.getOperand(1));
	SDValue ResultLo = DAG.getNode(Op.getOpcode(), dl, WidenedVT, Op0Lo, Op1Lo);
	SDValue ResultHi = DAG.getNode(Op.getOpcode(), dl, WidenedVT, Op0Hi, Op1Hi);
	return DAG.getNode(AArch64ISD::UZP1, dl, VT, ResultLo, ResultHi);
	}

	bool AArch64TargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
	// Currently no fixed length shuffles that require SVE are legal.
	if (useSVEForFixedLengthVectorVT(VT))
	return false;

	if (VT.getVectorNumElements() == 4 &&
	(VT.is128BitVector() \|\| VT.is64BitVector())) {
	unsigned PFIndexes[4];
	for (unsigned i = 0; i != 4; ++i) {
	if (M[i] < 0)
	PFIndexes[i] = 8;
	else
	PFIndexes[i] = M[i];
	}

	// Compute the index in the perfect shuffle table.
	unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
	PFIndexes[2] * 9 + PFIndexes[3];
	unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
	unsigned Cost = (PFEntry >> 30);

	if (Cost <= 4)
	return true;
	}

	bool DummyBool;
	int DummyInt;
	unsigned DummyUnsigned;

	return (ShuffleVectorSDNode::isSplatMask(&M[0], VT) \|\| isREVMask(M, VT, 64) \|\|
	isREVMask(M, VT, 32) \|\| isREVMask(M, VT, 16) \|\|
	isEXTMask(M, VT, DummyBool, DummyUnsigned) \|\|
	// isTBLMask(M, VT) \|\| // FIXME: Port TBL support from ARM.
	isTRNMask(M, VT, DummyUnsigned) \|\| isUZPMask(M, VT, DummyUnsigned) \|\|
	isZIPMask(M, VT, DummyUnsigned) \|\|
	isTRN_v_undef_Mask(M, VT, DummyUnsigned) \|\|
	isUZP_v_undef_Mask(M, VT, DummyUnsigned) \|\|
	isZIP_v_undef_Mask(M, VT, DummyUnsigned) \|\|
	isINSMask(M, VT.getVectorNumElements(), DummyBool, DummyInt) \|\|
	isConcatMask(M, VT, VT.getSizeInBits() == 128));
	}

	/// getVShiftImm - Check if this is a valid build_vector for the immediate
	/// operand of a vector shift operation, where all the elements of the
	/// build_vector must have the same constant integer value.
	static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
	// Ignore bit_converts.
	while (Op.getOpcode() == ISD::BITCAST)
	Op = Op.getOperand(0);
	BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
	APInt SplatBits, SplatUndef;
	unsigned SplatBitSize;
	bool HasAnyUndefs;
	if (!BVN \|\| !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
	HasAnyUndefs, ElementBits) \|\|
	SplatBitSize > ElementBits)
	return false;
	Cnt = SplatBits.getSExtValue();
	return true;
	}

	/// isVShiftLImm - Check if this is a valid build_vector for the immediate
	/// operand of a vector shift left operation. That value must be in the range:
	/// 0 <= Value < ElementBits for a left shift; or
	/// 0 <= Value <= ElementBits for a long left shift.
	static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
	assert(VT.isVector() && "vector shift count is not a vector type");
	int64_t ElementBits = VT.getScalarSizeInBits();
	if (!getVShiftImm(Op, ElementBits, Cnt))
	return false;
	return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
	}

	/// isVShiftRImm - Check if this is a valid build_vector for the immediate
	/// operand of a vector shift right operation. The value must be in the range:
	/// 1 <= Value <= ElementBits for a right shift; or
	static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt) {
	assert(VT.isVector() && "vector shift count is not a vector type");
	int64_t ElementBits = VT.getScalarSizeInBits();
	if (!getVShiftImm(Op, ElementBits, Cnt))
	return false;
	return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
	}

	SDValue AArch64TargetLowering::LowerTRUNCATE(SDValue Op,
	SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();

	if (VT.getScalarType() == MVT::i1) {
	// Lower i1 truncate to `(x & 1) != 0`.
	SDLoc dl(Op);
	EVT OpVT = Op.getOperand(0).getValueType();
	SDValue Zero = DAG.getConstant(0, dl, OpVT);
	SDValue One = DAG.getConstant(1, dl, OpVT);
	SDValue And = DAG.getNode(ISD::AND, dl, OpVT, Op.getOperand(0), One);
	return DAG.getSetCC(dl, VT, And, Zero, ISD::SETNE);
	}

	if (!VT.isVector() \|\| VT.isScalableVector())
	return SDValue();

	if (useSVEForFixedLengthVectorVT(Op.getOperand(0).getValueType()))
	return LowerFixedLengthVectorTruncateToSVE(Op, DAG);

	return SDValue();
	}

	SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
	SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();
	SDLoc DL(Op);
	int64_t Cnt;

	if (!Op.getOperand(1).getValueType().isVector())
	return Op;
	unsigned EltSize = VT.getScalarSizeInBits();

	switch (Op.getOpcode()) {
	default:
	llvm_unreachable("unexpected shift opcode");

	case ISD::SHL:
	if (VT.isScalableVector() \|\| useSVEForFixedLengthVectorVT(VT))
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::SHL_PRED);

	if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize)
	return DAG.getNode(AArch64ISD::VSHL, DL, VT, Op.getOperand(0),
	DAG.getConstant(Cnt, DL, MVT::i32));
	return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
	DAG.getConstant(Intrinsic::aarch64_neon_ushl, DL,
	MVT::i32),
	Op.getOperand(0), Op.getOperand(1));
	case ISD::SRA:
	case ISD::SRL:
	if (VT.isScalableVector() \|\| useSVEForFixedLengthVectorVT(VT)) {
	unsigned Opc = Op.getOpcode() == ISD::SRA ? AArch64ISD::SRA_PRED
	: AArch64ISD::SRL_PRED;
	return LowerToPredicatedOp(Op, DAG, Opc);
	}

	// Right shift immediate
	if (isVShiftRImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize) {
	unsigned Opc =
	(Op.getOpcode() == ISD::SRA) ? AArch64ISD::VASHR : AArch64ISD::VLSHR;
	return DAG.getNode(Opc, DL, VT, Op.getOperand(0),
	DAG.getConstant(Cnt, DL, MVT::i32));
	}

	// Right shift register. Note, there is not a shift right register
	// instruction, but the shift left register instruction takes a signed
	// value, where negative numbers specify a right shift.
	unsigned Opc = (Op.getOpcode() == ISD::SRA) ? Intrinsic::aarch64_neon_sshl
	: Intrinsic::aarch64_neon_ushl;
	// negate the shift amount
	SDValue NegShift = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
	Op.getOperand(1));
	SDValue NegShiftLeft =
	DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
	DAG.getConstant(Opc, DL, MVT::i32), Op.getOperand(0),
	NegShift);
	return NegShiftLeft;
	}

	return SDValue();
	}

	static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
	AArch64CC::CondCode CC, bool NoNans, EVT VT,
	const SDLoc &dl, SelectionDAG &DAG) {
	EVT SrcVT = LHS.getValueType();
	assert(VT.getSizeInBits() == SrcVT.getSizeInBits() &&
	"function only supposed to emit natural comparisons");

	BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
	APInt CnstBits(VT.getSizeInBits(), 0);
	APInt UndefBits(VT.getSizeInBits(), 0);
	bool IsCnst = BVN && resolveBuildVector(BVN, CnstBits, UndefBits);
	bool IsZero = IsCnst && (CnstBits == 0);

	if (SrcVT.getVectorElementType().isFloatingPoint()) {
	switch (CC) {
	default:
	return SDValue();
	case AArch64CC::NE: {
	SDValue Fcmeq;
	if (IsZero)
	Fcmeq = DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS);
	else
	Fcmeq = DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
	return DAG.getNOT(dl, Fcmeq, VT);
	}
	case AArch64CC::EQ:
	if (IsZero)
	return DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS);
	return DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
	case AArch64CC::GE:
	if (IsZero)
	return DAG.getNode(AArch64ISD::FCMGEz, dl, VT, LHS);
	return DAG.getNode(AArch64ISD::FCMGE, dl, VT, LHS, RHS);
	case AArch64CC::GT:
	if (IsZero)
	return DAG.getNode(AArch64ISD::FCMGTz, dl, VT, LHS);
	return DAG.getNode(AArch64ISD::FCMGT, dl, VT, LHS, RHS);
	case AArch64CC::LS:
	if (IsZero)
	return DAG.getNode(AArch64ISD::FCMLEz, dl, VT, LHS);
	return DAG.getNode(AArch64ISD::FCMGE, dl, VT, RHS, LHS);
	case AArch64CC::LT:
	if (!NoNans)
	return SDValue();
	// If we ignore NaNs then we can use to the MI implementation.
	LLVM_FALLTHROUGH;
	case AArch64CC::MI:
	if (IsZero)
	return DAG.getNode(AArch64ISD::FCMLTz, dl, VT, LHS);
	return DAG.getNode(AArch64ISD::FCMGT, dl, VT, RHS, LHS);
	}
	}

	switch (CC) {
	default:
	return SDValue();
	case AArch64CC::NE: {
	SDValue Cmeq;
	if (IsZero)
	Cmeq = DAG.getNode(AArch64ISD::CMEQz, dl, VT, LHS);
	else
	Cmeq = DAG.getNode(AArch64ISD::CMEQ, dl, VT, LHS, RHS);
	return DAG.getNOT(dl, Cmeq, VT);
	}
	case AArch64CC::EQ:
	if (IsZero)
	return DAG.getNode(AArch64ISD::CMEQz, dl, VT, LHS);
	return DAG.getNode(AArch64ISD::CMEQ, dl, VT, LHS, RHS);
	case AArch64CC::GE:
	if (IsZero)
	return DAG.getNode(AArch64ISD::CMGEz, dl, VT, LHS);
	return DAG.getNode(AArch64ISD::CMGE, dl, VT, LHS, RHS);
	case AArch64CC::GT:
	if (IsZero)
	return DAG.getNode(AArch64ISD::CMGTz, dl, VT, LHS);
	return DAG.getNode(AArch64ISD::CMGT, dl, VT, LHS, RHS);
	case AArch64CC::LE:
	if (IsZero)
	return DAG.getNode(AArch64ISD::CMLEz, dl, VT, LHS);
	return DAG.getNode(AArch64ISD::CMGE, dl, VT, RHS, LHS);
	case AArch64CC::LS:
	return DAG.getNode(AArch64ISD::CMHS, dl, VT, RHS, LHS);
	case AArch64CC::LO:
	return DAG.getNode(AArch64ISD::CMHI, dl, VT, RHS, LHS);
	case AArch64CC::LT:
	if (IsZero)
	return DAG.getNode(AArch64ISD::CMLTz, dl, VT, LHS);
	return DAG.getNode(AArch64ISD::CMGT, dl, VT, RHS, LHS);
	case AArch64CC::HI:
	return DAG.getNode(AArch64ISD::CMHI, dl, VT, LHS, RHS);
	case AArch64CC::HS:
	return DAG.getNode(AArch64ISD::CMHS, dl, VT, LHS, RHS);
	}
	}

	SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
	SelectionDAG &DAG) const {
	if (Op.getValueType().isScalableVector())
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::SETCC_MERGE_ZERO);

	if (useSVEForFixedLengthVectorVT(Op.getOperand(0).getValueType()))
	return LowerFixedLengthVectorSetccToSVE(Op, DAG);

	ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
	SDValue LHS = Op.getOperand(0);
	SDValue RHS = Op.getOperand(1);
	EVT CmpVT = LHS.getValueType().changeVectorElementTypeToInteger();
	SDLoc dl(Op);

	if (LHS.getValueType().getVectorElementType().isInteger()) {
	assert(LHS.getValueType() == RHS.getValueType());
	AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
	SDValue Cmp =
	EmitVectorComparison(LHS, RHS, AArch64CC, false, CmpVT, dl, DAG);
	return DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType());
	}

	const bool FullFP16 =
	static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();

	// Make v4f16 (only) fcmp operations utilise vector instructions
	// v8f16 support will be a litle more complicated
	if (!FullFP16 && LHS.getValueType().getVectorElementType() == MVT::f16) {
	if (LHS.getValueType().getVectorNumElements() == 4) {
	LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v4f32, LHS);
	RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v4f32, RHS);
	SDValue NewSetcc = DAG.getSetCC(dl, MVT::v4i16, LHS, RHS, CC);
	DAG.ReplaceAllUsesWith(Op, NewSetcc);
	CmpVT = MVT::v4i32;
	} else
	return SDValue();
	}

	assert((!FullFP16 && LHS.getValueType().getVectorElementType() != MVT::f16) \|\|
	LHS.getValueType().getVectorElementType() != MVT::f128);

	// Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
	// clean. Some of them require two branches to implement.
	AArch64CC::CondCode CC1, CC2;
	bool ShouldInvert;
	changeVectorFPCCToAArch64CC(CC, CC1, CC2, ShouldInvert);

	bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath;
	SDValue Cmp =
	EmitVectorComparison(LHS, RHS, CC1, NoNaNs, CmpVT, dl, DAG);
	if (!Cmp.getNode())
	return SDValue();

	if (CC2 != AArch64CC::AL) {
	SDValue Cmp2 =
	EmitVectorComparison(LHS, RHS, CC2, NoNaNs, CmpVT, dl, DAG);
	if (!Cmp2.getNode())
	return SDValue();

	Cmp = DAG.getNode(ISD::OR, dl, CmpVT, Cmp, Cmp2);
	}

	Cmp = DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType());

	if (ShouldInvert)
	Cmp = DAG.getNOT(dl, Cmp, Cmp.getValueType());

	return Cmp;
	}

	static SDValue getReductionSDNode(unsigned Op, SDLoc DL, SDValue ScalarOp,
	SelectionDAG &DAG) {
	SDValue VecOp = ScalarOp.getOperand(0);
	auto Rdx = DAG.getNode(Op, DL, VecOp.getSimpleValueType(), VecOp);
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarOp.getValueType(), Rdx,
	DAG.getConstant(0, DL, MVT::i64));
	}

	SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
	SelectionDAG &DAG) const {
	SDValue Src = Op.getOperand(0);

	// Try to lower fixed length reductions to SVE.
	EVT SrcVT = Src.getValueType();
	bool OverrideNEON = Op.getOpcode() == ISD::VECREDUCE_AND \|\|
	Op.getOpcode() == ISD::VECREDUCE_OR \|\|
	Op.getOpcode() == ISD::VECREDUCE_XOR \|\|
	Op.getOpcode() == ISD::VECREDUCE_FADD \|\|
	(Op.getOpcode() != ISD::VECREDUCE_ADD &&
	SrcVT.getVectorElementType() == MVT::i64);
	if (SrcVT.isScalableVector() \|\|
	useSVEForFixedLengthVectorVT(SrcVT, OverrideNEON)) {

	if (SrcVT.getVectorElementType() == MVT::i1)
	return LowerPredReductionToSVE(Op, DAG);

	switch (Op.getOpcode()) {
	case ISD::VECREDUCE_ADD:
	return LowerReductionToSVE(AArch64ISD::UADDV_PRED, Op, DAG);
	case ISD::VECREDUCE_AND:
	return LowerReductionToSVE(AArch64ISD::ANDV_PRED, Op, DAG);
	case ISD::VECREDUCE_OR:
	return LowerReductionToSVE(AArch64ISD::ORV_PRED, Op, DAG);
	case ISD::VECREDUCE_SMAX:
	return LowerReductionToSVE(AArch64ISD::SMAXV_PRED, Op, DAG);
	case ISD::VECREDUCE_SMIN:
	return LowerReductionToSVE(AArch64ISD::SMINV_PRED, Op, DAG);
	case ISD::VECREDUCE_UMAX:
	return LowerReductionToSVE(AArch64ISD::UMAXV_PRED, Op, DAG);
	case ISD::VECREDUCE_UMIN:
	return LowerReductionToSVE(AArch64ISD::UMINV_PRED, Op, DAG);
	case ISD::VECREDUCE_XOR:
	return LowerReductionToSVE(AArch64ISD::EORV_PRED, Op, DAG);
	case ISD::VECREDUCE_FADD:
	return LowerReductionToSVE(AArch64ISD::FADDV_PRED, Op, DAG);
	case ISD::VECREDUCE_FMAX:
	return LowerReductionToSVE(AArch64ISD::FMAXNMV_PRED, Op, DAG);
	case ISD::VECREDUCE_FMIN:
	return LowerReductionToSVE(AArch64ISD::FMINNMV_PRED, Op, DAG);
	default:
	llvm_unreachable("Unhandled fixed length reduction");
	}
	}

	// Lower NEON reductions.
	SDLoc dl(Op);
	switch (Op.getOpcode()) {
	case ISD::VECREDUCE_ADD:
	return getReductionSDNode(AArch64ISD::UADDV, dl, Op, DAG);
	case ISD::VECREDUCE_SMAX:
	return getReductionSDNode(AArch64ISD::SMAXV, dl, Op, DAG);
	case ISD::VECREDUCE_SMIN:
	return getReductionSDNode(AArch64ISD::SMINV, dl, Op, DAG);
	case ISD::VECREDUCE_UMAX:
	return getReductionSDNode(AArch64ISD::UMAXV, dl, Op, DAG);
	case ISD::VECREDUCE_UMIN:
	return getReductionSDNode(AArch64ISD::UMINV, dl, Op, DAG);
	case ISD::VECREDUCE_FMAX: {
	return DAG.getNode(
	ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(),
	DAG.getConstant(Intrinsic::aarch64_neon_fmaxnmv, dl, MVT::i32),
	Src);
	}
	case ISD::VECREDUCE_FMIN: {
	return DAG.getNode(
	ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(),
	DAG.getConstant(Intrinsic::aarch64_neon_fminnmv, dl, MVT::i32),
	Src);
	}
	default:
	llvm_unreachable("Unhandled reduction");
	}
	}

	SDValue AArch64TargetLowering::LowerATOMIC_LOAD_SUB(SDValue Op,
	SelectionDAG &DAG) const {
	auto &Subtarget = static_cast<const AArch64Subtarget &>(DAG.getSubtarget());
	if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics())
	return SDValue();

	// LSE has an atomic load-add instruction, but not a load-sub.
	SDLoc dl(Op);
	MVT VT = Op.getSimpleValueType();
	SDValue RHS = Op.getOperand(2);
	AtomicSDNode *AN = cast<AtomicSDNode>(Op.getNode());
	RHS = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), RHS);
	return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, dl, AN->getMemoryVT(),
	Op.getOperand(0), Op.getOperand(1), RHS,
	AN->getMemOperand());
	}

	SDValue AArch64TargetLowering::LowerATOMIC_LOAD_AND(SDValue Op,
	SelectionDAG &DAG) const {
	auto &Subtarget = static_cast<const AArch64Subtarget &>(DAG.getSubtarget());
	if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics())
	return SDValue();

	// LSE has an atomic load-clear instruction, but not a load-and.
	SDLoc dl(Op);
	MVT VT = Op.getSimpleValueType();
	SDValue RHS = Op.getOperand(2);
	AtomicSDNode *AN = cast<AtomicSDNode>(Op.getNode());
	RHS = DAG.getNode(ISD::XOR, dl, VT, DAG.getConstant(-1ULL, dl, VT), RHS);
	return DAG.getAtomic(ISD::ATOMIC_LOAD_CLR, dl, AN->getMemoryVT(),
	Op.getOperand(0), Op.getOperand(1), RHS,
	AN->getMemOperand());
	}

	SDValue AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC(
	SDValue Op, SDValue Chain, SDValue &Size, SelectionDAG &DAG) const {
	SDLoc dl(Op);
	EVT PtrVT = getPointerTy(DAG.getDataLayout());
	SDValue Callee = DAG.getTargetExternalSymbol("__chkstk", PtrVT, 0);

	const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
	const uint32_t *Mask = TRI->getWindowsStackProbePreservedMask();
	if (Subtarget->hasCustomCallingConv())
	TRI->UpdateCustomCallPreservedMask(DAG.getMachineFunction(), &Mask);

	Size = DAG.getNode(ISD::SRL, dl, MVT::i64, Size,
	DAG.getConstant(4, dl, MVT::i64));
	Chain = DAG.getCopyToReg(Chain, dl, AArch64::X15, Size, SDValue());
	Chain =
	DAG.getNode(AArch64ISD::CALL, dl, DAG.getVTList(MVT::Other, MVT::Glue),
	Chain, Callee, DAG.getRegister(AArch64::X15, MVT::i64),
	DAG.getRegisterMask(Mask), Chain.getValue(1));
	// To match the actual intent better, we should read the output from X15 here
	// again (instead of potentially spilling it to the stack), but rereading Size
	// from X15 here doesn't work at -O0, since it thinks that X15 is undefined
	// here.

	Size = DAG.getNode(ISD::SHL, dl, MVT::i64, Size,
	DAG.getConstant(4, dl, MVT::i64));
	return Chain;
	}

	SDValue
	AArch64TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
	SelectionDAG &DAG) const {
	assert(Subtarget->isTargetWindows() &&
	"Only Windows alloca probing supported");
	SDLoc dl(Op);
	// Get the inputs.
	SDNode *Node = Op.getNode();
	SDValue Chain = Op.getOperand(0);
	SDValue Size = Op.getOperand(1);
	MaybeAlign Align =
	cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
	EVT VT = Node->getValueType(0);

	if (DAG.getMachineFunction().getFunction().hasFnAttribute(
	"no-stack-arg-probe")) {
	SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64);
	Chain = SP.getValue(1);
	SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size);
	if (Align)
	SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
	DAG.getConstant(-(uint64_t)Align->value(), dl, VT));
	Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP);
	SDValue Ops[2] = {SP, Chain};
	return DAG.getMergeValues(Ops, dl);
	}

	Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl);

	Chain = LowerWindowsDYNAMIC_STACKALLOC(Op, Chain, Size, DAG);

	SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64);
	Chain = SP.getValue(1);
	SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size);
	if (Align)
	SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
	DAG.getConstant(-(uint64_t)Align->value(), dl, VT));
	Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP);

	Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true),
	DAG.getIntPtrConstant(0, dl, true), SDValue(), dl);

	SDValue Ops[2] = {SP, Chain};
	return DAG.getMergeValues(Ops, dl);
	}

	SDValue AArch64TargetLowering::LowerVSCALE(SDValue Op,
	SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();
	assert(VT != MVT::i64 && "Expected illegal VSCALE node");

	SDLoc DL(Op);
	APInt MulImm = cast<ConstantSDNode>(Op.getOperand(0))->getAPIntValue();
	return DAG.getZExtOrTrunc(DAG.getVScale(DL, MVT::i64, MulImm.sextOrSelf(64)),
	DL, VT);
	}

	/// Set the IntrinsicInfo for the `aarch64_sve_st<N>` intrinsics.
	template <unsigned NumVecs>
	static bool
	setInfoSVEStN(const AArch64TargetLowering &TLI, const DataLayout &DL,
	AArch64TargetLowering::IntrinsicInfo &Info, const CallInst &CI) {
	Info.opc = ISD::INTRINSIC_VOID;
	// Retrieve EC from first vector argument.
	const EVT VT = TLI.getMemValueType(DL, CI.getArgOperand(0)->getType());
	ElementCount EC = VT.getVectorElementCount();
	#ifndef NDEBUG
	// Check the assumption that all input vectors are the same type.
	for (unsigned I = 0; I < NumVecs; ++I)
	assert(VT == TLI.getMemValueType(DL, CI.getArgOperand(I)->getType()) &&
	"Invalid type.");
	#endif
	// memVT is `NumVecs * VT`.
	Info.memVT = EVT::getVectorVT(CI.getType()->getContext(), VT.getScalarType(),
	EC * NumVecs);
	Info.ptrVal = CI.getArgOperand(CI.getNumArgOperands() - 1);
	Info.offset = 0;
	Info.align.reset();
	Info.flags = MachineMemOperand::MOStore;
	return true;
	}

	/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
	/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
	/// specified in the intrinsic calls.
	bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
	const CallInst &I,
	MachineFunction &MF,
	unsigned Intrinsic) const {
	auto &DL = I.getModule()->getDataLayout();
	switch (Intrinsic) {
	case Intrinsic::aarch64_sve_st2:
	return setInfoSVEStN<2>(*this, DL, Info, I);
	case Intrinsic::aarch64_sve_st3:
	return setInfoSVEStN<3>(*this, DL, Info, I);
	case Intrinsic::aarch64_sve_st4:
	return setInfoSVEStN<4>(*this, DL, Info, I);
	case Intrinsic::aarch64_neon_ld2:
	case Intrinsic::aarch64_neon_ld3:
	case Intrinsic::aarch64_neon_ld4:
	case Intrinsic::aarch64_neon_ld1x2:
	case Intrinsic::aarch64_neon_ld1x3:
	case Intrinsic::aarch64_neon_ld1x4:
	case Intrinsic::aarch64_neon_ld2lane:
	case Intrinsic::aarch64_neon_ld3lane:
	case Intrinsic::aarch64_neon_ld4lane:
	case Intrinsic::aarch64_neon_ld2r:
	case Intrinsic::aarch64_neon_ld3r:
	case Intrinsic::aarch64_neon_ld4r: {
	Info.opc = ISD::INTRINSIC_W_CHAIN;
	// Conservatively set memVT to the entire set of vectors loaded.
	uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
	Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
	Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
	Info.offset = 0;
	Info.align.reset();
	// volatile loads with NEON intrinsics not supported
	Info.flags = MachineMemOperand::MOLoad;
	return true;
	}
	case Intrinsic::aarch64_neon_st2:
	case Intrinsic::aarch64_neon_st3:
	case Intrinsic::aarch64_neon_st4:
	case Intrinsic::aarch64_neon_st1x2:
	case Intrinsic::aarch64_neon_st1x3:
	case Intrinsic::aarch64_neon_st1x4:
	case Intrinsic::aarch64_neon_st2lane:
	case Intrinsic::aarch64_neon_st3lane:
	case Intrinsic::aarch64_neon_st4lane: {
	Info.opc = ISD::INTRINSIC_VOID;
	// Conservatively set memVT to the entire set of vectors stored.
	unsigned NumElts = 0;
	for (unsigned ArgI = 0, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
	Type *ArgTy = I.getArgOperand(ArgI)->getType();
	if (!ArgTy->isVectorTy())
	break;
	NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
	}
	Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
	Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
	Info.offset = 0;
	Info.align.reset();
	// volatile stores with NEON intrinsics not supported
	Info.flags = MachineMemOperand::MOStore;
	return true;
	}
	case Intrinsic::aarch64_ldaxr:
	case Intrinsic::aarch64_ldxr: {
	PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
	Info.opc = ISD::INTRINSIC_W_CHAIN;
	Info.memVT = MVT::getVT(PtrTy->getElementType());
	Info.ptrVal = I.getArgOperand(0);
	Info.offset = 0;
	Info.align = DL.getABITypeAlign(PtrTy->getElementType());
	Info.flags = MachineMemOperand::MOLoad \| MachineMemOperand::MOVolatile;
	return true;
	}
	case Intrinsic::aarch64_stlxr:
	case Intrinsic::aarch64_stxr: {
	PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
	Info.opc = ISD::INTRINSIC_W_CHAIN;
	Info.memVT = MVT::getVT(PtrTy->getElementType());
	Info.ptrVal = I.getArgOperand(1);
	Info.offset = 0;
	Info.align = DL.getABITypeAlign(PtrTy->getElementType());
	Info.flags = MachineMemOperand::MOStore \| MachineMemOperand::MOVolatile;
	return true;
	}
	case Intrinsic::aarch64_ldaxp:
	case Intrinsic::aarch64_ldxp:
	Info.opc = ISD::INTRINSIC_W_CHAIN;
	Info.memVT = MVT::i128;
	Info.ptrVal = I.getArgOperand(0);
	Info.offset = 0;
	Info.align = Align(16);
	Info.flags = MachineMemOperand::MOLoad \| MachineMemOperand::MOVolatile;
	return true;
	case Intrinsic::aarch64_stlxp:
	case Intrinsic::aarch64_stxp:
	Info.opc = ISD::INTRINSIC_W_CHAIN;
	Info.memVT = MVT::i128;
	Info.ptrVal = I.getArgOperand(2);
	Info.offset = 0;
	Info.align = Align(16);
	Info.flags = MachineMemOperand::MOStore \| MachineMemOperand::MOVolatile;
	return true;
	case Intrinsic::aarch64_sve_ldnt1: {
	PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
	Info.opc = ISD::INTRINSIC_W_CHAIN;
	Info.memVT = MVT::getVT(I.getType());
	Info.ptrVal = I.getArgOperand(1);
	Info.offset = 0;
	Info.align = DL.getABITypeAlign(PtrTy->getElementType());
	Info.flags = MachineMemOperand::MOLoad;
	if (Intrinsic == Intrinsic::aarch64_sve_ldnt1)
	Info.flags \|= MachineMemOperand::MONonTemporal;
	return true;
	}
	case Intrinsic::aarch64_sve_stnt1: {
	PointerType *PtrTy = cast<PointerType>(I.getArgOperand(2)->getType());
	Info.opc = ISD::INTRINSIC_W_CHAIN;
	Info.memVT = MVT::getVT(I.getOperand(0)->getType());
	Info.ptrVal = I.getArgOperand(2);
	Info.offset = 0;
	Info.align = DL.getABITypeAlign(PtrTy->getElementType());
	Info.flags = MachineMemOperand::MOStore;
	if (Intrinsic == Intrinsic::aarch64_sve_stnt1)
	Info.flags \|= MachineMemOperand::MONonTemporal;
	return true;
	}
	default:
	break;
	}

	return false;
	}

	bool AArch64TargetLowering::shouldReduceLoadWidth(SDNode *Load,
	ISD::LoadExtType ExtTy,
	EVT NewVT) const {
	// TODO: This may be worth removing. Check regression tests for diffs.
	if (!TargetLoweringBase::shouldReduceLoadWidth(Load, ExtTy, NewVT))
	return false;

	// If we're reducing the load width in order to avoid having to use an extra
	// instruction to do extension then it's probably a good idea.
	if (ExtTy != ISD::NON_EXTLOAD)
	return true;
	// Don't reduce load width if it would prevent us from combining a shift into
	// the offset.
	MemSDNode *Mem = dyn_cast<MemSDNode>(Load);
	assert(Mem);
	const SDValue &Base = Mem->getBasePtr();
	if (Base.getOpcode() == ISD::ADD &&
	Base.getOperand(1).getOpcode() == ISD::SHL &&
	Base.getOperand(1).hasOneUse() &&
	Base.getOperand(1).getOperand(1).getOpcode() == ISD::Constant) {
	// The shift can be combined if it matches the size of the value being
	// loaded (and so reducing the width would make it not match).
	uint64_t ShiftAmount = Base.getOperand(1).getConstantOperandVal(1);
	uint64_t LoadBytes = Mem->getMemoryVT().getSizeInBits()/8;
	if (ShiftAmount == Log2_32(LoadBytes))
	return false;
	}
	// We have no reason to disallow reducing the load width, so allow it.
	return true;
	}

	// Truncations from 64-bit GPR to 32-bit GPR is free.
	bool AArch64TargetLowering::isTruncateFree(Type Ty1, Type Ty2) const {
	if (!Ty1->isIntegerTy() \|\| !Ty2->isIntegerTy())
	return false;
	uint64_t NumBits1 = Ty1->getPrimitiveSizeInBits().getFixedSize();
	uint64_t NumBits2 = Ty2->getPrimitiveSizeInBits().getFixedSize();
	return NumBits1 > NumBits2;
	}
	bool AArch64TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
	if (VT1.isVector() \|\| VT2.isVector() \|\| !VT1.isInteger() \|\| !VT2.isInteger())
	return false;
	uint64_t NumBits1 = VT1.getFixedSizeInBits();
	uint64_t NumBits2 = VT2.getFixedSizeInBits();
	return NumBits1 > NumBits2;
	}

	/// Check if it is profitable to hoist instruction in then/else to if.
	/// Not profitable if I and it's user can form a FMA instruction
	/// because we prefer FMSUB/FMADD.
	bool AArch64TargetLowering::isProfitableToHoist(Instruction *I) const {
	if (I->getOpcode() != Instruction::FMul)
	return true;

	if (!I->hasOneUse())
	return true;

	Instruction *User = I->user_back();

	if (User &&
	!(User->getOpcode() == Instruction::FSub \|\|
	User->getOpcode() == Instruction::FAdd))
	return true;

	const TargetOptions &Options = getTargetMachine().Options;
	const Function *F = I->getFunction();
	const DataLayout &DL = F->getParent()->getDataLayout();
	Type *Ty = User->getOperand(0)->getType();

	return !(isFMAFasterThanFMulAndFAdd(*F, Ty) &&
	isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
	(Options.AllowFPOpFusion == FPOpFusion::Fast \|\|
	Options.UnsafeFPMath));
	}

	// All 32-bit GPR operations implicitly zero the high-half of the corresponding
	// 64-bit GPR.
	bool AArch64TargetLowering::isZExtFree(Type Ty1, Type Ty2) const {
	if (!Ty1->isIntegerTy() \|\| !Ty2->isIntegerTy())
	return false;
	unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
	unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
	return NumBits1 == 32 && NumBits2 == 64;
	}
	bool AArch64TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
	if (VT1.isVector() \|\| VT2.isVector() \|\| !VT1.isInteger() \|\| !VT2.isInteger())
	return false;
	unsigned NumBits1 = VT1.getSizeInBits();
	unsigned NumBits2 = VT2.getSizeInBits();
	return NumBits1 == 32 && NumBits2 == 64;
	}

	bool AArch64TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
	EVT VT1 = Val.getValueType();
	if (isZExtFree(VT1, VT2)) {
	return true;
	}

	if (Val.getOpcode() != ISD::LOAD)
	return false;

	// 8-, 16-, and 32-bit integer loads all implicitly zero-extend.
	return (VT1.isSimple() && !VT1.isVector() && VT1.isInteger() &&
	VT2.isSimple() && !VT2.isVector() && VT2.isInteger() &&
	VT1.getSizeInBits() <= 32);
	}

	bool AArch64TargetLowering::isExtFreeImpl(const Instruction *Ext) const {
	if (isa<FPExtInst>(Ext))
	return false;

	// Vector types are not free.
	if (Ext->getType()->isVectorTy())
	return false;

	for (const Use &U : Ext->uses()) {
	// The extension is free if we can fold it with a left shift in an
	// addressing mode or an arithmetic operation: add, sub, and cmp.

	// Is there a shift?
	const Instruction *Instr = cast<Instruction>(U.getUser());

	// Is this a constant shift?
	switch (Instr->getOpcode()) {
	case Instruction::Shl:
	if (!isa<ConstantInt>(Instr->getOperand(1)))
	return false;
	break;
	case Instruction::GetElementPtr: {
	gep_type_iterator GTI = gep_type_begin(Instr);
	auto &DL = Ext->getModule()->getDataLayout();
	std::advance(GTI, U.getOperandNo()-1);
	Type *IdxTy = GTI.getIndexedType();
	// This extension will end up with a shift because of the scaling factor.
	// 8-bit sized types have a scaling factor of 1, thus a shift amount of 0.
	// Get the shift amount based on the scaling factor:
	// log2(sizeof(IdxTy)) - log2(8).
	uint64_t ShiftAmt =
	countTrailingZeros(DL.getTypeStoreSizeInBits(IdxTy).getFixedSize()) - 3;
	// Is the constant foldable in the shift of the addressing mode?
	// I.e., shift amount is between 1 and 4 inclusive.
	if (ShiftAmt == 0 \|\| ShiftAmt > 4)
	return false;
	break;
	}
	case Instruction::Trunc:
	// Check if this is a noop.
	// trunc(sext ty1 to ty2) to ty1.
	if (Instr->getType() == Ext->getOperand(0)->getType())
	continue;
	LLVM_FALLTHROUGH;
	default:
	return false;
	}

	// At this point we can use the bfm family, so this extension is free
	// for that use.
	}
	return true;
	}

	/// Check if both Op1 and Op2 are shufflevector extracts of either the lower
	/// or upper half of the vector elements.
	static bool areExtractShuffleVectors(Value Op1, Value Op2) {
	auto areTypesHalfed = [](Value FullV, Value HalfV) {
	auto *FullTy = FullV->getType();
	auto *HalfTy = HalfV->getType();
	return FullTy->getPrimitiveSizeInBits().getFixedSize() ==
	2 * HalfTy->getPrimitiveSizeInBits().getFixedSize();
	};

	auto extractHalf = [](Value FullV, Value HalfV) {
	auto *FullVT = cast<FixedVectorType>(FullV->getType());
	auto *HalfVT = cast<FixedVectorType>(HalfV->getType());
	return FullVT->getNumElements() == 2 * HalfVT->getNumElements();
	};

	ArrayRef<int> M1, M2;
	Value S1Op1, S2Op1;
	if (!match(Op1, m_Shuffle(m_Value(S1Op1), m_Undef(), m_Mask(M1))) \|\|
	!match(Op2, m_Shuffle(m_Value(S2Op1), m_Undef(), m_Mask(M2))))
	return false;

	// Check that the operands are half as wide as the result and we extract
	// half of the elements of the input vectors.
	if (!areTypesHalfed(S1Op1, Op1) \|\| !areTypesHalfed(S2Op1, Op2) \|\|
	!extractHalf(S1Op1, Op1) \|\| !extractHalf(S2Op1, Op2))
	return false;

	// Check the mask extracts either the lower or upper half of vector
	// elements.
	int M1Start = -1;
	int M2Start = -1;
	int NumElements = cast<FixedVectorType>(Op1->getType())->getNumElements() * 2;
	if (!ShuffleVectorInst::isExtractSubvectorMask(M1, NumElements, M1Start) \|\|
	!ShuffleVectorInst::isExtractSubvectorMask(M2, NumElements, M2Start) \|\|
	M1Start != M2Start \|\| (M1Start != 0 && M2Start != (NumElements / 2)))
	return false;

	return true;
	}

	/// Check if Ext1 and Ext2 are extends of the same type, doubling the bitwidth
	/// of the vector elements.
	static bool areExtractExts(Value Ext1, Value Ext2) {
	auto areExtDoubled = [](Instruction *Ext) {
	return Ext->getType()->getScalarSizeInBits() ==
	2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
	};

	if (!match(Ext1, m_ZExtOrSExt(m_Value())) \|\|
	!match(Ext2, m_ZExtOrSExt(m_Value())) \|\|
	!areExtDoubled(cast<Instruction>(Ext1)) \|\|
	!areExtDoubled(cast<Instruction>(Ext2)))
	return false;

	return true;
	}

	/// Check if Op could be used with vmull_high_p64 intrinsic.
	static bool isOperandOfVmullHighP64(Value *Op) {
	Value *VectorOperand = nullptr;
	ConstantInt *ElementIndex = nullptr;
	return match(Op, m_ExtractElt(m_Value(VectorOperand),
	m_ConstantInt(ElementIndex))) &&
	ElementIndex->getValue() == 1 &&
	isa<FixedVectorType>(VectorOperand->getType()) &&
	cast<FixedVectorType>(VectorOperand->getType())->getNumElements() == 2;
	}

	/// Check if Op1 and Op2 could be used with vmull_high_p64 intrinsic.
	static bool areOperandsOfVmullHighP64(Value Op1, Value Op2) {
	return isOperandOfVmullHighP64(Op1) && isOperandOfVmullHighP64(Op2);
	}

	/// Check if sinking \p I's operands to I's basic block is profitable, because
	/// the operands can be folded into a target instruction, e.g.
	/// shufflevectors extracts and/or sext/zext can be folded into (u,s)subl(2).
	bool AArch64TargetLowering::shouldSinkOperands(
	Instruction I, SmallVectorImpl<Use > &Ops) const {
	if (!I->getType()->isVectorTy())
	return false;

	if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
	switch (II->getIntrinsicID()) {
	case Intrinsic::aarch64_neon_umull:
	if (!areExtractShuffleVectors(II->getOperand(0), II->getOperand(1)))
	return false;
	Ops.push_back(&II->getOperandUse(0));
	Ops.push_back(&II->getOperandUse(1));
	return true;

	case Intrinsic::aarch64_neon_pmull64:
	if (!areOperandsOfVmullHighP64(II->getArgOperand(0),
	II->getArgOperand(1)))
	return false;
	Ops.push_back(&II->getArgOperandUse(0));
	Ops.push_back(&II->getArgOperandUse(1));
	return true;

	default:
	return false;
	}
	}

	switch (I->getOpcode()) {
	case Instruction::Sub:
	case Instruction::Add: {
	if (!areExtractExts(I->getOperand(0), I->getOperand(1)))
	return false;

	// If the exts' operands extract either the lower or upper elements, we
	// can sink them too.
	auto Ext1 = cast<Instruction>(I->getOperand(0));
	auto Ext2 = cast<Instruction>(I->getOperand(1));
	if (areExtractShuffleVectors(Ext1, Ext2)) {
	Ops.push_back(&Ext1->getOperandUse(0));
	Ops.push_back(&Ext2->getOperandUse(0));
	}

	Ops.push_back(&I->getOperandUse(0));
	Ops.push_back(&I->getOperandUse(1));

	return true;
	}
	case Instruction::Mul: {
	bool IsProfitable = false;
	for (auto &Op : I->operands()) {
	// Make sure we are not already sinking this operand
	if (any_of(Ops, [&](Use *U) { return U->get() == Op; }))
	continue;

	ShuffleVectorInst *Shuffle = dyn_cast<ShuffleVectorInst>(Op);
	if (!Shuffle \|\| !Shuffle->isZeroEltSplat())
	continue;

	Value *ShuffleOperand = Shuffle->getOperand(0);
	InsertElementInst *Insert = dyn_cast<InsertElementInst>(ShuffleOperand);
	if (!Insert)
	continue;

	Instruction *OperandInstr = dyn_cast<Instruction>(Insert->getOperand(1));
	if (!OperandInstr)
	continue;

	ConstantInt *ElementConstant =
	dyn_cast<ConstantInt>(Insert->getOperand(2));
	// Check that the insertelement is inserting into element 0
	if (!ElementConstant \|\| ElementConstant->getZExtValue() != 0)
	continue;

	unsigned Opcode = OperandInstr->getOpcode();
	if (Opcode != Instruction::SExt && Opcode != Instruction::ZExt)
	continue;

	Ops.push_back(&Shuffle->getOperandUse(0));
	Ops.push_back(&Op);
	IsProfitable = true;
	}

	return IsProfitable;
	}
	default:
	return false;
	}
	return false;
	}

	bool AArch64TargetLowering::hasPairedLoad(EVT LoadedType,
	Align &RequiredAligment) const {
	if (!LoadedType.isSimple() \|\|
	(!LoadedType.isInteger() && !LoadedType.isFloatingPoint()))
	return false;
	// Cyclone supports unaligned accesses.
	RequiredAligment = Align(1);
	unsigned NumBits = LoadedType.getSizeInBits();
	return NumBits == 32 \|\| NumBits == 64;
	}

	/// A helper function for determining the number of interleaved accesses we
	/// will generate when lowering accesses of the given type.
	unsigned
	AArch64TargetLowering::getNumInterleavedAccesses(VectorType *VecTy,
	const DataLayout &DL) const {
	return (DL.getTypeSizeInBits(VecTy) + 127) / 128;
	}

	MachineMemOperand::Flags
	AArch64TargetLowering::getTargetMMOFlags(const Instruction &I) const {
	if (Subtarget->getProcFamily() == AArch64Subtarget::Falkor &&
	I.getMetadata(FALKOR_STRIDED_ACCESS_MD) != nullptr)
	return MOStridedAccess;
	return MachineMemOperand::MONone;
	}

	bool AArch64TargetLowering::isLegalInterleavedAccessType(
	VectorType *VecTy, const DataLayout &DL) const {

	unsigned VecSize = DL.getTypeSizeInBits(VecTy);
	unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());

	// Ensure the number of vector elements is greater than 1.
	if (cast<FixedVectorType>(VecTy)->getNumElements() < 2)
	return false;

	// Ensure the element type is legal.
	if (ElSize != 8 && ElSize != 16 && ElSize != 32 && ElSize != 64)
	return false;

	// Ensure the total vector size is 64 or a multiple of 128. Types larger than
	// 128 will be split into multiple interleaved accesses.
	return VecSize == 64 \|\| VecSize % 128 == 0;
	}

	/// Lower an interleaved load into a ldN intrinsic.
	///
	/// E.g. Lower an interleaved load (Factor = 2):
	/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
	/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
	/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
	///
	/// Into:
	/// %ld2 = { <4 x i32>, <4 x i32> } call llvm.aarch64.neon.ld2(%ptr)
	/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
	/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
	bool AArch64TargetLowering::lowerInterleavedLoad(
	LoadInst LI, ArrayRef<ShuffleVectorInst > Shuffles,
	ArrayRef<unsigned> Indices, unsigned Factor) const {
	assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
	"Invalid interleave factor");
	assert(!Shuffles.empty() && "Empty shufflevector input");
	assert(Shuffles.size() == Indices.size() &&
	"Unmatched number of shufflevectors and indices");

	const DataLayout &DL = LI->getModule()->getDataLayout();

	VectorType *VTy = Shuffles[0]->getType();

	// Skip if we do not have NEON and skip illegal vector types. We can
	// "legalize" wide vector types into multiple interleaved accesses as long as
	// the vector types are divisible by 128.
	if (!Subtarget->hasNEON() \|\| !isLegalInterleavedAccessType(VTy, DL))
	return false;

	unsigned NumLoads = getNumInterleavedAccesses(VTy, DL);

	auto *FVTy = cast<FixedVectorType>(VTy);

	// A pointer vector can not be the return type of the ldN intrinsics. Need to
	// load integer vectors first and then convert to pointer vectors.
	Type *EltTy = FVTy->getElementType();
	if (EltTy->isPointerTy())
	FVTy =
	FixedVectorType::get(DL.getIntPtrType(EltTy), FVTy->getNumElements());

	IRBuilder<> Builder(LI);

	// The base address of the load.
	Value *BaseAddr = LI->getPointerOperand();

	if (NumLoads > 1) {
	// If we're going to generate more than one load, reset the sub-vector type
	// to something legal.
	FVTy = FixedVectorType::get(FVTy->getElementType(),
	FVTy->getNumElements() / NumLoads);

	// We will compute the pointer operand of each load from the original base
	// address using GEPs. Cast the base address to a pointer to the scalar
	// element type.
	BaseAddr = Builder.CreateBitCast(
	BaseAddr,
	FVTy->getElementType()->getPointerTo(LI->getPointerAddressSpace()));
	}

	Type *PtrTy = FVTy->getPointerTo(LI->getPointerAddressSpace());
	Type *Tys[2] = {FVTy, PtrTy};
	static const Intrinsic::ID LoadInts[3] = {Intrinsic::aarch64_neon_ld2,
	Intrinsic::aarch64_neon_ld3,
	Intrinsic::aarch64_neon_ld4};
	Function *LdNFunc =
	Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);

	// Holds sub-vectors extracted from the load intrinsic return values. The
	// sub-vectors are associated with the shufflevector instructions they will
	// replace.
	DenseMap<ShuffleVectorInst , SmallVector<Value , 4>> SubVecs;

	for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {

	// If we're generating more than one load, compute the base address of
	// subsequent loads as an offset from the previous.
	if (LoadCount > 0)
	BaseAddr = Builder.CreateConstGEP1_32(FVTy->getElementType(), BaseAddr,
	FVTy->getNumElements() * Factor);

	CallInst *LdN = Builder.CreateCall(
	LdNFunc, Builder.CreateBitCast(BaseAddr, PtrTy), "ldN");

	// Extract and store the sub-vectors returned by the load intrinsic.
	for (unsigned i = 0; i < Shuffles.size(); i++) {
	ShuffleVectorInst *SVI = Shuffles[i];
	unsigned Index = Indices[i];

	Value *SubVec = Builder.CreateExtractValue(LdN, Index);

	// Convert the integer vector to pointer vector if the element is pointer.
	if (EltTy->isPointerTy())
	SubVec = Builder.CreateIntToPtr(
	SubVec, FixedVectorType::get(SVI->getType()->getElementType(),
	FVTy->getNumElements()));
	SubVecs[SVI].push_back(SubVec);
	}
	}

	// Replace uses of the shufflevector instructions with the sub-vectors
	// returned by the load intrinsic. If a shufflevector instruction is
	// associated with more than one sub-vector, those sub-vectors will be
	// concatenated into a single wide vector.
	for (ShuffleVectorInst *SVI : Shuffles) {
	auto &SubVec = SubVecs[SVI];
	auto *WideVec =
	SubVec.size() > 1 ? concatenateVectors(Builder, SubVec) : SubVec[0];
	SVI->replaceAllUsesWith(WideVec);
	}

	return true;
	}

	/// Lower an interleaved store into a stN intrinsic.
	///
	/// E.g. Lower an interleaved store (Factor = 3):
	/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
	/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
	/// store <12 x i32> %i.vec, <12 x i32>* %ptr
	///
	/// Into:
	/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
	/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
	/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
	/// call void llvm.aarch64.neon.st3(%sub.v0, %sub.v1, %sub.v2, %ptr)
	///
	/// Note that the new shufflevectors will be removed and we'll only generate one
	/// st3 instruction in CodeGen.
	///
	/// Example for a more general valid mask (Factor 3). Lower:
	/// %i.vec = shuffle <32 x i32> %v0, <32 x i32> %v1,
	/// <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
	/// store <12 x i32> %i.vec, <12 x i32>* %ptr
	///
	/// Into:
	/// %sub.v0 = shuffle <32 x i32> %v0, <32 x i32> v1, <4, 5, 6, 7>
	/// %sub.v1 = shuffle <32 x i32> %v0, <32 x i32> v1, <32, 33, 34, 35>
	/// %sub.v2 = shuffle <32 x i32> %v0, <32 x i32> v1, <16, 17, 18, 19>
	/// call void llvm.aarch64.neon.st3(%sub.v0, %sub.v1, %sub.v2, %ptr)
	bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
	ShuffleVectorInst *SVI,
	unsigned Factor) const {
	assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
	"Invalid interleave factor");

	auto *VecTy = cast<FixedVectorType>(SVI->getType());
	assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store");

	unsigned LaneLen = VecTy->getNumElements() / Factor;
	Type *EltTy = VecTy->getElementType();
	auto *SubVecTy = FixedVectorType::get(EltTy, LaneLen);

	const DataLayout &DL = SI->getModule()->getDataLayout();

	// Skip if we do not have NEON and skip illegal vector types. We can
	// "legalize" wide vector types into multiple interleaved accesses as long as
	// the vector types are divisible by 128.
	if (!Subtarget->hasNEON() \|\| !isLegalInterleavedAccessType(SubVecTy, DL))
	return false;

	unsigned NumStores = getNumInterleavedAccesses(SubVecTy, DL);

	Value *Op0 = SVI->getOperand(0);
	Value *Op1 = SVI->getOperand(1);
	IRBuilder<> Builder(SI);

	// StN intrinsics don't support pointer vectors as arguments. Convert pointer
	// vectors to integer vectors.
	if (EltTy->isPointerTy()) {
	Type *IntTy = DL.getIntPtrType(EltTy);
	unsigned NumOpElts =
	cast<FixedVectorType>(Op0->getType())->getNumElements();

	// Convert to the corresponding integer vector.
	auto *IntVecTy = FixedVectorType::get(IntTy, NumOpElts);
	Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
	Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);

	SubVecTy = FixedVectorType::get(IntTy, LaneLen);
	}

	// The base address of the store.
	Value *BaseAddr = SI->getPointerOperand();

	if (NumStores > 1) {
	// If we're going to generate more than one store, reset the lane length
	// and sub-vector type to something legal.
	LaneLen /= NumStores;
	SubVecTy = FixedVectorType::get(SubVecTy->getElementType(), LaneLen);

	// We will compute the pointer operand of each store from the original base
	// address using GEPs. Cast the base address to a pointer to the scalar
	// element type.
	BaseAddr = Builder.CreateBitCast(
	BaseAddr,
	SubVecTy->getElementType()->getPointerTo(SI->getPointerAddressSpace()));
	}

	auto Mask = SVI->getShuffleMask();

	Type *PtrTy = SubVecTy->getPointerTo(SI->getPointerAddressSpace());
	Type *Tys[2] = {SubVecTy, PtrTy};
	static const Intrinsic::ID StoreInts[3] = {Intrinsic::aarch64_neon_st2,
	Intrinsic::aarch64_neon_st3,
	Intrinsic::aarch64_neon_st4};
	Function *StNFunc =
	Intrinsic::getDeclaration(SI->getModule(), StoreInts[Factor - 2], Tys);

	for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {

	SmallVector<Value *, 5> Ops;

	// Split the shufflevector operands into sub vectors for the new stN call.
	for (unsigned i = 0; i < Factor; i++) {
	unsigned IdxI = StoreCount * LaneLen * Factor + i;
	if (Mask[IdxI] >= 0) {
	Ops.push_back(Builder.CreateShuffleVector(
	Op0, Op1, createSequentialMask(Mask[IdxI], LaneLen, 0)));
	} else {
	unsigned StartMask = 0;
	for (unsigned j = 1; j < LaneLen; j++) {
	unsigned IdxJ = StoreCount * LaneLen * Factor + j;
	if (Mask[IdxJ * Factor + IdxI] >= 0) {
	StartMask = Mask[IdxJ * Factor + IdxI] - IdxJ;
	break;
	}
	}
	// Note: Filling undef gaps with random elements is ok, since
	// those elements were being written anyway (with undefs).
	// In the case of all undefs we're defaulting to using elems from 0
	// Note: StartMask cannot be negative, it's checked in
	// isReInterleaveMask
	Ops.push_back(Builder.CreateShuffleVector(
	Op0, Op1, createSequentialMask(StartMask, LaneLen, 0)));
	}
	}

	// If we generating more than one store, we compute the base address of
	// subsequent stores as an offset from the previous.
	if (StoreCount > 0)
	BaseAddr = Builder.CreateConstGEP1_32(SubVecTy->getElementType(),
	BaseAddr, LaneLen * Factor);

	Ops.push_back(Builder.CreateBitCast(BaseAddr, PtrTy));
	Builder.CreateCall(StNFunc, Ops);
	}
	return true;
	}

	// Lower an SVE structured load intrinsic returning a tuple type to target
	// specific intrinsic taking the same input but returning a multi-result value
	// of the split tuple type.
	//
	// E.g. Lowering an LD3:
	//
	// call <vscale x 12 x i32> @llvm.aarch64.sve.ld3.nxv12i32(
	// <vscale x 4 x i1> %pred,
	// <vscale x 4 x i32>* %addr)
	//
	// Output DAG:
	//
	// t0: ch = EntryToken
	// t2: nxv4i1,ch = CopyFromReg t0, Register:nxv4i1 %0
	// t4: i64,ch = CopyFromReg t0, Register:i64 %1
	// t5: nxv4i32,nxv4i32,nxv4i32,ch = AArch64ISD::SVE_LD3 t0, t2, t4
	// t6: nxv12i32 = concat_vectors t5, t5:1, t5:2
	//
	// This is called pre-legalization to avoid widening/splitting issues with
	// non-power-of-2 tuple types used for LD3, such as nxv12i32.
	SDValue AArch64TargetLowering::LowerSVEStructLoad(unsigned Intrinsic,
	ArrayRef<SDValue> LoadOps,
	EVT VT, SelectionDAG &DAG,
	const SDLoc &DL) const {
	assert(VT.isScalableVector() && "Can only lower scalable vectors");

	unsigned N, Opcode;
	static std::map<unsigned, std::pair<unsigned, unsigned>> IntrinsicMap = {
	{Intrinsic::aarch64_sve_ld2, {2, AArch64ISD::SVE_LD2_MERGE_ZERO}},
	{Intrinsic::aarch64_sve_ld3, {3, AArch64ISD::SVE_LD3_MERGE_ZERO}},
	{Intrinsic::aarch64_sve_ld4, {4, AArch64ISD::SVE_LD4_MERGE_ZERO}}};

	std::tie(N, Opcode) = IntrinsicMap[Intrinsic];
	assert(VT.getVectorElementCount().getKnownMinValue() % N == 0 &&
	"invalid tuple vector type!");

	EVT SplitVT =
	EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
	VT.getVectorElementCount().divideCoefficientBy(N));
	assert(isTypeLegal(SplitVT));

	SmallVector<EVT, 5> VTs(N, SplitVT);
	VTs.push_back(MVT::Other); // Chain
	SDVTList NodeTys = DAG.getVTList(VTs);

	SDValue PseudoLoad = DAG.getNode(Opcode, DL, NodeTys, LoadOps);
	SmallVector<SDValue, 4> PseudoLoadOps;
	for (unsigned I = 0; I < N; ++I)
	PseudoLoadOps.push_back(SDValue(PseudoLoad.getNode(), I));
	return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, PseudoLoadOps);
	}

	EVT AArch64TargetLowering::getOptimalMemOpType(
	const MemOp &Op, const AttributeList &FuncAttributes) const {
	bool CanImplicitFloat =
	!FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat);
	bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
	bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
	// Only use AdvSIMD to implement memset of 32-byte and above. It would have
	// taken one instruction to materialize the v2i64 zero and one store (with
	// restrictive addressing mode). Just do i64 stores.
	bool IsSmallMemset = Op.isMemset() && Op.size() < 32;
	auto AlignmentIsAcceptable = [&](EVT VT, Align AlignCheck) {
	if (Op.isAligned(AlignCheck))
	return true;
	bool Fast;
	return allowsMisalignedMemoryAccesses(VT, 0, Align(1),
	MachineMemOperand::MONone, &Fast) &&
	Fast;
	};

	if (CanUseNEON && Op.isMemset() && !IsSmallMemset &&
	AlignmentIsAcceptable(MVT::v2i64, Align(16)))
	return MVT::v2i64;
	if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, Align(16)))
	return MVT::f128;
	if (Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64, Align(8)))
	return MVT::i64;
	if (Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32, Align(4)))
	return MVT::i32;
	return MVT::Other;
	}

	LLT AArch64TargetLowering::getOptimalMemOpLLT(
	const MemOp &Op, const AttributeList &FuncAttributes) const {
	bool CanImplicitFloat =
	!FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat);
	bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
	bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
	// Only use AdvSIMD to implement memset of 32-byte and above. It would have
	// taken one instruction to materialize the v2i64 zero and one store (with
	// restrictive addressing mode). Just do i64 stores.
	bool IsSmallMemset = Op.isMemset() && Op.size() < 32;
	auto AlignmentIsAcceptable = [&](EVT VT, Align AlignCheck) {
	if (Op.isAligned(AlignCheck))
	return true;
	bool Fast;
	return allowsMisalignedMemoryAccesses(VT, 0, Align(1),
	MachineMemOperand::MONone, &Fast) &&
	Fast;
	};

	if (CanUseNEON && Op.isMemset() && !IsSmallMemset &&
	AlignmentIsAcceptable(MVT::v2i64, Align(16)))
	return LLT::fixed_vector(2, 64);
	if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, Align(16)))
	return LLT::scalar(128);
	if (Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64, Align(8)))
	return LLT::scalar(64);
	if (Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32, Align(4)))
	return LLT::scalar(32);
	return LLT();
	}

	// 12-bit optionally shifted immediates are legal for adds.
	bool AArch64TargetLowering::isLegalAddImmediate(int64_t Immed) const {
	if (Immed == std::numeric_limits<int64_t>::min()) {
	LLVM_DEBUG(dbgs() << "Illegal add imm " << Immed
	<< ": avoid UB for INT64_MIN\n");
	return false;
	}
	// Same encoding for add/sub, just flip the sign.
	Immed = std::abs(Immed);
	bool IsLegal = ((Immed >> 12) == 0 \|\|
	((Immed & 0xfff) == 0 && Immed >> 24 == 0));
	LLVM_DEBUG(dbgs() << "Is " << Immed
	<< " legal add imm: " << (IsLegal ? "yes" : "no") << "\n");
	return IsLegal;
	}

	// Integer comparisons are implemented with ADDS/SUBS, so the range of valid
	// immediates is the same as for an add or a sub.
	bool AArch64TargetLowering::isLegalICmpImmediate(int64_t Immed) const {
	return isLegalAddImmediate(Immed);
	}

	/// isLegalAddressingMode - Return true if the addressing mode represented
	/// by AM is legal for this target, for a load/store of the specified type.
	bool AArch64TargetLowering::isLegalAddressingMode(const DataLayout &DL,
	const AddrMode &AM, Type *Ty,
	unsigned AS, Instruction *I) const {
	// AArch64 has five basic addressing modes:
	// reg
	// reg + 9-bit signed offset
	// reg + SIZE_IN_BYTES * 12-bit unsigned offset
	// reg1 + reg2
	// reg + SIZE_IN_BYTES * reg

	// No global is ever allowed as a base.
	if (AM.BaseGV)
	return false;

	// No reg+reg+imm addressing.
	if (AM.HasBaseReg && AM.BaseOffs && AM.Scale)
	return false;

	// FIXME: Update this method to support scalable addressing modes.
	if (isa<ScalableVectorType>(Ty)) {
	uint64_t VecElemNumBytes =
	DL.getTypeSizeInBits(cast<VectorType>(Ty)->getElementType()) / 8;
	return AM.HasBaseReg && !AM.BaseOffs &&
	(AM.Scale == 0 \|\| (uint64_t)AM.Scale == VecElemNumBytes);
	}

	// check reg + imm case:
	// i.e., reg + 0, reg + imm9, reg + SIZE_IN_BYTES * uimm12
	uint64_t NumBytes = 0;
	if (Ty->isSized()) {
	uint64_t NumBits = DL.getTypeSizeInBits(Ty);
	NumBytes = NumBits / 8;
	if (!isPowerOf2_64(NumBits))
	NumBytes = 0;
	}

	if (!AM.Scale) {
	int64_t Offset = AM.BaseOffs;

	// 9-bit signed offset
	if (isInt<9>(Offset))
	return true;

	// 12-bit unsigned offset
	unsigned shift = Log2_64(NumBytes);
	if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 &&
	// Must be a multiple of NumBytes (NumBytes is a power of 2)
	(Offset >> shift) << shift == Offset)
	return true;
	return false;
	}

	// Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2

	return AM.Scale == 1 \|\| (AM.Scale > 0 && (uint64_t)AM.Scale == NumBytes);
	}

	bool AArch64TargetLowering::shouldConsiderGEPOffsetSplit() const {
	// Consider splitting large offset of struct or array.
	return true;
	}

	InstructionCost AArch64TargetLowering::getScalingFactorCost(
	const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const {
	// Scaling factors are not free at all.
	// Operands \| Rt Latency
	// -------------------------------------------
	// Rt, [Xn, Xm] \| 4
	// -------------------------------------------
	// Rt, [Xn, Xm, lsl #imm] \| Rn: 4 Rm: 5
	// Rt, [Xn, Wm, <extend> #imm] \|
	if (isLegalAddressingMode(DL, AM, Ty, AS))
	// Scale represents reg2 * scale, thus account for 1 if
	// it is not equal to 0 or 1.
	return AM.Scale != 0 && AM.Scale != 1;
	return -1;
	}

	bool AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(
	const MachineFunction &MF, EVT VT) const {
	VT = VT.getScalarType();

	if (!VT.isSimple())
	return false;

	switch (VT.getSimpleVT().SimpleTy) {
	case MVT::f16:
	return Subtarget->hasFullFP16();
	case MVT::f32:
	case MVT::f64:
	return true;
	default:
	break;
	}

	return false;
	}

	bool AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
	Type *Ty) const {
	switch (Ty->getScalarType()->getTypeID()) {
	case Type::FloatTyID:
	case Type::DoubleTyID:
	return true;
	default:
	return false;
	}
	}

	bool AArch64TargetLowering::generateFMAsInMachineCombiner(
	EVT VT, CodeGenOpt::Level OptLevel) const {
	return (OptLevel >= CodeGenOpt::Aggressive) && !VT.isScalableVector();
	}

	const MCPhysReg *
	AArch64TargetLowering::getScratchRegisters(CallingConv::ID) const {
	// LR is a callee-save register, but we must treat it as clobbered by any call
	// site. Hence we include LR in the scratch registers, which are in turn added
	// as implicit-defs for stackmaps and patchpoints.
	static const MCPhysReg ScratchRegs[] = {
	AArch64::X16, AArch64::X17, AArch64::LR, 0
	};
	return ScratchRegs;
	}

	bool
	AArch64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N,
	CombineLevel Level) const {
	N = N->getOperand(0).getNode();
	EVT VT = N->getValueType(0);
	// If N is unsigned bit extraction: ((x >> C) & mask), then do not combine
	// it with shift to let it be lowered to UBFX.
	if (N->getOpcode() == ISD::AND && (VT == MVT::i32 \|\| VT == MVT::i64) &&
	isa<ConstantSDNode>(N->getOperand(1))) {
	uint64_t TruncMask = N->getConstantOperandVal(1);
	if (isMask_64(TruncMask) &&
	N->getOperand(0).getOpcode() == ISD::SRL &&
	isa<ConstantSDNode>(N->getOperand(0)->getOperand(1)))
	return false;
	}
	return true;
	}

	bool AArch64TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
	Type *Ty) const {
	assert(Ty->isIntegerTy());

	unsigned BitSize = Ty->getPrimitiveSizeInBits();
	if (BitSize == 0)
	return false;

	int64_t Val = Imm.getSExtValue();
	if (Val == 0 \|\| AArch64_AM::isLogicalImmediate(Val, BitSize))
	return true;

	if ((int64_t)Val < 0)
	Val = ~Val;
	if (BitSize == 32)
	Val &= (1LL << 32) - 1;

	unsigned LZ = countLeadingZeros((uint64_t)Val);
	unsigned Shift = (63 - LZ) / 16;
	// MOVZ is free so return true for one or fewer MOVK.
	return Shift < 3;
	}

	bool AArch64TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
	unsigned Index) const {
	if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
	return false;

	return (Index == 0 \|\| Index == ResVT.getVectorNumElements());
	}

	/// Turn vector tests of the signbit in the form of:
	/// xor (sra X, elt_size(X)-1), -1
	/// into:
	/// cmge X, X, #0
	static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG,
	const AArch64Subtarget *Subtarget) {
	EVT VT = N->getValueType(0);
	if (!Subtarget->hasNEON() \|\| !VT.isVector())
	return SDValue();

	// There must be a shift right algebraic before the xor, and the xor must be a
	// 'not' operation.
	SDValue Shift = N->getOperand(0);
	SDValue Ones = N->getOperand(1);
	if (Shift.getOpcode() != AArch64ISD::VASHR \|\| !Shift.hasOneUse() \|\|
	!ISD::isBuildVectorAllOnes(Ones.getNode()))
	return SDValue();

	// The shift should be smearing the sign bit across each vector element.
	auto *ShiftAmt = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
	EVT ShiftEltTy = Shift.getValueType().getVectorElementType();
	if (!ShiftAmt \|\| ShiftAmt->getZExtValue() != ShiftEltTy.getSizeInBits() - 1)
	return SDValue();

	return DAG.getNode(AArch64ISD::CMGEz, SDLoc(N), VT, Shift.getOperand(0));
	}

	// Given a vecreduce_add node, detect the below pattern and convert it to the
	// node sequence with UABDL, [S\|U]ADB and UADDLP.
	//
	// i32 vecreduce_add(
	// v16i32 abs(
	// v16i32 sub(
	// v16i32 [sign\|zero]_extend(v16i8 a), v16i32 [sign\|zero]_extend(v16i8 b))))
	// =================>
	// i32 vecreduce_add(
	// v4i32 UADDLP(
	// v8i16 add(
	// v8i16 zext(
	// v8i8 [S\|U]ABD low8:v16i8 a, low8:v16i8 b
	// v8i16 zext(
	// v8i8 [S\|U]ABD high8:v16i8 a, high8:v16i8 b
	static SDValue performVecReduceAddCombineWithUADDLP(SDNode *N,
	SelectionDAG &DAG) {
	// Assumed i32 vecreduce_add
	if (N->getValueType(0) != MVT::i32)
	return SDValue();

	SDValue VecReduceOp0 = N->getOperand(0);
	unsigned Opcode = VecReduceOp0.getOpcode();
	// Assumed v16i32 abs
	if (Opcode != ISD::ABS \|\| VecReduceOp0->getValueType(0) != MVT::v16i32)
	return SDValue();

	SDValue ABS = VecReduceOp0;
	// Assumed v16i32 sub
	if (ABS->getOperand(0)->getOpcode() != ISD::SUB \|\|
	ABS->getOperand(0)->getValueType(0) != MVT::v16i32)
	return SDValue();

	SDValue SUB = ABS->getOperand(0);
	unsigned Opcode0 = SUB->getOperand(0).getOpcode();
	unsigned Opcode1 = SUB->getOperand(1).getOpcode();
	// Assumed v16i32 type
	if (SUB->getOperand(0)->getValueType(0) != MVT::v16i32 \|\|
	SUB->getOperand(1)->getValueType(0) != MVT::v16i32)
	return SDValue();

	// Assumed zext or sext
	bool IsZExt = false;
	if (Opcode0 == ISD::ZERO_EXTEND && Opcode1 == ISD::ZERO_EXTEND) {
	IsZExt = true;
	} else if (Opcode0 == ISD::SIGN_EXTEND && Opcode1 == ISD::SIGN_EXTEND) {
	IsZExt = false;
	} else
	return SDValue();

	SDValue EXT0 = SUB->getOperand(0);
	SDValue EXT1 = SUB->getOperand(1);
	// Assumed zext's operand has v16i8 type
	if (EXT0->getOperand(0)->getValueType(0) != MVT::v16i8 \|\|
	EXT1->getOperand(0)->getValueType(0) != MVT::v16i8)
	return SDValue();

	// Pattern is dectected. Let's convert it to sequence of nodes.
	SDLoc DL(N);

	// First, create the node pattern of UABD/SABD.
	SDValue UABDHigh8Op0 =
	DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT0->getOperand(0),
	DAG.getConstant(8, DL, MVT::i64));
	SDValue UABDHigh8Op1 =
	DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT1->getOperand(0),
	DAG.getConstant(8, DL, MVT::i64));
	SDValue UABDHigh8 = DAG.getNode(IsZExt ? ISD::ABDU : ISD::ABDS, DL, MVT::v8i8,
	UABDHigh8Op0, UABDHigh8Op1);
	SDValue UABDL = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, UABDHigh8);

	// Second, create the node pattern of UABAL.
	SDValue UABDLo8Op0 =
	DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT0->getOperand(0),
	DAG.getConstant(0, DL, MVT::i64));
	SDValue UABDLo8Op1 =
	DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT1->getOperand(0),
	DAG.getConstant(0, DL, MVT::i64));
	SDValue UABDLo8 = DAG.getNode(IsZExt ? ISD::ABDU : ISD::ABDS, DL, MVT::v8i8,
	UABDLo8Op0, UABDLo8Op1);
	SDValue ZExtUABD = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, UABDLo8);
	SDValue UABAL = DAG.getNode(ISD::ADD, DL, MVT::v8i16, UABDL, ZExtUABD);

	// Third, create the node of UADDLP.
	SDValue UADDLP = DAG.getNode(AArch64ISD::UADDLP, DL, MVT::v4i32, UABAL);

	// Fourth, create the node of VECREDUCE_ADD.
	return DAG.getNode(ISD::VECREDUCE_ADD, DL, MVT::i32, UADDLP);
	}

	// Turn a v8i8/v16i8 extended vecreduce into a udot/sdot and vecreduce
	// vecreduce.add(ext(A)) to vecreduce.add(DOT(zero, A, one))
	// vecreduce.add(mul(ext(A), ext(B))) to vecreduce.add(DOT(zero, A, B))
	static SDValue performVecReduceAddCombine(SDNode *N, SelectionDAG &DAG,
	const AArch64Subtarget *ST) {
	if (!ST->hasDotProd())
	return performVecReduceAddCombineWithUADDLP(N, DAG);

	SDValue Op0 = N->getOperand(0);
	if (N->getValueType(0) != MVT::i32 \|\|
	Op0.getValueType().getVectorElementType() != MVT::i32)
	return SDValue();

	unsigned ExtOpcode = Op0.getOpcode();
	SDValue A = Op0;
	SDValue B;
	if (ExtOpcode == ISD::MUL) {
	A = Op0.getOperand(0);
	B = Op0.getOperand(1);
	if (A.getOpcode() != B.getOpcode() \|\|
	A.getOperand(0).getValueType() != B.getOperand(0).getValueType())
	return SDValue();
	ExtOpcode = A.getOpcode();
	}
	if (ExtOpcode != ISD::ZERO_EXTEND && ExtOpcode != ISD::SIGN_EXTEND)
	return SDValue();

	EVT Op0VT = A.getOperand(0).getValueType();
	if (Op0VT != MVT::v8i8 && Op0VT != MVT::v16i8)
	return SDValue();

	SDLoc DL(Op0);
	// For non-mla reductions B can be set to 1. For MLA we take the operand of
	// the extend B.
	if (!B)
	B = DAG.getConstant(1, DL, Op0VT);
	else
	B = B.getOperand(0);

	SDValue Zeros =
	DAG.getConstant(0, DL, Op0VT == MVT::v8i8 ? MVT::v2i32 : MVT::v4i32);
	auto DotOpcode =
	(ExtOpcode == ISD::ZERO_EXTEND) ? AArch64ISD::UDOT : AArch64ISD::SDOT;
	SDValue Dot = DAG.getNode(DotOpcode, DL, Zeros.getValueType(), Zeros,
	A.getOperand(0), B);
	return DAG.getNode(ISD::VECREDUCE_ADD, DL, N->getValueType(0), Dot);
	}

	static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const AArch64Subtarget *Subtarget) {
	if (DCI.isBeforeLegalizeOps())
	return SDValue();

	return foldVectorXorShiftIntoCmp(N, DAG, Subtarget);
	}

	SDValue
	AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
	SelectionDAG &DAG,
	SmallVectorImpl<SDNode *> &Created) const {
	AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
	if (isIntDivCheap(N->getValueType(0), Attr))
	return SDValue(N,0); // Lower SDIV as SDIV

	// fold (sdiv X, pow2)
	EVT VT = N->getValueType(0);
	if ((VT != MVT::i32 && VT != MVT::i64) \|\|
	!(Divisor.isPowerOf2() \|\| (-Divisor).isPowerOf2()))
	return SDValue();

	SDLoc DL(N);
	SDValue N0 = N->getOperand(0);
	unsigned Lg2 = Divisor.countTrailingZeros();
	SDValue Zero = DAG.getConstant(0, DL, VT);
	SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT);

	// Add (N0 < 0) ? Pow2 - 1 : 0;
	SDValue CCVal;
	SDValue Cmp = getAArch64Cmp(N0, Zero, ISD::SETLT, CCVal, DAG, DL);
	SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
	SDValue CSel = DAG.getNode(AArch64ISD::CSEL, DL, VT, Add, N0, CCVal, Cmp);

	Created.push_back(Cmp.getNode());
	Created.push_back(Add.getNode());
	Created.push_back(CSel.getNode());

	// Divide by pow2.
	SDValue SRA =
	DAG.getNode(ISD::SRA, DL, VT, CSel, DAG.getConstant(Lg2, DL, MVT::i64));

	// If we're dividing by a positive value, we're done. Otherwise, we must
	// negate the result.
	if (Divisor.isNonNegative())
	return SRA;

	Created.push_back(SRA.getNode());
	return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
	}

	static bool IsSVECntIntrinsic(SDValue S) {
	switch(getIntrinsicID(S.getNode())) {
	default:
	break;
	case Intrinsic::aarch64_sve_cntb:
	case Intrinsic::aarch64_sve_cnth:
	case Intrinsic::aarch64_sve_cntw:
	case Intrinsic::aarch64_sve_cntd:
	return true;
	}
	return false;
	}

	/// Calculates what the pre-extend type is, based on the extension
	/// operation node provided by \p Extend.
	///
	/// In the case that \p Extend is a SIGN_EXTEND or a ZERO_EXTEND, the
	/// pre-extend type is pulled directly from the operand, while other extend
	/// operations need a bit more inspection to get this information.
	///
	/// \param Extend The SDNode from the DAG that represents the extend operation
	/// \param DAG The SelectionDAG hosting the \p Extend node
	///
	/// \returns The type representing the \p Extend source type, or \p MVT::Other
	/// if no valid type can be determined
	static EVT calculatePreExtendType(SDValue Extend, SelectionDAG &DAG) {
	switch (Extend.getOpcode()) {
	case ISD::SIGN_EXTEND:
	case ISD::ZERO_EXTEND:
	return Extend.getOperand(0).getValueType();
	case ISD::AssertSext:
	case ISD::AssertZext:
	case ISD::SIGN_EXTEND_INREG: {
	VTSDNode *TypeNode = dyn_cast<VTSDNode>(Extend.getOperand(1));
	if (!TypeNode)
	return MVT::Other;
	return TypeNode->getVT();
	}
	case ISD::AND: {
	ConstantSDNode *Constant =
	dyn_cast<ConstantSDNode>(Extend.getOperand(1).getNode());
	if (!Constant)
	return MVT::Other;

	uint32_t Mask = Constant->getZExtValue();

	if (Mask == UCHAR_MAX)
	return MVT::i8;
	else if (Mask == USHRT_MAX)
	return MVT::i16;
	else if (Mask == UINT_MAX)
	return MVT::i32;

	return MVT::Other;
	}
	default:
	return MVT::Other;
	}

	llvm_unreachable("Code path unhandled in calculatePreExtendType!");
	}

	/// Combines a dup(sext/zext) node pattern into sext/zext(dup)
	/// making use of the vector SExt/ZExt rather than the scalar SExt/ZExt
	static SDValue performCommonVectorExtendCombine(SDValue VectorShuffle,
	SelectionDAG &DAG) {

	ShuffleVectorSDNode *ShuffleNode =
	dyn_cast<ShuffleVectorSDNode>(VectorShuffle.getNode());
	if (!ShuffleNode)
	return SDValue();

	// Ensuring the mask is zero before continuing
	if (!ShuffleNode->isSplat() \|\| ShuffleNode->getSplatIndex() != 0)
	return SDValue();

	SDValue InsertVectorElt = VectorShuffle.getOperand(0);

	if (InsertVectorElt.getOpcode() != ISD::INSERT_VECTOR_ELT)
	return SDValue();

	SDValue InsertLane = InsertVectorElt.getOperand(2);
	ConstantSDNode *Constant = dyn_cast<ConstantSDNode>(InsertLane.getNode());
	// Ensures the insert is inserting into lane 0
	if (!Constant \|\| Constant->getZExtValue() != 0)
	return SDValue();

	SDValue Extend = InsertVectorElt.getOperand(1);
	unsigned ExtendOpcode = Extend.getOpcode();

	bool IsSExt = ExtendOpcode == ISD::SIGN_EXTEND \|\|
	ExtendOpcode == ISD::SIGN_EXTEND_INREG \|\|
	ExtendOpcode == ISD::AssertSext;
	if (!IsSExt && ExtendOpcode != ISD::ZERO_EXTEND &&
	ExtendOpcode != ISD::AssertZext && ExtendOpcode != ISD::AND)
	return SDValue();

	EVT TargetType = VectorShuffle.getValueType();
	EVT PreExtendType = calculatePreExtendType(Extend, DAG);

	if ((TargetType != MVT::v8i16 && TargetType != MVT::v4i32 &&
	TargetType != MVT::v2i64) \|\|
	(PreExtendType == MVT::Other))
	return SDValue();

	// Restrict valid pre-extend data type
	if (PreExtendType != MVT::i8 && PreExtendType != MVT::i16 &&
	PreExtendType != MVT::i32)
	return SDValue();

	EVT PreExtendVT = TargetType.changeVectorElementType(PreExtendType);

	if (PreExtendVT.getVectorElementCount() != TargetType.getVectorElementCount())
	return SDValue();

	if (TargetType.getScalarSizeInBits() != PreExtendVT.getScalarSizeInBits() * 2)
	return SDValue();

	SDLoc DL(VectorShuffle);

	SDValue InsertVectorNode = DAG.getNode(
	InsertVectorElt.getOpcode(), DL, PreExtendVT, DAG.getUNDEF(PreExtendVT),
	DAG.getAnyExtOrTrunc(Extend.getOperand(0), DL, PreExtendType),
	DAG.getConstant(0, DL, MVT::i64));

	std::vector<int> ShuffleMask(TargetType.getVectorElementCount().getValue());

	SDValue VectorShuffleNode =
	DAG.getVectorShuffle(PreExtendVT, DL, InsertVectorNode,
	DAG.getUNDEF(PreExtendVT), ShuffleMask);

	SDValue ExtendNode = DAG.getNode(IsSExt ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
	DL, TargetType, VectorShuffleNode);

	return ExtendNode;
	}

	/// Combines a mul(dup(sext/zext)) node pattern into mul(sext/zext(dup))
	/// making use of the vector SExt/ZExt rather than the scalar SExt/ZExt
	static SDValue performMulVectorExtendCombine(SDNode *Mul, SelectionDAG &DAG) {
	// If the value type isn't a vector, none of the operands are going to be dups
	if (!Mul->getValueType(0).isVector())
	return SDValue();

	SDValue Op0 = performCommonVectorExtendCombine(Mul->getOperand(0), DAG);
	SDValue Op1 = performCommonVectorExtendCombine(Mul->getOperand(1), DAG);

	// Neither operands have been changed, don't make any further changes
	if (!Op0 && !Op1)
	return SDValue();

	SDLoc DL(Mul);
	return DAG.getNode(Mul->getOpcode(), DL, Mul->getValueType(0),
	Op0 ? Op0 : Mul->getOperand(0),
	Op1 ? Op1 : Mul->getOperand(1));
	}

	static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const AArch64Subtarget *Subtarget) {

	if (SDValue Ext = performMulVectorExtendCombine(N, DAG))
	return Ext;

	if (DCI.isBeforeLegalizeOps())
	return SDValue();

	// The below optimizations require a constant RHS.
	if (!isa<ConstantSDNode>(N->getOperand(1)))
	return SDValue();

	SDValue N0 = N->getOperand(0);
	ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(1));
	const APInt &ConstValue = C->getAPIntValue();

	// Allow the scaling to be folded into the `cnt` instruction by preventing
	// the scaling to be obscured here. This makes it easier to pattern match.
	if (IsSVECntIntrinsic(N0) \|\|
	(N0->getOpcode() == ISD::TRUNCATE &&
	(IsSVECntIntrinsic(N0->getOperand(0)))))
	if (ConstValue.sge(1) && ConstValue.sle(16))
	return SDValue();

	// Multiplication of a power of two plus/minus one can be done more
	// cheaply as as shift+add/sub. For now, this is true unilaterally. If
	// future CPUs have a cheaper MADD instruction, this may need to be
	// gated on a subtarget feature. For Cyclone, 32-bit MADD is 4 cycles and
	// 64-bit is 5 cycles, so this is always a win.
	// More aggressively, some multiplications N0 * C can be lowered to
	// shift+add+shift if the constant C = A * B where A = 2^N + 1 and B = 2^M,
	// e.g. 6=32=(2+1)2.
	// TODO: consider lowering more cases, e.g. C = 14, -6, -14 or even 45
	// which equals to (1+2)*16-(1+2).

	// TrailingZeroes is used to test if the mul can be lowered to
	// shift+add+shift.
	unsigned TrailingZeroes = ConstValue.countTrailingZeros();
	if (TrailingZeroes) {
	// Conservatively do not lower to shift+add+shift if the mul might be
	// folded into smul or umul.
	if (N0->hasOneUse() && (isSignExtended(N0.getNode(), DAG) \|\|
	isZeroExtended(N0.getNode(), DAG)))
	return SDValue();
	// Conservatively do not lower to shift+add+shift if the mul might be
	// folded into madd or msub.
	if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ADD \|\|
	N->use_begin()->getOpcode() == ISD::SUB))
	return SDValue();
	}
	// Use ShiftedConstValue instead of ConstValue to support both shift+add/sub
	// and shift+add+shift.
	APInt ShiftedConstValue = ConstValue.ashr(TrailingZeroes);

	unsigned ShiftAmt, AddSubOpc;
	// Is the shifted value the LHS operand of the add/sub?
	bool ShiftValUseIsN0 = true;
	// Do we need to negate the result?
	bool NegateResult = false;

	if (ConstValue.isNonNegative()) {
	// (mul x, 2^N + 1) => (add (shl x, N), x)
	// (mul x, 2^N - 1) => (sub (shl x, N), x)
	// (mul x, (2^N + 1) * 2^M) => (shl (add (shl x, N), x), M)
	APInt SCVMinus1 = ShiftedConstValue - 1;
	APInt CVPlus1 = ConstValue + 1;
	if (SCVMinus1.isPowerOf2()) {
	ShiftAmt = SCVMinus1.logBase2();
	AddSubOpc = ISD::ADD;
	} else if (CVPlus1.isPowerOf2()) {
	ShiftAmt = CVPlus1.logBase2();
	AddSubOpc = ISD::SUB;
	} else
	return SDValue();
	} else {
	// (mul x, -(2^N - 1)) => (sub x, (shl x, N))
	// (mul x, -(2^N + 1)) => - (add (shl x, N), x)
	APInt CVNegPlus1 = -ConstValue + 1;
	APInt CVNegMinus1 = -ConstValue - 1;
	if (CVNegPlus1.isPowerOf2()) {
	ShiftAmt = CVNegPlus1.logBase2();
	AddSubOpc = ISD::SUB;
	ShiftValUseIsN0 = false;
	} else if (CVNegMinus1.isPowerOf2()) {
	ShiftAmt = CVNegMinus1.logBase2();
	AddSubOpc = ISD::ADD;
	NegateResult = true;
	} else
	return SDValue();
	}

	SDLoc DL(N);
	EVT VT = N->getValueType(0);
	SDValue ShiftedVal = DAG.getNode(ISD::SHL, DL, VT, N0,
	DAG.getConstant(ShiftAmt, DL, MVT::i64));

	SDValue AddSubN0 = ShiftValUseIsN0 ? ShiftedVal : N0;
	SDValue AddSubN1 = ShiftValUseIsN0 ? N0 : ShiftedVal;
	SDValue Res = DAG.getNode(AddSubOpc, DL, VT, AddSubN0, AddSubN1);
	assert(!(NegateResult && TrailingZeroes) &&
	"NegateResult and TrailingZeroes cannot both be true for now.");
	// Negate the result.
	if (NegateResult)
	return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
	// Shift the result.
	if (TrailingZeroes)
	return DAG.getNode(ISD::SHL, DL, VT, Res,
	DAG.getConstant(TrailingZeroes, DL, MVT::i64));
	return Res;
	}

	static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N,
	SelectionDAG &DAG) {
	// Take advantage of vector comparisons producing 0 or -1 in each lane to
	// optimize away operation when it's from a constant.
	//
	// The general transformation is:
	// UNARYOP(AND(VECTOR_CMP(x,y), constant)) -->
	// AND(VECTOR_CMP(x,y), constant2)
	// constant2 = UNARYOP(constant)

	// Early exit if this isn't a vector operation, the operand of the
	// unary operation isn't a bitwise AND, or if the sizes of the operations
	// aren't the same.
	EVT VT = N->getValueType(0);
	if (!VT.isVector() \|\| N->getOperand(0)->getOpcode() != ISD::AND \|\|
	N->getOperand(0)->getOperand(0)->getOpcode() != ISD::SETCC \|\|
	VT.getSizeInBits() != N->getOperand(0)->getValueType(0).getSizeInBits())
	return SDValue();

	// Now check that the other operand of the AND is a constant. We could
	// make the transformation for non-constant splats as well, but it's unclear
	// that would be a benefit as it would not eliminate any operations, just
	// perform one more step in scalar code before moving to the vector unit.
	if (BuildVectorSDNode *BV =
	dyn_cast<BuildVectorSDNode>(N->getOperand(0)->getOperand(1))) {
	// Bail out if the vector isn't a constant.
	if (!BV->isConstant())
	return SDValue();

	// Everything checks out. Build up the new and improved node.
	SDLoc DL(N);
	EVT IntVT = BV->getValueType(0);
	// Create a new constant of the appropriate type for the transformed
	// DAG.
	SDValue SourceConst = DAG.getNode(N->getOpcode(), DL, VT, SDValue(BV, 0));
	// The AND node needs bitcasts to/from an integer vector type around it.
	SDValue MaskConst = DAG.getNode(ISD::BITCAST, DL, IntVT, SourceConst);
	SDValue NewAnd = DAG.getNode(ISD::AND, DL, IntVT,
	N->getOperand(0)->getOperand(0), MaskConst);
	SDValue Res = DAG.getNode(ISD::BITCAST, DL, VT, NewAnd);
	return Res;
	}

	return SDValue();
	}

	static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
	const AArch64Subtarget *Subtarget) {
	// First try to optimize away the conversion when it's conditionally from
	// a constant. Vectors only.
	if (SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG))
	return Res;

	EVT VT = N->getValueType(0);
	if (VT != MVT::f32 && VT != MVT::f64)
	return SDValue();

	// Only optimize when the source and destination types have the same width.
	if (VT.getSizeInBits() != N->getOperand(0).getValueSizeInBits())
	return SDValue();

	// If the result of an integer load is only used by an integer-to-float
	// conversion, use a fp load instead and a AdvSIMD scalar {S\|U}CVTF instead.
	// This eliminates an "integer-to-vector-move" UOP and improves throughput.
	SDValue N0 = N->getOperand(0);
	if (Subtarget->hasNEON() && ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
	// Do not change the width of a volatile load.
	!cast<LoadSDNode>(N0)->isVolatile()) {
	LoadSDNode *LN0 = cast<LoadSDNode>(N0);
	SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
	LN0->getPointerInfo(), LN0->getAlignment(),
	LN0->getMemOperand()->getFlags());

	// Make sure successors of the original load stay after it by updating them
	// to use the new Chain.
	DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), Load.getValue(1));

	unsigned Opcode =
	(N->getOpcode() == ISD::SINT_TO_FP) ? AArch64ISD::SITOF : AArch64ISD::UITOF;
	return DAG.getNode(Opcode, SDLoc(N), VT, Load);
	}

	return SDValue();
	}

	/// Fold a floating-point multiply by power of two into floating-point to
	/// fixed-point conversion.
	static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const AArch64Subtarget *Subtarget) {
	if (!Subtarget->hasNEON())
	return SDValue();

	if (!N->getValueType(0).isSimple())
	return SDValue();

	SDValue Op = N->getOperand(0);
	if (!Op.getValueType().isVector() \|\| !Op.getValueType().isSimple() \|\|
	Op.getOpcode() != ISD::FMUL)
	return SDValue();

	SDValue ConstVec = Op->getOperand(1);
	if (!isa<BuildVectorSDNode>(ConstVec))
	return SDValue();

	MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
	uint32_t FloatBits = FloatTy.getSizeInBits();
	if (FloatBits != 32 && FloatBits != 64)
	return SDValue();

	MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
	uint32_t IntBits = IntTy.getSizeInBits();
	if (IntBits != 16 && IntBits != 32 && IntBits != 64)
	return SDValue();

	// Avoid conversions where iN is larger than the float (e.g., float -> i64).
	if (IntBits > FloatBits)
	return SDValue();

	BitVector UndefElements;
	BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
	int32_t Bits = IntBits == 64 ? 64 : 32;
	int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, Bits + 1);
	if (C == -1 \|\| C == 0 \|\| C > Bits)
	return SDValue();

	MVT ResTy;
	unsigned NumLanes = Op.getValueType().getVectorNumElements();
	switch (NumLanes) {
	default:
	return SDValue();
	case 2:
	ResTy = FloatBits == 32 ? MVT::v2i32 : MVT::v2i64;
	break;
	case 4:
	ResTy = FloatBits == 32 ? MVT::v4i32 : MVT::v4i64;
	break;
	}

	if (ResTy == MVT::v4i64 && DCI.isBeforeLegalizeOps())
	return SDValue();

	assert((ResTy != MVT::v4i64 \|\| DCI.isBeforeLegalizeOps()) &&
	"Illegal vector type after legalization");

	SDLoc DL(N);
	bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
	unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfp2fxs
	: Intrinsic::aarch64_neon_vcvtfp2fxu;
	SDValue FixConv =
	DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ResTy,
	DAG.getConstant(IntrinsicOpcode, DL, MVT::i32),
	Op->getOperand(0), DAG.getConstant(C, DL, MVT::i32));
	// We can handle smaller integers by generating an extra trunc.
	if (IntBits < FloatBits)
	FixConv = DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), FixConv);

	return FixConv;
	}

	/// Fold a floating-point divide by power of two into fixed-point to
	/// floating-point conversion.
	static SDValue performFDivCombine(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const AArch64Subtarget *Subtarget) {
	if (!Subtarget->hasNEON())
	return SDValue();

	SDValue Op = N->getOperand(0);
	unsigned Opc = Op->getOpcode();
	if (!Op.getValueType().isVector() \|\| !Op.getValueType().isSimple() \|\|
	!Op.getOperand(0).getValueType().isSimple() \|\|
	(Opc != ISD::SINT_TO_FP && Opc != ISD::UINT_TO_FP))
	return SDValue();

	SDValue ConstVec = N->getOperand(1);
	if (!isa<BuildVectorSDNode>(ConstVec))
	return SDValue();

	MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType();
	int32_t IntBits = IntTy.getSizeInBits();
	if (IntBits != 16 && IntBits != 32 && IntBits != 64)
	return SDValue();

	MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
	int32_t FloatBits = FloatTy.getSizeInBits();
	if (FloatBits != 32 && FloatBits != 64)
	return SDValue();

	// Avoid conversions where iN is larger than the float (e.g., i64 -> float).
	if (IntBits > FloatBits)
	return SDValue();

	BitVector UndefElements;
	BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
	int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, FloatBits + 1);
	if (C == -1 \|\| C == 0 \|\| C > FloatBits)
	return SDValue();

	MVT ResTy;
	unsigned NumLanes = Op.getValueType().getVectorNumElements();
	switch (NumLanes) {
	default:
	return SDValue();
	case 2:
	ResTy = FloatBits == 32 ? MVT::v2i32 : MVT::v2i64;
	break;
	case 4:
	ResTy = FloatBits == 32 ? MVT::v4i32 : MVT::v4i64;
	break;
	}

	if (ResTy == MVT::v4i64 && DCI.isBeforeLegalizeOps())
	return SDValue();

	SDLoc DL(N);
	SDValue ConvInput = Op.getOperand(0);
	bool IsSigned = Opc == ISD::SINT_TO_FP;
	if (IntBits < FloatBits)
	ConvInput = DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL,
	ResTy, ConvInput);

	unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfxs2fp
	: Intrinsic::aarch64_neon_vcvtfxu2fp;
	return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(),
	DAG.getConstant(IntrinsicOpcode, DL, MVT::i32), ConvInput,
	DAG.getConstant(C, DL, MVT::i32));
	}

	/// An EXTR instruction is made up of two shifts, ORed together. This helper
	/// searches for and classifies those shifts.
	static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount,
	bool &FromHi) {
	if (N.getOpcode() == ISD::SHL)
	FromHi = false;
	else if (N.getOpcode() == ISD::SRL)
	FromHi = true;
	else
	return false;

	if (!isa<ConstantSDNode>(N.getOperand(1)))
	return false;

	ShiftAmount = N->getConstantOperandVal(1);
	Src = N->getOperand(0);
	return true;
	}

	/// EXTR instruction extracts a contiguous chunk of bits from two existing
	/// registers viewed as a high/low pair. This function looks for the pattern:
	/// <tt>(or (shl VAL1, \#N), (srl VAL2, \#RegWidth-N))</tt> and replaces it
	/// with an EXTR. Can't quite be done in TableGen because the two immediates
	/// aren't independent.
	static SDValue tryCombineToEXTR(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI) {
	SelectionDAG &DAG = DCI.DAG;
	SDLoc DL(N);
	EVT VT = N->getValueType(0);

	assert(N->getOpcode() == ISD::OR && "Unexpected root");

	if (VT != MVT::i32 && VT != MVT::i64)
	return SDValue();

	SDValue LHS;
	uint32_t ShiftLHS = 0;
	bool LHSFromHi = false;
	if (!findEXTRHalf(N->getOperand(0), LHS, ShiftLHS, LHSFromHi))
	return SDValue();

	SDValue RHS;
	uint32_t ShiftRHS = 0;
	bool RHSFromHi = false;
	if (!findEXTRHalf(N->getOperand(1), RHS, ShiftRHS, RHSFromHi))
	return SDValue();

	// If they're both trying to come from the high part of the register, they're
	// not really an EXTR.
	if (LHSFromHi == RHSFromHi)
	return SDValue();

	if (ShiftLHS + ShiftRHS != VT.getSizeInBits())
	return SDValue();

	if (LHSFromHi) {
	std::swap(LHS, RHS);
	std::swap(ShiftLHS, ShiftRHS);
	}

	return DAG.getNode(AArch64ISD::EXTR, DL, VT, LHS, RHS,
	DAG.getConstant(ShiftRHS, DL, MVT::i64));
	}

	static SDValue tryCombineToBSL(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI) {
	EVT VT = N->getValueType(0);
	SelectionDAG &DAG = DCI.DAG;
	SDLoc DL(N);

	if (!VT.isVector())
	return SDValue();

	// The combining code currently only works for NEON vectors. In particular,
	// it does not work for SVE when dealing with vectors wider than 128 bits.
	if (!VT.is64BitVector() && !VT.is128BitVector())
	return SDValue();

	SDValue N0 = N->getOperand(0);
	if (N0.getOpcode() != ISD::AND)
	return SDValue();

	SDValue N1 = N->getOperand(1);
	if (N1.getOpcode() != ISD::AND)
	return SDValue();

	// InstCombine does (not (neg a)) => (add a -1).
	// Try: (or (and (neg a) b) (and (add a -1) c)) => (bsl (neg a) b c)
	// Loop over all combinations of AND operands.
	for (int i = 1; i >= 0; --i) {
	for (int j = 1; j >= 0; --j) {
	SDValue O0 = N0->getOperand(i);
	SDValue O1 = N1->getOperand(j);
	SDValue Sub, Add, SubSibling, AddSibling;

	// Find a SUB and an ADD operand, one from each AND.
	if (O0.getOpcode() == ISD::SUB && O1.getOpcode() == ISD::ADD) {
	Sub = O0;
	Add = O1;
	SubSibling = N0->getOperand(1 - i);
	AddSibling = N1->getOperand(1 - j);
	} else if (O0.getOpcode() == ISD::ADD && O1.getOpcode() == ISD::SUB) {
	Add = O0;
	Sub = O1;
	AddSibling = N0->getOperand(1 - i);
	SubSibling = N1->getOperand(1 - j);
	} else
	continue;

	if (!ISD::isBuildVectorAllZeros(Sub.getOperand(0).getNode()))
	continue;

	// Constant ones is always righthand operand of the Add.
	if (!ISD::isBuildVectorAllOnes(Add.getOperand(1).getNode()))
	continue;

	if (Sub.getOperand(1) != Add.getOperand(0))
	continue;

	return DAG.getNode(AArch64ISD::BSP, DL, VT, Sub, SubSibling, AddSibling);
	}
	}

	// (or (and a b) (and (not a) c)) => (bsl a b c)
	// We only have to look for constant vectors here since the general, variable
	// case can be handled in TableGen.
	unsigned Bits = VT.getScalarSizeInBits();
	uint64_t BitMask = Bits == 64 ? -1ULL : ((1ULL << Bits) - 1);
	for (int i = 1; i >= 0; --i)
	for (int j = 1; j >= 0; --j) {
	BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(i));
	BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(j));
	if (!BVN0 \|\| !BVN1)
	continue;

	bool FoundMatch = true;
	for (unsigned k = 0; k < VT.getVectorNumElements(); ++k) {
	ConstantSDNode *CN0 = dyn_cast<ConstantSDNode>(BVN0->getOperand(k));
	ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(BVN1->getOperand(k));
	if (!CN0 \|\| !CN1 \|\|
	CN0->getZExtValue() != (BitMask & ~CN1->getZExtValue())) {
	FoundMatch = false;
	break;
	}
	}

	if (FoundMatch)
	return DAG.getNode(AArch64ISD::BSP, DL, VT, SDValue(BVN0, 0),
	N0->getOperand(1 - i), N1->getOperand(1 - j));
	}

	return SDValue();
	}

	static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
	const AArch64Subtarget *Subtarget) {
	// Attempt to form an EXTR from (or (shl VAL1, #N), (srl VAL2, #RegWidth-N))
	SelectionDAG &DAG = DCI.DAG;
	EVT VT = N->getValueType(0);

	if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
	return SDValue();

	if (SDValue Res = tryCombineToEXTR(N, DCI))
	return Res;

	if (SDValue Res = tryCombineToBSL(N, DCI))
	return Res;

	return SDValue();
	}

	static bool isConstantSplatVectorMaskForType(SDNode *N, EVT MemVT) {
	if (!MemVT.getVectorElementType().isSimple())
	return false;

	uint64_t MaskForTy = 0ull;
	switch (MemVT.getVectorElementType().getSimpleVT().SimpleTy) {
	case MVT::i8:
	MaskForTy = 0xffull;
	break;
	case MVT::i16:
	MaskForTy = 0xffffull;
	break;
	case MVT::i32:
	MaskForTy = 0xffffffffull;
	break;
	default:
	return false;
	break;
	}

	if (N->getOpcode() == AArch64ISD::DUP \|\| N->getOpcode() == ISD::SPLAT_VECTOR)
	if (auto *Op0 = dyn_cast<ConstantSDNode>(N->getOperand(0)))
	return Op0->getAPIntValue().getLimitedValue() == MaskForTy;

	return false;
	}

	static SDValue performSVEAndCombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI) {
	if (DCI.isBeforeLegalizeOps())
	return SDValue();

	SelectionDAG &DAG = DCI.DAG;
	SDValue Src = N->getOperand(0);
	unsigned Opc = Src->getOpcode();

	// Zero/any extend of an unsigned unpack
	if (Opc == AArch64ISD::UUNPKHI \|\| Opc == AArch64ISD::UUNPKLO) {
	SDValue UnpkOp = Src->getOperand(0);
	SDValue Dup = N->getOperand(1);

	if (Dup.getOpcode() != AArch64ISD::DUP)
	return SDValue();

	SDLoc DL(N);
	ConstantSDNode *C = dyn_cast<ConstantSDNode>(Dup->getOperand(0));
	uint64_t ExtVal = C->getZExtValue();

	// If the mask is fully covered by the unpack, we don't need to push
	// a new AND onto the operand
	EVT EltTy = UnpkOp->getValueType(0).getVectorElementType();
	if ((ExtVal == 0xFF && EltTy == MVT::i8) \|\|
	(ExtVal == 0xFFFF && EltTy == MVT::i16) \|\|
	(ExtVal == 0xFFFFFFFF && EltTy == MVT::i32))
	return Src;

	// Truncate to prevent a DUP with an over wide constant
	APInt Mask = C->getAPIntValue().trunc(EltTy.getSizeInBits());

	// Otherwise, make sure we propagate the AND to the operand
	// of the unpack
	Dup = DAG.getNode(AArch64ISD::DUP, DL,
	UnpkOp->getValueType(0),
	DAG.getConstant(Mask.zextOrTrunc(32), DL, MVT::i32));

	SDValue And = DAG.getNode(ISD::AND, DL,
	UnpkOp->getValueType(0), UnpkOp, Dup);

	return DAG.getNode(Opc, DL, N->getValueType(0), And);
	}

	if (!EnableCombineMGatherIntrinsics)
	return SDValue();

	SDValue Mask = N->getOperand(1);

	if (!Src.hasOneUse())
	return SDValue();

	EVT MemVT;

	// SVE load instructions perform an implicit zero-extend, which makes them
	// perfect candidates for combining.
	switch (Opc) {
	case AArch64ISD::LD1_MERGE_ZERO:
	case AArch64ISD::LDNF1_MERGE_ZERO:
	case AArch64ISD::LDFF1_MERGE_ZERO:
	MemVT = cast<VTSDNode>(Src->getOperand(3))->getVT();
	break;
	case AArch64ISD::GLD1_MERGE_ZERO:
	case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
	case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
	case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
	case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
	case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
	case AArch64ISD::GLD1_IMM_MERGE_ZERO:
	case AArch64ISD::GLDFF1_MERGE_ZERO:
	case AArch64ISD::GLDFF1_SCALED_MERGE_ZERO:
	case AArch64ISD::GLDFF1_SXTW_MERGE_ZERO:
	case AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO:
	case AArch64ISD::GLDFF1_UXTW_MERGE_ZERO:
	case AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO:
	case AArch64ISD::GLDFF1_IMM_MERGE_ZERO:
	case AArch64ISD::GLDNT1_MERGE_ZERO:
	MemVT = cast<VTSDNode>(Src->getOperand(4))->getVT();
	break;
	default:
	return SDValue();
	}

	if (isConstantSplatVectorMaskForType(Mask.getNode(), MemVT))
	return Src;

	return SDValue();
	}

	static SDValue performANDCombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI) {
	SelectionDAG &DAG = DCI.DAG;
	SDValue LHS = N->getOperand(0);
	EVT VT = N->getValueType(0);
	if (!VT.isVector() \|\| !DAG.getTargetLoweringInfo().isTypeLegal(VT))
	return SDValue();

	if (VT.isScalableVector())
	return performSVEAndCombine(N, DCI);

	// The combining code below works only for NEON vectors. In particular, it
	// does not work for SVE when dealing with vectors wider than 128 bits.
	if (!(VT.is64BitVector() \|\| VT.is128BitVector()))
	return SDValue();

	BuildVectorSDNode *BVN =
	dyn_cast<BuildVectorSDNode>(N->getOperand(1).getNode());
	if (!BVN)
	return SDValue();

	// AND does not accept an immediate, so check if we can use a BIC immediate
	// instruction instead. We do this here instead of using a (and x, (mvni imm))
	// pattern in isel, because some immediates may be lowered to the preferred
	// (and x, (movi imm)) form, even though an mvni representation also exists.
	APInt DefBits(VT.getSizeInBits(), 0);
	APInt UndefBits(VT.getSizeInBits(), 0);
	if (resolveBuildVector(BVN, DefBits, UndefBits)) {
	SDValue NewOp;

	DefBits = ~DefBits;
	if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::BICi, SDValue(N, 0), DAG,
	DefBits, &LHS)) \|\|
	(NewOp = tryAdvSIMDModImm16(AArch64ISD::BICi, SDValue(N, 0), DAG,
	DefBits, &LHS)))
	return NewOp;

	UndefBits = ~UndefBits;
	if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::BICi, SDValue(N, 0), DAG,
	UndefBits, &LHS)) \|\|
	(NewOp = tryAdvSIMDModImm16(AArch64ISD::BICi, SDValue(N, 0), DAG,
	UndefBits, &LHS)))
	return NewOp;
	}

	return SDValue();
	}

	static SDValue performSRLCombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI) {
	SelectionDAG &DAG = DCI.DAG;
	EVT VT = N->getValueType(0);
	if (VT != MVT::i32 && VT != MVT::i64)
	return SDValue();

	// Canonicalize (srl (bswap i32 x), 16) to (rotr (bswap i32 x), 16), if the
	// high 16-bits of x are zero. Similarly, canonicalize (srl (bswap i64 x), 32)
	// to (rotr (bswap i64 x), 32), if the high 32-bits of x are zero.
	SDValue N0 = N->getOperand(0);
	if (N0.getOpcode() == ISD::BSWAP) {
	SDLoc DL(N);
	SDValue N1 = N->getOperand(1);
	SDValue N00 = N0.getOperand(0);
	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
	uint64_t ShiftAmt = C->getZExtValue();
	if (VT == MVT::i32 && ShiftAmt == 16 &&
	DAG.MaskedValueIsZero(N00, APInt::getHighBitsSet(32, 16)))
	return DAG.getNode(ISD::ROTR, DL, VT, N0, N1);
	if (VT == MVT::i64 && ShiftAmt == 32 &&
	DAG.MaskedValueIsZero(N00, APInt::getHighBitsSet(64, 32)))
	return DAG.getNode(ISD::ROTR, DL, VT, N0, N1);
	}
	}
	return SDValue();
	}

	// Attempt to form urhadd(OpA, OpB) from
	// truncate(vlshr(sub(zext(OpB), xor(zext(OpA), Ones(ElemSizeInBits))), 1))
	// or uhadd(OpA, OpB) from truncate(vlshr(add(zext(OpA), zext(OpB)), 1)).
	// The original form of the first expression is
	// truncate(srl(add(zext(OpB), add(zext(OpA), 1)), 1)) and the
	// (OpA + OpB + 1) subexpression will have been changed to (OpB - (~OpA)).
	// Before this function is called the srl will have been lowered to
	// AArch64ISD::VLSHR.
	// This pass can also recognize signed variants of the patterns that use sign
	// extension instead of zero extension and form a srhadd(OpA, OpB) or a
	// shadd(OpA, OpB) from them.
	static SDValue
	performVectorTruncateCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
	SelectionDAG &DAG) {
	EVT VT = N->getValueType(0);

	// Since we are looking for a right shift by a constant value of 1 and we are
	// operating on types at least 16 bits in length (sign/zero extended OpA and
	// OpB, which are at least 8 bits), it follows that the truncate will always
	// discard the shifted-in bit and therefore the right shift will be logical
	// regardless of the signedness of OpA and OpB.
	SDValue Shift = N->getOperand(0);
	if (Shift.getOpcode() != AArch64ISD::VLSHR)
	return SDValue();

	// Is the right shift using an immediate value of 1?
	uint64_t ShiftAmount = Shift.getConstantOperandVal(1);
	if (ShiftAmount != 1)
	return SDValue();

	SDValue ExtendOpA, ExtendOpB;
	SDValue ShiftOp0 = Shift.getOperand(0);
	unsigned ShiftOp0Opc = ShiftOp0.getOpcode();
	if (ShiftOp0Opc == ISD::SUB) {

	SDValue Xor = ShiftOp0.getOperand(1);
	if (Xor.getOpcode() != ISD::XOR)
	return SDValue();

	// Is the XOR using a constant amount of all ones in the right hand side?
	uint64_t C;
	if (!isAllConstantBuildVector(Xor.getOperand(1), C))
	return SDValue();

	unsigned ElemSizeInBits = VT.getScalarSizeInBits();
	APInt CAsAPInt(ElemSizeInBits, C);
	if (CAsAPInt != APInt::getAllOnesValue(ElemSizeInBits))
	return SDValue();

	ExtendOpA = Xor.getOperand(0);
	ExtendOpB = ShiftOp0.getOperand(0);
	} else if (ShiftOp0Opc == ISD::ADD) {
	ExtendOpA = ShiftOp0.getOperand(0);
	ExtendOpB = ShiftOp0.getOperand(1);
	} else
	return SDValue();

	unsigned ExtendOpAOpc = ExtendOpA.getOpcode();
	unsigned ExtendOpBOpc = ExtendOpB.getOpcode();
	if (!(ExtendOpAOpc == ExtendOpBOpc &&
	(ExtendOpAOpc == ISD::ZERO_EXTEND \|\| ExtendOpAOpc == ISD::SIGN_EXTEND)))
	return SDValue();

	// Is the result of the right shift being truncated to the same value type as
	// the original operands, OpA and OpB?
	SDValue OpA = ExtendOpA.getOperand(0);
	SDValue OpB = ExtendOpB.getOperand(0);
	EVT OpAVT = OpA.getValueType();
	assert(ExtendOpA.getValueType() == ExtendOpB.getValueType());
	if (!(VT == OpAVT && OpAVT == OpB.getValueType()))
	return SDValue();

	SDLoc DL(N);
	bool IsSignExtend = ExtendOpAOpc == ISD::SIGN_EXTEND;
	bool IsRHADD = ShiftOp0Opc == ISD::SUB;
	unsigned HADDOpc = IsSignExtend
	? (IsRHADD ? AArch64ISD::SRHADD : AArch64ISD::SHADD)
	: (IsRHADD ? AArch64ISD::URHADD : AArch64ISD::UHADD);
	SDValue ResultHADD = DAG.getNode(HADDOpc, DL, VT, OpA, OpB);

	return ResultHADD;
	}

	static bool hasPairwiseAdd(unsigned Opcode, EVT VT, bool FullFP16) {
	switch (Opcode) {
	case ISD::FADD:
	return (FullFP16 && VT == MVT::f16) \|\| VT == MVT::f32 \|\| VT == MVT::f64;
	case ISD::ADD:
	return VT == MVT::i64;
	default:
	return false;
	}
	}

	static SDValue performExtractVectorEltCombine(SDNode *N, SelectionDAG &DAG) {
	SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
	ConstantSDNode *ConstantN1 = dyn_cast<ConstantSDNode>(N1);

	EVT VT = N->getValueType(0);
	const bool FullFP16 =
	static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();

	// Rewrite for pairwise fadd pattern
	// (f32 (extract_vector_elt
	// (fadd (vXf32 Other)
	// (vector_shuffle (vXf32 Other) undef <1,X,...> )) 0))
	// ->
	// (f32 (fadd (extract_vector_elt (vXf32 Other) 0)
	// (extract_vector_elt (vXf32 Other) 1))
	if (ConstantN1 && ConstantN1->getZExtValue() == 0 &&
	hasPairwiseAdd(N0->getOpcode(), VT, FullFP16)) {
	SDLoc DL(N0);
	SDValue N00 = N0->getOperand(0);
	SDValue N01 = N0->getOperand(1);

	ShuffleVectorSDNode *Shuffle = dyn_cast<ShuffleVectorSDNode>(N01);
	SDValue Other = N00;

	// And handle the commutative case.
	if (!Shuffle) {
	Shuffle = dyn_cast<ShuffleVectorSDNode>(N00);
	Other = N01;
	}

	if (Shuffle && Shuffle->getMaskElt(0) == 1 &&
	Other == Shuffle->getOperand(0)) {
	return DAG.getNode(N0->getOpcode(), DL, VT,
	DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Other,
	DAG.getConstant(0, DL, MVT::i64)),
	DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Other,
	DAG.getConstant(1, DL, MVT::i64)));
	}
	}

	return SDValue();
	}

	static SDValue performConcatVectorsCombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI,
	SelectionDAG &DAG) {
	SDLoc dl(N);
	EVT VT = N->getValueType(0);
	SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
	unsigned N0Opc = N0->getOpcode(), N1Opc = N1->getOpcode();

	// Optimize concat_vectors of truncated vectors, where the intermediate
	// type is illegal, to avoid said illegality, e.g.,
	// (v4i16 (concat_vectors (v2i16 (truncate (v2i64))),
	// (v2i16 (truncate (v2i64)))))
	// ->
	// (v4i16 (truncate (vector_shuffle (v4i32 (bitcast (v2i64))),
	// (v4i32 (bitcast (v2i64))),
	// <0, 2, 4, 6>)))
	// This isn't really target-specific, but ISD::TRUNCATE legality isn't keyed
	// on both input and result type, so we might generate worse code.
	// On AArch64 we know it's fine for v2i64->v4i16 and v4i32->v8i8.
	if (N->getNumOperands() == 2 && N0Opc == ISD::TRUNCATE &&
	N1Opc == ISD::TRUNCATE) {
	SDValue N00 = N0->getOperand(0);
	SDValue N10 = N1->getOperand(0);
	EVT N00VT = N00.getValueType();

	if (N00VT == N10.getValueType() &&
	(N00VT == MVT::v2i64 \|\| N00VT == MVT::v4i32) &&
	N00VT.getScalarSizeInBits() == 4 * VT.getScalarSizeInBits()) {
	MVT MidVT = (N00VT == MVT::v2i64 ? MVT::v4i32 : MVT::v8i16);
	SmallVector<int, 8> Mask(MidVT.getVectorNumElements());
	for (size_t i = 0; i < Mask.size(); ++i)
	Mask[i] = i * 2;
	return DAG.getNode(ISD::TRUNCATE, dl, VT,
	DAG.getVectorShuffle(
	MidVT, dl,
	DAG.getNode(ISD::BITCAST, dl, MidVT, N00),
	DAG.getNode(ISD::BITCAST, dl, MidVT, N10), Mask));
	}
	}

	// Wait 'til after everything is legalized to try this. That way we have
	// legal vector types and such.
	if (DCI.isBeforeLegalizeOps())
	return SDValue();

	// Optimise concat_vectors of two [us]rhadds or [us]hadds that use extracted
	// subvectors from the same original vectors. Combine these into a single
	// [us]rhadd or [us]hadd that operates on the two original vectors. Example:
	// (v16i8 (concat_vectors (v8i8 (urhadd (extract_subvector (v16i8 OpA, <0>),
	// extract_subvector (v16i8 OpB,
	// <0>))),
	// (v8i8 (urhadd (extract_subvector (v16i8 OpA, <8>),
	// extract_subvector (v16i8 OpB,
	// <8>)))))
	// ->
	// (v16i8(urhadd(v16i8 OpA, v16i8 OpB)))
	if (N->getNumOperands() == 2 && N0Opc == N1Opc &&
	(N0Opc == AArch64ISD::URHADD \|\| N0Opc == AArch64ISD::SRHADD \|\|
	N0Opc == AArch64ISD::UHADD \|\| N0Opc == AArch64ISD::SHADD)) {
	SDValue N00 = N0->getOperand(0);
	SDValue N01 = N0->getOperand(1);
	SDValue N10 = N1->getOperand(0);
	SDValue N11 = N1->getOperand(1);

	EVT N00VT = N00.getValueType();
	EVT N10VT = N10.getValueType();

	if (N00->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
	N01->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
	N10->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
	N11->getOpcode() == ISD::EXTRACT_SUBVECTOR && N00VT == N10VT) {
	SDValue N00Source = N00->getOperand(0);
	SDValue N01Source = N01->getOperand(0);
	SDValue N10Source = N10->getOperand(0);
	SDValue N11Source = N11->getOperand(0);

	if (N00Source == N10Source && N01Source == N11Source &&
	N00Source.getValueType() == VT && N01Source.getValueType() == VT) {
	assert(N0.getValueType() == N1.getValueType());

	uint64_t N00Index = N00.getConstantOperandVal(1);
	uint64_t N01Index = N01.getConstantOperandVal(1);
	uint64_t N10Index = N10.getConstantOperandVal(1);
	uint64_t N11Index = N11.getConstantOperandVal(1);

	if (N00Index == N01Index && N10Index == N11Index && N00Index == 0 &&
	N10Index == N00VT.getVectorNumElements())
	return DAG.getNode(N0Opc, dl, VT, N00Source, N01Source);
	}
	}
	}

	// If we see a (concat_vectors (v1x64 A), (v1x64 A)) it's really a vector
	// splat. The indexed instructions are going to be expecting a DUPLANE64, so
	// canonicalise to that.
	if (N0 == N1 && VT.getVectorNumElements() == 2) {
	assert(VT.getScalarSizeInBits() == 64);
	return DAG.getNode(AArch64ISD::DUPLANE64, dl, VT, WidenVector(N0, DAG),
	DAG.getConstant(0, dl, MVT::i64));
	}

	// Canonicalise concat_vectors so that the right-hand vector has as few
	// bit-casts as possible before its real operation. The primary matching
	// destination for these operations will be the narrowing "2" instructions,
	// which depend on the operation being performed on this right-hand vector.
	// For example,
	// (concat_vectors LHS, (v1i64 (bitconvert (v4i16 RHS))))
	// becomes
	// (bitconvert (concat_vectors (v4i16 (bitconvert LHS)), RHS))

	if (N1Opc != ISD::BITCAST)
	return SDValue();
	SDValue RHS = N1->getOperand(0);
	MVT RHSTy = RHS.getValueType().getSimpleVT();
	// If the RHS is not a vector, this is not the pattern we're looking for.
	if (!RHSTy.isVector())
	return SDValue();

	LLVM_DEBUG(
	dbgs() << "aarch64-lower: concat_vectors bitcast simplification\n");

	MVT ConcatTy = MVT::getVectorVT(RHSTy.getVectorElementType(),
	RHSTy.getVectorNumElements() * 2);
	return DAG.getNode(ISD::BITCAST, dl, VT,
	DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatTy,
	DAG.getNode(ISD::BITCAST, dl, RHSTy, N0),
	RHS));
	}

	static SDValue tryCombineFixedPointConvert(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI,
	SelectionDAG &DAG) {
	// Wait until after everything is legalized to try this. That way we have
	// legal vector types and such.
	if (DCI.isBeforeLegalizeOps())
	return SDValue();
	// Transform a scalar conversion of a value from a lane extract into a
	// lane extract of a vector conversion. E.g., from foo1 to foo2:
	// double foo1(int64x2_t a) { return vcvtd_n_f64_s64(a[1], 9); }
	// double foo2(int64x2_t a) { return vcvtq_n_f64_s64(a, 9)[1]; }
	//
	// The second form interacts better with instruction selection and the
	// register allocator to avoid cross-class register copies that aren't
	// coalescable due to a lane reference.

	// Check the operand and see if it originates from a lane extract.
	SDValue Op1 = N->getOperand(1);
	if (Op1.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
	// Yep, no additional predication needed. Perform the transform.
	SDValue IID = N->getOperand(0);
	SDValue Shift = N->getOperand(2);
	SDValue Vec = Op1.getOperand(0);
	SDValue Lane = Op1.getOperand(1);
	EVT ResTy = N->getValueType(0);
	EVT VecResTy;
	SDLoc DL(N);

	// The vector width should be 128 bits by the time we get here, even
	// if it started as 64 bits (the extract_vector handling will have
	// done so).
	assert(Vec.getValueSizeInBits() == 128 &&
	"unexpected vector size on extract_vector_elt!");
	if (Vec.getValueType() == MVT::v4i32)
	VecResTy = MVT::v4f32;
	else if (Vec.getValueType() == MVT::v2i64)
	VecResTy = MVT::v2f64;
	else
	llvm_unreachable("unexpected vector type!");

	SDValue Convert =
	DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VecResTy, IID, Vec, Shift);
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResTy, Convert, Lane);
	}
	return SDValue();
	}

	// AArch64 high-vector "long" operations are formed by performing the non-high
	// version on an extract_subvector of each operand which gets the high half:
	//
	// (longop2 LHS, RHS) == (longop (extract_high LHS), (extract_high RHS))
	//
	// However, there are cases which don't have an extract_high explicitly, but
	// have another operation that can be made compatible with one for free. For
	// example:
	//
	// (dupv64 scalar) --> (extract_high (dup128 scalar))
	//
	// This routine does the actual conversion of such DUPs, once outer routines
	// have determined that everything else is in order.
	// It also supports immediate DUP-like nodes (MOVI/MVNi), which we can fold
	// similarly here.
	static SDValue tryExtendDUPToExtractHigh(SDValue N, SelectionDAG &DAG) {
	switch (N.getOpcode()) {
	case AArch64ISD::DUP:
	case AArch64ISD::DUPLANE8:
	case AArch64ISD::DUPLANE16:
	case AArch64ISD::DUPLANE32:
	case AArch64ISD::DUPLANE64:
	case AArch64ISD::MOVI:
	case AArch64ISD::MOVIshift:
	case AArch64ISD::MOVIedit:
	case AArch64ISD::MOVImsl:
	case AArch64ISD::MVNIshift:
	case AArch64ISD::MVNImsl:
	break;
	default:
	// FMOV could be supported, but isn't very useful, as it would only occur
	// if you passed a bitcast' floating point immediate to an eligible long
	// integer op (addl, smull, ...).
	return SDValue();
	}

	MVT NarrowTy = N.getSimpleValueType();
	if (!NarrowTy.is64BitVector())
	return SDValue();

	MVT ElementTy = NarrowTy.getVectorElementType();
	unsigned NumElems = NarrowTy.getVectorNumElements();
	MVT NewVT = MVT::getVectorVT(ElementTy, NumElems * 2);

	SDLoc dl(N);
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NarrowTy,
	DAG.getNode(N->getOpcode(), dl, NewVT, N->ops()),
	DAG.getConstant(NumElems, dl, MVT::i64));
	}

	static bool isEssentiallyExtractHighSubvector(SDValue N) {
	if (N.getOpcode() == ISD::BITCAST)
	N = N.getOperand(0);
	if (N.getOpcode() != ISD::EXTRACT_SUBVECTOR)
	return false;
	+ if (N.getOperand(0).getValueType().isScalableVector())
	+ return false;
	return cast<ConstantSDNode>(N.getOperand(1))->getAPIntValue() ==
	N.getOperand(0).getValueType().getVectorNumElements() / 2;
	}

	/// Helper structure to keep track of ISD::SET_CC operands.
	struct GenericSetCCInfo {
	const SDValue *Opnd0;
	const SDValue *Opnd1;
	ISD::CondCode CC;
	};

	/// Helper structure to keep track of a SET_CC lowered into AArch64 code.
	struct AArch64SetCCInfo {
	const SDValue *Cmp;
	AArch64CC::CondCode CC;
	};

	/// Helper structure to keep track of SetCC information.
	union SetCCInfo {
	GenericSetCCInfo Generic;
	AArch64SetCCInfo AArch64;
	};

	/// Helper structure to be able to read SetCC information. If set to
	/// true, IsAArch64 field, Info is a AArch64SetCCInfo, otherwise Info is a
	/// GenericSetCCInfo.
	struct SetCCInfoAndKind {
	SetCCInfo Info;
	bool IsAArch64;
	};

	/// Check whether or not \p Op is a SET_CC operation, either a generic or
	/// an
	/// AArch64 lowered one.
	/// \p SetCCInfo is filled accordingly.
	/// \post SetCCInfo is meanginfull only when this function returns true.
	/// \return True when Op is a kind of SET_CC operation.
	static bool isSetCC(SDValue Op, SetCCInfoAndKind &SetCCInfo) {
	// If this is a setcc, this is straight forward.
	if (Op.getOpcode() == ISD::SETCC) {
	SetCCInfo.Info.Generic.Opnd0 = &Op.getOperand(0);
	SetCCInfo.Info.Generic.Opnd1 = &Op.getOperand(1);
	SetCCInfo.Info.Generic.CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
	SetCCInfo.IsAArch64 = false;
	return true;
	}
	// Otherwise, check if this is a matching csel instruction.
	// In other words:
	// - csel 1, 0, cc
	// - csel 0, 1, !cc
	if (Op.getOpcode() != AArch64ISD::CSEL)
	return false;
	// Set the information about the operands.
	// TODO: we want the operands of the Cmp not the csel
	SetCCInfo.Info.AArch64.Cmp = &Op.getOperand(3);
	SetCCInfo.IsAArch64 = true;
	SetCCInfo.Info.AArch64.CC = static_cast<AArch64CC::CondCode>(
	cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());

	// Check that the operands matches the constraints:
	// (1) Both operands must be constants.
	// (2) One must be 1 and the other must be 0.
	ConstantSDNode *TValue = dyn_cast<ConstantSDNode>(Op.getOperand(0));
	ConstantSDNode *FValue = dyn_cast<ConstantSDNode>(Op.getOperand(1));

	// Check (1).
	if (!TValue \|\| !FValue)
	return false;

	// Check (2).
	if (!TValue->isOne()) {
	// Update the comparison when we are interested in !cc.
	std::swap(TValue, FValue);
	SetCCInfo.Info.AArch64.CC =
	AArch64CC::getInvertedCondCode(SetCCInfo.Info.AArch64.CC);
	}
	return TValue->isOne() && FValue->isNullValue();
	}

	// Returns true if Op is setcc or zext of setcc.
	static bool isSetCCOrZExtSetCC(const SDValue& Op, SetCCInfoAndKind &Info) {
	if (isSetCC(Op, Info))
	return true;
	return ((Op.getOpcode() == ISD::ZERO_EXTEND) &&
	isSetCC(Op->getOperand(0), Info));
	}

	// The folding we want to perform is:
	// (add x, [zext] (setcc cc ...) )
	// -->
	// (csel x, (add x, 1), !cc ...)
	//
	// The latter will get matched to a CSINC instruction.
	static SDValue performSetccAddFolding(SDNode *Op, SelectionDAG &DAG) {
	assert(Op && Op->getOpcode() == ISD::ADD && "Unexpected operation!");
	SDValue LHS = Op->getOperand(0);
	SDValue RHS = Op->getOperand(1);
	SetCCInfoAndKind InfoAndKind;

	// If both operands are a SET_CC, then we don't want to perform this
	// folding and create another csel as this results in more instructions
	// (and higher register usage).
	if (isSetCCOrZExtSetCC(LHS, InfoAndKind) &&
	isSetCCOrZExtSetCC(RHS, InfoAndKind))
	return SDValue();

	// If neither operand is a SET_CC, give up.
	if (!isSetCCOrZExtSetCC(LHS, InfoAndKind)) {
	std::swap(LHS, RHS);
	if (!isSetCCOrZExtSetCC(LHS, InfoAndKind))
	return SDValue();
	}

	// FIXME: This could be generatized to work for FP comparisons.
	EVT CmpVT = InfoAndKind.IsAArch64
	? InfoAndKind.Info.AArch64.Cmp->getOperand(0).getValueType()
	: InfoAndKind.Info.Generic.Opnd0->getValueType();
	if (CmpVT != MVT::i32 && CmpVT != MVT::i64)
	return SDValue();

	SDValue CCVal;
	SDValue Cmp;
	SDLoc dl(Op);
	if (InfoAndKind.IsAArch64) {
	CCVal = DAG.getConstant(
	AArch64CC::getInvertedCondCode(InfoAndKind.Info.AArch64.CC), dl,
	MVT::i32);
	Cmp = *InfoAndKind.Info.AArch64.Cmp;
	} else
	Cmp = getAArch64Cmp(
	InfoAndKind.Info.Generic.Opnd0, InfoAndKind.Info.Generic.Opnd1,
	ISD::getSetCCInverse(InfoAndKind.Info.Generic.CC, CmpVT), CCVal, DAG,
	dl);

	EVT VT = Op->getValueType(0);
	LHS = DAG.getNode(ISD::ADD, dl, VT, RHS, DAG.getConstant(1, dl, VT));
	return DAG.getNode(AArch64ISD::CSEL, dl, VT, RHS, LHS, CCVal, Cmp);
	}

	// ADD(UADDV a, UADDV b) --> UADDV(ADD a, b)
	static SDValue performUADDVCombine(SDNode *N, SelectionDAG &DAG) {
	EVT VT = N->getValueType(0);
	// Only scalar integer and vector types.
	if (N->getOpcode() != ISD::ADD \|\| !VT.isScalarInteger())
	return SDValue();

	SDValue LHS = N->getOperand(0);
	SDValue RHS = N->getOperand(1);
	if (LHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
	RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\| LHS.getValueType() != VT)
	return SDValue();

	auto *LHSN1 = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
	auto *RHSN1 = dyn_cast<ConstantSDNode>(RHS->getOperand(1));
	if (!LHSN1 \|\| LHSN1 != RHSN1 \|\| !RHSN1->isNullValue())
	return SDValue();

	SDValue Op1 = LHS->getOperand(0);
	SDValue Op2 = RHS->getOperand(0);
	EVT OpVT1 = Op1.getValueType();
	EVT OpVT2 = Op2.getValueType();
	if (Op1.getOpcode() != AArch64ISD::UADDV \|\| OpVT1 != OpVT2 \|\|
	Op2.getOpcode() != AArch64ISD::UADDV \|\|
	OpVT1.getVectorElementType() != VT)
	return SDValue();

	SDValue Val1 = Op1.getOperand(0);
	SDValue Val2 = Op2.getOperand(0);
	EVT ValVT = Val1->getValueType(0);
	SDLoc DL(N);
	SDValue AddVal = DAG.getNode(ISD::ADD, DL, ValVT, Val1, Val2);
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
	DAG.getNode(AArch64ISD::UADDV, DL, ValVT, AddVal),
	DAG.getConstant(0, DL, MVT::i64));
	}

	// ADD(UDOT(zero, x, y), A) --> UDOT(A, x, y)
	static SDValue performAddDotCombine(SDNode *N, SelectionDAG &DAG) {
	EVT VT = N->getValueType(0);
	if (N->getOpcode() != ISD::ADD)
	return SDValue();

	SDValue Dot = N->getOperand(0);
	SDValue A = N->getOperand(1);
	// Handle commutivity
	auto isZeroDot = [](SDValue Dot) {
	return (Dot.getOpcode() == AArch64ISD::UDOT \|\|
	Dot.getOpcode() == AArch64ISD::SDOT) &&
	isZerosVector(Dot.getOperand(0).getNode());
	};
	if (!isZeroDot(Dot))
	std::swap(Dot, A);
	if (!isZeroDot(Dot))
	return SDValue();

	return DAG.getNode(Dot.getOpcode(), SDLoc(N), VT, A, Dot.getOperand(1),
	Dot.getOperand(2));
	}

	// The basic add/sub long vector instructions have variants with "2" on the end
	// which act on the high-half of their inputs. They are normally matched by
	// patterns like:
	//
	// (add (zeroext (extract_high LHS)),
	// (zeroext (extract_high RHS)))
	// -> uaddl2 vD, vN, vM
	//
	// However, if one of the extracts is something like a duplicate, this
	// instruction can still be used profitably. This function puts the DAG into a
	// more appropriate form for those patterns to trigger.
	static SDValue performAddSubLongCombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI,
	SelectionDAG &DAG) {
	if (DCI.isBeforeLegalizeOps())
	return SDValue();

	MVT VT = N->getSimpleValueType(0);
	if (!VT.is128BitVector()) {
	if (N->getOpcode() == ISD::ADD)
	return performSetccAddFolding(N, DAG);
	return SDValue();
	}

	// Make sure both branches are extended in the same way.
	SDValue LHS = N->getOperand(0);
	SDValue RHS = N->getOperand(1);
	if ((LHS.getOpcode() != ISD::ZERO_EXTEND &&
	LHS.getOpcode() != ISD::SIGN_EXTEND) \|\|
	LHS.getOpcode() != RHS.getOpcode())
	return SDValue();

	unsigned ExtType = LHS.getOpcode();

	// It's not worth doing if at least one of the inputs isn't already an
	// extract, but we don't know which it'll be so we have to try both.
	if (isEssentiallyExtractHighSubvector(LHS.getOperand(0))) {
	RHS = tryExtendDUPToExtractHigh(RHS.getOperand(0), DAG);
	if (!RHS.getNode())
	return SDValue();

	RHS = DAG.getNode(ExtType, SDLoc(N), VT, RHS);
	} else if (isEssentiallyExtractHighSubvector(RHS.getOperand(0))) {
	LHS = tryExtendDUPToExtractHigh(LHS.getOperand(0), DAG);
	if (!LHS.getNode())
	return SDValue();

	LHS = DAG.getNode(ExtType, SDLoc(N), VT, LHS);
	}

	return DAG.getNode(N->getOpcode(), SDLoc(N), VT, LHS, RHS);
	}

	static SDValue performAddSubCombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI,
	SelectionDAG &DAG) {
	// Try to change sum of two reductions.
	if (SDValue Val = performUADDVCombine(N, DAG))
	return Val;
	if (SDValue Val = performAddDotCombine(N, DAG))
	return Val;

	return performAddSubLongCombine(N, DCI, DAG);
	}

	// Massage DAGs which we can use the high-half "long" operations on into
	// something isel will recognize better. E.g.
	//
	// (aarch64_neon_umull (extract_high vec) (dupv64 scalar)) -->
	// (aarch64_neon_umull (extract_high (v2i64 vec)))
	// (extract_high (v2i64 (dup128 scalar)))))
	//
	static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI,
	SelectionDAG &DAG) {
	if (DCI.isBeforeLegalizeOps())
	return SDValue();

	SDValue LHS = N->getOperand((IID == Intrinsic::not_intrinsic) ? 0 : 1);
	SDValue RHS = N->getOperand((IID == Intrinsic::not_intrinsic) ? 1 : 2);
	assert(LHS.getValueType().is64BitVector() &&
	RHS.getValueType().is64BitVector() &&
	"unexpected shape for long operation");

	// Either node could be a DUP, but it's not worth doing both of them (you'd
	// just as well use the non-high version) so look for a corresponding extract
	// operation on the other "wing".
	if (isEssentiallyExtractHighSubvector(LHS)) {
	RHS = tryExtendDUPToExtractHigh(RHS, DAG);
	if (!RHS.getNode())
	return SDValue();
	} else if (isEssentiallyExtractHighSubvector(RHS)) {
	LHS = tryExtendDUPToExtractHigh(LHS, DAG);
	if (!LHS.getNode())
	return SDValue();
	}

	if (IID == Intrinsic::not_intrinsic)
	return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), LHS, RHS);

	return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), N->getValueType(0),
	N->getOperand(0), LHS, RHS);
	}

	static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) {
	MVT ElemTy = N->getSimpleValueType(0).getScalarType();
	unsigned ElemBits = ElemTy.getSizeInBits();

	int64_t ShiftAmount;
	if (BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(2))) {
	APInt SplatValue, SplatUndef;
	unsigned SplatBitSize;
	bool HasAnyUndefs;
	if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
	HasAnyUndefs, ElemBits) \|\|
	SplatBitSize != ElemBits)
	return SDValue();

	ShiftAmount = SplatValue.getSExtValue();
	} else if (ConstantSDNode *CVN = dyn_cast<ConstantSDNode>(N->getOperand(2))) {
	ShiftAmount = CVN->getSExtValue();
	} else
	return SDValue();

	unsigned Opcode;
	bool IsRightShift;
	switch (IID) {
	default:
	llvm_unreachable("Unknown shift intrinsic");
	case Intrinsic::aarch64_neon_sqshl:
	Opcode = AArch64ISD::SQSHL_I;
	IsRightShift = false;
	break;
	case Intrinsic::aarch64_neon_uqshl:
	Opcode = AArch64ISD::UQSHL_I;
	IsRightShift = false;
	break;
	case Intrinsic::aarch64_neon_srshl:
	Opcode = AArch64ISD::SRSHR_I;
	IsRightShift = true;
	break;
	case Intrinsic::aarch64_neon_urshl:
	Opcode = AArch64ISD::URSHR_I;
	IsRightShift = true;
	break;
	case Intrinsic::aarch64_neon_sqshlu:
	Opcode = AArch64ISD::SQSHLU_I;
	IsRightShift = false;
	break;
	case Intrinsic::aarch64_neon_sshl:
	case Intrinsic::aarch64_neon_ushl:
	// For positive shift amounts we can use SHL, as ushl/sshl perform a regular
	// left shift for positive shift amounts. Below, we only replace the current
	// node with VSHL, if this condition is met.
	Opcode = AArch64ISD::VSHL;
	IsRightShift = false;
	break;
	}

	if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits) {
	SDLoc dl(N);
	return DAG.getNode(Opcode, dl, N->getValueType(0), N->getOperand(1),
	DAG.getConstant(-ShiftAmount, dl, MVT::i32));
	} else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount < ElemBits) {
	SDLoc dl(N);
	return DAG.getNode(Opcode, dl, N->getValueType(0), N->getOperand(1),
	DAG.getConstant(ShiftAmount, dl, MVT::i32));
	}

	return SDValue();
	}

	// The CRC32[BH] instructions ignore the high bits of their data operand. Since
	// the intrinsics must be legal and take an i32, this means there's almost
	// certainly going to be a zext in the DAG which we can eliminate.
	static SDValue tryCombineCRC32(unsigned Mask, SDNode *N, SelectionDAG &DAG) {
	SDValue AndN = N->getOperand(2);
	if (AndN.getOpcode() != ISD::AND)
	return SDValue();

	ConstantSDNode *CMask = dyn_cast<ConstantSDNode>(AndN.getOperand(1));
	if (!CMask \|\| CMask->getZExtValue() != Mask)
	return SDValue();

	return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), MVT::i32,
	N->getOperand(0), N->getOperand(1), AndN.getOperand(0));
	}

	static SDValue combineAcrossLanesIntrinsic(unsigned Opc, SDNode *N,
	SelectionDAG &DAG) {
	SDLoc dl(N);
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0),
	DAG.getNode(Opc, dl,
	N->getOperand(1).getSimpleValueType(),
	N->getOperand(1)),
	DAG.getConstant(0, dl, MVT::i64));
	}

	static SDValue LowerSVEIntrinsicIndex(SDNode *N, SelectionDAG &DAG) {
	SDLoc DL(N);
	SDValue Op1 = N->getOperand(1);
	SDValue Op2 = N->getOperand(2);
	EVT ScalarTy = Op2.getValueType();
	if ((ScalarTy == MVT::i8) \|\| (ScalarTy == MVT::i16))
	ScalarTy = MVT::i32;

	// Lower index_vector(base, step) to mul(step step_vector(1)) + splat(base).
	SDValue StepVector = DAG.getStepVector(DL, N->getValueType(0));
	SDValue Step = DAG.getNode(ISD::SPLAT_VECTOR, DL, N->getValueType(0), Op2);
	SDValue Mul = DAG.getNode(ISD::MUL, DL, N->getValueType(0), StepVector, Step);
	SDValue Base = DAG.getNode(ISD::SPLAT_VECTOR, DL, N->getValueType(0), Op1);
	return DAG.getNode(ISD::ADD, DL, N->getValueType(0), Mul, Base);
	}

	static SDValue LowerSVEIntrinsicDUP(SDNode *N, SelectionDAG &DAG) {
	SDLoc dl(N);
	SDValue Scalar = N->getOperand(3);
	EVT ScalarTy = Scalar.getValueType();

	if ((ScalarTy == MVT::i8) \|\| (ScalarTy == MVT::i16))
	Scalar = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Scalar);

	SDValue Passthru = N->getOperand(1);
	SDValue Pred = N->getOperand(2);
	return DAG.getNode(AArch64ISD::DUP_MERGE_PASSTHRU, dl, N->getValueType(0),
	Pred, Scalar, Passthru);
	}

	static SDValue LowerSVEIntrinsicEXT(SDNode *N, SelectionDAG &DAG) {
	SDLoc dl(N);
	LLVMContext &Ctx = *DAG.getContext();
	EVT VT = N->getValueType(0);

	assert(VT.isScalableVector() && "Expected a scalable vector.");

	// Current lowering only supports the SVE-ACLE types.
	if (VT.getSizeInBits().getKnownMinSize() != AArch64::SVEBitsPerBlock)
	return SDValue();

	unsigned ElemSize = VT.getVectorElementType().getSizeInBits() / 8;
	unsigned ByteSize = VT.getSizeInBits().getKnownMinSize() / 8;
	EVT ByteVT =
	EVT::getVectorVT(Ctx, MVT::i8, ElementCount::getScalable(ByteSize));

	// Convert everything to the domain of EXT (i.e bytes).
	SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, ByteVT, N->getOperand(1));
	SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, ByteVT, N->getOperand(2));
	SDValue Op2 = DAG.getNode(ISD::MUL, dl, MVT::i32, N->getOperand(3),
	DAG.getConstant(ElemSize, dl, MVT::i32));

	SDValue EXT = DAG.getNode(AArch64ISD::EXT, dl, ByteVT, Op0, Op1, Op2);
	return DAG.getNode(ISD::BITCAST, dl, VT, EXT);
	}

	static SDValue tryConvertSVEWideCompare(SDNode *N, ISD::CondCode CC,
	TargetLowering::DAGCombinerInfo &DCI,
	SelectionDAG &DAG) {
	if (DCI.isBeforeLegalize())
	return SDValue();

	SDValue Comparator = N->getOperand(3);
	if (Comparator.getOpcode() == AArch64ISD::DUP \|\|
	Comparator.getOpcode() == ISD::SPLAT_VECTOR) {
	unsigned IID = getIntrinsicID(N);
	EVT VT = N->getValueType(0);
	EVT CmpVT = N->getOperand(2).getValueType();
	SDValue Pred = N->getOperand(1);
	SDValue Imm;
	SDLoc DL(N);

	switch (IID) {
	default:
	llvm_unreachable("Called with wrong intrinsic!");
	break;

	// Signed comparisons
	case Intrinsic::aarch64_sve_cmpeq_wide:
	case Intrinsic::aarch64_sve_cmpne_wide:
	case Intrinsic::aarch64_sve_cmpge_wide:
	case Intrinsic::aarch64_sve_cmpgt_wide:
	case Intrinsic::aarch64_sve_cmplt_wide:
	case Intrinsic::aarch64_sve_cmple_wide: {
	if (auto *CN = dyn_cast<ConstantSDNode>(Comparator.getOperand(0))) {
	int64_t ImmVal = CN->getSExtValue();
	if (ImmVal >= -16 && ImmVal <= 15)
	Imm = DAG.getConstant(ImmVal, DL, MVT::i32);
	else
	return SDValue();
	}
	break;
	}
	// Unsigned comparisons
	case Intrinsic::aarch64_sve_cmphs_wide:
	case Intrinsic::aarch64_sve_cmphi_wide:
	case Intrinsic::aarch64_sve_cmplo_wide:
	case Intrinsic::aarch64_sve_cmpls_wide: {
	if (auto *CN = dyn_cast<ConstantSDNode>(Comparator.getOperand(0))) {
	uint64_t ImmVal = CN->getZExtValue();
	if (ImmVal <= 127)
	Imm = DAG.getConstant(ImmVal, DL, MVT::i32);
	else
	return SDValue();
	}
	break;
	}
	}

	if (!Imm)
	return SDValue();

	SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, DL, CmpVT, Imm);
	return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, DL, VT, Pred,
	N->getOperand(2), Splat, DAG.getCondCode(CC));
	}

	return SDValue();
	}

	static SDValue getPTest(SelectionDAG &DAG, EVT VT, SDValue Pg, SDValue Op,
	AArch64CC::CondCode Cond) {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();

	SDLoc DL(Op);
	assert(Op.getValueType().isScalableVector() &&
	TLI.isTypeLegal(Op.getValueType()) &&
	"Expected legal scalable vector type!");

	// Ensure target specific opcodes are using legal type.
	EVT OutVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
	SDValue TVal = DAG.getConstant(1, DL, OutVT);
	SDValue FVal = DAG.getConstant(0, DL, OutVT);

	// Set condition code (CC) flags.
	SDValue Test = DAG.getNode(AArch64ISD::PTEST, DL, MVT::Other, Pg, Op);

	// Convert CC to integer based on requested condition.
	// NOTE: Cond is inverted to promote CSEL's removal when it feeds a compare.
	SDValue CC = DAG.getConstant(getInvertedCondCode(Cond), DL, MVT::i32);
	SDValue Res = DAG.getNode(AArch64ISD::CSEL, DL, OutVT, FVal, TVal, CC, Test);
	return DAG.getZExtOrTrunc(Res, DL, VT);
	}

	static SDValue combineSVEReductionInt(SDNode *N, unsigned Opc,
	SelectionDAG &DAG) {
	SDLoc DL(N);

	SDValue Pred = N->getOperand(1);
	SDValue VecToReduce = N->getOperand(2);

	// NOTE: The integer reduction's result type is not always linked to the
	// operand's element type so we construct it from the intrinsic's result type.
	EVT ReduceVT = getPackedSVEVectorVT(N->getValueType(0));
	SDValue Reduce = DAG.getNode(Opc, DL, ReduceVT, Pred, VecToReduce);

	// SVE reductions set the whole vector register with the first element
	// containing the reduction result, which we'll now extract.
	SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce,
	Zero);
	}

	static SDValue combineSVEReductionFP(SDNode *N, unsigned Opc,
	SelectionDAG &DAG) {
	SDLoc DL(N);

	SDValue Pred = N->getOperand(1);
	SDValue VecToReduce = N->getOperand(2);

	EVT ReduceVT = VecToReduce.getValueType();
	SDValue Reduce = DAG.getNode(Opc, DL, ReduceVT, Pred, VecToReduce);

	// SVE reductions set the whole vector register with the first element
	// containing the reduction result, which we'll now extract.
	SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce,
	Zero);
	}

	static SDValue combineSVEReductionOrderedFP(SDNode *N, unsigned Opc,
	SelectionDAG &DAG) {
	SDLoc DL(N);

	SDValue Pred = N->getOperand(1);
	SDValue InitVal = N->getOperand(2);
	SDValue VecToReduce = N->getOperand(3);
	EVT ReduceVT = VecToReduce.getValueType();

	// Ordered reductions use the first lane of the result vector as the
	// reduction's initial value.
	SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
	InitVal = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ReduceVT,
	DAG.getUNDEF(ReduceVT), InitVal, Zero);

	SDValue Reduce = DAG.getNode(Opc, DL, ReduceVT, Pred, InitVal, VecToReduce);

	// SVE reductions set the whole vector register with the first element
	// containing the reduction result, which we'll now extract.
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce,
	Zero);
	}

	static bool isAllActivePredicate(SDValue N) {
	unsigned NumElts = N.getValueType().getVectorMinNumElements();

	// Look through cast.
	while (N.getOpcode() == AArch64ISD::REINTERPRET_CAST) {
	N = N.getOperand(0);
	// When reinterpreting from a type with fewer elements the "new" elements
	// are not active, so bail if they're likely to be used.
	if (N.getValueType().getVectorMinNumElements() < NumElts)
	return false;
	}

	// "ptrue p.<ty>, all" can be considered all active when <ty> is the same size
	// or smaller than the implicit element type represented by N.
	// NOTE: A larger element count implies a smaller element type.
	if (N.getOpcode() == AArch64ISD::PTRUE &&
	N.getConstantOperandVal(0) == AArch64SVEPredPattern::all)
	return N.getValueType().getVectorMinNumElements() >= NumElts;

	return false;
	}

	// If a merged operation has no inactive lanes we can relax it to a predicated
	// or unpredicated operation, which potentially allows better isel (perhaps
	// using immediate forms) or relaxing register reuse requirements.
	static SDValue convertMergedOpToPredOp(SDNode *N, unsigned Opc,
	SelectionDAG &DAG,
	bool UnpredOp = false) {
	assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Expected intrinsic!");
	assert(N->getNumOperands() == 4 && "Expected 3 operand intrinsic!");
	SDValue Pg = N->getOperand(1);

	// ISD way to specify an all active predicate.
	if (isAllActivePredicate(Pg)) {
	if (UnpredOp)
	return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), N->getOperand(2),
	N->getOperand(3));
	else
	return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), Pg,
	N->getOperand(2), N->getOperand(3));
	}

	// FUTURE: SplatVector(true)
	return SDValue();
	}

	static SDValue performIntrinsicCombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI,
	const AArch64Subtarget *Subtarget) {
	SelectionDAG &DAG = DCI.DAG;
	unsigned IID = getIntrinsicID(N);
	switch (IID) {
	default:
	break;
	case Intrinsic::aarch64_neon_vcvtfxs2fp:
	case Intrinsic::aarch64_neon_vcvtfxu2fp:
	return tryCombineFixedPointConvert(N, DCI, DAG);
	case Intrinsic::aarch64_neon_saddv:
	return combineAcrossLanesIntrinsic(AArch64ISD::SADDV, N, DAG);
	case Intrinsic::aarch64_neon_uaddv:
	return combineAcrossLanesIntrinsic(AArch64ISD::UADDV, N, DAG);
	case Intrinsic::aarch64_neon_sminv:
	return combineAcrossLanesIntrinsic(AArch64ISD::SMINV, N, DAG);
	case Intrinsic::aarch64_neon_uminv:
	return combineAcrossLanesIntrinsic(AArch64ISD::UMINV, N, DAG);
	case Intrinsic::aarch64_neon_smaxv:
	return combineAcrossLanesIntrinsic(AArch64ISD::SMAXV, N, DAG);
	case Intrinsic::aarch64_neon_umaxv:
	return combineAcrossLanesIntrinsic(AArch64ISD::UMAXV, N, DAG);
	case Intrinsic::aarch64_neon_fmax:
	return DAG.getNode(ISD::FMAXIMUM, SDLoc(N), N->getValueType(0),
	N->getOperand(1), N->getOperand(2));
	case Intrinsic::aarch64_neon_fmin:
	return DAG.getNode(ISD::FMINIMUM, SDLoc(N), N->getValueType(0),
	N->getOperand(1), N->getOperand(2));
	case Intrinsic::aarch64_neon_fmaxnm:
	return DAG.getNode(ISD::FMAXNUM, SDLoc(N), N->getValueType(0),
	N->getOperand(1), N->getOperand(2));
	case Intrinsic::aarch64_neon_fminnm:
	return DAG.getNode(ISD::FMINNUM, SDLoc(N), N->getValueType(0),
	N->getOperand(1), N->getOperand(2));
	case Intrinsic::aarch64_neon_smull:
	case Intrinsic::aarch64_neon_umull:
	case Intrinsic::aarch64_neon_pmull:
	case Intrinsic::aarch64_neon_sqdmull:
	return tryCombineLongOpWithDup(IID, N, DCI, DAG);
	case Intrinsic::aarch64_neon_sqshl:
	case Intrinsic::aarch64_neon_uqshl:
	case Intrinsic::aarch64_neon_sqshlu:
	case Intrinsic::aarch64_neon_srshl:
	case Intrinsic::aarch64_neon_urshl:
	case Intrinsic::aarch64_neon_sshl:
	case Intrinsic::aarch64_neon_ushl:
	return tryCombineShiftImm(IID, N, DAG);
	case Intrinsic::aarch64_crc32b:
	case Intrinsic::aarch64_crc32cb:
	return tryCombineCRC32(0xff, N, DAG);
	case Intrinsic::aarch64_crc32h:
	case Intrinsic::aarch64_crc32ch:
	return tryCombineCRC32(0xffff, N, DAG);
	case Intrinsic::aarch64_sve_saddv:
	// There is no i64 version of SADDV because the sign is irrelevant.
	if (N->getOperand(2)->getValueType(0).getVectorElementType() == MVT::i64)
	return combineSVEReductionInt(N, AArch64ISD::UADDV_PRED, DAG);
	else
	return combineSVEReductionInt(N, AArch64ISD::SADDV_PRED, DAG);
	case Intrinsic::aarch64_sve_uaddv:
	return combineSVEReductionInt(N, AArch64ISD::UADDV_PRED, DAG);
	case Intrinsic::aarch64_sve_smaxv:
	return combineSVEReductionInt(N, AArch64ISD::SMAXV_PRED, DAG);
	case Intrinsic::aarch64_sve_umaxv:
	return combineSVEReductionInt(N, AArch64ISD::UMAXV_PRED, DAG);
	case Intrinsic::aarch64_sve_sminv:
	return combineSVEReductionInt(N, AArch64ISD::SMINV_PRED, DAG);
	case Intrinsic::aarch64_sve_uminv:
	return combineSVEReductionInt(N, AArch64ISD::UMINV_PRED, DAG);
	case Intrinsic::aarch64_sve_orv:
	return combineSVEReductionInt(N, AArch64ISD::ORV_PRED, DAG);
	case Intrinsic::aarch64_sve_eorv:
	return combineSVEReductionInt(N, AArch64ISD::EORV_PRED, DAG);
	case Intrinsic::aarch64_sve_andv:
	return combineSVEReductionInt(N, AArch64ISD::ANDV_PRED, DAG);
	case Intrinsic::aarch64_sve_index:
	return LowerSVEIntrinsicIndex(N, DAG);
	case Intrinsic::aarch64_sve_dup:
	return LowerSVEIntrinsicDUP(N, DAG);
	case Intrinsic::aarch64_sve_dup_x:
	return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), N->getValueType(0),
	N->getOperand(1));
	case Intrinsic::aarch64_sve_ext:
	return LowerSVEIntrinsicEXT(N, DAG);
	case Intrinsic::aarch64_sve_mul:
	return convertMergedOpToPredOp(N, AArch64ISD::MUL_PRED, DAG);
	case Intrinsic::aarch64_sve_smulh:
	return convertMergedOpToPredOp(N, AArch64ISD::MULHS_PRED, DAG);
	case Intrinsic::aarch64_sve_umulh:
	return convertMergedOpToPredOp(N, AArch64ISD::MULHU_PRED, DAG);
	case Intrinsic::aarch64_sve_smin:
	return convertMergedOpToPredOp(N, AArch64ISD::SMIN_PRED, DAG);
	case Intrinsic::aarch64_sve_umin:
	return convertMergedOpToPredOp(N, AArch64ISD::UMIN_PRED, DAG);
	case Intrinsic::aarch64_sve_smax:
	return convertMergedOpToPredOp(N, AArch64ISD::SMAX_PRED, DAG);
	case Intrinsic::aarch64_sve_umax:
	return convertMergedOpToPredOp(N, AArch64ISD::UMAX_PRED, DAG);
	case Intrinsic::aarch64_sve_lsl:
	return convertMergedOpToPredOp(N, AArch64ISD::SHL_PRED, DAG);
	case Intrinsic::aarch64_sve_lsr:
	return convertMergedOpToPredOp(N, AArch64ISD::SRL_PRED, DAG);
	case Intrinsic::aarch64_sve_asr:
	return convertMergedOpToPredOp(N, AArch64ISD::SRA_PRED, DAG);
	case Intrinsic::aarch64_sve_fadd:
	return convertMergedOpToPredOp(N, AArch64ISD::FADD_PRED, DAG);
	case Intrinsic::aarch64_sve_fsub:
	return convertMergedOpToPredOp(N, AArch64ISD::FSUB_PRED, DAG);
	case Intrinsic::aarch64_sve_fmul:
	return convertMergedOpToPredOp(N, AArch64ISD::FMUL_PRED, DAG);
	case Intrinsic::aarch64_sve_add:
	return convertMergedOpToPredOp(N, ISD::ADD, DAG, true);
	case Intrinsic::aarch64_sve_sub:
	return convertMergedOpToPredOp(N, ISD::SUB, DAG, true);
	case Intrinsic::aarch64_sve_and:
	return convertMergedOpToPredOp(N, ISD::AND, DAG, true);
	case Intrinsic::aarch64_sve_bic:
	return convertMergedOpToPredOp(N, AArch64ISD::BIC, DAG, true);
	case Intrinsic::aarch64_sve_eor:
	return convertMergedOpToPredOp(N, ISD::XOR, DAG, true);
	case Intrinsic::aarch64_sve_orr:
	return convertMergedOpToPredOp(N, ISD::OR, DAG, true);
	case Intrinsic::aarch64_sve_sqadd:
	return convertMergedOpToPredOp(N, ISD::SADDSAT, DAG, true);
	case Intrinsic::aarch64_sve_sqsub:
	return convertMergedOpToPredOp(N, ISD::SSUBSAT, DAG, true);
	case Intrinsic::aarch64_sve_uqadd:
	return convertMergedOpToPredOp(N, ISD::UADDSAT, DAG, true);
	case Intrinsic::aarch64_sve_uqsub:
	return convertMergedOpToPredOp(N, ISD::USUBSAT, DAG, true);
	case Intrinsic::aarch64_sve_sqadd_x:
	return DAG.getNode(ISD::SADDSAT, SDLoc(N), N->getValueType(0),
	N->getOperand(1), N->getOperand(2));
	case Intrinsic::aarch64_sve_sqsub_x:
	return DAG.getNode(ISD::SSUBSAT, SDLoc(N), N->getValueType(0),
	N->getOperand(1), N->getOperand(2));
	case Intrinsic::aarch64_sve_uqadd_x:
	return DAG.getNode(ISD::UADDSAT, SDLoc(N), N->getValueType(0),
	N->getOperand(1), N->getOperand(2));
	case Intrinsic::aarch64_sve_uqsub_x:
	return DAG.getNode(ISD::USUBSAT, SDLoc(N), N->getValueType(0),
	N->getOperand(1), N->getOperand(2));
	case Intrinsic::aarch64_sve_cmphs:
	if (!N->getOperand(2).getValueType().isFloatingPoint())
	return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
	N->getValueType(0), N->getOperand(1), N->getOperand(2),
	N->getOperand(3), DAG.getCondCode(ISD::SETUGE));
	break;
	case Intrinsic::aarch64_sve_cmphi:
	if (!N->getOperand(2).getValueType().isFloatingPoint())
	return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
	N->getValueType(0), N->getOperand(1), N->getOperand(2),
	N->getOperand(3), DAG.getCondCode(ISD::SETUGT));
	break;
	case Intrinsic::aarch64_sve_fcmpge:
	case Intrinsic::aarch64_sve_cmpge:
	return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
	N->getValueType(0), N->getOperand(1), N->getOperand(2),
	N->getOperand(3), DAG.getCondCode(ISD::SETGE));
	break;
	case Intrinsic::aarch64_sve_fcmpgt:
	case Intrinsic::aarch64_sve_cmpgt:
	return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
	N->getValueType(0), N->getOperand(1), N->getOperand(2),
	N->getOperand(3), DAG.getCondCode(ISD::SETGT));
	break;
	case Intrinsic::aarch64_sve_fcmpeq:
	case Intrinsic::aarch64_sve_cmpeq:
	return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
	N->getValueType(0), N->getOperand(1), N->getOperand(2),
	N->getOperand(3), DAG.getCondCode(ISD::SETEQ));
	break;
	case Intrinsic::aarch64_sve_fcmpne:
	case Intrinsic::aarch64_sve_cmpne:
	return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
	N->getValueType(0), N->getOperand(1), N->getOperand(2),
	N->getOperand(3), DAG.getCondCode(ISD::SETNE));
	break;
	case Intrinsic::aarch64_sve_fcmpuo:
	return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
	N->getValueType(0), N->getOperand(1), N->getOperand(2),
	N->getOperand(3), DAG.getCondCode(ISD::SETUO));
	break;
	case Intrinsic::aarch64_sve_fadda:
	return combineSVEReductionOrderedFP(N, AArch64ISD::FADDA_PRED, DAG);
	case Intrinsic::aarch64_sve_faddv:
	return combineSVEReductionFP(N, AArch64ISD::FADDV_PRED, DAG);
	case Intrinsic::aarch64_sve_fmaxnmv:
	return combineSVEReductionFP(N, AArch64ISD::FMAXNMV_PRED, DAG);
	case Intrinsic::aarch64_sve_fmaxv:
	return combineSVEReductionFP(N, AArch64ISD::FMAXV_PRED, DAG);
	case Intrinsic::aarch64_sve_fminnmv:
	return combineSVEReductionFP(N, AArch64ISD::FMINNMV_PRED, DAG);
	case Intrinsic::aarch64_sve_fminv:
	return combineSVEReductionFP(N, AArch64ISD::FMINV_PRED, DAG);
	case Intrinsic::aarch64_sve_sel:
	return DAG.getNode(ISD::VSELECT, SDLoc(N), N->getValueType(0),
	N->getOperand(1), N->getOperand(2), N->getOperand(3));
	case Intrinsic::aarch64_sve_cmpeq_wide:
	return tryConvertSVEWideCompare(N, ISD::SETEQ, DCI, DAG);
	case Intrinsic::aarch64_sve_cmpne_wide:
	return tryConvertSVEWideCompare(N, ISD::SETNE, DCI, DAG);
	case Intrinsic::aarch64_sve_cmpge_wide:
	return tryConvertSVEWideCompare(N, ISD::SETGE, DCI, DAG);
	case Intrinsic::aarch64_sve_cmpgt_wide:
	return tryConvertSVEWideCompare(N, ISD::SETGT, DCI, DAG);
	case Intrinsic::aarch64_sve_cmplt_wide:
	return tryConvertSVEWideCompare(N, ISD::SETLT, DCI, DAG);
	case Intrinsic::aarch64_sve_cmple_wide:
	return tryConvertSVEWideCompare(N, ISD::SETLE, DCI, DAG);
	case Intrinsic::aarch64_sve_cmphs_wide:
	return tryConvertSVEWideCompare(N, ISD::SETUGE, DCI, DAG);
	case Intrinsic::aarch64_sve_cmphi_wide:
	return tryConvertSVEWideCompare(N, ISD::SETUGT, DCI, DAG);
	case Intrinsic::aarch64_sve_cmplo_wide:
	return tryConvertSVEWideCompare(N, ISD::SETULT, DCI, DAG);
	case Intrinsic::aarch64_sve_cmpls_wide:
	return tryConvertSVEWideCompare(N, ISD::SETULE, DCI, DAG);
	case Intrinsic::aarch64_sve_ptest_any:
	return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
	AArch64CC::ANY_ACTIVE);
	case Intrinsic::aarch64_sve_ptest_first:
	return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
	AArch64CC::FIRST_ACTIVE);
	case Intrinsic::aarch64_sve_ptest_last:
	return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
	AArch64CC::LAST_ACTIVE);
	}
	return SDValue();
	}

	static SDValue performExtendCombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI,
	SelectionDAG &DAG) {
	// If we see something like (zext (sabd (extract_high ...), (DUP ...))) then
	// we can convert that DUP into another extract_high (of a bigger DUP), which
	// helps the backend to decide that an sabdl2 would be useful, saving a real
	// extract_high operation.
	if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ZERO_EXTEND &&
	(N->getOperand(0).getOpcode() == ISD::ABDU \|\|
	N->getOperand(0).getOpcode() == ISD::ABDS)) {
	SDNode *ABDNode = N->getOperand(0).getNode();
	SDValue NewABD =
	tryCombineLongOpWithDup(Intrinsic::not_intrinsic, ABDNode, DCI, DAG);
	if (!NewABD.getNode())
	return SDValue();

	return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), NewABD);
	}
	return SDValue();
	}

	static SDValue splitStoreSplat(SelectionDAG &DAG, StoreSDNode &St,
	SDValue SplatVal, unsigned NumVecElts) {
	assert(!St.isTruncatingStore() && "cannot split truncating vector store");
	unsigned OrigAlignment = St.getAlignment();
	unsigned EltOffset = SplatVal.getValueType().getSizeInBits() / 8;

	// Create scalar stores. This is at least as good as the code sequence for a
	// split unaligned store which is a dup.s, ext.b, and two stores.
	// Most of the time the three stores should be replaced by store pair
	// instructions (stp).
	SDLoc DL(&St);
	SDValue BasePtr = St.getBasePtr();
	uint64_t BaseOffset = 0;

	const MachinePointerInfo &PtrInfo = St.getPointerInfo();
	SDValue NewST1 =
	DAG.getStore(St.getChain(), DL, SplatVal, BasePtr, PtrInfo,
	OrigAlignment, St.getMemOperand()->getFlags());

	// As this in ISel, we will not merge this add which may degrade results.
	if (BasePtr->getOpcode() == ISD::ADD &&
	isa<ConstantSDNode>(BasePtr->getOperand(1))) {
	BaseOffset = cast<ConstantSDNode>(BasePtr->getOperand(1))->getSExtValue();
	BasePtr = BasePtr->getOperand(0);
	}

	unsigned Offset = EltOffset;
	while (--NumVecElts) {
	unsigned Alignment = MinAlign(OrigAlignment, Offset);
	SDValue OffsetPtr =
	DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
	DAG.getConstant(BaseOffset + Offset, DL, MVT::i64));
	NewST1 = DAG.getStore(NewST1.getValue(0), DL, SplatVal, OffsetPtr,
	PtrInfo.getWithOffset(Offset), Alignment,
	St.getMemOperand()->getFlags());
	Offset += EltOffset;
	}
	return NewST1;
	}

	// Returns an SVE type that ContentTy can be trivially sign or zero extended
	// into.
	static MVT getSVEContainerType(EVT ContentTy) {
	assert(ContentTy.isSimple() && "No SVE containers for extended types");

	switch (ContentTy.getSimpleVT().SimpleTy) {
	default:
	llvm_unreachable("No known SVE container for this MVT type");
	case MVT::nxv2i8:
	case MVT::nxv2i16:
	case MVT::nxv2i32:
	case MVT::nxv2i64:
	case MVT::nxv2f32:
	case MVT::nxv2f64:
	return MVT::nxv2i64;
	case MVT::nxv4i8:
	case MVT::nxv4i16:
	case MVT::nxv4i32:
	case MVT::nxv4f32:
	return MVT::nxv4i32;
	case MVT::nxv8i8:
	case MVT::nxv8i16:
	case MVT::nxv8f16:
	case MVT::nxv8bf16:
	return MVT::nxv8i16;
	case MVT::nxv16i8:
	return MVT::nxv16i8;
	}
	}

	static SDValue performLD1Combine(SDNode *N, SelectionDAG &DAG, unsigned Opc) {
	SDLoc DL(N);
	EVT VT = N->getValueType(0);

	if (VT.getSizeInBits().getKnownMinSize() > AArch64::SVEBitsPerBlock)
	return SDValue();

	EVT ContainerVT = VT;
	if (ContainerVT.isInteger())
	ContainerVT = getSVEContainerType(ContainerVT);

	SDVTList VTs = DAG.getVTList(ContainerVT, MVT::Other);
	SDValue Ops[] = { N->getOperand(0), // Chain
	N->getOperand(2), // Pg
	N->getOperand(3), // Base
	DAG.getValueType(VT) };

	SDValue Load = DAG.getNode(Opc, DL, VTs, Ops);
	SDValue LoadChain = SDValue(Load.getNode(), 1);

	if (ContainerVT.isInteger() && (VT != ContainerVT))
	Load = DAG.getNode(ISD::TRUNCATE, DL, VT, Load.getValue(0));

	return DAG.getMergeValues({ Load, LoadChain }, DL);
	}

	static SDValue performLDNT1Combine(SDNode *N, SelectionDAG &DAG) {
	SDLoc DL(N);
	EVT VT = N->getValueType(0);
	EVT PtrTy = N->getOperand(3).getValueType();

	if (VT == MVT::nxv8bf16 &&
	!static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
	return SDValue();

	EVT LoadVT = VT;
	if (VT.isFloatingPoint())
	LoadVT = VT.changeTypeToInteger();

	auto *MINode = cast<MemIntrinsicSDNode>(N);
	SDValue PassThru = DAG.getConstant(0, DL, LoadVT);
	SDValue L = DAG.getMaskedLoad(LoadVT, DL, MINode->getChain(),
	MINode->getOperand(3), DAG.getUNDEF(PtrTy),
	MINode->getOperand(2), PassThru,
	MINode->getMemoryVT(), MINode->getMemOperand(),
	ISD::UNINDEXED, ISD::NON_EXTLOAD, false);

	if (VT.isFloatingPoint()) {
	SDValue Ops[] = { DAG.getNode(ISD::BITCAST, DL, VT, L), L.getValue(1) };
	return DAG.getMergeValues(Ops, DL);
	}

	return L;
	}

	template <unsigned Opcode>
	static SDValue performLD1ReplicateCombine(SDNode *N, SelectionDAG &DAG) {
	static_assert(Opcode == AArch64ISD::LD1RQ_MERGE_ZERO \|\|
	Opcode == AArch64ISD::LD1RO_MERGE_ZERO,
	"Unsupported opcode.");
	SDLoc DL(N);
	EVT VT = N->getValueType(0);
	if (VT == MVT::nxv8bf16 &&
	!static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
	return SDValue();

	EVT LoadVT = VT;
	if (VT.isFloatingPoint())
	LoadVT = VT.changeTypeToInteger();

	SDValue Ops[] = {N->getOperand(0), N->getOperand(2), N->getOperand(3)};
	SDValue Load = DAG.getNode(Opcode, DL, {LoadVT, MVT::Other}, Ops);
	SDValue LoadChain = SDValue(Load.getNode(), 1);

	if (VT.isFloatingPoint())
	Load = DAG.getNode(ISD::BITCAST, DL, VT, Load.getValue(0));

	return DAG.getMergeValues({Load, LoadChain}, DL);
	}

	static SDValue performST1Combine(SDNode *N, SelectionDAG &DAG) {
	SDLoc DL(N);
	SDValue Data = N->getOperand(2);
	EVT DataVT = Data.getValueType();
	EVT HwSrcVt = getSVEContainerType(DataVT);
	SDValue InputVT = DAG.getValueType(DataVT);

	if (DataVT == MVT::nxv8bf16 &&
	!static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
	return SDValue();

	if (DataVT.isFloatingPoint())
	InputVT = DAG.getValueType(HwSrcVt);

	SDValue SrcNew;
	if (Data.getValueType().isFloatingPoint())
	SrcNew = DAG.getNode(ISD::BITCAST, DL, HwSrcVt, Data);
	else
	SrcNew = DAG.getNode(ISD::ANY_EXTEND, DL, HwSrcVt, Data);

	SDValue Ops[] = { N->getOperand(0), // Chain
	SrcNew,
	N->getOperand(4), // Base
	N->getOperand(3), // Pg
	InputVT
	};

	return DAG.getNode(AArch64ISD::ST1_PRED, DL, N->getValueType(0), Ops);
	}

	static SDValue performSTNT1Combine(SDNode *N, SelectionDAG &DAG) {
	SDLoc DL(N);

	SDValue Data = N->getOperand(2);
	EVT DataVT = Data.getValueType();
	EVT PtrTy = N->getOperand(4).getValueType();

	if (DataVT == MVT::nxv8bf16 &&
	!static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
	return SDValue();

	if (DataVT.isFloatingPoint())
	Data = DAG.getNode(ISD::BITCAST, DL, DataVT.changeTypeToInteger(), Data);

	auto *MINode = cast<MemIntrinsicSDNode>(N);
	return DAG.getMaskedStore(MINode->getChain(), DL, Data, MINode->getOperand(4),
	DAG.getUNDEF(PtrTy), MINode->getOperand(3),
	MINode->getMemoryVT(), MINode->getMemOperand(),
	ISD::UNINDEXED, false, false);
	}

	/// Replace a splat of zeros to a vector store by scalar stores of WZR/XZR. The
	/// load store optimizer pass will merge them to store pair stores. This should
	/// be better than a movi to create the vector zero followed by a vector store
	/// if the zero constant is not re-used, since one instructions and one register
	/// live range will be removed.
	///
	/// For example, the final generated code should be:
	///
	/// stp xzr, xzr, [x0]
	///
	/// instead of:
	///
	/// movi v0.2d, #0
	/// str q0, [x0]
	///
	static SDValue replaceZeroVectorStore(SelectionDAG &DAG, StoreSDNode &St) {
	SDValue StVal = St.getValue();
	EVT VT = StVal.getValueType();

	// Avoid scalarizing zero splat stores for scalable vectors.
	if (VT.isScalableVector())
	return SDValue();

	// It is beneficial to scalarize a zero splat store for 2 or 3 i64 elements or
	// 2, 3 or 4 i32 elements.
	int NumVecElts = VT.getVectorNumElements();
	if (!(((NumVecElts == 2 \|\| NumVecElts == 3) &&
	VT.getVectorElementType().getSizeInBits() == 64) \|\|
	((NumVecElts == 2 \|\| NumVecElts == 3 \|\| NumVecElts == 4) &&
	VT.getVectorElementType().getSizeInBits() == 32)))
	return SDValue();

	if (StVal.getOpcode() != ISD::BUILD_VECTOR)
	return SDValue();

	// If the zero constant has more than one use then the vector store could be
	// better since the constant mov will be amortized and stp q instructions
	// should be able to be formed.
	if (!StVal.hasOneUse())
	return SDValue();

	// If the store is truncating then it's going down to i16 or smaller, which
	// means it can be implemented in a single store anyway.
	if (St.isTruncatingStore())
	return SDValue();

	// If the immediate offset of the address operand is too large for the stp
	// instruction, then bail out.
	if (DAG.isBaseWithConstantOffset(St.getBasePtr())) {
	int64_t Offset = St.getBasePtr()->getConstantOperandVal(1);
	if (Offset < -512 \|\| Offset > 504)
	return SDValue();
	}

	for (int I = 0; I < NumVecElts; ++I) {
	SDValue EltVal = StVal.getOperand(I);
	if (!isNullConstant(EltVal) && !isNullFPConstant(EltVal))
	return SDValue();
	}

	// Use a CopyFromReg WZR/XZR here to prevent
	// DAGCombiner::MergeConsecutiveStores from undoing this transformation.
	SDLoc DL(&St);
	unsigned ZeroReg;
	EVT ZeroVT;
	if (VT.getVectorElementType().getSizeInBits() == 32) {
	ZeroReg = AArch64::WZR;
	ZeroVT = MVT::i32;
	} else {
	ZeroReg = AArch64::XZR;
	ZeroVT = MVT::i64;
	}
	SDValue SplatVal =
	DAG.getCopyFromReg(DAG.getEntryNode(), DL, ZeroReg, ZeroVT);
	return splitStoreSplat(DAG, St, SplatVal, NumVecElts);
	}

	/// Replace a splat of a scalar to a vector store by scalar stores of the scalar
	/// value. The load store optimizer pass will merge them to store pair stores.
	/// This has better performance than a splat of the scalar followed by a split
	/// vector store. Even if the stores are not merged it is four stores vs a dup,
	/// followed by an ext.b and two stores.
	static SDValue replaceSplatVectorStore(SelectionDAG &DAG, StoreSDNode &St) {
	SDValue StVal = St.getValue();
	EVT VT = StVal.getValueType();

	// Don't replace floating point stores, they possibly won't be transformed to
	// stp because of the store pair suppress pass.
	if (VT.isFloatingPoint())
	return SDValue();

	// We can express a splat as store pair(s) for 2 or 4 elements.
	unsigned NumVecElts = VT.getVectorNumElements();
	if (NumVecElts != 4 && NumVecElts != 2)
	return SDValue();

	// If the store is truncating then it's going down to i16 or smaller, which
	// means it can be implemented in a single store anyway.
	if (St.isTruncatingStore())
	return SDValue();

	// Check that this is a splat.
	// Make sure that each of the relevant vector element locations are inserted
	// to, i.e. 0 and 1 for v2i64 and 0, 1, 2, 3 for v4i32.
	std::bitset<4> IndexNotInserted((1 << NumVecElts) - 1);
	SDValue SplatVal;
	for (unsigned I = 0; I < NumVecElts; ++I) {
	// Check for insert vector elements.
	if (StVal.getOpcode() != ISD::INSERT_VECTOR_ELT)
	return SDValue();

	// Check that same value is inserted at each vector element.
	if (I == 0)
	SplatVal = StVal.getOperand(1);
	else if (StVal.getOperand(1) != SplatVal)
	return SDValue();

	// Check insert element index.
	ConstantSDNode *CIndex = dyn_cast<ConstantSDNode>(StVal.getOperand(2));
	if (!CIndex)
	return SDValue();
	uint64_t IndexVal = CIndex->getZExtValue();
	if (IndexVal >= NumVecElts)
	return SDValue();
	IndexNotInserted.reset(IndexVal);

	StVal = StVal.getOperand(0);
	}
	// Check that all vector element locations were inserted to.
	if (IndexNotInserted.any())
	return SDValue();

	return splitStoreSplat(DAG, St, SplatVal, NumVecElts);
	}

	static SDValue splitStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
	SelectionDAG &DAG,
	const AArch64Subtarget *Subtarget) {

	StoreSDNode *S = cast<StoreSDNode>(N);
	if (S->isVolatile() \|\| S->isIndexed())
	return SDValue();

	SDValue StVal = S->getValue();
	EVT VT = StVal.getValueType();

	if (!VT.isFixedLengthVector())
	return SDValue();

	// If we get a splat of zeros, convert this vector store to a store of
	// scalars. They will be merged into store pairs of xzr thereby removing one
	// instruction and one register.
	if (SDValue ReplacedZeroSplat = replaceZeroVectorStore(DAG, *S))
	return ReplacedZeroSplat;

	// FIXME: The logic for deciding if an unaligned store should be split should
	// be included in TLI.allowsMisalignedMemoryAccesses(), and there should be
	// a call to that function here.

	if (!Subtarget->isMisaligned128StoreSlow())
	return SDValue();

	// Don't split at -Oz.
	if (DAG.getMachineFunction().getFunction().hasMinSize())
	return SDValue();

	// Don't split v2i64 vectors. Memcpy lowering produces those and splitting
	// those up regresses performance on micro-benchmarks and olden/bh.
	if (VT.getVectorNumElements() < 2 \|\| VT == MVT::v2i64)
	return SDValue();

	// Split unaligned 16B stores. They are terrible for performance.
	// Don't split stores with alignment of 1 or 2. Code that uses clang vector
	// extensions can use this to mark that it does not want splitting to happen
	// (by underspecifying alignment to be 1 or 2). Furthermore, the chance of
	// eliminating alignment hazards is only 1 in 8 for alignment of 2.
	if (VT.getSizeInBits() != 128 \|\| S->getAlignment() >= 16 \|\|
	S->getAlignment() <= 2)
	return SDValue();

	// If we get a splat of a scalar convert this vector store to a store of
	// scalars. They will be merged into store pairs thereby removing two
	// instructions.
	if (SDValue ReplacedSplat = replaceSplatVectorStore(DAG, *S))
	return ReplacedSplat;

	SDLoc DL(S);

	// Split VT into two.
	EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
	unsigned NumElts = HalfVT.getVectorNumElements();
	SDValue SubVector0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal,
	DAG.getConstant(0, DL, MVT::i64));
	SDValue SubVector1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal,
	DAG.getConstant(NumElts, DL, MVT::i64));
	SDValue BasePtr = S->getBasePtr();
	SDValue NewST1 =
	DAG.getStore(S->getChain(), DL, SubVector0, BasePtr, S->getPointerInfo(),
	S->getAlignment(), S->getMemOperand()->getFlags());
	SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
	DAG.getConstant(8, DL, MVT::i64));
	return DAG.getStore(NewST1.getValue(0), DL, SubVector1, OffsetPtr,
	S->getPointerInfo(), S->getAlignment(),
	S->getMemOperand()->getFlags());
	}

	static SDValue performSpliceCombine(SDNode *N, SelectionDAG &DAG) {
	assert(N->getOpcode() == AArch64ISD::SPLICE && "Unexepected Opcode!");

	// splice(pg, op1, undef) -> op1
	if (N->getOperand(2).isUndef())
	return N->getOperand(1);

	return SDValue();
	}

	static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG) {
	SDLoc DL(N);
	SDValue Op0 = N->getOperand(0);
	SDValue Op1 = N->getOperand(1);
	EVT ResVT = N->getValueType(0);

	// uzp1(unpklo(uzp1(x, y)), z) => uzp1(x, z)
	if (Op0.getOpcode() == AArch64ISD::UUNPKLO) {
	if (Op0.getOperand(0).getOpcode() == AArch64ISD::UZP1) {
	SDValue X = Op0.getOperand(0).getOperand(0);
	return DAG.getNode(AArch64ISD::UZP1, DL, ResVT, X, Op1);
	}
	}

	// uzp1(x, unpkhi(uzp1(y, z))) => uzp1(x, z)
	if (Op1.getOpcode() == AArch64ISD::UUNPKHI) {
	if (Op1.getOperand(0).getOpcode() == AArch64ISD::UZP1) {
	SDValue Z = Op1.getOperand(0).getOperand(1);
	return DAG.getNode(AArch64ISD::UZP1, DL, ResVT, Op0, Z);
	}
	}

	return SDValue();
	}

	static SDValue performGLD1Combine(SDNode *N, SelectionDAG &DAG) {
	unsigned Opc = N->getOpcode();

	assert(((Opc >= AArch64ISD::GLD1_MERGE_ZERO && // unsigned gather loads
	Opc <= AArch64ISD::GLD1_IMM_MERGE_ZERO) \|\|
	(Opc >= AArch64ISD::GLD1S_MERGE_ZERO && // signed gather loads
	Opc <= AArch64ISD::GLD1S_IMM_MERGE_ZERO)) &&
	"Invalid opcode.");

	const bool Scaled = Opc == AArch64ISD::GLD1_SCALED_MERGE_ZERO \|\|
	Opc == AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
	const bool Signed = Opc == AArch64ISD::GLD1S_MERGE_ZERO \|\|
	Opc == AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
	const bool Extended = Opc == AArch64ISD::GLD1_SXTW_MERGE_ZERO \|\|
	Opc == AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO \|\|
	Opc == AArch64ISD::GLD1_UXTW_MERGE_ZERO \|\|
	Opc == AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO;

	SDLoc DL(N);
	SDValue Chain = N->getOperand(0);
	SDValue Pg = N->getOperand(1);
	SDValue Base = N->getOperand(2);
	SDValue Offset = N->getOperand(3);
	SDValue Ty = N->getOperand(4);

	EVT ResVT = N->getValueType(0);

	const auto OffsetOpc = Offset.getOpcode();
	const bool OffsetIsZExt =
	OffsetOpc == AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU;
	const bool OffsetIsSExt =
	OffsetOpc == AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU;

	// Fold sign/zero extensions of vector offsets into GLD1 nodes where possible.
	if (!Extended && (OffsetIsSExt \|\| OffsetIsZExt)) {
	SDValue ExtPg = Offset.getOperand(0);
	VTSDNode *ExtFrom = cast<VTSDNode>(Offset.getOperand(2).getNode());
	EVT ExtFromEVT = ExtFrom->getVT().getVectorElementType();

	// If the predicate for the sign- or zero-extended offset is the
	// same as the predicate used for this load and the sign-/zero-extension
	// was from a 32-bits...
	if (ExtPg == Pg && ExtFromEVT == MVT::i32) {
	SDValue UnextendedOffset = Offset.getOperand(1);

	unsigned NewOpc = getGatherVecOpcode(Scaled, OffsetIsSExt, true);
	if (Signed)
	NewOpc = getSignExtendedGatherOpcode(NewOpc);

	return DAG.getNode(NewOpc, DL, {ResVT, MVT::Other},
	{Chain, Pg, Base, UnextendedOffset, Ty});
	}
	}

	return SDValue();
	}

	/// Optimize a vector shift instruction and its operand if shifted out
	/// bits are not used.
	static SDValue performVectorShiftCombine(SDNode *N,
	const AArch64TargetLowering &TLI,
	TargetLowering::DAGCombinerInfo &DCI) {
	assert(N->getOpcode() == AArch64ISD::VASHR \|\|
	N->getOpcode() == AArch64ISD::VLSHR);

	SDValue Op = N->getOperand(0);
	unsigned OpScalarSize = Op.getScalarValueSizeInBits();

	unsigned ShiftImm = N->getConstantOperandVal(1);
	assert(OpScalarSize > ShiftImm && "Invalid shift imm");

	APInt ShiftedOutBits = APInt::getLowBitsSet(OpScalarSize, ShiftImm);
	APInt DemandedMask = ~ShiftedOutBits;

	if (TLI.SimplifyDemandedBits(Op, DemandedMask, DCI))
	return SDValue(N, 0);

	return SDValue();
	}

	/// Target-specific DAG combine function for post-increment LD1 (lane) and
	/// post-increment LD1R.
	static SDValue performPostLD1Combine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI,
	bool IsLaneOp) {
	if (DCI.isBeforeLegalizeOps())
	return SDValue();

	SelectionDAG &DAG = DCI.DAG;
	EVT VT = N->getValueType(0);

	if (VT.isScalableVector())
	return SDValue();

	unsigned LoadIdx = IsLaneOp ? 1 : 0;
	SDNode *LD = N->getOperand(LoadIdx).getNode();
	// If it is not LOAD, can not do such combine.
	if (LD->getOpcode() != ISD::LOAD)
	return SDValue();

	// The vector lane must be a constant in the LD1LANE opcode.
	SDValue Lane;
	if (IsLaneOp) {
	Lane = N->getOperand(2);
	auto *LaneC = dyn_cast<ConstantSDNode>(Lane);
	if (!LaneC \|\| LaneC->getZExtValue() >= VT.getVectorNumElements())
	return SDValue();
	}

	LoadSDNode *LoadSDN = cast<LoadSDNode>(LD);
	EVT MemVT = LoadSDN->getMemoryVT();
	// Check if memory operand is the same type as the vector element.
	if (MemVT != VT.getVectorElementType())
	return SDValue();

	// Check if there are other uses. If so, do not combine as it will introduce
	// an extra load.
	for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end(); UI != UE;
	++UI) {
	if (UI.getUse().getResNo() == 1) // Ignore uses of the chain result.
	continue;
	if (*UI != N)
	return SDValue();
	}

	SDValue Addr = LD->getOperand(1);
	SDValue Vector = N->getOperand(0);
	// Search for a use of the address operand that is an increment.
	for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), UE =
	Addr.getNode()->use_end(); UI != UE; ++UI) {
	SDNode User = UI;
	if (User->getOpcode() != ISD::ADD
	\|\| UI.getUse().getResNo() != Addr.getResNo())
	continue;

	// If the increment is a constant, it must match the memory ref size.
	SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
	if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
	uint32_t IncVal = CInc->getZExtValue();
	unsigned NumBytes = VT.getScalarSizeInBits() / 8;
	if (IncVal != NumBytes)
	continue;
	Inc = DAG.getRegister(AArch64::XZR, MVT::i64);
	}

	// To avoid cycle construction make sure that neither the load nor the add
	// are predecessors to each other or the Vector.
	SmallPtrSet<const SDNode *, 32> Visited;
	SmallVector<const SDNode *, 16> Worklist;
	Visited.insert(Addr.getNode());
	Worklist.push_back(User);
	Worklist.push_back(LD);
	Worklist.push_back(Vector.getNode());
	if (SDNode::hasPredecessorHelper(LD, Visited, Worklist) \|\|
	SDNode::hasPredecessorHelper(User, Visited, Worklist))
	continue;

	SmallVector<SDValue, 8> Ops;
	Ops.push_back(LD->getOperand(0)); // Chain
	if (IsLaneOp) {
	Ops.push_back(Vector); // The vector to be inserted
	Ops.push_back(Lane); // The lane to be inserted in the vector
	}
	Ops.push_back(Addr);
	Ops.push_back(Inc);

	EVT Tys[3] = { VT, MVT::i64, MVT::Other };
	SDVTList SDTys = DAG.getVTList(Tys);
	unsigned NewOp = IsLaneOp ? AArch64ISD::LD1LANEpost : AArch64ISD::LD1DUPpost;
	SDValue UpdN = DAG.getMemIntrinsicNode(NewOp, SDLoc(N), SDTys, Ops,
	MemVT,
	LoadSDN->getMemOperand());

	// Update the uses.
	SDValue NewResults[] = {
	SDValue(LD, 0), // The result of load
	SDValue(UpdN.getNode(), 2) // Chain
	};
	DCI.CombineTo(LD, NewResults);
	DCI.CombineTo(N, SDValue(UpdN.getNode(), 0)); // Dup/Inserted Result
	DCI.CombineTo(User, SDValue(UpdN.getNode(), 1)); // Write back register

	break;
	}
	return SDValue();
	}

	/// Simplify ``Addr`` given that the top byte of it is ignored by HW during
	/// address translation.
	static bool performTBISimplification(SDValue Addr,
	TargetLowering::DAGCombinerInfo &DCI,
	SelectionDAG &DAG) {
	APInt DemandedMask = APInt::getLowBitsSet(64, 56);
	KnownBits Known;
	TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
	!DCI.isBeforeLegalizeOps());
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	if (TLI.SimplifyDemandedBits(Addr, DemandedMask, Known, TLO)) {
	DCI.CommitTargetLoweringOpt(TLO);
	return true;
	}
	return false;
	}

	static SDValue foldTruncStoreOfExt(SelectionDAG &DAG, SDNode *N) {
	assert((N->getOpcode() == ISD::STORE \|\| N->getOpcode() == ISD::MSTORE) &&
	"Expected STORE dag node in input!");

	if (auto Store = dyn_cast<StoreSDNode>(N)) {
	if (!Store->isTruncatingStore() \|\| Store->isIndexed())
	return SDValue();
	SDValue Ext = Store->getValue();
	auto ExtOpCode = Ext.getOpcode();
	if (ExtOpCode != ISD::ZERO_EXTEND && ExtOpCode != ISD::SIGN_EXTEND &&
	ExtOpCode != ISD::ANY_EXTEND)
	return SDValue();
	SDValue Orig = Ext->getOperand(0);
	if (Store->getMemoryVT() != Orig->getValueType(0))
	return SDValue();
	return DAG.getStore(Store->getChain(), SDLoc(Store), Orig,
	Store->getBasePtr(), Store->getPointerInfo(),
	Store->getAlign());
	}

	return SDValue();
	}

	static SDValue performSTORECombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI,
	SelectionDAG &DAG,
	const AArch64Subtarget *Subtarget) {
	if (SDValue Split = splitStores(N, DCI, DAG, Subtarget))
	return Split;

	if (Subtarget->supportsAddressTopByteIgnored() &&
	performTBISimplification(N->getOperand(2), DCI, DAG))
	return SDValue(N, 0);

	if (SDValue Store = foldTruncStoreOfExt(DAG, N))
	return Store;

	return SDValue();
	}

	/// Target-specific DAG combine function for NEON load/store intrinsics
	/// to merge base address updates.
	static SDValue performNEONPostLDSTCombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI,
	SelectionDAG &DAG) {
	if (DCI.isBeforeLegalize() \|\| DCI.isCalledByLegalizer())
	return SDValue();

	unsigned AddrOpIdx = N->getNumOperands() - 1;
	SDValue Addr = N->getOperand(AddrOpIdx);

	// Search for a use of the address operand that is an increment.
	for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
	UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
	SDNode User = UI;
	if (User->getOpcode() != ISD::ADD \|\|
	UI.getUse().getResNo() != Addr.getResNo())
	continue;

	// Check that the add is independent of the load/store. Otherwise, folding
	// it would create a cycle.
	SmallPtrSet<const SDNode *, 32> Visited;
	SmallVector<const SDNode *, 16> Worklist;
	Visited.insert(Addr.getNode());
	Worklist.push_back(N);
	Worklist.push_back(User);
	if (SDNode::hasPredecessorHelper(N, Visited, Worklist) \|\|
	SDNode::hasPredecessorHelper(User, Visited, Worklist))
	continue;

	// Find the new opcode for the updating load/store.
	bool IsStore = false;
	bool IsLaneOp = false;
	bool IsDupOp = false;
	unsigned NewOpc = 0;
	unsigned NumVecs = 0;
	unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
	switch (IntNo) {
	default: llvm_unreachable("unexpected intrinsic for Neon base update");
	case Intrinsic::aarch64_neon_ld2: NewOpc = AArch64ISD::LD2post;
	NumVecs = 2; break;
	case Intrinsic::aarch64_neon_ld3: NewOpc = AArch64ISD::LD3post;
	NumVecs = 3; break;
	case Intrinsic::aarch64_neon_ld4: NewOpc = AArch64ISD::LD4post;
	NumVecs = 4; break;
	case Intrinsic::aarch64_neon_st2: NewOpc = AArch64ISD::ST2post;
	NumVecs = 2; IsStore = true; break;
	case Intrinsic::aarch64_neon_st3: NewOpc = AArch64ISD::ST3post;
	NumVecs = 3; IsStore = true; break;
	case Intrinsic::aarch64_neon_st4: NewOpc = AArch64ISD::ST4post;
	NumVecs = 4; IsStore = true; break;
	case Intrinsic::aarch64_neon_ld1x2: NewOpc = AArch64ISD::LD1x2post;
	NumVecs = 2; break;
	case Intrinsic::aarch64_neon_ld1x3: NewOpc = AArch64ISD::LD1x3post;
	NumVecs = 3; break;
	case Intrinsic::aarch64_neon_ld1x4: NewOpc = AArch64ISD::LD1x4post;
	NumVecs = 4; break;
	case Intrinsic::aarch64_neon_st1x2: NewOpc = AArch64ISD::ST1x2post;
	NumVecs = 2; IsStore = true; break;
	case Intrinsic::aarch64_neon_st1x3: NewOpc = AArch64ISD::ST1x3post;
	NumVecs = 3; IsStore = true; break;
	case Intrinsic::aarch64_neon_st1x4: NewOpc = AArch64ISD::ST1x4post;
	NumVecs = 4; IsStore = true; break;
	case Intrinsic::aarch64_neon_ld2r: NewOpc = AArch64ISD::LD2DUPpost;
	NumVecs = 2; IsDupOp = true; break;
	case Intrinsic::aarch64_neon_ld3r: NewOpc = AArch64ISD::LD3DUPpost;
	NumVecs = 3; IsDupOp = true; break;
	case Intrinsic::aarch64_neon_ld4r: NewOpc = AArch64ISD::LD4DUPpost;
	NumVecs = 4; IsDupOp = true; break;
	case Intrinsic::aarch64_neon_ld2lane: NewOpc = AArch64ISD::LD2LANEpost;
	NumVecs = 2; IsLaneOp = true; break;
	case Intrinsic::aarch64_neon_ld3lane: NewOpc = AArch64ISD::LD3LANEpost;
	NumVecs = 3; IsLaneOp = true; break;
	case Intrinsic::aarch64_neon_ld4lane: NewOpc = AArch64ISD::LD4LANEpost;
	NumVecs = 4; IsLaneOp = true; break;
	case Intrinsic::aarch64_neon_st2lane: NewOpc = AArch64ISD::ST2LANEpost;
	NumVecs = 2; IsStore = true; IsLaneOp = true; break;
	case Intrinsic::aarch64_neon_st3lane: NewOpc = AArch64ISD::ST3LANEpost;
	NumVecs = 3; IsStore = true; IsLaneOp = true; break;
	case Intrinsic::aarch64_neon_st4lane: NewOpc = AArch64ISD::ST4LANEpost;
	NumVecs = 4; IsStore = true; IsLaneOp = true; break;
	}

	EVT VecTy;
	if (IsStore)
	VecTy = N->getOperand(2).getValueType();
	else
	VecTy = N->getValueType(0);

	// If the increment is a constant, it must match the memory ref size.
	SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
	if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
	uint32_t IncVal = CInc->getZExtValue();
	unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
	if (IsLaneOp \|\| IsDupOp)
	NumBytes /= VecTy.getVectorNumElements();
	if (IncVal != NumBytes)
	continue;
	Inc = DAG.getRegister(AArch64::XZR, MVT::i64);
	}
	SmallVector<SDValue, 8> Ops;
	Ops.push_back(N->getOperand(0)); // Incoming chain
	// Load lane and store have vector list as input.
	if (IsLaneOp \|\| IsStore)
	for (unsigned i = 2; i < AddrOpIdx; ++i)
	Ops.push_back(N->getOperand(i));
	Ops.push_back(Addr); // Base register
	Ops.push_back(Inc);

	// Return Types.
	EVT Tys[6];
	unsigned NumResultVecs = (IsStore ? 0 : NumVecs);
	unsigned n;
	for (n = 0; n < NumResultVecs; ++n)
	Tys[n] = VecTy;
	Tys[n++] = MVT::i64; // Type of write back register
	Tys[n] = MVT::Other; // Type of the chain
	SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs + 2));

	MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);
	SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys, Ops,
	MemInt->getMemoryVT(),
	MemInt->getMemOperand());

	// Update the uses.
	std::vector<SDValue> NewResults;
	for (unsigned i = 0; i < NumResultVecs; ++i) {
	NewResults.push_back(SDValue(UpdN.getNode(), i));
	}
	NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1));
	DCI.CombineTo(N, NewResults);
	DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));

	break;
	}
	return SDValue();
	}

	// Checks to see if the value is the prescribed width and returns information
	// about its extension mode.
	static
	bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType) {
	ExtType = ISD::NON_EXTLOAD;
	switch(V.getNode()->getOpcode()) {
	default:
	return false;
	case ISD::LOAD: {
	LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
	if ((LoadNode->getMemoryVT() == MVT::i8 && width == 8)
	\|\| (LoadNode->getMemoryVT() == MVT::i16 && width == 16)) {
	ExtType = LoadNode->getExtensionType();
	return true;
	}
	return false;
	}
	case ISD::AssertSext: {
	VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
	if ((TypeNode->getVT() == MVT::i8 && width == 8)
	\|\| (TypeNode->getVT() == MVT::i16 && width == 16)) {
	ExtType = ISD::SEXTLOAD;
	return true;
	}
	return false;
	}
	case ISD::AssertZext: {
	VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
	if ((TypeNode->getVT() == MVT::i8 && width == 8)
	\|\| (TypeNode->getVT() == MVT::i16 && width == 16)) {
	ExtType = ISD::ZEXTLOAD;
	return true;
	}
	return false;
	}
	case ISD::Constant:
	case ISD::TargetConstant: {
	return std::abs(cast<ConstantSDNode>(V.getNode())->getSExtValue()) <
	1LL << (width - 1);
	}
	}

	return true;
	}

	// This function does a whole lot of voodoo to determine if the tests are
	// equivalent without and with a mask. Essentially what happens is that given a
	// DAG resembling:
	//
	// +-------------+ +-------------+ +-------------+ +-------------+
	// \| Input \| \| AddConstant \| \| CompConstant\| \| CC \|
	// +-------------+ +-------------+ +-------------+ +-------------+
	// \| \| \| \|
	// V V \| +----------+
	// +-------------+ +----+ \| \|
	// \| ADD \| \|0xff\| \| \|
	// +-------------+ +----+ \| \|
	// \| \| \| \|
	// V V \| \|
	// +-------------+ \| \|
	// \| AND \| \| \|
	// +-------------+ \| \|
	// \| \| \|
	// +-----+ \| \|
	// \| \| \|
	// V V V
	// +-------------+
	// \| CMP \|
	// +-------------+
	//
	// The AND node may be safely removed for some combinations of inputs. In
	// particular we need to take into account the extension type of the Input,
	// the exact values of AddConstant, CompConstant, and CC, along with the nominal
	// width of the input (this can work for any width inputs, the above graph is
	// specific to 8 bits.
	//
	// The specific equations were worked out by generating output tables for each
	// AArch64CC value in terms of and AddConstant (w1), CompConstant(w2). The
	// problem was simplified by working with 4 bit inputs, which means we only
	// needed to reason about 24 distinct bit patterns: 8 patterns unique to zero
	// extension (8,15), 8 patterns unique to sign extensions (-8,-1), and 8
	// patterns present in both extensions (0,7). For every distinct set of
	// AddConstant and CompConstants bit patterns we can consider the masked and
	// unmasked versions to be equivalent if the result of this function is true for
	// all 16 distinct bit patterns of for the current extension type of Input (w0).
	//
	// sub w8, w0, w1
	// and w10, w8, #0x0f
	// cmp w8, w2
	// cset w9, AArch64CC
	// cmp w10, w2
	// cset w11, AArch64CC
	// cmp w9, w11
	// cset w0, eq
	// ret
	//
	// Since the above function shows when the outputs are equivalent it defines
	// when it is safe to remove the AND. Unfortunately it only runs on AArch64 and
	// would be expensive to run during compiles. The equations below were written
	// in a test harness that confirmed they gave equivalent outputs to the above
	// for all inputs function, so they can be used determine if the removal is
	// legal instead.
	//
	// isEquivalentMaskless() is the code for testing if the AND can be removed
	// factored out of the DAG recognition as the DAG can take several forms.

	static bool isEquivalentMaskless(unsigned CC, unsigned width,
	ISD::LoadExtType ExtType, int AddConstant,
	int CompConstant) {
	// By being careful about our equations and only writing the in term
	// symbolic values and well known constants (0, 1, -1, MaxUInt) we can
	// make them generally applicable to all bit widths.
	int MaxUInt = (1 << width);

	// For the purposes of these comparisons sign extending the type is
	// equivalent to zero extending the add and displacing it by half the integer
	// width. Provided we are careful and make sure our equations are valid over
	// the whole range we can just adjust the input and avoid writing equations
	// for sign extended inputs.
	if (ExtType == ISD::SEXTLOAD)
	AddConstant -= (1 << (width-1));

	switch(CC) {
	case AArch64CC::LE:
	case AArch64CC::GT:
	if ((AddConstant == 0) \|\|
	(CompConstant == MaxUInt - 1 && AddConstant < 0) \|\|
	(AddConstant >= 0 && CompConstant < 0) \|\|
	(AddConstant <= 0 && CompConstant <= 0 && CompConstant < AddConstant))
	return true;
	break;
	case AArch64CC::LT:
	case AArch64CC::GE:
	if ((AddConstant == 0) \|\|
	(AddConstant >= 0 && CompConstant <= 0) \|\|
	(AddConstant <= 0 && CompConstant <= 0 && CompConstant <= AddConstant))
	return true;
	break;
	case AArch64CC::HI:
	case AArch64CC::LS:
	if ((AddConstant >= 0 && CompConstant < 0) \|\|
	(AddConstant <= 0 && CompConstant >= -1 &&
	CompConstant < AddConstant + MaxUInt))
	return true;
	break;
	case AArch64CC::PL:
	case AArch64CC::MI:
	if ((AddConstant == 0) \|\|
	(AddConstant > 0 && CompConstant <= 0) \|\|
	(AddConstant < 0 && CompConstant <= AddConstant))
	return true;
	break;
	case AArch64CC::LO:
	case AArch64CC::HS:
	if ((AddConstant >= 0 && CompConstant <= 0) \|\|
	(AddConstant <= 0 && CompConstant >= 0 &&
	CompConstant <= AddConstant + MaxUInt))
	return true;
	break;
	case AArch64CC::EQ:
	case AArch64CC::NE:
	if ((AddConstant > 0 && CompConstant < 0) \|\|
	(AddConstant < 0 && CompConstant >= 0 &&
	CompConstant < AddConstant + MaxUInt) \|\|
	(AddConstant >= 0 && CompConstant >= 0 &&
	CompConstant >= AddConstant) \|\|
	(AddConstant <= 0 && CompConstant < 0 && CompConstant < AddConstant))
	return true;
	break;
	case AArch64CC::VS:
	case AArch64CC::VC:
	case AArch64CC::AL:
	case AArch64CC::NV:
	return true;
	case AArch64CC::Invalid:
	break;
	}

	return false;
	}

	static
	SDValue performCONDCombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI,
	SelectionDAG &DAG, unsigned CCIndex,
	unsigned CmpIndex) {
	unsigned CC = cast<ConstantSDNode>(N->getOperand(CCIndex))->getSExtValue();
	SDNode *SubsNode = N->getOperand(CmpIndex).getNode();
	unsigned CondOpcode = SubsNode->getOpcode();

	if (CondOpcode != AArch64ISD::SUBS)
	return SDValue();

	// There is a SUBS feeding this condition. Is it fed by a mask we can
	// use?

	SDNode *AndNode = SubsNode->getOperand(0).getNode();
	unsigned MaskBits = 0;

	if (AndNode->getOpcode() != ISD::AND)
	return SDValue();

	if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndNode->getOperand(1))) {
	uint32_t CNV = CN->getZExtValue();
	if (CNV == 255)
	MaskBits = 8;
	else if (CNV == 65535)
	MaskBits = 16;
	}

	if (!MaskBits)
	return SDValue();

	SDValue AddValue = AndNode->getOperand(0);

	if (AddValue.getOpcode() != ISD::ADD)
	return SDValue();

	// The basic dag structure is correct, grab the inputs and validate them.

	SDValue AddInputValue1 = AddValue.getNode()->getOperand(0);
	SDValue AddInputValue2 = AddValue.getNode()->getOperand(1);
	SDValue SubsInputValue = SubsNode->getOperand(1);

	// The mask is present and the provenance of all the values is a smaller type,
	// lets see if the mask is superfluous.

	if (!isa<ConstantSDNode>(AddInputValue2.getNode()) \|\|
	!isa<ConstantSDNode>(SubsInputValue.getNode()))
	return SDValue();

	ISD::LoadExtType ExtType;

	if (!checkValueWidth(SubsInputValue, MaskBits, ExtType) \|\|
	!checkValueWidth(AddInputValue2, MaskBits, ExtType) \|\|
	!checkValueWidth(AddInputValue1, MaskBits, ExtType) )
	return SDValue();

	if(!isEquivalentMaskless(CC, MaskBits, ExtType,
	cast<ConstantSDNode>(AddInputValue2.getNode())->getSExtValue(),
	cast<ConstantSDNode>(SubsInputValue.getNode())->getSExtValue()))
	return SDValue();

	// The AND is not necessary, remove it.

	SDVTList VTs = DAG.getVTList(SubsNode->getValueType(0),
	SubsNode->getValueType(1));
	SDValue Ops[] = { AddValue, SubsNode->getOperand(1) };

	SDValue NewValue = DAG.getNode(CondOpcode, SDLoc(SubsNode), VTs, Ops);
	DAG.ReplaceAllUsesWith(SubsNode, NewValue.getNode());

	return SDValue(N, 0);
	}

	// Optimize compare with zero and branch.
	static SDValue performBRCONDCombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI,
	SelectionDAG &DAG) {
	MachineFunction &MF = DAG.getMachineFunction();
	// Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
	// will not be produced, as they are conditional branch instructions that do
	// not set flags.
	if (MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening))
	return SDValue();

	if (SDValue NV = performCONDCombine(N, DCI, DAG, 2, 3))
	N = NV.getNode();
	SDValue Chain = N->getOperand(0);
	SDValue Dest = N->getOperand(1);
	SDValue CCVal = N->getOperand(2);
	SDValue Cmp = N->getOperand(3);

	assert(isa<ConstantSDNode>(CCVal) && "Expected a ConstantSDNode here!");
	unsigned CC = cast<ConstantSDNode>(CCVal)->getZExtValue();
	if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
	return SDValue();

	unsigned CmpOpc = Cmp.getOpcode();
	if (CmpOpc != AArch64ISD::ADDS && CmpOpc != AArch64ISD::SUBS)
	return SDValue();

	// Only attempt folding if there is only one use of the flag and no use of the
	// value.
	if (!Cmp->hasNUsesOfValue(0, 0) \|\| !Cmp->hasNUsesOfValue(1, 1))
	return SDValue();

	SDValue LHS = Cmp.getOperand(0);
	SDValue RHS = Cmp.getOperand(1);

	assert(LHS.getValueType() == RHS.getValueType() &&
	"Expected the value type to be the same for both operands!");
	if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
	return SDValue();

	if (isNullConstant(LHS))
	std::swap(LHS, RHS);

	if (!isNullConstant(RHS))
	return SDValue();

	if (LHS.getOpcode() == ISD::SHL \|\| LHS.getOpcode() == ISD::SRA \|\|
	LHS.getOpcode() == ISD::SRL)
	return SDValue();

	// Fold the compare into the branch instruction.
	SDValue BR;
	if (CC == AArch64CC::EQ)
	BR = DAG.getNode(AArch64ISD::CBZ, SDLoc(N), MVT::Other, Chain, LHS, Dest);
	else
	BR = DAG.getNode(AArch64ISD::CBNZ, SDLoc(N), MVT::Other, Chain, LHS, Dest);

	// Do not add new nodes to DAG combiner worklist.
	DCI.CombineTo(N, BR, false);

	return SDValue();
	}

	// Optimize CSEL instructions
	static SDValue performCSELCombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI,
	SelectionDAG &DAG) {
	// CSEL x, x, cc -> x
	if (N->getOperand(0) == N->getOperand(1))
	return N->getOperand(0);

	return performCONDCombine(N, DCI, DAG, 2, 3);
	}

	static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) {
	assert(N->getOpcode() == ISD::SETCC && "Unexpected opcode!");
	SDValue LHS = N->getOperand(0);
	SDValue RHS = N->getOperand(1);
	ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();

	// setcc (csel 0, 1, cond, X), 1, ne ==> csel 0, 1, !cond, X
	if (Cond == ISD::SETNE && isOneConstant(RHS) &&
	LHS->getOpcode() == AArch64ISD::CSEL &&
	isNullConstant(LHS->getOperand(0)) && isOneConstant(LHS->getOperand(1)) &&
	LHS->hasOneUse()) {
	SDLoc DL(N);

	// Invert CSEL's condition.
	auto *OpCC = cast<ConstantSDNode>(LHS.getOperand(2));
	auto OldCond = static_cast<AArch64CC::CondCode>(OpCC->getZExtValue());
	auto NewCond = getInvertedCondCode(OldCond);

	// csel 0, 1, !cond, X
	SDValue CSEL =
	DAG.getNode(AArch64ISD::CSEL, DL, LHS.getValueType(), LHS.getOperand(0),
	LHS.getOperand(1), DAG.getConstant(NewCond, DL, MVT::i32),
	LHS.getOperand(3));
	return DAG.getZExtOrTrunc(CSEL, DL, N->getValueType(0));
	}

	return SDValue();
	}

	static SDValue performSetccMergeZeroCombine(SDNode *N, SelectionDAG &DAG) {
	assert(N->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO &&
	"Unexpected opcode!");

	SDValue Pred = N->getOperand(0);
	SDValue LHS = N->getOperand(1);
	SDValue RHS = N->getOperand(2);
	ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(3))->get();

	// setcc_merge_zero pred (sign_extend (setcc_merge_zero ... pred ...)), 0, ne
	// => inner setcc_merge_zero
	if (Cond == ISD::SETNE && isZerosVector(RHS.getNode()) &&
	LHS->getOpcode() == ISD::SIGN_EXTEND &&
	LHS->getOperand(0)->getValueType(0) == N->getValueType(0) &&
	LHS->getOperand(0)->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO &&
	LHS->getOperand(0)->getOperand(0) == Pred)
	return LHS->getOperand(0);

	return SDValue();
	}

	// Optimize some simple tbz/tbnz cases. Returns the new operand and bit to test
	// as well as whether the test should be inverted. This code is required to
	// catch these cases (as opposed to standard dag combines) because
	// AArch64ISD::TBZ is matched during legalization.
	static SDValue getTestBitOperand(SDValue Op, unsigned &Bit, bool &Invert,
	SelectionDAG &DAG) {

	if (!Op->hasOneUse())
	return Op;

	// We don't handle undef/constant-fold cases below, as they should have
	// already been taken care of (e.g. and of 0, test of undefined shifted bits,
	// etc.)

	// (tbz (trunc x), b) -> (tbz x, b)
	// This case is just here to enable more of the below cases to be caught.
	if (Op->getOpcode() == ISD::TRUNCATE &&
	Bit < Op->getValueType(0).getSizeInBits()) {
	return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
	}

	// (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
	if (Op->getOpcode() == ISD::ANY_EXTEND &&
	Bit < Op->getOperand(0).getValueSizeInBits()) {
	return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
	}

	if (Op->getNumOperands() != 2)
	return Op;

	auto *C = dyn_cast<ConstantSDNode>(Op->getOperand(1));
	if (!C)
	return Op;

	switch (Op->getOpcode()) {
	default:
	return Op;

	// (tbz (and x, m), b) -> (tbz x, b)
	case ISD::AND:
	if ((C->getZExtValue() >> Bit) & 1)
	return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
	return Op;

	// (tbz (shl x, c), b) -> (tbz x, b-c)
	case ISD::SHL:
	if (C->getZExtValue() <= Bit &&
	(Bit - C->getZExtValue()) < Op->getValueType(0).getSizeInBits()) {
	Bit = Bit - C->getZExtValue();
	return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
	}
	return Op;

	// (tbz (sra x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits in x
	case ISD::SRA:
	Bit = Bit + C->getZExtValue();
	if (Bit >= Op->getValueType(0).getSizeInBits())
	Bit = Op->getValueType(0).getSizeInBits() - 1;
	return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);

	// (tbz (srl x, c), b) -> (tbz x, b+c)
	case ISD::SRL:
	if ((Bit + C->getZExtValue()) < Op->getValueType(0).getSizeInBits()) {
	Bit = Bit + C->getZExtValue();
	return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
	}
	return Op;

	// (tbz (xor x, -1), b) -> (tbnz x, b)
	case ISD::XOR:
	if ((C->getZExtValue() >> Bit) & 1)
	Invert = !Invert;
	return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
	}
	}

	// Optimize test single bit zero/non-zero and branch.
	static SDValue performTBZCombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI,
	SelectionDAG &DAG) {
	unsigned Bit = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
	bool Invert = false;
	SDValue TestSrc = N->getOperand(1);
	SDValue NewTestSrc = getTestBitOperand(TestSrc, Bit, Invert, DAG);

	if (TestSrc == NewTestSrc)
	return SDValue();

	unsigned NewOpc = N->getOpcode();
	if (Invert) {
	if (NewOpc == AArch64ISD::TBZ)
	NewOpc = AArch64ISD::TBNZ;
	else {
	assert(NewOpc == AArch64ISD::TBNZ);
	NewOpc = AArch64ISD::TBZ;
	}
	}

	SDLoc DL(N);
	return DAG.getNode(NewOpc, DL, MVT::Other, N->getOperand(0), NewTestSrc,
	DAG.getConstant(Bit, DL, MVT::i64), N->getOperand(3));
	}

	// vselect (v1i1 setcc) ->
	// vselect (v1iXX setcc) (XX is the size of the compared operand type)
	// FIXME: Currently the type legalizer can't handle VSELECT having v1i1 as
	// condition. If it can legalize "VSELECT v1i1" correctly, no need to combine
	// such VSELECT.
	static SDValue performVSelectCombine(SDNode *N, SelectionDAG &DAG) {
	SDValue N0 = N->getOperand(0);
	EVT CCVT = N0.getValueType();

	// Check for sign pattern (VSELECT setgt, iN lhs, -1, 1, -1) and transform
	// into (OR (ASR lhs, N-1), 1), which requires less instructions for the
	// supported types.
	SDValue SetCC = N->getOperand(0);
	if (SetCC.getOpcode() == ISD::SETCC &&
	SetCC.getOperand(2) == DAG.getCondCode(ISD::SETGT)) {
	SDValue CmpLHS = SetCC.getOperand(0);
	EVT VT = CmpLHS.getValueType();
	SDNode *CmpRHS = SetCC.getOperand(1).getNode();
	SDNode *SplatLHS = N->getOperand(1).getNode();
	SDNode *SplatRHS = N->getOperand(2).getNode();
	APInt SplatLHSVal;
	if (CmpLHS.getValueType() == N->getOperand(1).getValueType() &&
	VT.isSimple() &&
	is_contained(
	makeArrayRef({MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
	MVT::v2i32, MVT::v4i32, MVT::v2i64}),
	VT.getSimpleVT().SimpleTy) &&
	ISD::isConstantSplatVector(SplatLHS, SplatLHSVal) &&
	SplatLHSVal.isOneValue() && ISD::isConstantSplatVectorAllOnes(CmpRHS) &&
	ISD::isConstantSplatVectorAllOnes(SplatRHS)) {
	unsigned NumElts = VT.getVectorNumElements();
	SmallVector<SDValue, 8> Ops(
	NumElts, DAG.getConstant(VT.getScalarSizeInBits() - 1, SDLoc(N),
	VT.getScalarType()));
	SDValue Val = DAG.getBuildVector(VT, SDLoc(N), Ops);

	auto Shift = DAG.getNode(ISD::SRA, SDLoc(N), VT, CmpLHS, Val);
	auto Or = DAG.getNode(ISD::OR, SDLoc(N), VT, Shift, N->getOperand(1));
	return Or;
	}
	}

	if (N0.getOpcode() != ISD::SETCC \|\|
	CCVT.getVectorElementCount() != ElementCount::getFixed(1) \|\|
	CCVT.getVectorElementType() != MVT::i1)
	return SDValue();

	EVT ResVT = N->getValueType(0);
	EVT CmpVT = N0.getOperand(0).getValueType();
	// Only combine when the result type is of the same size as the compared
	// operands.
	if (ResVT.getSizeInBits() != CmpVT.getSizeInBits())
	return SDValue();

	SDValue IfTrue = N->getOperand(1);
	SDValue IfFalse = N->getOperand(2);
	SetCC = DAG.getSetCC(SDLoc(N), CmpVT.changeVectorElementTypeToInteger(),
	N0.getOperand(0), N0.getOperand(1),
	cast<CondCodeSDNode>(N0.getOperand(2))->get());
	return DAG.getNode(ISD::VSELECT, SDLoc(N), ResVT, SetCC,
	IfTrue, IfFalse);
	}

	/// A vector select: "(select vL, vR, (setcc LHS, RHS))" is best performed with
	/// the compare-mask instructions rather than going via NZCV, even if LHS and
	/// RHS are really scalar. This replaces any scalar setcc in the above pattern
	/// with a vector one followed by a DUP shuffle on the result.
	static SDValue performSelectCombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI) {
	SelectionDAG &DAG = DCI.DAG;
	SDValue N0 = N->getOperand(0);
	EVT ResVT = N->getValueType(0);

	if (N0.getOpcode() != ISD::SETCC)
	return SDValue();

	if (ResVT.isScalableVector())
	return SDValue();

	// Make sure the SETCC result is either i1 (initial DAG), or i32, the lowered
	// scalar SetCCResultType. We also don't expect vectors, because we assume
	// that selects fed by vector SETCCs are canonicalized to VSELECT.
	assert((N0.getValueType() == MVT::i1 \|\| N0.getValueType() == MVT::i32) &&
	"Scalar-SETCC feeding SELECT has unexpected result type!");

	// If NumMaskElts == 0, the comparison is larger than select result. The
	// largest real NEON comparison is 64-bits per lane, which means the result is
	// at most 32-bits and an illegal vector. Just bail out for now.
	EVT SrcVT = N0.getOperand(0).getValueType();

	// Don't try to do this optimization when the setcc itself has i1 operands.
	// There are no legal vectors of i1, so this would be pointless.
	if (SrcVT == MVT::i1)
	return SDValue();

	int NumMaskElts = ResVT.getSizeInBits() / SrcVT.getSizeInBits();
	if (!ResVT.isVector() \|\| NumMaskElts == 0)
	return SDValue();

	SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumMaskElts);
	EVT CCVT = SrcVT.changeVectorElementTypeToInteger();

	// Also bail out if the vector CCVT isn't the same size as ResVT.
	// This can happen if the SETCC operand size doesn't divide the ResVT size
	// (e.g., f64 vs v3f32).
	if (CCVT.getSizeInBits() != ResVT.getSizeInBits())
	return SDValue();

	// Make sure we didn't create illegal types, if we're not supposed to.
	assert(DCI.isBeforeLegalize() \|\|
	DAG.getTargetLoweringInfo().isTypeLegal(SrcVT));

	// First perform a vector comparison, where lane 0 is the one we're interested
	// in.
	SDLoc DL(N0);
	SDValue LHS =
	DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(0));
	SDValue RHS =
	DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(1));
	SDValue SetCC = DAG.getNode(ISD::SETCC, DL, CCVT, LHS, RHS, N0.getOperand(2));

	// Now duplicate the comparison mask we want across all other lanes.
	SmallVector<int, 8> DUPMask(CCVT.getVectorNumElements(), 0);
	SDValue Mask = DAG.getVectorShuffle(CCVT, DL, SetCC, SetCC, DUPMask);
	Mask = DAG.getNode(ISD::BITCAST, DL,
	ResVT.changeVectorElementTypeToInteger(), Mask);

	return DAG.getSelect(DL, ResVT, Mask, N->getOperand(1), N->getOperand(2));
	}

	/// Get rid of unnecessary NVCASTs (that don't change the type).
	static SDValue performNVCASTCombine(SDNode *N) {
	if (N->getValueType(0) == N->getOperand(0).getValueType())
	return N->getOperand(0);

	return SDValue();
	}

	// If all users of the globaladdr are of the form (globaladdr + constant), find
	// the smallest constant, fold it into the globaladdr's offset and rewrite the
	// globaladdr as (globaladdr + constant) - constant.
	static SDValue performGlobalAddressCombine(SDNode *N, SelectionDAG &DAG,
	const AArch64Subtarget *Subtarget,
	const TargetMachine &TM) {
	auto *GN = cast<GlobalAddressSDNode>(N);
	if (Subtarget->ClassifyGlobalReference(GN->getGlobal(), TM) !=
	AArch64II::MO_NO_FLAG)
	return SDValue();

	uint64_t MinOffset = -1ull;
	for (SDNode *N : GN->uses()) {
	if (N->getOpcode() != ISD::ADD)
	return SDValue();
	auto *C = dyn_cast<ConstantSDNode>(N->getOperand(0));
	if (!C)
	C = dyn_cast<ConstantSDNode>(N->getOperand(1));
	if (!C)
	return SDValue();
	MinOffset = std::min(MinOffset, C->getZExtValue());
	}
	uint64_t Offset = MinOffset + GN->getOffset();

	// Require that the new offset is larger than the existing one. Otherwise, we
	// can end up oscillating between two possible DAGs, for example,
	// (add (add globaladdr + 10, -1), 1) and (add globaladdr + 9, 1).
	if (Offset <= uint64_t(GN->getOffset()))
	return SDValue();

	// Check whether folding this offset is legal. It must not go out of bounds of
	// the referenced object to avoid violating the code model, and must be
	// smaller than 2^21 because this is the largest offset expressible in all
	// object formats.
	//
	// This check also prevents us from folding negative offsets, which will end
	// up being treated in the same way as large positive ones. They could also
	// cause code model violations, and aren't really common enough to matter.
	if (Offset >= (1 << 21))
	return SDValue();

	const GlobalValue *GV = GN->getGlobal();
	Type *T = GV->getValueType();
	if (!T->isSized() \|\|
	Offset > GV->getParent()->getDataLayout().getTypeAllocSize(T))
	return SDValue();

	SDLoc DL(GN);
	SDValue Result = DAG.getGlobalAddress(GV, DL, MVT::i64, Offset);
	return DAG.getNode(ISD::SUB, DL, MVT::i64, Result,
	DAG.getConstant(MinOffset, DL, MVT::i64));
	}

	// Turns the vector of indices into a vector of byte offstes by scaling Offset
	// by (BitWidth / 8).
	static SDValue getScaledOffsetForBitWidth(SelectionDAG &DAG, SDValue Offset,
	SDLoc DL, unsigned BitWidth) {
	assert(Offset.getValueType().isScalableVector() &&
	"This method is only for scalable vectors of offsets");

	SDValue Shift = DAG.getConstant(Log2_32(BitWidth / 8), DL, MVT::i64);
	SDValue SplatShift = DAG.getNode(ISD::SPLAT_VECTOR, DL, MVT::nxv2i64, Shift);

	return DAG.getNode(ISD::SHL, DL, MVT::nxv2i64, Offset, SplatShift);
	}

	/// Check if the value of \p OffsetInBytes can be used as an immediate for
	/// the gather load/prefetch and scatter store instructions with vector base and
	/// immediate offset addressing mode:
	///
	/// [<Zn>.[S\|D]{, #<imm>}]
	///
	/// where <imm> = sizeof(<T>) * k, for k = 0, 1, ..., 31.
	inline static bool isValidImmForSVEVecImmAddrMode(unsigned OffsetInBytes,
	unsigned ScalarSizeInBytes) {
	// The immediate is not a multiple of the scalar size.
	if (OffsetInBytes % ScalarSizeInBytes)
	return false;

	// The immediate is out of range.
	if (OffsetInBytes / ScalarSizeInBytes > 31)
	return false;

	return true;
	}

	/// Check if the value of \p Offset represents a valid immediate for the SVE
	/// gather load/prefetch and scatter store instructiona with vector base and
	/// immediate offset addressing mode:
	///
	/// [<Zn>.[S\|D]{, #<imm>}]
	///
	/// where <imm> = sizeof(<T>) * k, for k = 0, 1, ..., 31.
	static bool isValidImmForSVEVecImmAddrMode(SDValue Offset,
	unsigned ScalarSizeInBytes) {
	ConstantSDNode *OffsetConst = dyn_cast<ConstantSDNode>(Offset.getNode());
	return OffsetConst && isValidImmForSVEVecImmAddrMode(
	OffsetConst->getZExtValue(), ScalarSizeInBytes);
	}

	static SDValue performScatterStoreCombine(SDNode *N, SelectionDAG &DAG,
	unsigned Opcode,
	bool OnlyPackedOffsets = true) {
	const SDValue Src = N->getOperand(2);
	const EVT SrcVT = Src->getValueType(0);
	assert(SrcVT.isScalableVector() &&
	"Scatter stores are only possible for SVE vectors");

	SDLoc DL(N);
	MVT SrcElVT = SrcVT.getVectorElementType().getSimpleVT();

	// Make sure that source data will fit into an SVE register
	if (SrcVT.getSizeInBits().getKnownMinSize() > AArch64::SVEBitsPerBlock)
	return SDValue();

	// For FPs, ACLE only supports _packed_ single and double precision types.
	if (SrcElVT.isFloatingPoint())
	if ((SrcVT != MVT::nxv4f32) && (SrcVT != MVT::nxv2f64))
	return SDValue();

	// Depending on the addressing mode, this is either a pointer or a vector of
	// pointers (that fits into one register)
	SDValue Base = N->getOperand(4);
	// Depending on the addressing mode, this is either a single offset or a
	// vector of offsets (that fits into one register)
	SDValue Offset = N->getOperand(5);

	// For "scalar + vector of indices", just scale the indices. This only
	// applies to non-temporal scatters because there's no instruction that takes
	// indicies.
	if (Opcode == AArch64ISD::SSTNT1_INDEX_PRED) {
	Offset =
	getScaledOffsetForBitWidth(DAG, Offset, DL, SrcElVT.getSizeInBits());
	Opcode = AArch64ISD::SSTNT1_PRED;
	}

	// In the case of non-temporal gather loads there's only one SVE instruction
	// per data-size: "scalar + vector", i.e.
	// * stnt1{b\|h\|w\|d} { z0.s }, p0/z, [z0.s, x0]
	// Since we do have intrinsics that allow the arguments to be in a different
	// order, we may need to swap them to match the spec.
	if (Opcode == AArch64ISD::SSTNT1_PRED && Offset.getValueType().isVector())
	std::swap(Base, Offset);

	// SST1_IMM requires that the offset is an immediate that is:
	// * a multiple of #SizeInBytes,
	// * in the range [0, 31 x #SizeInBytes],
	// where #SizeInBytes is the size in bytes of the stored items. For
	// immediates outside that range and non-immediate scalar offsets use SST1 or
	// SST1_UXTW instead.
	if (Opcode == AArch64ISD::SST1_IMM_PRED) {
	if (!isValidImmForSVEVecImmAddrMode(Offset,
	SrcVT.getScalarSizeInBits() / 8)) {
	if (MVT::nxv4i32 == Base.getValueType().getSimpleVT().SimpleTy)
	Opcode = AArch64ISD::SST1_UXTW_PRED;
	else
	Opcode = AArch64ISD::SST1_PRED;

	std::swap(Base, Offset);
	}
	}

	auto &TLI = DAG.getTargetLoweringInfo();
	if (!TLI.isTypeLegal(Base.getValueType()))
	return SDValue();

	// Some scatter store variants allow unpacked offsets, but only as nxv2i32
	// vectors. These are implicitly sign (sxtw) or zero (zxtw) extend to
	// nxv2i64. Legalize accordingly.
	if (!OnlyPackedOffsets &&
	Offset.getValueType().getSimpleVT().SimpleTy == MVT::nxv2i32)
	Offset = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::nxv2i64, Offset).getValue(0);

	if (!TLI.isTypeLegal(Offset.getValueType()))
	return SDValue();

	// Source value type that is representable in hardware
	EVT HwSrcVt = getSVEContainerType(SrcVT);

	// Keep the original type of the input data to store - this is needed to be
	// able to select the correct instruction, e.g. ST1B, ST1H, ST1W and ST1D. For
	// FP values we want the integer equivalent, so just use HwSrcVt.
	SDValue InputVT = DAG.getValueType(SrcVT);
	if (SrcVT.isFloatingPoint())
	InputVT = DAG.getValueType(HwSrcVt);

	SDVTList VTs = DAG.getVTList(MVT::Other);
	SDValue SrcNew;

	if (Src.getValueType().isFloatingPoint())
	SrcNew = DAG.getNode(ISD::BITCAST, DL, HwSrcVt, Src);
	else
	SrcNew = DAG.getNode(ISD::ANY_EXTEND, DL, HwSrcVt, Src);

	SDValue Ops[] = {N->getOperand(0), // Chain
	SrcNew,
	N->getOperand(3), // Pg
	Base,
	Offset,
	InputVT};

	return DAG.getNode(Opcode, DL, VTs, Ops);
	}

	static SDValue performGatherLoadCombine(SDNode *N, SelectionDAG &DAG,
	unsigned Opcode,
	bool OnlyPackedOffsets = true) {
	const EVT RetVT = N->getValueType(0);
	assert(RetVT.isScalableVector() &&
	"Gather loads are only possible for SVE vectors");

	SDLoc DL(N);

	// Make sure that the loaded data will fit into an SVE register
	if (RetVT.getSizeInBits().getKnownMinSize() > AArch64::SVEBitsPerBlock)
	return SDValue();

	// Depending on the addressing mode, this is either a pointer or a vector of
	// pointers (that fits into one register)
	SDValue Base = N->getOperand(3);
	// Depending on the addressing mode, this is either a single offset or a
	// vector of offsets (that fits into one register)
	SDValue Offset = N->getOperand(4);

	// For "scalar + vector of indices", just scale the indices. This only
	// applies to non-temporal gathers because there's no instruction that takes
	// indicies.
	if (Opcode == AArch64ISD::GLDNT1_INDEX_MERGE_ZERO) {
	Offset = getScaledOffsetForBitWidth(DAG, Offset, DL,
	RetVT.getScalarSizeInBits());
	Opcode = AArch64ISD::GLDNT1_MERGE_ZERO;
	}

	// In the case of non-temporal gather loads there's only one SVE instruction
	// per data-size: "scalar + vector", i.e.
	// * ldnt1{b\|h\|w\|d} { z0.s }, p0/z, [z0.s, x0]
	// Since we do have intrinsics that allow the arguments to be in a different
	// order, we may need to swap them to match the spec.
	if (Opcode == AArch64ISD::GLDNT1_MERGE_ZERO &&
	Offset.getValueType().isVector())
	std::swap(Base, Offset);

	// GLD{FF}1_IMM requires that the offset is an immediate that is:
	// * a multiple of #SizeInBytes,
	// * in the range [0, 31 x #SizeInBytes],
	// where #SizeInBytes is the size in bytes of the loaded items. For
	// immediates outside that range and non-immediate scalar offsets use
	// GLD1_MERGE_ZERO or GLD1_UXTW_MERGE_ZERO instead.
	if (Opcode == AArch64ISD::GLD1_IMM_MERGE_ZERO \|\|
	Opcode == AArch64ISD::GLDFF1_IMM_MERGE_ZERO) {
	if (!isValidImmForSVEVecImmAddrMode(Offset,
	RetVT.getScalarSizeInBits() / 8)) {
	if (MVT::nxv4i32 == Base.getValueType().getSimpleVT().SimpleTy)
	Opcode = (Opcode == AArch64ISD::GLD1_IMM_MERGE_ZERO)
	? AArch64ISD::GLD1_UXTW_MERGE_ZERO
	: AArch64ISD::GLDFF1_UXTW_MERGE_ZERO;
	else
	Opcode = (Opcode == AArch64ISD::GLD1_IMM_MERGE_ZERO)
	? AArch64ISD::GLD1_MERGE_ZERO
	: AArch64ISD::GLDFF1_MERGE_ZERO;

	std::swap(Base, Offset);
	}
	}

	auto &TLI = DAG.getTargetLoweringInfo();
	if (!TLI.isTypeLegal(Base.getValueType()))
	return SDValue();

	// Some gather load variants allow unpacked offsets, but only as nxv2i32
	// vectors. These are implicitly sign (sxtw) or zero (zxtw) extend to
	// nxv2i64. Legalize accordingly.
	if (!OnlyPackedOffsets &&
	Offset.getValueType().getSimpleVT().SimpleTy == MVT::nxv2i32)
	Offset = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::nxv2i64, Offset).getValue(0);

	// Return value type that is representable in hardware
	EVT HwRetVt = getSVEContainerType(RetVT);

	// Keep the original output value type around - this is needed to be able to
	// select the correct instruction, e.g. LD1B, LD1H, LD1W and LD1D. For FP
	// values we want the integer equivalent, so just use HwRetVT.
	SDValue OutVT = DAG.getValueType(RetVT);
	if (RetVT.isFloatingPoint())
	OutVT = DAG.getValueType(HwRetVt);

	SDVTList VTs = DAG.getVTList(HwRetVt, MVT::Other);
	SDValue Ops[] = {N->getOperand(0), // Chain
	N->getOperand(2), // Pg
	Base, Offset, OutVT};

	SDValue Load = DAG.getNode(Opcode, DL, VTs, Ops);
	SDValue LoadChain = SDValue(Load.getNode(), 1);

	if (RetVT.isInteger() && (RetVT != HwRetVt))
	Load = DAG.getNode(ISD::TRUNCATE, DL, RetVT, Load.getValue(0));

	// If the original return value was FP, bitcast accordingly. Doing it here
	// means that we can avoid adding TableGen patterns for FPs.
	if (RetVT.isFloatingPoint())
	Load = DAG.getNode(ISD::BITCAST, DL, RetVT, Load.getValue(0));

	return DAG.getMergeValues({Load, LoadChain}, DL);
	}

	static SDValue
	performSignExtendInRegCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
	SelectionDAG &DAG) {
	SDLoc DL(N);
	SDValue Src = N->getOperand(0);
	unsigned Opc = Src->getOpcode();

	// Sign extend of an unsigned unpack -> signed unpack
	if (Opc == AArch64ISD::UUNPKHI \|\| Opc == AArch64ISD::UUNPKLO) {

	unsigned SOpc = Opc == AArch64ISD::UUNPKHI ? AArch64ISD::SUNPKHI
	: AArch64ISD::SUNPKLO;

	// Push the sign extend to the operand of the unpack
	// This is necessary where, for example, the operand of the unpack
	// is another unpack:
	// 4i32 sign_extend_inreg (4i32 uunpklo(8i16 uunpklo (16i8 opnd)), from 4i8)
	// ->
	// 4i32 sunpklo (8i16 sign_extend_inreg(8i16 uunpklo (16i8 opnd), from 8i8)
	// ->
	// 4i32 sunpklo(8i16 sunpklo(16i8 opnd))
	SDValue ExtOp = Src->getOperand(0);
	auto VT = cast<VTSDNode>(N->getOperand(1))->getVT();
	EVT EltTy = VT.getVectorElementType();
	(void)EltTy;

	assert((EltTy == MVT::i8 \|\| EltTy == MVT::i16 \|\| EltTy == MVT::i32) &&
	"Sign extending from an invalid type");

	EVT ExtVT = VT.getDoubleNumVectorElementsVT(*DAG.getContext());

	SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ExtOp.getValueType(),
	ExtOp, DAG.getValueType(ExtVT));

	return DAG.getNode(SOpc, DL, N->getValueType(0), Ext);
	}

	if (DCI.isBeforeLegalizeOps())
	return SDValue();

	if (!EnableCombineMGatherIntrinsics)
	return SDValue();

	// SVE load nodes (e.g. AArch64ISD::GLD1) are straightforward candidates
	// for DAG Combine with SIGN_EXTEND_INREG. Bail out for all other nodes.
	unsigned NewOpc;
	unsigned MemVTOpNum = 4;
	switch (Opc) {
	case AArch64ISD::LD1_MERGE_ZERO:
	NewOpc = AArch64ISD::LD1S_MERGE_ZERO;
	MemVTOpNum = 3;
	break;
	case AArch64ISD::LDNF1_MERGE_ZERO:
	NewOpc = AArch64ISD::LDNF1S_MERGE_ZERO;
	MemVTOpNum = 3;
	break;
	case AArch64ISD::LDFF1_MERGE_ZERO:
	NewOpc = AArch64ISD::LDFF1S_MERGE_ZERO;
	MemVTOpNum = 3;
	break;
	case AArch64ISD::GLD1_MERGE_ZERO:
	NewOpc = AArch64ISD::GLD1S_MERGE_ZERO;
	break;
	case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
	NewOpc = AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
	break;
	case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
	NewOpc = AArch64ISD::GLD1S_SXTW_MERGE_ZERO;
	break;
	case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
	NewOpc = AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO;
	break;
	case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
	NewOpc = AArch64ISD::GLD1S_UXTW_MERGE_ZERO;
	break;
	case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
	NewOpc = AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO;
	break;
	case AArch64ISD::GLD1_IMM_MERGE_ZERO:
	NewOpc = AArch64ISD::GLD1S_IMM_MERGE_ZERO;
	break;
	case AArch64ISD::GLDFF1_MERGE_ZERO:
	NewOpc = AArch64ISD::GLDFF1S_MERGE_ZERO;
	break;
	case AArch64ISD::GLDFF1_SCALED_MERGE_ZERO:
	NewOpc = AArch64ISD::GLDFF1S_SCALED_MERGE_ZERO;
	break;
	case AArch64ISD::GLDFF1_SXTW_MERGE_ZERO:
	NewOpc = AArch64ISD::GLDFF1S_SXTW_MERGE_ZERO;
	break;
	case AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO:
	NewOpc = AArch64ISD::GLDFF1S_SXTW_SCALED_MERGE_ZERO;
	break;
	case AArch64ISD::GLDFF1_UXTW_MERGE_ZERO:
	NewOpc = AArch64ISD::GLDFF1S_UXTW_MERGE_ZERO;
	break;
	case AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO:
	NewOpc = AArch64ISD::GLDFF1S_UXTW_SCALED_MERGE_ZERO;
	break;
	case AArch64ISD::GLDFF1_IMM_MERGE_ZERO:
	NewOpc = AArch64ISD::GLDFF1S_IMM_MERGE_ZERO;
	break;
	case AArch64ISD::GLDNT1_MERGE_ZERO:
	NewOpc = AArch64ISD::GLDNT1S_MERGE_ZERO;
	break;
	default:
	return SDValue();
	}

	EVT SignExtSrcVT = cast<VTSDNode>(N->getOperand(1))->getVT();
	EVT SrcMemVT = cast<VTSDNode>(Src->getOperand(MemVTOpNum))->getVT();

	if ((SignExtSrcVT != SrcMemVT) \|\| !Src.hasOneUse())
	return SDValue();

	EVT DstVT = N->getValueType(0);
	SDVTList VTs = DAG.getVTList(DstVT, MVT::Other);

	SmallVector<SDValue, 5> Ops;
	for (unsigned I = 0; I < Src->getNumOperands(); ++I)
	Ops.push_back(Src->getOperand(I));

	SDValue ExtLoad = DAG.getNode(NewOpc, SDLoc(N), VTs, Ops);
	DCI.CombineTo(N, ExtLoad);
	DCI.CombineTo(Src.getNode(), ExtLoad, ExtLoad.getValue(1));

	// Return N so it doesn't get rechecked
	return SDValue(N, 0);
	}

	/// Legalize the gather prefetch (scalar + vector addressing mode) when the
	/// offset vector is an unpacked 32-bit scalable vector. The other cases (Offset
	/// != nxv2i32) do not need legalization.
	static SDValue legalizeSVEGatherPrefetchOffsVec(SDNode *N, SelectionDAG &DAG) {
	const unsigned OffsetPos = 4;
	SDValue Offset = N->getOperand(OffsetPos);

	// Not an unpacked vector, bail out.
	if (Offset.getValueType().getSimpleVT().SimpleTy != MVT::nxv2i32)
	return SDValue();

	// Extend the unpacked offset vector to 64-bit lanes.
	SDLoc DL(N);
	Offset = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::nxv2i64, Offset);
	SmallVector<SDValue, 5> Ops(N->op_begin(), N->op_end());
	// Replace the offset operand with the 64-bit one.
	Ops[OffsetPos] = Offset;

	return DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::Other), Ops);
	}

	/// Combines a node carrying the intrinsic
	/// `aarch64_sve_prf<T>_gather_scalar_offset` into a node that uses
	/// `aarch64_sve_prfb_gather_uxtw_index` when the scalar offset passed to
	/// `aarch64_sve_prf<T>_gather_scalar_offset` is not a valid immediate for the
	/// sve gather prefetch instruction with vector plus immediate addressing mode.
	static SDValue combineSVEPrefetchVecBaseImmOff(SDNode *N, SelectionDAG &DAG,
	unsigned ScalarSizeInBytes) {
	const unsigned ImmPos = 4, OffsetPos = 3;
	// No need to combine the node if the immediate is valid...
	if (isValidImmForSVEVecImmAddrMode(N->getOperand(ImmPos), ScalarSizeInBytes))
	return SDValue();

	// ...otherwise swap the offset base with the offset...
	SmallVector<SDValue, 5> Ops(N->op_begin(), N->op_end());
	std::swap(Ops[ImmPos], Ops[OffsetPos]);
	// ...and remap the intrinsic `aarch64_sve_prf<T>_gather_scalar_offset` to
	// `aarch64_sve_prfb_gather_uxtw_index`.
	SDLoc DL(N);
	Ops[1] = DAG.getConstant(Intrinsic::aarch64_sve_prfb_gather_uxtw_index, DL,
	MVT::i64);

	return DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::Other), Ops);
	}

	// Return true if the vector operation can guarantee only the first lane of its
	// result contains data, with all bits in other lanes set to zero.
	static bool isLanes1toNKnownZero(SDValue Op) {
	switch (Op.getOpcode()) {
	default:
	return false;
	case AArch64ISD::ANDV_PRED:
	case AArch64ISD::EORV_PRED:
	case AArch64ISD::FADDA_PRED:
	case AArch64ISD::FADDV_PRED:
	case AArch64ISD::FMAXNMV_PRED:
	case AArch64ISD::FMAXV_PRED:
	case AArch64ISD::FMINNMV_PRED:
	case AArch64ISD::FMINV_PRED:
	case AArch64ISD::ORV_PRED:
	case AArch64ISD::SADDV_PRED:
	case AArch64ISD::SMAXV_PRED:
	case AArch64ISD::SMINV_PRED:
	case AArch64ISD::UADDV_PRED:
	case AArch64ISD::UMAXV_PRED:
	case AArch64ISD::UMINV_PRED:
	return true;
	}
	}

	static SDValue removeRedundantInsertVectorElt(SDNode *N) {
	assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT && "Unexpected node!");
	SDValue InsertVec = N->getOperand(0);
	SDValue InsertElt = N->getOperand(1);
	SDValue InsertIdx = N->getOperand(2);

	// We only care about inserts into the first element...
	if (!isNullConstant(InsertIdx))
	return SDValue();
	// ...of a zero'd vector...
	if (!ISD::isConstantSplatVectorAllZeros(InsertVec.getNode()))
	return SDValue();
	// ...where the inserted data was previously extracted...
	if (InsertElt.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
	return SDValue();

	SDValue ExtractVec = InsertElt.getOperand(0);
	SDValue ExtractIdx = InsertElt.getOperand(1);

	// ...from the first element of a vector.
	if (!isNullConstant(ExtractIdx))
	return SDValue();

	// If we get here we are effectively trying to zero lanes 1-N of a vector.

	// Ensure there's no type conversion going on.
	if (N->getValueType(0) != ExtractVec.getValueType())
	return SDValue();

	if (!isLanes1toNKnownZero(ExtractVec))
	return SDValue();

	// The explicit zeroing is redundant.
	return ExtractVec;
	}

	static SDValue
	performInsertVectorEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
	if (SDValue Res = removeRedundantInsertVectorElt(N))
	return Res;

	return performPostLD1Combine(N, DCI, true);
	}

	SDValue performSVESpliceCombine(SDNode *N, SelectionDAG &DAG) {
	EVT Ty = N->getValueType(0);
	if (Ty.isInteger())
	return SDValue();

	EVT IntTy = Ty.changeVectorElementTypeToInteger();
	EVT ExtIntTy = getPackedSVEVectorVT(IntTy.getVectorElementCount());
	if (ExtIntTy.getVectorElementType().getScalarSizeInBits() <
	IntTy.getVectorElementType().getScalarSizeInBits())
	return SDValue();

	SDLoc DL(N);
	SDValue LHS = DAG.getAnyExtOrTrunc(DAG.getBitcast(IntTy, N->getOperand(0)),
	DL, ExtIntTy);
	SDValue RHS = DAG.getAnyExtOrTrunc(DAG.getBitcast(IntTy, N->getOperand(1)),
	DL, ExtIntTy);
	SDValue Idx = N->getOperand(2);
	SDValue Splice = DAG.getNode(ISD::VECTOR_SPLICE, DL, ExtIntTy, LHS, RHS, Idx);
	SDValue Trunc = DAG.getAnyExtOrTrunc(Splice, DL, IntTy);
	return DAG.getBitcast(Ty, Trunc);
	}

	SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
	DAGCombinerInfo &DCI) const {
	SelectionDAG &DAG = DCI.DAG;
	switch (N->getOpcode()) {
	default:
	LLVM_DEBUG(dbgs() << "Custom combining: skipping\n");
	break;
	case ISD::ADD:
	case ISD::SUB:
	return performAddSubCombine(N, DCI, DAG);
	case ISD::XOR:
	return performXorCombine(N, DAG, DCI, Subtarget);
	case ISD::MUL:
	return performMulCombine(N, DAG, DCI, Subtarget);
	case ISD::SINT_TO_FP:
	case ISD::UINT_TO_FP:
	return performIntToFpCombine(N, DAG, Subtarget);
	case ISD::FP_TO_SINT:
	case ISD::FP_TO_UINT:
	return performFpToIntCombine(N, DAG, DCI, Subtarget);
	case ISD::FDIV:
	return performFDivCombine(N, DAG, DCI, Subtarget);
	case ISD::OR:
	return performORCombine(N, DCI, Subtarget);
	case ISD::AND:
	return performANDCombine(N, DCI);
	case ISD::SRL:
	return performSRLCombine(N, DCI);
	case ISD::INTRINSIC_WO_CHAIN:
	return performIntrinsicCombine(N, DCI, Subtarget);
	case ISD::ANY_EXTEND:
	case ISD::ZERO_EXTEND:
	case ISD::SIGN_EXTEND:
	return performExtendCombine(N, DCI, DAG);
	case ISD::SIGN_EXTEND_INREG:
	return performSignExtendInRegCombine(N, DCI, DAG);
	case ISD::TRUNCATE:
	return performVectorTruncateCombine(N, DCI, DAG);
	case ISD::CONCAT_VECTORS:
	return performConcatVectorsCombine(N, DCI, DAG);
	case ISD::SELECT:
	return performSelectCombine(N, DCI);
	case ISD::VSELECT:
	return performVSelectCombine(N, DCI.DAG);
	case ISD::SETCC:
	return performSETCCCombine(N, DAG);
	case ISD::LOAD:
	if (performTBISimplification(N->getOperand(1), DCI, DAG))
	return SDValue(N, 0);
	break;
	case ISD::STORE:
	return performSTORECombine(N, DCI, DAG, Subtarget);
	case ISD::VECTOR_SPLICE:
	return performSVESpliceCombine(N, DAG);
	case AArch64ISD::BRCOND:
	return performBRCONDCombine(N, DCI, DAG);
	case AArch64ISD::TBNZ:
	case AArch64ISD::TBZ:
	return performTBZCombine(N, DCI, DAG);
	case AArch64ISD::CSEL:
	return performCSELCombine(N, DCI, DAG);
	case AArch64ISD::DUP:
	return performPostLD1Combine(N, DCI, false);
	case AArch64ISD::NVCAST:
	return performNVCASTCombine(N);
	case AArch64ISD::SPLICE:
	return performSpliceCombine(N, DAG);
	case AArch64ISD::UZP1:
	return performUzpCombine(N, DAG);
	case AArch64ISD::SETCC_MERGE_ZERO:
	return performSetccMergeZeroCombine(N, DAG);
	case AArch64ISD::GLD1_MERGE_ZERO:
	case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
	case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
	case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
	case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
	case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
	case AArch64ISD::GLD1_IMM_MERGE_ZERO:
	case AArch64ISD::GLD1S_MERGE_ZERO:
	case AArch64ISD::GLD1S_SCALED_MERGE_ZERO:
	case AArch64ISD::GLD1S_UXTW_MERGE_ZERO:
	case AArch64ISD::GLD1S_SXTW_MERGE_ZERO:
	case AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO:
	case AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO:
	case AArch64ISD::GLD1S_IMM_MERGE_ZERO:
	return performGLD1Combine(N, DAG);
	case AArch64ISD::VASHR:
	case AArch64ISD::VLSHR:
	return performVectorShiftCombine(N, *this, DCI);
	case ISD::INSERT_VECTOR_ELT:
	return performInsertVectorEltCombine(N, DCI);
	case ISD::EXTRACT_VECTOR_ELT:
	return performExtractVectorEltCombine(N, DAG);
	case ISD::VECREDUCE_ADD:
	return performVecReduceAddCombine(N, DCI.DAG, Subtarget);
	case ISD::INTRINSIC_VOID:
	case ISD::INTRINSIC_W_CHAIN:
	switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
	case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
	return combineSVEPrefetchVecBaseImmOff(N, DAG, 1 /=ScalarSizeInBytes/);
	case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
	return combineSVEPrefetchVecBaseImmOff(N, DAG, 2 /=ScalarSizeInBytes/);
	case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:
	return combineSVEPrefetchVecBaseImmOff(N, DAG, 4 /=ScalarSizeInBytes/);
	case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:
	return combineSVEPrefetchVecBaseImmOff(N, DAG, 8 /=ScalarSizeInBytes/);
	case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:
	case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:
	case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:
	case Intrinsic::aarch64_sve_prfh_gather_sxtw_index:
	case Intrinsic::aarch64_sve_prfw_gather_uxtw_index:
	case Intrinsic::aarch64_sve_prfw_gather_sxtw_index:
	case Intrinsic::aarch64_sve_prfd_gather_uxtw_index:
	case Intrinsic::aarch64_sve_prfd_gather_sxtw_index:
	return legalizeSVEGatherPrefetchOffsVec(N, DAG);
	case Intrinsic::aarch64_neon_ld2:
	case Intrinsic::aarch64_neon_ld3:
	case Intrinsic::aarch64_neon_ld4:
	case Intrinsic::aarch64_neon_ld1x2:
	case Intrinsic::aarch64_neon_ld1x3:
	case Intrinsic::aarch64_neon_ld1x4:
	case Intrinsic::aarch64_neon_ld2lane:
	case Intrinsic::aarch64_neon_ld3lane:
	case Intrinsic::aarch64_neon_ld4lane:
	case Intrinsic::aarch64_neon_ld2r:
	case Intrinsic::aarch64_neon_ld3r:
	case Intrinsic::aarch64_neon_ld4r:
	case Intrinsic::aarch64_neon_st2:
	case Intrinsic::aarch64_neon_st3:
	case Intrinsic::aarch64_neon_st4:
	case Intrinsic::aarch64_neon_st1x2:
	case Intrinsic::aarch64_neon_st1x3:
	case Intrinsic::aarch64_neon_st1x4:
	case Intrinsic::aarch64_neon_st2lane:
	case Intrinsic::aarch64_neon_st3lane:
	case Intrinsic::aarch64_neon_st4lane:
	return performNEONPostLDSTCombine(N, DCI, DAG);
	case Intrinsic::aarch64_sve_ldnt1:
	return performLDNT1Combine(N, DAG);
	case Intrinsic::aarch64_sve_ld1rq:
	return performLD1ReplicateCombine<AArch64ISD::LD1RQ_MERGE_ZERO>(N, DAG);
	case Intrinsic::aarch64_sve_ld1ro:
	return performLD1ReplicateCombine<AArch64ISD::LD1RO_MERGE_ZERO>(N, DAG);
	case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
	return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1_MERGE_ZERO);
	case Intrinsic::aarch64_sve_ldnt1_gather:
	return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1_MERGE_ZERO);
	case Intrinsic::aarch64_sve_ldnt1_gather_index:
	return performGatherLoadCombine(N, DAG,
	AArch64ISD::GLDNT1_INDEX_MERGE_ZERO);
	case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
	return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1_MERGE_ZERO);
	case Intrinsic::aarch64_sve_ld1:
	return performLD1Combine(N, DAG, AArch64ISD::LD1_MERGE_ZERO);
	case Intrinsic::aarch64_sve_ldnf1:
	return performLD1Combine(N, DAG, AArch64ISD::LDNF1_MERGE_ZERO);
	case Intrinsic::aarch64_sve_ldff1:
	return performLD1Combine(N, DAG, AArch64ISD::LDFF1_MERGE_ZERO);
	case Intrinsic::aarch64_sve_st1:
	return performST1Combine(N, DAG);
	case Intrinsic::aarch64_sve_stnt1:
	return performSTNT1Combine(N, DAG);
	case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:
	return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1_PRED);
	case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:
	return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1_PRED);
	case Intrinsic::aarch64_sve_stnt1_scatter:
	return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1_PRED);
	case Intrinsic::aarch64_sve_stnt1_scatter_index:
	return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1_INDEX_PRED);
	case Intrinsic::aarch64_sve_ld1_gather:
	return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_MERGE_ZERO);
	case Intrinsic::aarch64_sve_ld1_gather_index:
	return performGatherLoadCombine(N, DAG,
	AArch64ISD::GLD1_SCALED_MERGE_ZERO);
	case Intrinsic::aarch64_sve_ld1_gather_sxtw:
	return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_SXTW_MERGE_ZERO,
	/OnlyPackedOffsets=/false);
	case Intrinsic::aarch64_sve_ld1_gather_uxtw:
	return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_UXTW_MERGE_ZERO,
	/OnlyPackedOffsets=/false);
	case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
	return performGatherLoadCombine(N, DAG,
	AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO,
	/OnlyPackedOffsets=/false);
	case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
	return performGatherLoadCombine(N, DAG,
	AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO,
	/OnlyPackedOffsets=/false);
	case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
	return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_IMM_MERGE_ZERO);
	case Intrinsic::aarch64_sve_ldff1_gather:
	return performGatherLoadCombine(N, DAG, AArch64ISD::GLDFF1_MERGE_ZERO);
	case Intrinsic::aarch64_sve_ldff1_gather_index:
	return performGatherLoadCombine(N, DAG,
	AArch64ISD::GLDFF1_SCALED_MERGE_ZERO);
	case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
	return performGatherLoadCombine(N, DAG,
	AArch64ISD::GLDFF1_SXTW_MERGE_ZERO,
	/OnlyPackedOffsets=/false);
	case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
	return performGatherLoadCombine(N, DAG,
	AArch64ISD::GLDFF1_UXTW_MERGE_ZERO,
	/OnlyPackedOffsets=/false);
	case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
	return performGatherLoadCombine(N, DAG,
	AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO,
	/OnlyPackedOffsets=/false);
	case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
	return performGatherLoadCombine(N, DAG,
	AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO,
	/OnlyPackedOffsets=/false);
	case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
	return performGatherLoadCombine(N, DAG,
	AArch64ISD::GLDFF1_IMM_MERGE_ZERO);
	case Intrinsic::aarch64_sve_st1_scatter:
	return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_PRED);
	case Intrinsic::aarch64_sve_st1_scatter_index:
	return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_SCALED_PRED);
	case Intrinsic::aarch64_sve_st1_scatter_sxtw:
	return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_SXTW_PRED,
	/OnlyPackedOffsets=/false);
	case Intrinsic::aarch64_sve_st1_scatter_uxtw:
	return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_UXTW_PRED,
	/OnlyPackedOffsets=/false);
	case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
	return performScatterStoreCombine(N, DAG,
	AArch64ISD::SST1_SXTW_SCALED_PRED,
	/OnlyPackedOffsets=/false);
	case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
	return performScatterStoreCombine(N, DAG,
	AArch64ISD::SST1_UXTW_SCALED_PRED,
	/OnlyPackedOffsets=/false);
	case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
	return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_IMM_PRED);
	case Intrinsic::aarch64_sve_tuple_get: {
	SDLoc DL(N);
	SDValue Chain = N->getOperand(0);
	SDValue Src1 = N->getOperand(2);
	SDValue Idx = N->getOperand(3);

	uint64_t IdxConst = cast<ConstantSDNode>(Idx)->getZExtValue();
	EVT ResVT = N->getValueType(0);
	uint64_t NumLanes = ResVT.getVectorElementCount().getKnownMinValue();
	SDValue ExtIdx = DAG.getVectorIdxConstant(IdxConst * NumLanes, DL);
	SDValue Val =
	DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResVT, Src1, ExtIdx);
	return DAG.getMergeValues({Val, Chain}, DL);
	}
	case Intrinsic::aarch64_sve_tuple_set: {
	SDLoc DL(N);
	SDValue Chain = N->getOperand(0);
	SDValue Tuple = N->getOperand(2);
	SDValue Idx = N->getOperand(3);
	SDValue Vec = N->getOperand(4);

	EVT TupleVT = Tuple.getValueType();
	uint64_t TupleLanes = TupleVT.getVectorElementCount().getKnownMinValue();

	uint64_t IdxConst = cast<ConstantSDNode>(Idx)->getZExtValue();
	uint64_t NumLanes =
	Vec.getValueType().getVectorElementCount().getKnownMinValue();

	if ((TupleLanes % NumLanes) != 0)
	report_fatal_error("invalid tuple vector!");

	uint64_t NumVecs = TupleLanes / NumLanes;

	SmallVector<SDValue, 4> Opnds;
	for (unsigned I = 0; I < NumVecs; ++I) {
	if (I == IdxConst)
	Opnds.push_back(Vec);
	else {
	SDValue ExtIdx = DAG.getVectorIdxConstant(I * NumLanes, DL);
	Opnds.push_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL,
	Vec.getValueType(), Tuple, ExtIdx));
	}
	}
	SDValue Concat =
	DAG.getNode(ISD::CONCAT_VECTORS, DL, Tuple.getValueType(), Opnds);
	return DAG.getMergeValues({Concat, Chain}, DL);
	}
	case Intrinsic::aarch64_sve_tuple_create2:
	case Intrinsic::aarch64_sve_tuple_create3:
	case Intrinsic::aarch64_sve_tuple_create4: {
	SDLoc DL(N);
	SDValue Chain = N->getOperand(0);

	SmallVector<SDValue, 4> Opnds;
	for (unsigned I = 2; I < N->getNumOperands(); ++I)
	Opnds.push_back(N->getOperand(I));

	EVT VT = Opnds[0].getValueType();
	EVT EltVT = VT.getVectorElementType();
	EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
	VT.getVectorElementCount() *
	(N->getNumOperands() - 2));
	SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, DestVT, Opnds);
	return DAG.getMergeValues({Concat, Chain}, DL);
	}
	case Intrinsic::aarch64_sve_ld2:
	case Intrinsic::aarch64_sve_ld3:
	case Intrinsic::aarch64_sve_ld4: {
	SDLoc DL(N);
	SDValue Chain = N->getOperand(0);
	SDValue Mask = N->getOperand(2);
	SDValue BasePtr = N->getOperand(3);
	SDValue LoadOps[] = {Chain, Mask, BasePtr};
	unsigned IntrinsicID =
	cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
	SDValue Result =
	LowerSVEStructLoad(IntrinsicID, LoadOps, N->getValueType(0), DAG, DL);
	return DAG.getMergeValues({Result, Chain}, DL);
	}
	case Intrinsic::aarch64_rndr:
	case Intrinsic::aarch64_rndrrs: {
	unsigned IntrinsicID =
	cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
	auto Register =
	(IntrinsicID == Intrinsic::aarch64_rndr ? AArch64SysReg::RNDR
	: AArch64SysReg::RNDRRS);
	SDLoc DL(N);
	SDValue A = DAG.getNode(
	AArch64ISD::MRS, DL, DAG.getVTList(MVT::i64, MVT::Glue, MVT::Other),
	N->getOperand(0), DAG.getConstant(Register, DL, MVT::i64));
	SDValue B = DAG.getNode(
	AArch64ISD::CSINC, DL, MVT::i32, DAG.getConstant(0, DL, MVT::i32),
	DAG.getConstant(0, DL, MVT::i32),
	DAG.getConstant(AArch64CC::NE, DL, MVT::i32), A.getValue(1));
	return DAG.getMergeValues(
	{A, DAG.getZExtOrTrunc(B, DL, MVT::i1), A.getValue(2)}, DL);
	}
	default:
	break;
	}
	break;
	case ISD::GlobalAddress:
	return performGlobalAddressCombine(N, DAG, Subtarget, getTargetMachine());
	}
	return SDValue();
	}

	// Check if the return value is used as only a return value, as otherwise
	// we can't perform a tail-call. In particular, we need to check for
	// target ISD nodes that are returns and any other "odd" constructs
	// that the generic analysis code won't necessarily catch.
	bool AArch64TargetLowering::isUsedByReturnOnly(SDNode *N,
	SDValue &Chain) const {
	if (N->getNumValues() != 1)
	return false;
	if (!N->hasNUsesOfValue(1, 0))
	return false;

	SDValue TCChain = Chain;
	SDNode Copy = N->use_begin();
	if (Copy->getOpcode() == ISD::CopyToReg) {
	// If the copy has a glue operand, we conservatively assume it isn't safe to
	// perform a tail call.
	if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() ==
	MVT::Glue)
	return false;
	TCChain = Copy->getOperand(0);
	} else if (Copy->getOpcode() != ISD::FP_EXTEND)
	return false;

	bool HasRet = false;
	for (SDNode *Node : Copy->uses()) {
	if (Node->getOpcode() != AArch64ISD::RET_FLAG)
	return false;
	HasRet = true;
	}

	if (!HasRet)
	return false;

	Chain = TCChain;
	return true;
	}

	// Return whether the an instruction can potentially be optimized to a tail
	// call. This will cause the optimizers to attempt to move, or duplicate,
	// return instructions to help enable tail call optimizations for this
	// instruction.
	bool AArch64TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
	return CI->isTailCall();
	}

	bool AArch64TargetLowering::getIndexedAddressParts(SDNode *Op, SDValue &Base,
	SDValue &Offset,
	ISD::MemIndexedMode &AM,
	bool &IsInc,
	SelectionDAG &DAG) const {
	if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
	return false;

	Base = Op->getOperand(0);
	// All of the indexed addressing mode instructions take a signed
	// 9 bit immediate offset.
	if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
	int64_t RHSC = RHS->getSExtValue();
	if (Op->getOpcode() == ISD::SUB)
	RHSC = -(uint64_t)RHSC;
	if (!isInt<9>(RHSC))
	return false;
	IsInc = (Op->getOpcode() == ISD::ADD);
	Offset = Op->getOperand(1);
	return true;
	}
	return false;
	}

	bool AArch64TargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
	SDValue &Offset,
	ISD::MemIndexedMode &AM,
	SelectionDAG &DAG) const {
	EVT VT;
	SDValue Ptr;
	if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
	VT = LD->getMemoryVT();
	Ptr = LD->getBasePtr();
	} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
	VT = ST->getMemoryVT();
	Ptr = ST->getBasePtr();
	} else
	return false;

	bool IsInc;
	if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, IsInc, DAG))
	return false;
	AM = IsInc ? ISD::PRE_INC : ISD::PRE_DEC;
	return true;
	}

	bool AArch64TargetLowering::getPostIndexedAddressParts(
	SDNode N, SDNode Op, SDValue &Base, SDValue &Offset,
	ISD::MemIndexedMode &AM, SelectionDAG &DAG) const {
	EVT VT;
	SDValue Ptr;
	if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
	VT = LD->getMemoryVT();
	Ptr = LD->getBasePtr();
	} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
	VT = ST->getMemoryVT();
	Ptr = ST->getBasePtr();
	} else
	return false;

	bool IsInc;
	if (!getIndexedAddressParts(Op, Base, Offset, AM, IsInc, DAG))
	return false;
	// Post-indexing updates the base, so it's not a valid transform
	// if that's not the same as the load's pointer.
	if (Ptr != Base)
	return false;
	AM = IsInc ? ISD::POST_INC : ISD::POST_DEC;
	return true;
	}

	void AArch64TargetLowering::ReplaceBITCASTResults(
	SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
	SDLoc DL(N);
	SDValue Op = N->getOperand(0);
	EVT VT = N->getValueType(0);
	EVT SrcVT = Op.getValueType();

	if (VT.isScalableVector() && !isTypeLegal(VT) && isTypeLegal(SrcVT)) {
	assert(!VT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
	"Expected fp->int bitcast!");
	SDValue CastResult = getSVESafeBitCast(getSVEContainerType(VT), Op, DAG);
	Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, CastResult));
	return;
	}

	if (VT != MVT::i16 \|\| (SrcVT != MVT::f16 && SrcVT != MVT::bf16))
	return;

	Op = SDValue(
	DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f32,
	DAG.getUNDEF(MVT::i32), Op,
	DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
	0);
	Op = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Op);
	Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Op));
	}

	static void ReplaceReductionResults(SDNode *N,
	SmallVectorImpl<SDValue> &Results,
	SelectionDAG &DAG, unsigned InterOp,
	unsigned AcrossOp) {
	EVT LoVT, HiVT;
	SDValue Lo, Hi;
	SDLoc dl(N);
	std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
	std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
	SDValue InterVal = DAG.getNode(InterOp, dl, LoVT, Lo, Hi);
	SDValue SplitVal = DAG.getNode(AcrossOp, dl, LoVT, InterVal);
	Results.push_back(SplitVal);
	}

	static std::pair<SDValue, SDValue> splitInt128(SDValue N, SelectionDAG &DAG) {
	SDLoc DL(N);
	SDValue Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, N);
	SDValue Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64,
	DAG.getNode(ISD::SRL, DL, MVT::i128, N,
	DAG.getConstant(64, DL, MVT::i64)));
	return std::make_pair(Lo, Hi);
	}

	void AArch64TargetLowering::ReplaceExtractSubVectorResults(
	SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
	SDValue In = N->getOperand(0);
	EVT InVT = In.getValueType();

	// Common code will handle these just fine.
	if (!InVT.isScalableVector() \|\| !InVT.isInteger())
	return;

	SDLoc DL(N);
	EVT VT = N->getValueType(0);

	// The following checks bail if this is not a halving operation.

	ElementCount ResEC = VT.getVectorElementCount();

	if (InVT.getVectorElementCount() != (ResEC * 2))
	return;

	auto *CIndex = dyn_cast<ConstantSDNode>(N->getOperand(1));
	if (!CIndex)
	return;

	unsigned Index = CIndex->getZExtValue();
	if ((Index != 0) && (Index != ResEC.getKnownMinValue()))
	return;

	unsigned Opcode = (Index == 0) ? AArch64ISD::UUNPKLO : AArch64ISD::UUNPKHI;
	EVT ExtendedHalfVT = VT.widenIntegerVectorElementType(*DAG.getContext());

	SDValue Half = DAG.getNode(Opcode, DL, ExtendedHalfVT, N->getOperand(0));
	Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Half));
	}

	// Create an even/odd pair of X registers holding integer value V.
	static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V) {
	SDLoc dl(V.getNode());
	SDValue VLo = DAG.getAnyExtOrTrunc(V, dl, MVT::i64);
	SDValue VHi = DAG.getAnyExtOrTrunc(
	DAG.getNode(ISD::SRL, dl, MVT::i128, V, DAG.getConstant(64, dl, MVT::i64)),
	dl, MVT::i64);
	if (DAG.getDataLayout().isBigEndian())
	std::swap (VLo, VHi);
	SDValue RegClass =
	DAG.getTargetConstant(AArch64::XSeqPairsClassRegClassID, dl, MVT::i32);
	SDValue SubReg0 = DAG.getTargetConstant(AArch64::sube64, dl, MVT::i32);
	SDValue SubReg1 = DAG.getTargetConstant(AArch64::subo64, dl, MVT::i32);
	const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 };
	return SDValue(
	DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);
	}

	static void ReplaceCMP_SWAP_128Results(SDNode *N,
	SmallVectorImpl<SDValue> &Results,
	SelectionDAG &DAG,
	const AArch64Subtarget *Subtarget) {
	assert(N->getValueType(0) == MVT::i128 &&
	"AtomicCmpSwap on types less than 128 should be legal");

	MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
	if (Subtarget->hasLSE() \|\| Subtarget->outlineAtomics()) {
	// LSE has a 128-bit compare and swap (CASP), but i128 is not a legal type,
	// so lower it here, wrapped in REG_SEQUENCE and EXTRACT_SUBREG.
	SDValue Ops[] = {
	createGPRPairNode(DAG, N->getOperand(2)), // Compare value
	createGPRPairNode(DAG, N->getOperand(3)), // Store value
	N->getOperand(1), // Ptr
	N->getOperand(0), // Chain in
	};

	unsigned Opcode;
	switch (MemOp->getMergedOrdering()) {
	case AtomicOrdering::Monotonic:
	Opcode = AArch64::CASPX;
	break;
	case AtomicOrdering::Acquire:
	Opcode = AArch64::CASPAX;
	break;
	case AtomicOrdering::Release:
	Opcode = AArch64::CASPLX;
	break;
	case AtomicOrdering::AcquireRelease:
	case AtomicOrdering::SequentiallyConsistent:
	Opcode = AArch64::CASPALX;
	break;
	default:
	llvm_unreachable("Unexpected ordering!");
	}

	MachineSDNode *CmpSwap = DAG.getMachineNode(
	Opcode, SDLoc(N), DAG.getVTList(MVT::Untyped, MVT::Other), Ops);
	DAG.setNodeMemRefs(CmpSwap, {MemOp});

	unsigned SubReg1 = AArch64::sube64, SubReg2 = AArch64::subo64;
	if (DAG.getDataLayout().isBigEndian())
	std::swap(SubReg1, SubReg2);
	SDValue Lo = DAG.getTargetExtractSubreg(SubReg1, SDLoc(N), MVT::i64,
	SDValue(CmpSwap, 0));
	SDValue Hi = DAG.getTargetExtractSubreg(SubReg2, SDLoc(N), MVT::i64,
	SDValue(CmpSwap, 0));
	Results.push_back(
	DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i128, Lo, Hi));
	Results.push_back(SDValue(CmpSwap, 1)); // Chain out
	return;
	}

	unsigned Opcode;
	switch (MemOp->getMergedOrdering()) {
	case AtomicOrdering::Monotonic:
	Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
	break;
	case AtomicOrdering::Acquire:
	Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
	break;
	case AtomicOrdering::Release:
	Opcode = AArch64::CMP_SWAP_128_RELEASE;
	break;
	case AtomicOrdering::AcquireRelease:
	case AtomicOrdering::SequentiallyConsistent:
	Opcode = AArch64::CMP_SWAP_128;
	break;
	default:
	llvm_unreachable("Unexpected ordering!");
	}

	auto Desired = splitInt128(N->getOperand(2), DAG);
	auto New = splitInt128(N->getOperand(3), DAG);
	SDValue Ops[] = {N->getOperand(1), Desired.first, Desired.second,
	New.first, New.second, N->getOperand(0)};
	SDNode *CmpSwap = DAG.getMachineNode(
	Opcode, SDLoc(N), DAG.getVTList(MVT::i64, MVT::i64, MVT::i32, MVT::Other),
	Ops);
	DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});

	Results.push_back(DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i128,
	SDValue(CmpSwap, 0), SDValue(CmpSwap, 1)));
	Results.push_back(SDValue(CmpSwap, 3));
	}

	void AArch64TargetLowering::ReplaceNodeResults(
	SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
	switch (N->getOpcode()) {
	default:
	llvm_unreachable("Don't know how to custom expand this");
	case ISD::BITCAST:
	ReplaceBITCASTResults(N, Results, DAG);
	return;
	case ISD::VECREDUCE_ADD:
	case ISD::VECREDUCE_SMAX:
	case ISD::VECREDUCE_SMIN:
	case ISD::VECREDUCE_UMAX:
	case ISD::VECREDUCE_UMIN:
	Results.push_back(LowerVECREDUCE(SDValue(N, 0), DAG));
	return;

	case ISD::CTPOP:
	if (SDValue Result = LowerCTPOP(SDValue(N, 0), DAG))
	Results.push_back(Result);
	return;
	case AArch64ISD::SADDV:
	ReplaceReductionResults(N, Results, DAG, ISD::ADD, AArch64ISD::SADDV);
	return;
	case AArch64ISD::UADDV:
	ReplaceReductionResults(N, Results, DAG, ISD::ADD, AArch64ISD::UADDV);
	return;
	case AArch64ISD::SMINV:
	ReplaceReductionResults(N, Results, DAG, ISD::SMIN, AArch64ISD::SMINV);
	return;
	case AArch64ISD::UMINV:
	ReplaceReductionResults(N, Results, DAG, ISD::UMIN, AArch64ISD::UMINV);
	return;
	case AArch64ISD::SMAXV:
	ReplaceReductionResults(N, Results, DAG, ISD::SMAX, AArch64ISD::SMAXV);
	return;
	case AArch64ISD::UMAXV:
	ReplaceReductionResults(N, Results, DAG, ISD::UMAX, AArch64ISD::UMAXV);
	return;
	case ISD::FP_TO_UINT:
	case ISD::FP_TO_SINT:
	assert(N->getValueType(0) == MVT::i128 && "unexpected illegal conversion");
	// Let normal code take care of it by not adding anything to Results.
	return;
	case ISD::ATOMIC_CMP_SWAP:
	ReplaceCMP_SWAP_128Results(N, Results, DAG, Subtarget);
	return;
	case ISD::LOAD: {
	assert(SDValue(N, 0).getValueType() == MVT::i128 &&
	"unexpected load's value type");
	LoadSDNode *LoadNode = cast<LoadSDNode>(N);
	if (!LoadNode->isVolatile() \|\| LoadNode->getMemoryVT() != MVT::i128) {
	// Non-volatile loads are optimized later in AArch64's load/store
	// optimizer.
	return;
	}

	SDValue Result = DAG.getMemIntrinsicNode(
	AArch64ISD::LDP, SDLoc(N),
	DAG.getVTList({MVT::i64, MVT::i64, MVT::Other}),
	{LoadNode->getChain(), LoadNode->getBasePtr()}, LoadNode->getMemoryVT(),
	LoadNode->getMemOperand());

	SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i128,
	Result.getValue(0), Result.getValue(1));
	Results.append({Pair, Result.getValue(2) /* Chain */});
	return;
	}
	case ISD::EXTRACT_SUBVECTOR:
	ReplaceExtractSubVectorResults(N, Results, DAG);
	return;
	case ISD::INSERT_SUBVECTOR:
	// Custom lowering has been requested for INSERT_SUBVECTOR -- but delegate
	// to common code for result type legalisation
	return;
	case ISD::INTRINSIC_WO_CHAIN: {
	EVT VT = N->getValueType(0);
	assert((VT == MVT::i8 \|\| VT == MVT::i16) &&
	"custom lowering for unexpected type");

	ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(0));
	Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
	switch (IntID) {
	default:
	return;
	case Intrinsic::aarch64_sve_clasta_n: {
	SDLoc DL(N);
	auto Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, N->getOperand(2));
	auto V = DAG.getNode(AArch64ISD::CLASTA_N, DL, MVT::i32,
	N->getOperand(1), Op2, N->getOperand(3));
	Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
	return;
	}
	case Intrinsic::aarch64_sve_clastb_n: {
	SDLoc DL(N);
	auto Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, N->getOperand(2));
	auto V = DAG.getNode(AArch64ISD::CLASTB_N, DL, MVT::i32,
	N->getOperand(1), Op2, N->getOperand(3));
	Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
	return;
	}
	case Intrinsic::aarch64_sve_lasta: {
	SDLoc DL(N);
	auto V = DAG.getNode(AArch64ISD::LASTA, DL, MVT::i32,
	N->getOperand(1), N->getOperand(2));
	Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
	return;
	}
	case Intrinsic::aarch64_sve_lastb: {
	SDLoc DL(N);
	auto V = DAG.getNode(AArch64ISD::LASTB, DL, MVT::i32,
	N->getOperand(1), N->getOperand(2));
	Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
	return;
	}
	}
	}
	}
	}

	bool AArch64TargetLowering::useLoadStackGuardNode() const {
	if (Subtarget->isTargetAndroid() \|\| Subtarget->isTargetFuchsia())
	return TargetLowering::useLoadStackGuardNode();
	return true;
	}

	unsigned AArch64TargetLowering::combineRepeatedFPDivisors() const {
	// Combine multiple FDIVs with the same divisor into multiple FMULs by the
	// reciprocal if there are three or more FDIVs.
	return 3;
	}

	TargetLoweringBase::LegalizeTypeAction
	AArch64TargetLowering::getPreferredVectorAction(MVT VT) const {
	// During type legalization, we prefer to widen v1i8, v1i16, v1i32 to v8i8,
	// v4i16, v2i32 instead of to promote.
	if (VT == MVT::v1i8 \|\| VT == MVT::v1i16 \|\| VT == MVT::v1i32 \|\|
	VT == MVT::v1f32)
	return TypeWidenVector;

	return TargetLoweringBase::getPreferredVectorAction(VT);
	}

	// Loads and stores less than 128-bits are already atomic; ones above that
	// are doomed anyway, so defer to the default libcall and blame the OS when
	// things go wrong.
	bool AArch64TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
	unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
	return Size == 128;
	}

	// Loads and stores less than 128-bits are already atomic; ones above that
	// are doomed anyway, so defer to the default libcall and blame the OS when
	// things go wrong.
	TargetLowering::AtomicExpansionKind
	AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
	unsigned Size = LI->getType()->getPrimitiveSizeInBits();
	return Size == 128 ? AtomicExpansionKind::LLSC : AtomicExpansionKind::None;
	}

	// For the real atomic operations, we have ldxr/stxr up to 128 bits,
	TargetLowering::AtomicExpansionKind
	AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
	if (AI->isFloatingPointOperation())
	return AtomicExpansionKind::CmpXChg;

	unsigned Size = AI->getType()->getPrimitiveSizeInBits();
	if (Size > 128) return AtomicExpansionKind::None;

	// Nand is not supported in LSE.
	// Leave 128 bits to LLSC or CmpXChg.
	if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128) {
	if (Subtarget->hasLSE())
	return AtomicExpansionKind::None;
	if (Subtarget->outlineAtomics()) {
	// [U]Min/[U]Max RWM atomics are used in __sync_fetch_ libcalls so far.
	// Don't outline them unless
	// (1) high level <atomic> support approved:
	// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p0493r1.pdf
	// (2) low level libgcc and compiler-rt support implemented by:
	// min/max outline atomics helpers
	if (AI->getOperation() != AtomicRMWInst::Min &&
	AI->getOperation() != AtomicRMWInst::Max &&
	AI->getOperation() != AtomicRMWInst::UMin &&
	AI->getOperation() != AtomicRMWInst::UMax) {
	return AtomicExpansionKind::None;
	}
	}
	}

	// At -O0, fast-regalloc cannot cope with the live vregs necessary to
	// implement atomicrmw without spilling. If the target address is also on the
	// stack and close enough to the spill slot, this can lead to a situation
	// where the monitor always gets cleared and the atomic operation can never
	// succeed. So at -O0 lower this operation to a CAS loop.
	if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
	return AtomicExpansionKind::CmpXChg;

	return AtomicExpansionKind::LLSC;
	}

	TargetLowering::AtomicExpansionKind
	AArch64TargetLowering::shouldExpandAtomicCmpXchgInIR(
	AtomicCmpXchgInst *AI) const {
	// If subtarget has LSE, leave cmpxchg intact for codegen.
	if (Subtarget->hasLSE() \|\| Subtarget->outlineAtomics())
	return AtomicExpansionKind::None;
	// At -O0, fast-regalloc cannot cope with the live vregs necessary to
	// implement cmpxchg without spilling. If the address being exchanged is also
	// on the stack and close enough to the spill slot, this can lead to a
	// situation where the monitor always gets cleared and the atomic operation
	// can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
	if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
	return AtomicExpansionKind::None;

	// 128-bit atomic cmpxchg is weird; AtomicExpand doesn't know how to expand
	// it.
	unsigned Size = AI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
	if (Size > 64)
	return AtomicExpansionKind::None;

	return AtomicExpansionKind::LLSC;
	}

	Value *AArch64TargetLowering::emitLoadLinked(IRBuilderBase &Builder,
	Type ValueTy, Value Addr,
	AtomicOrdering Ord) const {
	Module *M = Builder.GetInsertBlock()->getParent()->getParent();
	bool IsAcquire = isAcquireOrStronger(Ord);

	// Since i128 isn't legal and intrinsics don't get type-lowered, the ldrexd
	// intrinsic must return {i64, i64} and we have to recombine them into a
	// single i128 here.
	if (ValueTy->getPrimitiveSizeInBits() == 128) {
	Intrinsic::ID Int =
	IsAcquire ? Intrinsic::aarch64_ldaxp : Intrinsic::aarch64_ldxp;
	Function *Ldxr = Intrinsic::getDeclaration(M, Int);

	Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
	Value *LoHi = Builder.CreateCall(Ldxr, Addr, "lohi");

	Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
	Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
	Lo = Builder.CreateZExt(Lo, ValueTy, "lo64");
	Hi = Builder.CreateZExt(Hi, ValueTy, "hi64");
	return Builder.CreateOr(
	Lo, Builder.CreateShl(Hi, ConstantInt::get(ValueTy, 64)), "val64");
	}

	Type *Tys[] = { Addr->getType() };
	Intrinsic::ID Int =
	IsAcquire ? Intrinsic::aarch64_ldaxr : Intrinsic::aarch64_ldxr;
	Function *Ldxr = Intrinsic::getDeclaration(M, Int, Tys);

	const DataLayout &DL = M->getDataLayout();
	IntegerType *IntEltTy = Builder.getIntNTy(DL.getTypeSizeInBits(ValueTy));
	Value *Trunc = Builder.CreateTrunc(Builder.CreateCall(Ldxr, Addr), IntEltTy);

	return Builder.CreateBitCast(Trunc, ValueTy);
	}

	void AArch64TargetLowering::emitAtomicCmpXchgNoStoreLLBalance(
	IRBuilderBase &Builder) const {
	Module *M = Builder.GetInsertBlock()->getParent()->getParent();
	Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::aarch64_clrex));
	}

	Value *AArch64TargetLowering::emitStoreConditional(IRBuilderBase &Builder,
	Value Val, Value Addr,
	AtomicOrdering Ord) const {
	Module *M = Builder.GetInsertBlock()->getParent()->getParent();
	bool IsRelease = isReleaseOrStronger(Ord);

	// Since the intrinsics must have legal type, the i128 intrinsics take two
	// parameters: "i64, i64". We must marshal Val into the appropriate form
	// before the call.
	if (Val->getType()->getPrimitiveSizeInBits() == 128) {
	Intrinsic::ID Int =
	IsRelease ? Intrinsic::aarch64_stlxp : Intrinsic::aarch64_stxp;
	Function *Stxr = Intrinsic::getDeclaration(M, Int);
	Type *Int64Ty = Type::getInt64Ty(M->getContext());

	Value *Lo = Builder.CreateTrunc(Val, Int64Ty, "lo");
	Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 64), Int64Ty, "hi");
	Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
	return Builder.CreateCall(Stxr, {Lo, Hi, Addr});
	}

	Intrinsic::ID Int =
	IsRelease ? Intrinsic::aarch64_stlxr : Intrinsic::aarch64_stxr;
	Type *Tys[] = { Addr->getType() };
	Function *Stxr = Intrinsic::getDeclaration(M, Int, Tys);

	const DataLayout &DL = M->getDataLayout();
	IntegerType *IntValTy = Builder.getIntNTy(DL.getTypeSizeInBits(Val->getType()));
	Val = Builder.CreateBitCast(Val, IntValTy);

	return Builder.CreateCall(Stxr,
	{Builder.CreateZExtOrBitCast(
	Val, Stxr->getFunctionType()->getParamType(0)),
	Addr});
	}

	bool AArch64TargetLowering::functionArgumentNeedsConsecutiveRegisters(
	Type *Ty, CallingConv::ID CallConv, bool isVarArg,
	const DataLayout &DL) const {
	if (!Ty->isArrayTy()) {
	const TypeSize &TySize = Ty->getPrimitiveSizeInBits();
	return TySize.isScalable() && TySize.getKnownMinSize() > 128;
	}

	// All non aggregate members of the type must have the same type
	SmallVector<EVT> ValueVTs;
	ComputeValueVTs(*this, DL, Ty, ValueVTs);
	return is_splat(ValueVTs);
	}

	bool AArch64TargetLowering::shouldNormalizeToSelectSequence(LLVMContext &,
	EVT) const {
	return false;
	}

	static Value *UseTlsOffset(IRBuilderBase &IRB, unsigned Offset) {
	Module *M = IRB.GetInsertBlock()->getParent()->getParent();
	Function *ThreadPointerFunc =
	Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
	return IRB.CreatePointerCast(
	IRB.CreateConstGEP1_32(IRB.getInt8Ty(), IRB.CreateCall(ThreadPointerFunc),
	Offset),
	IRB.getInt8PtrTy()->getPointerTo(0));
	}

	Value *AArch64TargetLowering::getIRStackGuard(IRBuilderBase &IRB) const {
	// Android provides a fixed TLS slot for the stack cookie. See the definition
	// of TLS_SLOT_STACK_GUARD in
	// https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
	if (Subtarget->isTargetAndroid())
	return UseTlsOffset(IRB, 0x28);

	// Fuchsia is similar.
	// <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
	if (Subtarget->isTargetFuchsia())
	return UseTlsOffset(IRB, -0x10);

	return TargetLowering::getIRStackGuard(IRB);
	}

	void AArch64TargetLowering::insertSSPDeclarations(Module &M) const {
	// MSVC CRT provides functionalities for stack protection.
	if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment()) {
	// MSVC CRT has a global variable holding security cookie.
	M.getOrInsertGlobal("__security_cookie",
	Type::getInt8PtrTy(M.getContext()));

	// MSVC CRT has a function to validate security cookie.
	FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
	"__security_check_cookie", Type::getVoidTy(M.getContext()),
	Type::getInt8PtrTy(M.getContext()));
	if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
	F->setCallingConv(CallingConv::Win64);
	F->addAttribute(1, Attribute::AttrKind::InReg);
	}
	return;
	}
	TargetLowering::insertSSPDeclarations(M);
	}

	Value *AArch64TargetLowering::getSDagStackGuard(const Module &M) const {
	// MSVC CRT has a global variable holding security cookie.
	if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
	return M.getGlobalVariable("__security_cookie");
	return TargetLowering::getSDagStackGuard(M);
	}

	Function *AArch64TargetLowering::getSSPStackGuardCheck(const Module &M) const {
	// MSVC CRT has a function to validate security cookie.
	if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
	return M.getFunction("__security_check_cookie");
	return TargetLowering::getSSPStackGuardCheck(M);
	}

	Value *
	AArch64TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const {
	// Android provides a fixed TLS slot for the SafeStack pointer. See the
	// definition of TLS_SLOT_SAFESTACK in
	// https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
	if (Subtarget->isTargetAndroid())
	return UseTlsOffset(IRB, 0x48);

	// Fuchsia is similar.
	// <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
	if (Subtarget->isTargetFuchsia())
	return UseTlsOffset(IRB, -0x8);

	return TargetLowering::getSafeStackPointerLocation(IRB);
	}

	bool AArch64TargetLowering::isMaskAndCmp0FoldingBeneficial(
	const Instruction &AndI) const {
	// Only sink 'and' mask to cmp use block if it is masking a single bit, since
	// this is likely to be fold the and/cmp/br into a single tbz instruction. It
	// may be beneficial to sink in other cases, but we would have to check that
	// the cmp would not get folded into the br to form a cbz for these to be
	// beneficial.
	ConstantInt* Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
	if (!Mask)
	return false;
	return Mask->getValue().isPowerOf2();
	}

	bool AArch64TargetLowering::
	shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
	SDValue X, ConstantSDNode XC, ConstantSDNode CC, SDValue Y,
	unsigned OldShiftOpcode, unsigned NewShiftOpcode,
	SelectionDAG &DAG) const {
	// Does baseline recommend not to perform the fold by default?
	if (!TargetLowering::shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
	X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG))
	return false;
	// Else, if this is a vector shift, prefer 'shl'.
	return X.getValueType().isScalarInteger() \|\| NewShiftOpcode == ISD::SHL;
	}

	bool AArch64TargetLowering::shouldExpandShift(SelectionDAG &DAG,
	SDNode *N) const {
	if (DAG.getMachineFunction().getFunction().hasMinSize() &&
	!Subtarget->isTargetWindows() && !Subtarget->isTargetDarwin())
	return false;
	return true;
	}

	void AArch64TargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
	// Update IsSplitCSR in AArch64unctionInfo.
	AArch64FunctionInfo *AFI = Entry->getParent()->getInfo<AArch64FunctionInfo>();
	AFI->setIsSplitCSR(true);
	}

	void AArch64TargetLowering::insertCopiesSplitCSR(
	MachineBasicBlock *Entry,
	const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
	const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
	const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
	if (!IStart)
	return;

	const TargetInstrInfo *TII = Subtarget->getInstrInfo();
	MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
	MachineBasicBlock::iterator MBBI = Entry->begin();
	for (const MCPhysReg I = IStart; I; ++I) {
	const TargetRegisterClass *RC = nullptr;
	if (AArch64::GPR64RegClass.contains(*I))
	RC = &AArch64::GPR64RegClass;
	else if (AArch64::FPR64RegClass.contains(*I))
	RC = &AArch64::FPR64RegClass;
	else
	llvm_unreachable("Unexpected register class in CSRsViaCopy!");

	Register NewVR = MRI->createVirtualRegister(RC);
	// Create copy from CSR to a virtual register.
	// FIXME: this currently does not emit CFI pseudo-instructions, it works
	// fine for CXX_FAST_TLS since the C++-style TLS access functions should be
	// nounwind. If we want to generalize this later, we may need to emit
	// CFI pseudo-instructions.
	assert(Entry->getParent()->getFunction().hasFnAttribute(
	Attribute::NoUnwind) &&
	"Function should be nounwind in insertCopiesSplitCSR!");
	Entry->addLiveIn(*I);
	BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
	.addReg(*I);

	// Insert the copy-back instructions right before the terminator.
	for (auto *Exit : Exits)
	BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
	TII->get(TargetOpcode::COPY), *I)
	.addReg(NewVR);
	}
	}

	bool AArch64TargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
	// Integer division on AArch64 is expensive. However, when aggressively
	// optimizing for code size, we prefer to use a div instruction, as it is
	// usually smaller than the alternative sequence.
	// The exception to this is vector division. Since AArch64 doesn't have vector
	// integer division, leaving the division as-is is a loss even in terms of
	// size, because it will have to be scalarized, while the alternative code
	// sequence can be performed in vector form.
	bool OptSize = Attr.hasFnAttribute(Attribute::MinSize);
	return OptSize && !VT.isVector();
	}

	bool AArch64TargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
	// We want inc-of-add for scalars and sub-of-not for vectors.
	return VT.isScalarInteger();
	}

	bool AArch64TargetLowering::enableAggressiveFMAFusion(EVT VT) const {
	return Subtarget->hasAggressiveFMA() && VT.isFloatingPoint();
	}

	unsigned
	AArch64TargetLowering::getVaListSizeInBits(const DataLayout &DL) const {
	if (Subtarget->isTargetDarwin() \|\| Subtarget->isTargetWindows())
	return getPointerTy(DL).getSizeInBits();

	return 3 * getPointerTy(DL).getSizeInBits() + 2 * 32;
	}

	void AArch64TargetLowering::finalizeLowering(MachineFunction &MF) const {
	MF.getFrameInfo().computeMaxCallFrameSize(MF);
	TargetLoweringBase::finalizeLowering(MF);
	}

	// Unlike X86, we let frame lowering assign offsets to all catch objects.
	bool AArch64TargetLowering::needsFixedCatchObjects() const {
	return false;
	}

	bool AArch64TargetLowering::shouldLocalize(
	const MachineInstr &MI, const TargetTransformInfo *TTI) const {
	switch (MI.getOpcode()) {
	case TargetOpcode::G_GLOBAL_VALUE: {
	// On Darwin, TLS global vars get selected into function calls, which
	// we don't want localized, as they can get moved into the middle of a
	// another call sequence.
	const GlobalValue &GV = *MI.getOperand(1).getGlobal();
	if (GV.isThreadLocal() && Subtarget->isTargetMachO())
	return false;
	break;
	}
	// If we legalized G_GLOBAL_VALUE into ADRP + G_ADD_LOW, mark both as being
	// localizable.
	case AArch64::ADRP:
	case AArch64::G_ADD_LOW:
	return true;
	default:
	break;
	}
	return TargetLoweringBase::shouldLocalize(MI, TTI);
	}

	bool AArch64TargetLowering::fallBackToDAGISel(const Instruction &Inst) const {
	if (isa<ScalableVectorType>(Inst.getType()))
	return true;

	for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
	if (isa<ScalableVectorType>(Inst.getOperand(i)->getType()))
	return true;

	if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
	if (isa<ScalableVectorType>(AI->getAllocatedType()))
	return true;
	}

	return false;
	}

	// Return the largest legal scalable vector type that matches VT's element type.
	static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT) {
	assert(VT.isFixedLengthVector() &&
	DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
	"Expected legal fixed length vector!");
	switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
	default:
	llvm_unreachable("unexpected element type for SVE container");
	case MVT::i8:
	return EVT(MVT::nxv16i8);
	case MVT::i16:
	return EVT(MVT::nxv8i16);
	case MVT::i32:
	return EVT(MVT::nxv4i32);
	case MVT::i64:
	return EVT(MVT::nxv2i64);
	case MVT::f16:
	return EVT(MVT::nxv8f16);
	case MVT::f32:
	return EVT(MVT::nxv4f32);
	case MVT::f64:
	return EVT(MVT::nxv2f64);
	}
	}

	// Return a PTRUE with active lanes corresponding to the extent of VT.
	static SDValue getPredicateForFixedLengthVector(SelectionDAG &DAG, SDLoc &DL,
	EVT VT) {
	assert(VT.isFixedLengthVector() &&
	DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
	"Expected legal fixed length vector!");

	int PgPattern;
	switch (VT.getVectorNumElements()) {
	default:
	llvm_unreachable("unexpected element count for SVE predicate");
	case 1:
	PgPattern = AArch64SVEPredPattern::vl1;
	break;
	case 2:
	PgPattern = AArch64SVEPredPattern::vl2;
	break;
	case 4:
	PgPattern = AArch64SVEPredPattern::vl4;
	break;
	case 8:
	PgPattern = AArch64SVEPredPattern::vl8;
	break;
	case 16:
	PgPattern = AArch64SVEPredPattern::vl16;
	break;
	case 32:
	PgPattern = AArch64SVEPredPattern::vl32;
	break;
	case 64:
	PgPattern = AArch64SVEPredPattern::vl64;
	break;
	case 128:
	PgPattern = AArch64SVEPredPattern::vl128;
	break;
	case 256:
	PgPattern = AArch64SVEPredPattern::vl256;
	break;
	}

	// TODO: For vectors that are exactly getMaxSVEVectorSizeInBits big, we can
	// use AArch64SVEPredPattern::all, which can enable the use of unpredicated
	// variants of instructions when available.

	MVT MaskVT;
	switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
	default:
	llvm_unreachable("unexpected element type for SVE predicate");
	case MVT::i8:
	MaskVT = MVT::nxv16i1;
	break;
	case MVT::i16:
	case MVT::f16:
	MaskVT = MVT::nxv8i1;
	break;
	case MVT::i32:
	case MVT::f32:
	MaskVT = MVT::nxv4i1;
	break;
	case MVT::i64:
	case MVT::f64:
	MaskVT = MVT::nxv2i1;
	break;
	}

	return DAG.getNode(AArch64ISD::PTRUE, DL, MaskVT,
	DAG.getTargetConstant(PgPattern, DL, MVT::i64));
	}

	static SDValue getPredicateForScalableVector(SelectionDAG &DAG, SDLoc &DL,
	EVT VT) {
	assert(VT.isScalableVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
	"Expected legal scalable vector!");
	auto PredTy = VT.changeVectorElementType(MVT::i1);
	return getPTrue(DAG, DL, PredTy, AArch64SVEPredPattern::all);
	}

	static SDValue getPredicateForVector(SelectionDAG &DAG, SDLoc &DL, EVT VT) {
	if (VT.isFixedLengthVector())
	return getPredicateForFixedLengthVector(DAG, DL, VT);

	return getPredicateForScalableVector(DAG, DL, VT);
	}

	// Grow V to consume an entire SVE register.
	static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V) {
	assert(VT.isScalableVector() &&
	"Expected to convert into a scalable vector!");
	assert(V.getValueType().isFixedLengthVector() &&
	"Expected a fixed length vector operand!");
	SDLoc DL(V);
	SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
	return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
	}

	// Shrink V so it's just big enough to maintain a VT's worth of data.
	static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V) {
	assert(VT.isFixedLengthVector() &&
	"Expected to convert into a fixed length vector!");
	assert(V.getValueType().isScalableVector() &&
	"Expected a scalable vector operand!");
	SDLoc DL(V);
	SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
	}

	// Convert all fixed length vector loads larger than NEON to masked_loads.
	SDValue AArch64TargetLowering::LowerFixedLengthVectorLoadToSVE(
	SDValue Op, SelectionDAG &DAG) const {
	auto Load = cast<LoadSDNode>(Op);

	SDLoc DL(Op);
	EVT VT = Op.getValueType();
	EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);

	auto NewLoad = DAG.getMaskedLoad(
	ContainerVT, DL, Load->getChain(), Load->getBasePtr(), Load->getOffset(),
	getPredicateForFixedLengthVector(DAG, DL, VT), DAG.getUNDEF(ContainerVT),
	Load->getMemoryVT(), Load->getMemOperand(), Load->getAddressingMode(),
	Load->getExtensionType());

	auto Result = convertFromScalableVector(DAG, VT, NewLoad);
	SDValue MergedValues[2] = {Result, Load->getChain()};
	return DAG.getMergeValues(MergedValues, DL);
	}

	static SDValue convertFixedMaskToScalableVector(SDValue Mask,
	SelectionDAG &DAG) {
	SDLoc DL(Mask);
	EVT InVT = Mask.getValueType();
	EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);

	auto Op1 = convertToScalableVector(DAG, ContainerVT, Mask);
	auto Op2 = DAG.getConstant(0, DL, ContainerVT);
	auto Pg = getPredicateForFixedLengthVector(DAG, DL, InVT);

	EVT CmpVT = Pg.getValueType();
	return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, DL, CmpVT,
	{Pg, Op1, Op2, DAG.getCondCode(ISD::SETNE)});
	}

	// Convert all fixed length vector loads larger than NEON to masked_loads.
	SDValue AArch64TargetLowering::LowerFixedLengthVectorMLoadToSVE(
	SDValue Op, SelectionDAG &DAG) const {
	auto Load = cast<MaskedLoadSDNode>(Op);

	if (Load->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD)
	return SDValue();

	SDLoc DL(Op);
	EVT VT = Op.getValueType();
	EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);

	SDValue Mask = convertFixedMaskToScalableVector(Load->getMask(), DAG);

	SDValue PassThru;
	bool IsPassThruZeroOrUndef = false;

	if (Load->getPassThru()->isUndef()) {
	PassThru = DAG.getUNDEF(ContainerVT);
	IsPassThruZeroOrUndef = true;
	} else {
	if (ContainerVT.isInteger())
	PassThru = DAG.getConstant(0, DL, ContainerVT);
	else
	PassThru = DAG.getConstantFP(0, DL, ContainerVT);
	if (isZerosVector(Load->getPassThru().getNode()))
	IsPassThruZeroOrUndef = true;
	}

	auto NewLoad = DAG.getMaskedLoad(
	ContainerVT, DL, Load->getChain(), Load->getBasePtr(), Load->getOffset(),
	Mask, PassThru, Load->getMemoryVT(), Load->getMemOperand(),
	Load->getAddressingMode(), Load->getExtensionType());

	if (!IsPassThruZeroOrUndef) {
	SDValue OldPassThru =
	convertToScalableVector(DAG, ContainerVT, Load->getPassThru());
	NewLoad = DAG.getSelect(DL, ContainerVT, Mask, NewLoad, OldPassThru);
	}

	auto Result = convertFromScalableVector(DAG, VT, NewLoad);
	SDValue MergedValues[2] = {Result, Load->getChain()};
	return DAG.getMergeValues(MergedValues, DL);
	}

	// Convert all fixed length vector stores larger than NEON to masked_stores.
	SDValue AArch64TargetLowering::LowerFixedLengthVectorStoreToSVE(
	SDValue Op, SelectionDAG &DAG) const {
	auto Store = cast<StoreSDNode>(Op);

	SDLoc DL(Op);
	EVT VT = Store->getValue().getValueType();
	EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);

	auto NewValue = convertToScalableVector(DAG, ContainerVT, Store->getValue());
	return DAG.getMaskedStore(
	Store->getChain(), DL, NewValue, Store->getBasePtr(), Store->getOffset(),
	getPredicateForFixedLengthVector(DAG, DL, VT), Store->getMemoryVT(),
	Store->getMemOperand(), Store->getAddressingMode(),
	Store->isTruncatingStore());
	}

	SDValue AArch64TargetLowering::LowerFixedLengthVectorMStoreToSVE(
	SDValue Op, SelectionDAG &DAG) const {
	auto Store = cast<MaskedStoreSDNode>(Op);

	if (Store->isTruncatingStore())
	return SDValue();

	SDLoc DL(Op);
	EVT VT = Store->getValue().getValueType();
	EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);

	auto NewValue = convertToScalableVector(DAG, ContainerVT, Store->getValue());
	SDValue Mask = convertFixedMaskToScalableVector(Store->getMask(), DAG);

	return DAG.getMaskedStore(
	Store->getChain(), DL, NewValue, Store->getBasePtr(), Store->getOffset(),
	Mask, Store->getMemoryVT(), Store->getMemOperand(),
	Store->getAddressingMode(), Store->isTruncatingStore());
	}

	SDValue AArch64TargetLowering::LowerFixedLengthVectorIntDivideToSVE(
	SDValue Op, SelectionDAG &DAG) const {
	SDLoc dl(Op);
	EVT VT = Op.getValueType();
	EVT EltVT = VT.getVectorElementType();

	bool Signed = Op.getOpcode() == ISD::SDIV;
	unsigned PredOpcode = Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED;

	// Scalable vector i32/i64 DIV is supported.
	if (EltVT == MVT::i32 \|\| EltVT == MVT::i64)
	return LowerToPredicatedOp(Op, DAG, PredOpcode, /OverrideNEON=/true);

	// Scalable vector i8/i16 DIV is not supported. Promote it to i32.
	EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
	EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
	EVT FixedWidenedVT = HalfVT.widenIntegerVectorElementType(*DAG.getContext());
	EVT ScalableWidenedVT = getContainerForFixedLengthVector(DAG, FixedWidenedVT);

	// If this is not a full vector, extend, div, and truncate it.
	EVT WidenedVT = VT.widenIntegerVectorElementType(*DAG.getContext());
	if (DAG.getTargetLoweringInfo().isTypeLegal(WidenedVT)) {
	unsigned ExtendOpcode = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
	SDValue Op0 = DAG.getNode(ExtendOpcode, dl, WidenedVT, Op.getOperand(0));
	SDValue Op1 = DAG.getNode(ExtendOpcode, dl, WidenedVT, Op.getOperand(1));
	SDValue Div = DAG.getNode(Op.getOpcode(), dl, WidenedVT, Op0, Op1);
	return DAG.getNode(ISD::TRUNCATE, dl, VT, Div);
	}

	// Convert the operands to scalable vectors.
	SDValue Op0 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));
	SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(1));

	// Extend the scalable operands.
	unsigned UnpkLo = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
	unsigned UnpkHi = Signed ? AArch64ISD::SUNPKHI : AArch64ISD::UUNPKHI;
	SDValue Op0Lo = DAG.getNode(UnpkLo, dl, ScalableWidenedVT, Op0);
	SDValue Op1Lo = DAG.getNode(UnpkLo, dl, ScalableWidenedVT, Op1);
	SDValue Op0Hi = DAG.getNode(UnpkHi, dl, ScalableWidenedVT, Op0);
	SDValue Op1Hi = DAG.getNode(UnpkHi, dl, ScalableWidenedVT, Op1);

	// Convert back to fixed vectors so the DIV can be further lowered.
	Op0Lo = convertFromScalableVector(DAG, FixedWidenedVT, Op0Lo);
	Op1Lo = convertFromScalableVector(DAG, FixedWidenedVT, Op1Lo);
	Op0Hi = convertFromScalableVector(DAG, FixedWidenedVT, Op0Hi);
	Op1Hi = convertFromScalableVector(DAG, FixedWidenedVT, Op1Hi);
	SDValue ResultLo = DAG.getNode(Op.getOpcode(), dl, FixedWidenedVT,
	Op0Lo, Op1Lo);
	SDValue ResultHi = DAG.getNode(Op.getOpcode(), dl, FixedWidenedVT,
	Op0Hi, Op1Hi);

	// Convert again to scalable vectors to truncate.
	ResultLo = convertToScalableVector(DAG, ScalableWidenedVT, ResultLo);
	ResultHi = convertToScalableVector(DAG, ScalableWidenedVT, ResultHi);
	SDValue ScalableResult = DAG.getNode(AArch64ISD::UZP1, dl, ContainerVT,
	ResultLo, ResultHi);

	return convertFromScalableVector(DAG, VT, ScalableResult);
	}

	SDValue AArch64TargetLowering::LowerFixedLengthVectorIntExtendToSVE(
	SDValue Op, SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();
	assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");

	SDLoc DL(Op);
	SDValue Val = Op.getOperand(0);
	EVT ContainerVT = getContainerForFixedLengthVector(DAG, Val.getValueType());
	Val = convertToScalableVector(DAG, ContainerVT, Val);

	bool Signed = Op.getOpcode() == ISD::SIGN_EXTEND;
	unsigned ExtendOpc = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;

	// Repeatedly unpack Val until the result is of the desired element type.
	switch (ContainerVT.getSimpleVT().SimpleTy) {
	default:
	llvm_unreachable("unimplemented container type");
	case MVT::nxv16i8:
	Val = DAG.getNode(ExtendOpc, DL, MVT::nxv8i16, Val);
	if (VT.getVectorElementType() == MVT::i16)
	break;
	LLVM_FALLTHROUGH;
	case MVT::nxv8i16:
	Val = DAG.getNode(ExtendOpc, DL, MVT::nxv4i32, Val);
	if (VT.getVectorElementType() == MVT::i32)
	break;
	LLVM_FALLTHROUGH;
	case MVT::nxv4i32:
	Val = DAG.getNode(ExtendOpc, DL, MVT::nxv2i64, Val);
	assert(VT.getVectorElementType() == MVT::i64 && "Unexpected element type!");
	break;
	}

	return convertFromScalableVector(DAG, VT, Val);
	}

	SDValue AArch64TargetLowering::LowerFixedLengthVectorTruncateToSVE(
	SDValue Op, SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();
	assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");

	SDLoc DL(Op);
	SDValue Val = Op.getOperand(0);
	EVT ContainerVT = getContainerForFixedLengthVector(DAG, Val.getValueType());
	Val = convertToScalableVector(DAG, ContainerVT, Val);

	// Repeatedly truncate Val until the result is of the desired element type.
	switch (ContainerVT.getSimpleVT().SimpleTy) {
	default:
	llvm_unreachable("unimplemented container type");
	case MVT::nxv2i64:
	Val = DAG.getNode(ISD::BITCAST, DL, MVT::nxv4i32, Val);
	Val = DAG.getNode(AArch64ISD::UZP1, DL, MVT::nxv4i32, Val, Val);
	if (VT.getVectorElementType() == MVT::i32)
	break;
	LLVM_FALLTHROUGH;
	case MVT::nxv4i32:
	Val = DAG.getNode(ISD::BITCAST, DL, MVT::nxv8i16, Val);
	Val = DAG.getNode(AArch64ISD::UZP1, DL, MVT::nxv8i16, Val, Val);
	if (VT.getVectorElementType() == MVT::i16)
	break;
	LLVM_FALLTHROUGH;
	case MVT::nxv8i16:
	Val = DAG.getNode(ISD::BITCAST, DL, MVT::nxv16i8, Val);
	Val = DAG.getNode(AArch64ISD::UZP1, DL, MVT::nxv16i8, Val, Val);
	assert(VT.getVectorElementType() == MVT::i8 && "Unexpected element type!");
	break;
	}

	return convertFromScalableVector(DAG, VT, Val);
	}

	SDValue AArch64TargetLowering::LowerFixedLengthExtractVectorElt(
	SDValue Op, SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();
	EVT InVT = Op.getOperand(0).getValueType();
	assert(InVT.isFixedLengthVector() && "Expected fixed length vector type!");

	SDLoc DL(Op);
	EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
	SDValue Op0 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(0));

	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Op.getOperand(1));
	}

	SDValue AArch64TargetLowering::LowerFixedLengthInsertVectorElt(
	SDValue Op, SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();
	assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");

	SDLoc DL(Op);
	EVT InVT = Op.getOperand(0).getValueType();
	EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
	SDValue Op0 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(0));

	auto ScalableRes = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ContainerVT, Op0,
	Op.getOperand(1), Op.getOperand(2));

	return convertFromScalableVector(DAG, VT, ScalableRes);
	}

	// Convert vector operation 'Op' to an equivalent predicated operation whereby
	// the original operation's type is used to construct a suitable predicate.
	// NOTE: The results for inactive lanes are undefined.
	SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op,
	SelectionDAG &DAG,
	unsigned NewOp,
	bool OverrideNEON) const {
	EVT VT = Op.getValueType();
	SDLoc DL(Op);
	auto Pg = getPredicateForVector(DAG, DL, VT);

	if (useSVEForFixedLengthVectorVT(VT, OverrideNEON)) {
	EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);

	// Create list of operands by converting existing ones to scalable types.
	SmallVector<SDValue, 4> Operands = {Pg};
	for (const SDValue &V : Op->op_values()) {
	if (isa<CondCodeSDNode>(V)) {
	Operands.push_back(V);
	continue;
	}

	if (const VTSDNode *VTNode = dyn_cast<VTSDNode>(V)) {
	EVT VTArg = VTNode->getVT().getVectorElementType();
	EVT NewVTArg = ContainerVT.changeVectorElementType(VTArg);
	Operands.push_back(DAG.getValueType(NewVTArg));
	continue;
	}

	assert(useSVEForFixedLengthVectorVT(V.getValueType(), OverrideNEON) &&
	"Only fixed length vectors are supported!");
	Operands.push_back(convertToScalableVector(DAG, ContainerVT, V));
	}

	if (isMergePassthruOpcode(NewOp))
	Operands.push_back(DAG.getUNDEF(ContainerVT));

	auto ScalableRes = DAG.getNode(NewOp, DL, ContainerVT, Operands);
	return convertFromScalableVector(DAG, VT, ScalableRes);
	}

	assert(VT.isScalableVector() && "Only expect to lower scalable vector op!");

	SmallVector<SDValue, 4> Operands = {Pg};
	for (const SDValue &V : Op->op_values()) {
	assert((!V.getValueType().isVector() \|\|
	V.getValueType().isScalableVector()) &&
	"Only scalable vectors are supported!");
	Operands.push_back(V);
	}

	if (isMergePassthruOpcode(NewOp))
	Operands.push_back(DAG.getUNDEF(VT));

	return DAG.getNode(NewOp, DL, VT, Operands);
	}

	// If a fixed length vector operation has no side effects when applied to
	// undefined elements, we can safely use scalable vectors to perform the same
	// operation without needing to worry about predication.
	SDValue AArch64TargetLowering::LowerToScalableOp(SDValue Op,
	SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();
	assert(useSVEForFixedLengthVectorVT(VT) &&
	"Only expected to lower fixed length vector operation!");
	EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);

	// Create list of operands by converting existing ones to scalable types.
	SmallVector<SDValue, 4> Ops;
	for (const SDValue &V : Op->op_values()) {
	assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");

	// Pass through non-vector operands.
	if (!V.getValueType().isVector()) {
	Ops.push_back(V);
	continue;
	}

	// "cast" fixed length vector to a scalable vector.
	assert(useSVEForFixedLengthVectorVT(V.getValueType()) &&
	"Only fixed length vectors are supported!");
	Ops.push_back(convertToScalableVector(DAG, ContainerVT, V));
	}

	auto ScalableRes = DAG.getNode(Op.getOpcode(), SDLoc(Op), ContainerVT, Ops);
	return convertFromScalableVector(DAG, VT, ScalableRes);
	}

	SDValue AArch64TargetLowering::LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp,
	SelectionDAG &DAG) const {
	SDLoc DL(ScalarOp);
	SDValue AccOp = ScalarOp.getOperand(0);
	SDValue VecOp = ScalarOp.getOperand(1);
	EVT SrcVT = VecOp.getValueType();
	EVT ResVT = SrcVT.getVectorElementType();

	EVT ContainerVT = SrcVT;
	if (SrcVT.isFixedLengthVector()) {
	ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT);
	VecOp = convertToScalableVector(DAG, ContainerVT, VecOp);
	}

	SDValue Pg = getPredicateForVector(DAG, DL, SrcVT);
	SDValue Zero = DAG.getConstant(0, DL, MVT::i64);

	// Convert operands to Scalable.
	AccOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ContainerVT,
	DAG.getUNDEF(ContainerVT), AccOp, Zero);

	// Perform reduction.
	SDValue Rdx = DAG.getNode(AArch64ISD::FADDA_PRED, DL, ContainerVT,
	Pg, AccOp, VecOp);

	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Rdx, Zero);
	}

	SDValue AArch64TargetLowering::LowerPredReductionToSVE(SDValue ReduceOp,
	SelectionDAG &DAG) const {
	SDLoc DL(ReduceOp);
	SDValue Op = ReduceOp.getOperand(0);
	EVT OpVT = Op.getValueType();
	EVT VT = ReduceOp.getValueType();

	if (!OpVT.isScalableVector() \|\| OpVT.getVectorElementType() != MVT::i1)
	return SDValue();

	SDValue Pg = getPredicateForVector(DAG, DL, OpVT);

	switch (ReduceOp.getOpcode()) {
	default:
	return SDValue();
	case ISD::VECREDUCE_OR:
	return getPTest(DAG, VT, Pg, Op, AArch64CC::ANY_ACTIVE);
	case ISD::VECREDUCE_AND: {
	Op = DAG.getNode(ISD::XOR, DL, OpVT, Op, Pg);
	return getPTest(DAG, VT, Pg, Op, AArch64CC::NONE_ACTIVE);
	}
	case ISD::VECREDUCE_XOR: {
	SDValue ID =
	DAG.getTargetConstant(Intrinsic::aarch64_sve_cntp, DL, MVT::i64);
	SDValue Cntp =
	DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i64, ID, Pg, Op);
	return DAG.getAnyExtOrTrunc(Cntp, DL, VT);
	}
	}

	return SDValue();
	}

	SDValue AArch64TargetLowering::LowerReductionToSVE(unsigned Opcode,
	SDValue ScalarOp,
	SelectionDAG &DAG) const {
	SDLoc DL(ScalarOp);
	SDValue VecOp = ScalarOp.getOperand(0);
	EVT SrcVT = VecOp.getValueType();

	if (useSVEForFixedLengthVectorVT(SrcVT, true)) {
	EVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT);
	VecOp = convertToScalableVector(DAG, ContainerVT, VecOp);
	}

	// UADDV always returns an i64 result.
	EVT ResVT = (Opcode == AArch64ISD::UADDV_PRED) ? MVT::i64 :
	SrcVT.getVectorElementType();
	EVT RdxVT = SrcVT;
	if (SrcVT.isFixedLengthVector() \|\| Opcode == AArch64ISD::UADDV_PRED)
	RdxVT = getPackedSVEVectorVT(ResVT);

	SDValue Pg = getPredicateForVector(DAG, DL, SrcVT);
	SDValue Rdx = DAG.getNode(Opcode, DL, RdxVT, Pg, VecOp);
	SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT,
	Rdx, DAG.getConstant(0, DL, MVT::i64));

	// The VEC_REDUCE nodes expect an element size result.
	if (ResVT != ScalarOp.getValueType())
	Res = DAG.getAnyExtOrTrunc(Res, DL, ScalarOp.getValueType());

	return Res;
	}

	SDValue
	AArch64TargetLowering::LowerFixedLengthVectorSelectToSVE(SDValue Op,
	SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();
	SDLoc DL(Op);

	EVT InVT = Op.getOperand(1).getValueType();
	EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
	SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(1));
	SDValue Op2 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(2));

	// Convert the mask to a predicated (NOTE: We don't need to worry about
	// inactive lanes since VSELECT is safe when given undefined elements).
	EVT MaskVT = Op.getOperand(0).getValueType();
	EVT MaskContainerVT = getContainerForFixedLengthVector(DAG, MaskVT);
	auto Mask = convertToScalableVector(DAG, MaskContainerVT, Op.getOperand(0));
	Mask = DAG.getNode(ISD::TRUNCATE, DL,
	MaskContainerVT.changeVectorElementType(MVT::i1), Mask);

	auto ScalableRes = DAG.getNode(ISD::VSELECT, DL, ContainerVT,
	Mask, Op1, Op2);

	return convertFromScalableVector(DAG, VT, ScalableRes);
	}

	SDValue AArch64TargetLowering::LowerFixedLengthVectorSetccToSVE(
	SDValue Op, SelectionDAG &DAG) const {
	SDLoc DL(Op);
	EVT InVT = Op.getOperand(0).getValueType();
	EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);

	assert(useSVEForFixedLengthVectorVT(InVT) &&
	"Only expected to lower fixed length vector operation!");
	assert(Op.getValueType() == InVT.changeTypeToInteger() &&
	"Expected integer result of the same bit length as the inputs!");

	auto Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));
	auto Op2 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(1));
	auto Pg = getPredicateForFixedLengthVector(DAG, DL, InVT);

	EVT CmpVT = Pg.getValueType();
	auto Cmp = DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, DL, CmpVT,
	{Pg, Op1, Op2, Op.getOperand(2)});

	EVT PromoteVT = ContainerVT.changeTypeToInteger();
	auto Promote = DAG.getBoolExtOrTrunc(Cmp, DL, PromoteVT, InVT);
	return convertFromScalableVector(DAG, Op.getValueType(), Promote);
	}

	SDValue
	AArch64TargetLowering::LowerFixedLengthBitcastToSVE(SDValue Op,
	SelectionDAG &DAG) const {
	SDLoc DL(Op);
	auto SrcOp = Op.getOperand(0);
	EVT VT = Op.getValueType();
	EVT ContainerDstVT = getContainerForFixedLengthVector(DAG, VT);
	EVT ContainerSrcVT =
	getContainerForFixedLengthVector(DAG, SrcOp.getValueType());

	SrcOp = convertToScalableVector(DAG, ContainerSrcVT, SrcOp);
	Op = DAG.getNode(ISD::BITCAST, DL, ContainerDstVT, SrcOp);
	return convertFromScalableVector(DAG, VT, Op);
	}

	SDValue AArch64TargetLowering::LowerFixedLengthConcatVectorsToSVE(
	SDValue Op, SelectionDAG &DAG) const {
	SDLoc DL(Op);
	unsigned NumOperands = Op->getNumOperands();

	assert(NumOperands > 1 && isPowerOf2_32(NumOperands) &&
	"Unexpected number of operands in CONCAT_VECTORS");

	auto SrcOp1 = Op.getOperand(0);
	auto SrcOp2 = Op.getOperand(1);
	EVT VT = Op.getValueType();
	EVT SrcVT = SrcOp1.getValueType();

	if (NumOperands > 2) {
	SmallVector<SDValue, 4> Ops;
	EVT PairVT = SrcVT.getDoubleNumVectorElementsVT(*DAG.getContext());
	for (unsigned I = 0; I < NumOperands; I += 2)
	Ops.push_back(DAG.getNode(ISD::CONCAT_VECTORS, DL, PairVT,
	Op->getOperand(I), Op->getOperand(I + 1)));

	return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Ops);
	}

	EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);

	SDValue Pg = getPredicateForFixedLengthVector(DAG, DL, SrcVT);
	SrcOp1 = convertToScalableVector(DAG, ContainerVT, SrcOp1);
	SrcOp2 = convertToScalableVector(DAG, ContainerVT, SrcOp2);

	Op = DAG.getNode(AArch64ISD::SPLICE, DL, ContainerVT, Pg, SrcOp1, SrcOp2);

	return convertFromScalableVector(DAG, VT, Op);
	}

	SDValue
	AArch64TargetLowering::LowerFixedLengthFPExtendToSVE(SDValue Op,
	SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();
	assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");

	SDLoc DL(Op);
	SDValue Val = Op.getOperand(0);
	SDValue Pg = getPredicateForVector(DAG, DL, VT);
	EVT SrcVT = Val.getValueType();
	EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
	EVT ExtendVT = ContainerVT.changeVectorElementType(
	SrcVT.getVectorElementType());

	Val = DAG.getNode(ISD::BITCAST, DL, SrcVT.changeTypeToInteger(), Val);
	Val = DAG.getNode(ISD::ANY_EXTEND, DL, VT.changeTypeToInteger(), Val);

	Val = convertToScalableVector(DAG, ContainerVT.changeTypeToInteger(), Val);
	Val = getSVESafeBitCast(ExtendVT, Val, DAG);
	Val = DAG.getNode(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU, DL, ContainerVT,
	Pg, Val, DAG.getUNDEF(ContainerVT));

	return convertFromScalableVector(DAG, VT, Val);
	}

	SDValue
	AArch64TargetLowering::LowerFixedLengthFPRoundToSVE(SDValue Op,
	SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();
	assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");

	SDLoc DL(Op);
	SDValue Val = Op.getOperand(0);
	EVT SrcVT = Val.getValueType();
	EVT ContainerSrcVT = getContainerForFixedLengthVector(DAG, SrcVT);
	EVT RoundVT = ContainerSrcVT.changeVectorElementType(
	VT.getVectorElementType());
	SDValue Pg = getPredicateForVector(DAG, DL, RoundVT);

	Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
	Val = DAG.getNode(AArch64ISD::FP_ROUND_MERGE_PASSTHRU, DL, RoundVT, Pg, Val,
	Op.getOperand(1), DAG.getUNDEF(RoundVT));
	Val = getSVESafeBitCast(ContainerSrcVT.changeTypeToInteger(), Val, DAG);
	Val = convertFromScalableVector(DAG, SrcVT.changeTypeToInteger(), Val);

	Val = DAG.getNode(ISD::TRUNCATE, DL, VT.changeTypeToInteger(), Val);
	return DAG.getNode(ISD::BITCAST, DL, VT, Val);
	}

	SDValue
	AArch64TargetLowering::LowerFixedLengthIntToFPToSVE(SDValue Op,
	SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();
	assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");

	bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP;
	unsigned Opcode = IsSigned ? AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU
	: AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU;

	SDLoc DL(Op);
	SDValue Val = Op.getOperand(0);
	EVT SrcVT = Val.getValueType();
	EVT ContainerDstVT = getContainerForFixedLengthVector(DAG, VT);
	EVT ContainerSrcVT = getContainerForFixedLengthVector(DAG, SrcVT);

	if (ContainerSrcVT.getVectorElementType().getSizeInBits() <=
	ContainerDstVT.getVectorElementType().getSizeInBits()) {
	SDValue Pg = getPredicateForVector(DAG, DL, VT);

	Val = DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL,
	VT.changeTypeToInteger(), Val);

	Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
	Val = getSVESafeBitCast(ContainerDstVT.changeTypeToInteger(), Val, DAG);
	// Safe to use a larger than specified operand since we just unpacked the
	// data, hence the upper bits are zero.
	Val = DAG.getNode(Opcode, DL, ContainerDstVT, Pg, Val,
	DAG.getUNDEF(ContainerDstVT));
	return convertFromScalableVector(DAG, VT, Val);
	} else {
	EVT CvtVT = ContainerSrcVT.changeVectorElementType(
	ContainerDstVT.getVectorElementType());
	SDValue Pg = getPredicateForVector(DAG, DL, CvtVT);

	Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
	Val = DAG.getNode(Opcode, DL, CvtVT, Pg, Val, DAG.getUNDEF(CvtVT));
	Val = getSVESafeBitCast(ContainerSrcVT, Val, DAG);
	Val = convertFromScalableVector(DAG, SrcVT, Val);

	Val = DAG.getNode(ISD::TRUNCATE, DL, VT.changeTypeToInteger(), Val);
	return DAG.getNode(ISD::BITCAST, DL, VT, Val);
	}
	}

	SDValue
	AArch64TargetLowering::LowerFixedLengthFPToIntToSVE(SDValue Op,
	SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();
	assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");

	bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT;
	unsigned Opcode = IsSigned ? AArch64ISD::FCVTZS_MERGE_PASSTHRU
	: AArch64ISD::FCVTZU_MERGE_PASSTHRU;

	SDLoc DL(Op);
	SDValue Val = Op.getOperand(0);
	EVT SrcVT = Val.getValueType();
	EVT ContainerDstVT = getContainerForFixedLengthVector(DAG, VT);
	EVT ContainerSrcVT = getContainerForFixedLengthVector(DAG, SrcVT);

	if (ContainerSrcVT.getVectorElementType().getSizeInBits() <=
	ContainerDstVT.getVectorElementType().getSizeInBits()) {
	EVT CvtVT = ContainerDstVT.changeVectorElementType(
	ContainerSrcVT.getVectorElementType());
	SDValue Pg = getPredicateForVector(DAG, DL, VT);

	Val = DAG.getNode(ISD::BITCAST, DL, SrcVT.changeTypeToInteger(), Val);
	Val = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Val);

	Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
	Val = getSVESafeBitCast(CvtVT, Val, DAG);
	Val = DAG.getNode(Opcode, DL, ContainerDstVT, Pg, Val,
	DAG.getUNDEF(ContainerDstVT));
	return convertFromScalableVector(DAG, VT, Val);
	} else {
	EVT CvtVT = ContainerSrcVT.changeTypeToInteger();
	SDValue Pg = getPredicateForVector(DAG, DL, CvtVT);

	// Safe to use a larger than specified result since an fp_to_int where the
	// result doesn't fit into the destination is undefined.
	Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
	Val = DAG.getNode(Opcode, DL, CvtVT, Pg, Val, DAG.getUNDEF(CvtVT));
	Val = convertFromScalableVector(DAG, SrcVT.changeTypeToInteger(), Val);

	return DAG.getNode(ISD::TRUNCATE, DL, VT, Val);
	}
	}

	SDValue AArch64TargetLowering::LowerFixedLengthVECTOR_SHUFFLEToSVE(
	SDValue Op, SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();
	assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");

	auto *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
	auto ShuffleMask = SVN->getMask();

	SDLoc DL(Op);
	SDValue Op1 = Op.getOperand(0);
	SDValue Op2 = Op.getOperand(1);

	EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
	Op1 = convertToScalableVector(DAG, ContainerVT, Op1);
	Op2 = convertToScalableVector(DAG, ContainerVT, Op2);

	bool ReverseEXT = false;
	unsigned Imm;
	if (isEXTMask(ShuffleMask, VT, ReverseEXT, Imm) &&
	Imm == VT.getVectorNumElements() - 1) {
	if (ReverseEXT)
	std::swap(Op1, Op2);

	EVT ScalarTy = VT.getVectorElementType();
	if ((ScalarTy == MVT::i8) \|\| (ScalarTy == MVT::i16))
	ScalarTy = MVT::i32;
	SDValue Scalar = DAG.getNode(
	ISD::EXTRACT_VECTOR_ELT, DL, ScalarTy, Op1,
	DAG.getConstant(VT.getVectorNumElements() - 1, DL, MVT::i64));
	Op = DAG.getNode(AArch64ISD::INSR, DL, ContainerVT, Op2, Scalar);
	return convertFromScalableVector(DAG, VT, Op);
	}

	return SDValue();
	}

	SDValue AArch64TargetLowering::getSVESafeBitCast(EVT VT, SDValue Op,
	SelectionDAG &DAG) const {
	SDLoc DL(Op);
	EVT InVT = Op.getValueType();
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	(void)TLI;

	assert(VT.isScalableVector() && TLI.isTypeLegal(VT) &&
	InVT.isScalableVector() && TLI.isTypeLegal(InVT) &&
	"Only expect to cast between legal scalable vector types!");
	assert((VT.getVectorElementType() == MVT::i1) ==
	(InVT.getVectorElementType() == MVT::i1) &&
	"Cannot cast between data and predicate scalable vector types!");

	if (InVT == VT)
	return Op;

	if (VT.getVectorElementType() == MVT::i1)
	return DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, VT, Op);

	EVT PackedVT = getPackedSVEVectorVT(VT.getVectorElementType());
	EVT PackedInVT = getPackedSVEVectorVT(InVT.getVectorElementType());

	// Pack input if required.
	if (InVT != PackedInVT)
	Op = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, PackedInVT, Op);

	Op = DAG.getNode(ISD::BITCAST, DL, PackedVT, Op);

	// Unpack result if required.
	if (VT != PackedVT)
	Op = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, VT, Op);

	return Op;
	}

	bool AArch64TargetLowering::isAllActivePredicate(SDValue N) const {
	return ::isAllActivePredicate(N);
	}

	EVT AArch64TargetLowering::getPromotedVTForPredicate(EVT VT) const {
	return ::getPromotedVTForPredicate(VT);
	}

	bool AArch64TargetLowering::SimplifyDemandedBitsForTargetNode(
	SDValue Op, const APInt &OriginalDemandedBits,
	const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
	unsigned Depth) const {

	unsigned Opc = Op.getOpcode();
	switch (Opc) {
	case AArch64ISD::VSHL: {
	// Match (VSHL (VLSHR Val X) X)
	SDValue ShiftL = Op;
	SDValue ShiftR = Op->getOperand(0);
	if (ShiftR->getOpcode() != AArch64ISD::VLSHR)
	return false;

	if (!ShiftL.hasOneUse() \|\| !ShiftR.hasOneUse())
	return false;

	unsigned ShiftLBits = ShiftL->getConstantOperandVal(1);
	unsigned ShiftRBits = ShiftR->getConstantOperandVal(1);

	// Other cases can be handled as well, but this is not
	// implemented.
	if (ShiftRBits != ShiftLBits)
	return false;

	unsigned ScalarSize = Op.getScalarValueSizeInBits();
	assert(ScalarSize > ShiftLBits && "Invalid shift imm");

	APInt ZeroBits = APInt::getLowBitsSet(ScalarSize, ShiftLBits);
	APInt UnusedBits = ~OriginalDemandedBits;

	if ((ZeroBits & UnusedBits) != ZeroBits)
	return false;

	// All bits that are zeroed by (VSHL (VLSHR Val X) X) are not
	// used - simplify to just Val.
	return TLO.CombineTo(Op, ShiftR->getOperand(0));
	}
	}

	return TargetLowering::SimplifyDemandedBitsForTargetNode(
	Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
	}

	bool AArch64TargetLowering::isConstantUnsignedBitfieldExtactLegal(
	unsigned Opc, LLT Ty1, LLT Ty2) const {
	return Ty1 == Ty2 && (Ty1 == LLT::scalar(32) \|\| Ty1 == LLT::scalar(64));
	}
	diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/SMEInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/AArch64/SMEInstrFormats.td
	index 62089166f4b7..00fd374587bc 100644
	--- a/contrib/llvm-project/llvm/lib/Target/AArch64/SMEInstrFormats.td
	+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/SMEInstrFormats.td
	@@ -1,792 +1,792 @@
	//=-- SMEInstrFormats.td - AArch64 SME Instruction classes -- tablegen ---=//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// AArch64 Scalable Matrix Extension (SME) Instruction Class Definitions.
	//
	//===----------------------------------------------------------------------===//

	//===----------------------------------------------------------------------===//
	// SME Outer Products
	//===----------------------------------------------------------------------===//

	class sme_fp_outer_product_inst<bit S, bit sz, MatrixTileOperand za_ty,
	ZPRRegOp zpr_ty, string mnemonic>
	: I<(outs za_ty:$ZAda),
	(ins PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm),
	mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm",
	"", []>,
	Sched<[]> {
	bits<5> Zm;
	bits<3> Pm;
	bits<3> Pn;
	bits<5> Zn;
	let Inst{31-23} = 0b100000001;
	let Inst{22} = sz;
	let Inst{21} = 0b0;
	let Inst{20-16} = Zm;
	let Inst{15-13} = Pm;
	let Inst{12-10} = Pn;
	let Inst{9-5} = Zn;
	let Inst{4} = S;
	let Inst{3} = 0b0;
	}

	class sme_outer_product_fp32<bit S, string mnemonic>
	: sme_fp_outer_product_inst<S, 0b0, TileOp32, ZPR32, mnemonic> {
	bits<2> ZAda;
	let Inst{1-0} = ZAda;
	let Inst{2} = 0b0;
	}

	class sme_outer_product_fp64<bit S, string mnemonic>
	: sme_fp_outer_product_inst<S, 0b1, TileOp64, ZPR64, mnemonic> {
	bits<3> ZAda;
	let Inst{2-0} = ZAda;
	}

	class sme_int_outer_product_inst<bit u0, bit u1, bit S, bit sz,
	MatrixTileOperand za_ty, ZPRRegOp zpr_ty,
	string mnemonic>
	: I<(outs za_ty:$ZAda),
	(ins PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm),
	mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm",
	"", []>,
	Sched<[]> {
	bits<5> Zm;
	bits<3> Pm;
	bits<3> Pn;
	bits<5> Zn;
	let Inst{31-25} = 0b1010000;
	let Inst{24} = u0;
	let Inst{23} = 0b1;
	let Inst{22} = sz;
	let Inst{21} = u1;
	let Inst{20-16} = Zm;
	let Inst{15-13} = Pm;
	let Inst{12-10} = Pn;
	let Inst{9-5} = Zn;
	let Inst{4} = S;
	let Inst{3} = 0b0;
	}

	class sme_int_outer_product_i32<bits<3> opc, string mnemonic>
	: sme_int_outer_product_inst<opc{2}, opc{1}, opc{0}, 0b0, TileOp32, ZPR8,
	mnemonic> {
	bits<2> ZAda;
	let Inst{1-0} = ZAda;
	let Inst{2} = 0b0;
	}

	class sme_int_outer_product_i64<bits<3> opc, string mnemonic>
	: sme_int_outer_product_inst<opc{2}, opc{1}, opc{0}, 0b1, TileOp64, ZPR16,
	mnemonic> {
	bits<3> ZAda;
	let Inst{2-0} = ZAda;
	}

	class sme_outer_product_widening_inst<bit op, bit S, string mnemonic>
	: I<(outs TileOp32:$ZAda),
	(ins PPR3bAny:$Pn, PPR3bAny:$Pm, ZPR16:$Zn, ZPR16:$Zm),
	mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm",
	"", []>,
	Sched<[]> {
	bits<5> Zm;
	bits<3> Pm;
	bits<3> Pn;
	bits<5> Zn;
	bits<2> ZAda;
	let Inst{31-22} = 0b1000000110;
	let Inst{21} = op;
	let Inst{20-16} = Zm;
	let Inst{15-13} = Pm;
	let Inst{12-10} = Pn;
	let Inst{9-5} = Zn;
	let Inst{4} = S;
	let Inst{3-2} = 0b00;
	let Inst{1-0} = ZAda;
	}

	multiclass sme_bf16_outer_product<bit S, string mnemonic> {
	def : sme_outer_product_widening_inst<0b0, S, mnemonic>;
	}

	multiclass sme_f16_outer_product<bit S, string mnemonic> {
	def : sme_outer_product_widening_inst<0b1, S, mnemonic>;
	}

	//===----------------------------------------------------------------------===//
	// SME Add Vector to Tile
	//===----------------------------------------------------------------------===//

	class sme_add_vector_to_tile_inst<bit op, bit V, MatrixTileOperand tile_ty,
	ZPRRegOp zpr_ty, string mnemonic>
	: I<(outs tile_ty:$ZAda),
	(ins PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn),
	mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn",
	"", []>, Sched<[]> {
	bits<3> Pm;
	bits<3> Pn;
	bits<5> Zn;
	let Inst{31-23} = 0b110000001;
	let Inst{22} = op;
	let Inst{21-17} = 0b01000;
	let Inst{16} = V;
	let Inst{15-13} = Pm;
	let Inst{12-10} = Pn;
	let Inst{9-5} = Zn;
	let Inst{4-3} = 0b00;
	}

	class sme_add_vector_to_tile_u32<bit V, string mnemonic>
	: sme_add_vector_to_tile_inst<0b0, V, TileOp32, ZPR32, mnemonic> {
	bits<2> ZAda;
	let Inst{2} = 0b0;
	let Inst{1-0} = ZAda;
	}

	class sme_add_vector_to_tile_u64<bit V, string mnemonic>
	: sme_add_vector_to_tile_inst<0b1, V, TileOp64, ZPR64, mnemonic> {
	bits<3> ZAda;
	let Inst{2-0} = ZAda;
	}

	//===----------------------------------------------------------------------===//
	// SME Contiguous Loads
	//===----------------------------------------------------------------------===//

	class sme_mem_ld_ss_base<bit Q, bit V, bits<2> msz, dag outs, dag ins,
	string mnemonic, string argstr>
	: I<outs, ins, mnemonic, argstr, "", []>, Sched<[]> {
	bits<5> Rm;
	bits<2> Rv;
	bits<3> Pg;
	bits<5> Rn;
	let Inst{31-25} = 0b1110000;
	let Inst{24} = Q;
	let Inst{23-22} = msz;
	let Inst{21} = 0b0;
	let Inst{20-16} = Rm;
	let Inst{15} = V;
	let Inst{14-13} = Rv;
	let Inst{12-10} = Pg;
	let Inst{9-5} = Rn;
	let Inst{4} = 0b0;

	let mayLoad = 1;
	}

	class sme_mem_ld_ss_inst_BHSD<bits<2> msz, string mnemonic,
	MatrixTileVectorOperand tile_ty, bit is_col,
	Operand imm_ty, RegisterOperand gpr_ty>
	: sme_mem_ld_ss_base<
	0b0, is_col, msz, (outs tile_ty:$ZAt),
	(ins MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn,
	gpr_ty:$Rm),
	mnemonic, "\t\\{$ZAt[$Rv, $imm]\\}, $Pg/z, [$Rn, $Rm]">;

	class sme_mem_ld_ss_inst_Q<string mnemonic, MatrixTileVectorOperand tile_ty,
	bit is_col>
	: sme_mem_ld_ss_base<
	0b1, is_col, 0b11, (outs tile_ty:$ZAt),
	(ins MatrixIndexGPR32Op12_15:$Rv, PPR3bAny:$Pg, GPR64sp:$Rn,
	GPR64shifted128:$Rm),
	mnemonic, "\t\\{$ZAt[$Rv]\\}, $Pg/z, [$Rn, $Rm]">;

	multiclass sme_mem_ss_aliases_BHSD<string mnemonic, Instruction inst,
	MatrixTileVectorOperand tile_ty, Operand imm_ty,
	RegisterOperand gpr_ty,
	string pg_suffix=""> {
	def : InstAlias<mnemonic # "\t$ZAt[$Rv, $imm], $Pg" # pg_suffix # ", [$Rn, $Rm]",
	(inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn, gpr_ty:$Rm), 0>;
	// Default XZR offset aliases
	def : InstAlias<mnemonic # "\t\\{$ZAt[$Rv, $imm]\\}, $Pg" # pg_suffix # ", [$Rn]",
	(inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 1>;
	def : InstAlias<mnemonic # "\t$ZAt[$Rv, $imm], $Pg" # pg_suffix # ", [$Rn]",
	(inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 0>;
	}

	multiclass sme_mem_ss_aliases_Q<string mnemonic, Instruction inst,
	MatrixTileVectorOperand tile_ty,
	string pg_suffix=""> {
	def : InstAlias<mnemonic # "\t$ZAt[$Rv], $Pg" # pg_suffix # ", [$Rn, $Rm]",
	(inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, PPR3bAny:$Pg, GPR64sp:$Rn, GPR64shifted128:$Rm), 0>;
	// Default XZR offset aliases
	def : InstAlias<mnemonic # "\t\\{$ZAt[$Rv]\\}, $Pg" # pg_suffix # ", [$Rn]",
	(inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 2>;
	def : InstAlias<mnemonic # "\t$ZAt[$Rv], $Pg" # pg_suffix # ", [$Rn]",
	(inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 0>;
	}

	multiclass sme_mem_ss_aliases<string mnemonic, string inst, bit is_col,
	string pg_suffix=""> {
	defm : sme_mem_ss_aliases_BHSD<mnemonic # "b", !cast<Instruction>(inst # _B),
	!if(is_col, TileVectorOpV8, TileVectorOpH8),
	imm0_15, GPR64shifted8, pg_suffix>;
	defm : sme_mem_ss_aliases_BHSD<mnemonic # "h", !cast<Instruction>(inst # _H),
	!if(is_col, TileVectorOpV16, TileVectorOpH16),
	imm0_7, GPR64shifted16, pg_suffix>;
	defm : sme_mem_ss_aliases_BHSD<mnemonic # "w", !cast<Instruction>(inst # _S),
	!if(is_col, TileVectorOpV32, TileVectorOpH32),
	imm0_3, GPR64shifted32, pg_suffix>;
	defm : sme_mem_ss_aliases_BHSD<mnemonic # "d", !cast<Instruction>(inst # _D),
	!if(is_col, TileVectorOpV64, TileVectorOpH64),
	imm0_1, GPR64shifted64, pg_suffix>;
	defm : sme_mem_ss_aliases_Q <mnemonic # "q", !cast<Instruction>(inst # _Q),
	!if(is_col, TileVectorOpV128, TileVectorOpH128),
	pg_suffix>;
	}

	multiclass sme_mem_ld_ss_aliases<string inst, bit is_col> {
	defm NAME : sme_mem_ss_aliases<"ld1", inst, is_col, "/z">;
	}

	multiclass sme_mem_ld_v_ss<string mnemonic, bit is_col> {
	def _B : sme_mem_ld_ss_inst_BHSD<0b00, mnemonic # "b",
	!if(is_col, TileVectorOpV8,
	TileVectorOpH8),
	is_col, imm0_15, GPR64shifted8> {
	bits<4> imm;
	let Inst{3-0} = imm;
	}
	def _H : sme_mem_ld_ss_inst_BHSD<0b01, mnemonic # "h",
	!if(is_col, TileVectorOpV16,
	TileVectorOpH16),
	is_col, imm0_7, GPR64shifted16> {
	bits<1> ZAt;
	bits<3> imm;
	let Inst{3} = ZAt;
	let Inst{2-0} = imm;
	}
	def _S : sme_mem_ld_ss_inst_BHSD<0b10, mnemonic # "w",
	!if(is_col, TileVectorOpV32,
	TileVectorOpH32),
	is_col, imm0_3, GPR64shifted32> {
	bits<2> ZAt;
	bits<2> imm;
	let Inst{3-2} = ZAt;
	let Inst{1-0} = imm;
	}
	def _D : sme_mem_ld_ss_inst_BHSD<0b11, mnemonic # "d",
	!if(is_col, TileVectorOpV64,
	TileVectorOpH64),
	is_col, imm0_1, GPR64shifted64> {
	bits<3> ZAt;
	bits<1> imm;
	let Inst{3-1} = ZAt;
	let Inst{0} = imm;
	}
	def _Q : sme_mem_ld_ss_inst_Q<mnemonic # "q",
	!if(is_col, TileVectorOpV128,
	TileVectorOpH128),
	is_col> {
	bits<4> ZAt;
	let Inst{3-0} = ZAt;
	}

	defm : sme_mem_ld_ss_aliases<NAME, is_col>;
	}

	multiclass sme_mem_ld_ss<string mnemonic> {
	defm _H : sme_mem_ld_v_ss<mnemonic, /is_col=/0b0>;
	defm _V : sme_mem_ld_v_ss<mnemonic, /is_col=/0b1>;
	}

	//===----------------------------------------------------------------------===//
	// SME Contiguous Stores
	//===----------------------------------------------------------------------===//

	class sme_mem_st_ss_base<bit Q, bit V, bits<2> msz, dag ins,
	string mnemonic, string argstr>
	: I<(outs), ins, mnemonic, argstr, "", []>, Sched<[]> {
	bits<5> Rm;
	bits<2> Rv;
	bits<3> Pg;
	bits<5> Rn;
	let Inst{31-25} = 0b1110000;
	let Inst{24} = Q;
	let Inst{23-22} = msz;
	let Inst{21} = 0b1;
	let Inst{20-16} = Rm;
	let Inst{15} = V;
	let Inst{14-13} = Rv;
	let Inst{12-10} = Pg;
	let Inst{9-5} = Rn;
	let Inst{4} = 0b0;

	let mayStore = 1;
	let hasSideEffects = 1;
	}

	class sme_mem_st_ss_inst_BHSD<bits<2> msz, string mnemonic,
	MatrixTileVectorOperand tile_ty, bit is_col,
	Operand imm_ty, RegisterOperand gpr_ty>
	: sme_mem_st_ss_base<
	0b0, is_col, msz,
	(ins tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg,
	GPR64sp:$Rn, gpr_ty:$Rm),
	mnemonic, "\t\\{$ZAt[$Rv, $imm]\\}, $Pg, [$Rn, $Rm]">;

	class sme_mem_st_ss_inst_Q<string mnemonic, MatrixTileVectorOperand tile_ty,
	bit is_col>
	: sme_mem_st_ss_base<
	0b1, is_col, 0b11,
	(ins tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, PPR3bAny:$Pg,
	GPR64sp:$Rn, GPR64shifted128:$Rm),
	mnemonic, "\t\\{$ZAt[$Rv]\\}, $Pg, [$Rn, $Rm]">;

	multiclass sme_mem_st_ss_aliases<string inst, bit is_col> {
	defm NAME : sme_mem_ss_aliases<"st1", inst, is_col>;
	}

	multiclass sme_mem_st_v_ss<string mnemonic, bit is_col> {
	def _B : sme_mem_st_ss_inst_BHSD<0b00, mnemonic # "b",
	!if(is_col, TileVectorOpV8,
	TileVectorOpH8),
	is_col, imm0_15, GPR64shifted8> {
	bits<4> imm;
	let Inst{3-0} = imm;
	}
	def _H : sme_mem_st_ss_inst_BHSD<0b01, mnemonic # "h",
	!if(is_col, TileVectorOpV16,
	TileVectorOpH16),
	is_col, imm0_7, GPR64shifted16> {
	bits<1> ZAt;
	bits<3> imm;
	let Inst{3} = ZAt;
	let Inst{2-0} = imm;
	}
	def _S : sme_mem_st_ss_inst_BHSD<0b10, mnemonic # "w",
	!if(is_col, TileVectorOpV32,
	TileVectorOpH32),
	is_col, imm0_3, GPR64shifted32> {
	bits<2> ZAt;
	bits<2> imm;
	let Inst{3-2} = ZAt;
	let Inst{1-0} = imm;
	}
	def _D : sme_mem_st_ss_inst_BHSD<0b11, mnemonic # "d",
	!if(is_col, TileVectorOpV64,
	TileVectorOpH64),
	is_col, imm0_1, GPR64shifted64> {
	bits<3> ZAt;
	bits<1> imm;
	let Inst{3-1} = ZAt;
	let Inst{0} = imm;
	}
	def _Q : sme_mem_st_ss_inst_Q<mnemonic # "q",
	!if(is_col, TileVectorOpV128,
	TileVectorOpH128),
	is_col> {
	bits<4> ZAt;
	let Inst{3-0} = ZAt;
	}

	defm : sme_mem_st_ss_aliases<NAME, is_col>;
	}

	multiclass sme_mem_st_ss<string mnemonic> {
	defm _H : sme_mem_st_v_ss<mnemonic, /is_col=/0b0>;
	defm _V : sme_mem_st_v_ss<mnemonic, /is_col=/0b1>;
	}

	//===----------------------------------------------------------------------===//
	// SME Save and Restore Array
	//===----------------------------------------------------------------------===//

	class sme_spill_fill_inst<bit isStore, dag outs, dag ins, string opcodestr>
	: I<outs, ins, opcodestr, "\t$ZAt[$Rv, $imm4], [$Rn, $offset, mul vl]", "",
	[]>,
	Sched<[]> {
	bits<2> Rv;
	bits<5> Rn;
	bits<4> imm4;
	let Inst{31-22} = 0b1110000100;
	let Inst{21} = isStore;
	let Inst{20-15} = 0b000000;
	let Inst{14-13} = Rv;
	let Inst{12-10} = 0b000;
	let Inst{9-5} = Rn;
	let Inst{4} = 0b0;
	let Inst{3-0} = imm4;

	let mayLoad = !not(isStore);
	let mayStore = isStore;
	}

	multiclass sme_spill_fill<bit isStore, dag outs, dag ins, string opcodestr> {
	def NAME : sme_spill_fill_inst<isStore, outs, ins, opcodestr>;

	def : InstAlias<opcodestr # "\t$ZAt[$Rv, $imm4], [$Rn]",
	(!cast<Instruction>(NAME) MatrixOp:$ZAt,
	MatrixIndexGPR32Op12_15:$Rv, imm0_15:$imm4, GPR64sp:$Rn, 0), 1>;
	}

	multiclass sme_spill<string opcodestr> {
	defm NAME : sme_spill_fill<0b1, (outs),
	(ins MatrixOp:$ZAt, MatrixIndexGPR32Op12_15:$Rv,
	imm0_15:$imm4, GPR64sp:$Rn,
	imm0_15:$offset),
	opcodestr>;
	}

	multiclass sme_fill<string opcodestr> {
	defm NAME : sme_spill_fill<0b0, (outs MatrixOp:$ZAt),
	(ins MatrixIndexGPR32Op12_15:$Rv,
	imm0_15:$imm4, GPR64sp:$Rn,
	imm0_15:$offset),
	opcodestr>;
	}

	//===----------------------------------------------------------------------===//
	// Move instructions
	//===----------------------------------------------------------------------===//

	class sme_vector_to_tile_base<bit Q, bit V, bits<2> sz, dag outs, dag ins,
	string mnemonic, string argstr>
	: I<outs, ins, mnemonic, argstr, "", []>, Sched<[]> {
	bits<2> Rv;
	bits<3> Pg;
	bits<5> Zn;
	let Inst{31-24} = 0b11000000;
	let Inst{23-22} = sz;
	let Inst{21-17} = 0b00000;
	let Inst{16} = Q;
	let Inst{15} = V;
	let Inst{14-13} = Rv;
	let Inst{12-10} = Pg;
	let Inst{9-5} = Zn;
	let Inst{4} = 0b0;
	}

	class sme_vector_to_tile_inst<bits<2> sz, MatrixTileVectorOperand tile_ty,
	bit is_col, Operand imm_ty, ZPRRegOp zpr_ty,
	string mnemonic>
	: sme_vector_to_tile_base<0b0, is_col, sz, (outs tile_ty:$ZAd),
	(ins MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, zpr_ty:$Zn),
	mnemonic, "\t$ZAd[$Rv, $imm], $Pg/m, $Zn">;

	class sme_vector_to_tile_inst_Q<MatrixTileVectorOperand tile_ty,
	bit is_col, string mnemonic>
	: sme_vector_to_tile_base<0b1, is_col, 0b11, (outs tile_ty:$ZAd),
	(ins MatrixIndexGPR32Op12_15:$Rv, PPR3bAny:$Pg, ZPR128:$Zn),
	mnemonic, "\t$ZAd[$Rv], $Pg/m, $Zn">;

	multiclass sme_vector_to_tile_aliases<Instruction inst,
	MatrixTileVectorOperand tile_ty,
	ZPRRegOp zpr_ty, Operand imm_ty> {
	def : InstAlias<"mov\t$ZAd[$Rv, $imm], $Pg/m, $Zn",
	- (inst tile_ty:$ZAd, MatrixIndexGPR32Op12_15:$Rv, imm0_15:$imm, PPR3bAny:$Pg, zpr_ty:$Zn), 1>;
	+ (inst tile_ty:$ZAd, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, zpr_ty:$Zn), 1>;
	}

	multiclass sme_vector_v_to_tile<string mnemonic, bit is_col> {
	def _B : sme_vector_to_tile_inst<0b00, !if(is_col, TileVectorOpV8,
	TileVectorOpH8),
	is_col, imm0_15, ZPR8, mnemonic> {
	bits<4> imm;
	let Inst{3-0} = imm;
	}
	def _H : sme_vector_to_tile_inst<0b01, !if(is_col, TileVectorOpV16,
	TileVectorOpH16),
	is_col, imm0_7, ZPR16, mnemonic> {
	bits<1> ZAd;
	bits<3> imm;
	let Inst{3} = ZAd;
	let Inst{2-0} = imm;
	}
	def _S : sme_vector_to_tile_inst<0b10, !if(is_col, TileVectorOpV32,
	TileVectorOpH32),
	is_col, imm0_3, ZPR32, mnemonic> {
	bits<2> ZAd;
	bits<2> imm;
	let Inst{3-2} = ZAd;
	let Inst{1-0} = imm;
	}
	def _D : sme_vector_to_tile_inst<0b11, !if(is_col, TileVectorOpV64,
	TileVectorOpH64),
	is_col, imm0_1, ZPR64, mnemonic> {
	bits<3> ZAd;
	bits<1> imm;
	let Inst{3-1} = ZAd;
	let Inst{0} = imm;
	}
	def _Q : sme_vector_to_tile_inst_Q<!if(is_col, TileVectorOpV128,
	TileVectorOpH128),
	is_col, mnemonic> {
	bits<4> ZAd;
	bits<1> imm;
	let Inst{3-0} = ZAd;
	}

	defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _B),
	!if(is_col, TileVectorOpV8,
	TileVectorOpH8),
	ZPR8, imm0_15>;
	defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _H),
	!if(is_col, TileVectorOpV16,
	TileVectorOpH16),
	ZPR16, imm0_7>;
	defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _S),
	!if(is_col, TileVectorOpV32,
	TileVectorOpH32),
	ZPR32, imm0_3>;
	defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _D),
	!if(is_col, TileVectorOpV64,
	TileVectorOpH64),
	ZPR64, imm0_1>;

	def : InstAlias<"mov\t$ZAd[$Rv], $Pg/m, $Zn",
	(!cast<Instruction>(NAME # _Q) !if(is_col,
	TileVectorOpV128,
	TileVectorOpH128):$ZAd,
	MatrixIndexGPR32Op12_15:$Rv,
	PPR3bAny:$Pg, ZPR128:$Zn), 1>;
	}

	multiclass sme_vector_to_tile<string mnemonic> {
	defm _H : sme_vector_v_to_tile<mnemonic, /is_col=/0b0>;
	defm _V : sme_vector_v_to_tile<mnemonic, /is_col=/0b1>;
	}

	class sme_tile_to_vector_base<bit Q, bit V, bits<2> sz, dag outs, dag ins,
	string mnemonic, string argstr>
	: I<outs, ins, mnemonic, argstr, "", []>, Sched<[]> {
	bits<2> Rv;
	bits<3> Pg;
	bits<5> Zd;
	let Inst{31-24} = 0b11000000;
	let Inst{23-22} = sz;
	let Inst{21-17} = 0b00001;
	let Inst{16} = Q;
	let Inst{15} = V;
	let Inst{14-13} = Rv;
	let Inst{12-10} = Pg;
	let Inst{9} = 0b0;
	let Inst{4-0} = Zd;
	}

	class sme_tile_to_vector_inst<bits<2> sz, ZPRRegOp zpr_ty,
	MatrixTileVectorOperand tile_ty,
	bit is_col, Operand imm_ty, string mnemonic>
	: sme_tile_to_vector_base<0b0, is_col, sz, (outs zpr_ty:$Zd),
	(ins PPR3bAny:$Pg, tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm),
	mnemonic, "\t$Zd, $Pg/m, $ZAn[$Rv, $imm]">;

	class sme_tile_to_vector_inst_Q<MatrixTileVectorOperand tile_ty,
	bit is_col, string mnemonic>
	: sme_tile_to_vector_base<0b1, is_col, 0b11, (outs ZPR128:$Zd),
	(ins PPR3bAny:$Pg, tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rv),
	mnemonic, "\t$Zd, $Pg/m, $ZAn[$Rv]">;

	multiclass sme_tile_to_vector_aliases<Instruction inst, ZPRRegOp zpr_ty,
	MatrixTileVectorOperand tile_ty,
	Operand imm_ty > {
	def : InstAlias<"mov\t$Zd, $Pg/m, $ZAn[$Rv, $imm]",
	(inst zpr_ty:$Zd, PPR3bAny:$Pg, tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm), 1>;
	}

	multiclass sme_tile_to_vector_v<string mnemonic, bit is_col> {
	def _B : sme_tile_to_vector_inst<0b00, ZPR8, !if(is_col, TileVectorOpV8,
	TileVectorOpH8),
	is_col, imm0_15, mnemonic> {
	bits<4> imm;
	let Inst{8-5} = imm;
	}
	def _H : sme_tile_to_vector_inst<0b01, ZPR16, !if(is_col, TileVectorOpV16,
	TileVectorOpH16),
	is_col, imm0_7, mnemonic> {
	bits<1> ZAn;
	bits<3> imm;
	let Inst{8} = ZAn;
	let Inst{7-5} = imm;
	}
	def _S : sme_tile_to_vector_inst<0b10, ZPR32, !if(is_col, TileVectorOpV32,
	TileVectorOpH32),
	is_col, imm0_3, mnemonic> {
	bits<2> ZAn;
	bits<2> imm;
	let Inst{8-7} = ZAn;
	let Inst{6-5} = imm;
	}
	def _D : sme_tile_to_vector_inst<0b11, ZPR64, !if(is_col, TileVectorOpV64,
	TileVectorOpH64),
	is_col, imm0_1, mnemonic> {
	bits<3> ZAn;
	bits<1> imm;
	let Inst{8-6} = ZAn;
	let Inst{5} = imm;
	}
	def _Q : sme_tile_to_vector_inst_Q<!if(is_col, TileVectorOpV128,
	TileVectorOpH128),
	is_col, mnemonic> {
	bits<4> ZAn;
	let Inst{8-5} = ZAn;
	}

	defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _B), ZPR8,
	!if(is_col, TileVectorOpV8,
	TileVectorOpH8), imm0_15>;
	defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _H), ZPR16,
	!if(is_col, TileVectorOpV16,
	TileVectorOpH16), imm0_7>;
	defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _S), ZPR32,
	!if(is_col, TileVectorOpV32,
	TileVectorOpH32), imm0_3>;
	defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _D), ZPR64,
	!if(is_col, TileVectorOpV64,
	TileVectorOpH64), imm0_1>;

	def : InstAlias<"mov\t$Zd, $Pg/m, $ZAn[$Rv]",
	(!cast<Instruction>(NAME # _Q) ZPR128:$Zd, PPR3bAny:$Pg,
	!if(is_col,
	TileVectorOpV128,
	TileVectorOpH128):$ZAn,
	MatrixIndexGPR32Op12_15:$Rv), 1>;
	}

	multiclass sme_tile_to_vector<string mnemonic> {
	defm _H : sme_tile_to_vector_v<mnemonic, /is_col=/0b0>;
	defm _V : sme_tile_to_vector_v<mnemonic, /is_col=/0b1>;
	}

	//===----------------------------------------------------------------------===//
	// SME Zero
	//===----------------------------------------------------------------------===//

	class sme_zero_inst<string mnemonic>
	: I<(outs MatrixTileList:$imm), (ins),
	mnemonic, "\t$imm", "", []>, Sched<[]> {
	bits<8> imm;
	let Inst{31-8} = 0b110000000000100000000000;
	let Inst{7-0} = imm;
	}

	multiclass sme_zero<string mnemonic> {
	def NAME : sme_zero_inst<mnemonic>;

	def : InstAlias<"zero\t\\{za\\}", (!cast<Instruction>(NAME) 0b11111111), 1>;
	def : InstAlias<"zero\t\\{za0.h\\}", (!cast<Instruction>(NAME) 0b01010101), 1>;
	def : InstAlias<"zero\t\\{za1.h\\}", (!cast<Instruction>(NAME) 0b10101010), 1>;
	def : InstAlias<"zero\t\\{za0.s\\}", (!cast<Instruction>(NAME) 0b00010001), 1>;
	def : InstAlias<"zero\t\\{za1.s\\}", (!cast<Instruction>(NAME) 0b00100010), 1>;
	def : InstAlias<"zero\t\\{za2.s\\}", (!cast<Instruction>(NAME) 0b01000100), 1>;
	def : InstAlias<"zero\t\\{za3.s\\}", (!cast<Instruction>(NAME) 0b10001000), 1>;
	def : InstAlias<"zero\t\\{za0.s,za1.s\\}", (!cast<Instruction>(NAME) 0b00110011), 1>;
	def : InstAlias<"zero\t\\{za0.s,za3.s\\}", (!cast<Instruction>(NAME) 0b10011001), 1>;
	def : InstAlias<"zero\t\\{za1.s,za2.s\\}", (!cast<Instruction>(NAME) 0b01100110), 1>;
	def : InstAlias<"zero\t\\{za2.s,za3.s\\}", (!cast<Instruction>(NAME) 0b11001100), 1>;
	def : InstAlias<"zero\t\\{za0.s,za1.s,za2.s\\}", (!cast<Instruction>(NAME) 0b01110111), 1>;
	def : InstAlias<"zero\t\\{za0.s,za1.s,za3.s\\}", (!cast<Instruction>(NAME) 0b10111011), 1>;
	def : InstAlias<"zero\t\\{za0.s,za2.s,za3.s\\}", (!cast<Instruction>(NAME) 0b11011101), 1>;
	def : InstAlias<"zero\t\\{za1.s,za2.s,za3.s\\}", (!cast<Instruction>(NAME) 0b11101110), 1>;
	}

	//===----------------------------------------------------------------------===//
	// SVE2 Instructions
	//===----------------------------------------------------------------------===//

	class sve2_int_perm_revd<string asm>
	: I<(outs ZPR128:$Zd), (ins ZPR128:$_Zd, PPR3bAny:$Pg, ZPR128:$Zn),
	asm, "\t$Zd, $Pg/m, $Zn", "", []>,
	Sched<[]> {
	bits<5> Zd;
	bits<3> Pg;
	bits<5> Zn;
	let Inst{31-24} = 0b00000101;
	let Inst{23-22} = 0b00; // size
	let Inst{21-13} = 0b101110100;
	let Inst{12-10} = Pg;
	let Inst{9-5} = Zn;
	let Inst{4-0} = Zd;

	let Constraints = "$Zd = $_Zd";
	let DestructiveInstType = DestructiveUnary;
	let ElementSize = ZPR128.ElementSize;
	}

	class sve2_clamp<string asm, bits<2> sz, bit U, ZPRRegOp zpr_ty>
	: I<(outs zpr_ty:$Zd), (ins zpr_ty:$Zn, zpr_ty:$Zm, zpr_ty:$_Zd),
	asm, "\t$Zd, $Zn, $Zm", "", []>,
	Sched<[]> {
	bits<5> Zm;
	bits<5> Zn;
	bits<5> Zd;
	let Inst{31-24} = 0b01000100;
	let Inst{23-22} = sz;
	let Inst{21} = 0b0;
	let Inst{20-16} = Zm;
	let Inst{15-11} = 0b11000;
	let Inst{10} = U;
	let Inst{9-5} = Zn;
	let Inst{4-0} = Zd;

	let Constraints = "$Zd = $_Zd";
	let DestructiveInstType = DestructiveOther;
	let ElementSize = zpr_ty.ElementSize;
	}

	multiclass sve2_clamp<string asm, bit U> {
	def _B : sve2_clamp<asm, 0b00, U, ZPR8>;
	def _H : sve2_clamp<asm, 0b01, U, ZPR16>;
	def _S : sve2_clamp<asm, 0b10, U, ZPR32>;
	def _D : sve2_clamp<asm, 0b11, U, ZPR64>;
	}

	class sve2_int_perm_dup_p<string asm, PPRRegOp ppr_ty, Operand imm_ty>
	: I<(outs ppr_ty:$Pd), (ins PPRAny:$Pg, ppr_ty:$Pn,
	MatrixIndexGPR32Op12_15:$Rm, imm_ty:$imm),
	asm, "\t$Pd, $Pg/z, $Pn[$Rm, $imm]", "", []>,
	Sched<[]> {
	bits<2> Rm;
	bits<4> Pg;
	bits<4> Pn;
	bits<4> Pd;
	let Inst{31-24} = 0b00100101;
	let Inst{21} = 0b1;
	let Inst{17-16} = Rm;
	let Inst{15-14} = 0b01;
	let Inst{13-10} = Pg;
	let Inst{9} = 0b0;
	let Inst{8-5} = Pn;
	let Inst{4} = 0b0;
	let Inst{3-0} = Pd;
	}

	multiclass sve2_int_perm_dup_p<string asm> {
	def _B : sve2_int_perm_dup_p<asm, PPR8, imm0_15> {
	bits<4> imm;
	let Inst{23-22} = imm{3-2};
	let Inst{20-19} = imm{1-0};
	let Inst{18} = 0b1;
	}
	def _H : sve2_int_perm_dup_p<asm, PPR16, imm0_7> {
	bits<3> imm;
	let Inst{23-22} = imm{2-1};
	let Inst{20} = imm{0};
	let Inst{19-18} = 0b10;
	}
	def _S : sve2_int_perm_dup_p<asm, PPR32, imm0_3> {
	bits<2> imm;
	let Inst{23-22} = imm{1-0};
	let Inst{20-18} = 0b100;
	}
	def _D : sve2_int_perm_dup_p<asm, PPR64, imm0_1> {
	bits<1> imm;
	let Inst{23} = imm;
	let Inst{22} = 0b1;
	let Inst{20-18} = 0b000;
	}

	def : InstAlias<"dup\t$Pd, $Pg/z, $Pn[$Rm]",
	(!cast<Instruction>(NAME # _B) PPR8:$Pd, PPRAny:$Pg, PPR8:$Pn, MatrixIndexGPR32Op12_15:$Rm, 0), 1>;
	def : InstAlias<"dup\t$Pd, $Pg/z, $Pn[$Rm]",
	(!cast<Instruction>(NAME # _H) PPR16:$Pd, PPRAny:$Pg, PPR16:$Pn, MatrixIndexGPR32Op12_15:$Rm, 0), 1>;
	def : InstAlias<"dup\t$Pd, $Pg/z, $Pn[$Rm]",
	(!cast<Instruction>(NAME # _S) PPR32:$Pd, PPRAny:$Pg, PPR32:$Pn, MatrixIndexGPR32Op12_15:$Rm, 0), 1>;
	def : InstAlias<"dup\t$Pd, $Pg/z, $Pn[$Rm]",
	(!cast<Instruction>(NAME # _D) PPR64:$Pd, PPRAny:$Pg, PPR64:$Pn, MatrixIndexGPR32Op12_15:$Rm, 0), 1>;
	}
	diff --git a/contrib/llvm-project/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp
	index d8465f6d682b..94126e179462 100644
	--- a/contrib/llvm-project/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp
	+++ b/contrib/llvm-project/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp
	@@ -1,861 +1,861 @@
	//===---- M68kAsmParser.cpp - Parse M68k assembly to MCInst instructions --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#include "M68kInstrInfo.h"
	#include "M68kRegisterInfo.h"
	#include "TargetInfo/M68kTargetInfo.h"

	#include "llvm/MC/MCContext.h"
	#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
	#include "llvm/MC/MCParser/MCTargetAsmParser.h"
	#include "llvm/MC/MCStreamer.h"
	#include "llvm/Support/TargetRegistry.h"

	#include <sstream>

	#define DEBUG_TYPE "m68k-asm-parser"

	using namespace llvm;

	static cl::opt<bool> RegisterPrefixOptional(
	"m68k-register-prefix-optional", cl::Hidden,
	cl::desc("Enable specifying registers without the % prefix"),
	cl::init(false));

	namespace {
	/// Parses M68k assembly from a stream.
	class M68kAsmParser : public MCTargetAsmParser {
	const MCSubtargetInfo &STI;
	MCAsmParser &Parser;
	const MCRegisterInfo *MRI;

	#define GET_ASSEMBLER_HEADER
	#include "M68kGenAsmMatcher.inc"

	// Helpers for Match&Emit.
	bool invalidOperand(const SMLoc &Loc, const OperandVector &Operands,
	const uint64_t &ErrorInfo);
	bool missingFeature(const SMLoc &Loc, const uint64_t &ErrorInfo);
	bool emit(MCInst &Inst, SMLoc const &Loc, MCStreamer &Out) const;
	bool parseRegisterName(unsigned int &RegNo, SMLoc Loc,
	StringRef RegisterName);
	OperandMatchResultTy parseRegister(unsigned int &RegNo);

	// Parser functions.
	void eatComma();

	bool isExpr();
	OperandMatchResultTy parseImm(OperandVector &Operands);
	OperandMatchResultTy parseMemOp(OperandVector &Operands);

	public:
	M68kAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
	const MCInstrInfo &MII, const MCTargetOptions &Options)
	: MCTargetAsmParser(Options, STI, MII), STI(STI), Parser(Parser) {
	MCAsmParserExtension::Initialize(Parser);
	MRI = getContext().getRegisterInfo();

	setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
	}

	unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
	unsigned Kind) override;
	bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
	OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
	SMLoc &EndLoc) override;
	bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
	SMLoc NameLoc, OperandVector &Operands) override;
	bool ParseDirective(AsmToken DirectiveID) override;
	bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
	OperandVector &Operands, MCStreamer &Out,
	uint64_t &ErrorInfo,
	bool MatchingInlineAsm) override;
	};

	struct M68kMemOp {
	enum class Kind {
	Addr,
	Reg,
	RegIndirect,
	RegPostIncrement,
	RegPreDecrement,
	RegIndirectDisplacement,
	RegIndirectDisplacementIndex,
	};

	// These variables are used for the following forms:
	// Addr: (OuterDisp)
	// Reg: %OuterReg
	// RegIndirect: (%OuterReg)
	// RegPostIncrement: (%OuterReg)+
	// RegPreDecrement: -(%OuterReg)
	// RegIndirectDisplacement: OuterDisp(%OuterReg)
	// RegIndirectDisplacementIndex:
	// OuterDisp(%OuterReg, %InnerReg.Size * Scale, InnerDisp)

	Kind Op;
	unsigned OuterReg;
	unsigned InnerReg;
	const MCExpr *OuterDisp;
	const MCExpr *InnerDisp;
	uint8_t Size : 4;
	uint8_t Scale : 4;
	const MCExpr *Expr;

	M68kMemOp() {}
	M68kMemOp(Kind Op) : Op(Op) {}

	void print(raw_ostream &OS) const;
	};

	/// An parsed M68k assembly operand.
	class M68kOperand : public MCParsedAsmOperand {
	typedef MCParsedAsmOperand Base;

	- enum class Kind {
	+ enum class KindTy {
	Invalid,
	Token,
	Imm,
	MemOp,
	};

	- Kind Kind;
	+ KindTy Kind;
	SMLoc Start, End;
	union {
	StringRef Token;
	int64_t Imm;
	const MCExpr *Expr;
	M68kMemOp MemOp;
	};

	public:
	- M68kOperand(enum Kind Kind, SMLoc Start, SMLoc End)
	+ M68kOperand(KindTy Kind, SMLoc Start, SMLoc End)
	: Base(), Kind(Kind), Start(Start), End(End) {}

	SMLoc getStartLoc() const override { return Start; }
	SMLoc getEndLoc() const override { return End; }

	void print(raw_ostream &OS) const override;

	bool isMem() const override { return false; }
	- bool isMemOp() const { return Kind == Kind::MemOp; }
	+ bool isMemOp() const { return Kind == KindTy::MemOp; }

	static void addExpr(MCInst &Inst, const MCExpr *Expr);

	// Reg
	bool isReg() const override;
	unsigned getReg() const override;
	void addRegOperands(MCInst &Inst, unsigned N) const;

	static std::unique_ptr<M68kOperand> createMemOp(M68kMemOp MemOp, SMLoc Start,
	SMLoc End);

	// Token
	bool isToken() const override;
	StringRef getToken() const;
	static std::unique_ptr<M68kOperand> createToken(StringRef Token, SMLoc Start,
	SMLoc End);

	// Imm
	bool isImm() const override;
	void addImmOperands(MCInst &Inst, unsigned N) const;

	static std::unique_ptr<M68kOperand> createImm(const MCExpr *Expr, SMLoc Start,
	SMLoc End);

	// Addr
	bool isAddr() const;
	void addAddrOperands(MCInst &Inst, unsigned N) const;

	// ARI
	bool isARI() const;
	void addARIOperands(MCInst &Inst, unsigned N) const;

	// ARID
	bool isARID() const;
	void addARIDOperands(MCInst &Inst, unsigned N) const;

	// ARII
	bool isARII() const;
	void addARIIOperands(MCInst &Inst, unsigned N) const;

	// ARIPD
	bool isARIPD() const;
	void addARIPDOperands(MCInst &Inst, unsigned N) const;

	// ARIPI
	bool isARIPI() const;
	void addARIPIOperands(MCInst &Inst, unsigned N) const;

	// PCD
	bool isPCD() const;
	void addPCDOperands(MCInst &Inst, unsigned N) const;

	// PCI
	bool isPCI() const;
	void addPCIOperands(MCInst &Inst, unsigned N) const;
	};

	} // end anonymous namespace.

	extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeM68kAsmParser() {
	RegisterMCAsmParser<M68kAsmParser> X(getTheM68kTarget());
	}

	#define GET_MATCHER_IMPLEMENTATION
	#include "M68kGenAsmMatcher.inc"

	void M68kMemOp::print(raw_ostream &OS) const {
	switch (Op) {
	case Kind::Addr:
	OS << OuterDisp;
	break;
	case Kind::Reg:
	OS << '%' << OuterReg;
	break;
	case Kind::RegIndirect:
	OS << "(%" << OuterReg << ')';
	break;
	case Kind::RegPostIncrement:
	OS << "(%" << OuterReg << ")+";
	break;
	case Kind::RegPreDecrement:
	OS << "-(%" << OuterReg << ")";
	break;
	case Kind::RegIndirectDisplacement:
	OS << OuterDisp << "(%" << OuterReg << ")";
	break;
	case Kind::RegIndirectDisplacementIndex:
	OS << OuterDisp << "(%" << OuterReg << ", " << InnerReg << "." << Size
	<< ", " << InnerDisp << ")";
	break;
	}
	}

	void M68kOperand::addExpr(MCInst &Inst, const MCExpr *Expr) {
	if (auto Const = dyn_cast<MCConstantExpr>(Expr)) {
	Inst.addOperand(MCOperand::createImm(Const->getValue()));
	return;
	}

	Inst.addOperand(MCOperand::createExpr(Expr));
	}

	// Reg
	bool M68kOperand::isReg() const {
	- return Kind == Kind::MemOp && MemOp.Op == M68kMemOp::Kind::Reg;
	+ return Kind == KindTy::MemOp && MemOp.Op == M68kMemOp::Kind::Reg;
	}

	unsigned M68kOperand::getReg() const {
	assert(isReg());
	return MemOp.OuterReg;
	}

	void M68kOperand::addRegOperands(MCInst &Inst, unsigned N) const {
	assert(isReg() && "wrong operand kind");
	assert((N == 1) && "can only handle one register operand");

	Inst.addOperand(MCOperand::createReg(getReg()));
	}

	std::unique_ptr<M68kOperand> M68kOperand::createMemOp(M68kMemOp MemOp,
	SMLoc Start, SMLoc End) {
	- auto Op = std::make_unique<M68kOperand>(Kind::MemOp, Start, End);
	+ auto Op = std::make_unique<M68kOperand>(KindTy::MemOp, Start, End);
	Op->MemOp = MemOp;
	return Op;
	}

	// Token
	-bool M68kOperand::isToken() const { return Kind == Kind::Token; }
	+bool M68kOperand::isToken() const { return Kind == KindTy::Token; }
	StringRef M68kOperand::getToken() const {
	assert(isToken());
	return Token;
	}

	std::unique_ptr<M68kOperand> M68kOperand::createToken(StringRef Token,
	SMLoc Start, SMLoc End) {
	- auto Op = std::make_unique<M68kOperand>(Kind::Token, Start, End);
	+ auto Op = std::make_unique<M68kOperand>(KindTy::Token, Start, End);
	Op->Token = Token;
	return Op;
	}

	// Imm
	-bool M68kOperand::isImm() const { return Kind == Kind::Imm; }
	+bool M68kOperand::isImm() const { return Kind == KindTy::Imm; }
	void M68kOperand::addImmOperands(MCInst &Inst, unsigned N) const {
	assert(isImm() && "wrong oeprand kind");
	assert((N == 1) && "can only handle one register operand");

	M68kOperand::addExpr(Inst, Expr);
	}

	std::unique_ptr<M68kOperand> M68kOperand::createImm(const MCExpr *Expr,
	SMLoc Start, SMLoc End) {
	- auto Op = std::make_unique<M68kOperand>(Kind::Imm, Start, End);
	+ auto Op = std::make_unique<M68kOperand>(KindTy::Imm, Start, End);
	Op->Expr = Expr;
	return Op;
	}

	// Addr
	bool M68kOperand::isAddr() const {
	return isMemOp() && MemOp.Op == M68kMemOp::Kind::Addr;
	}
	void M68kOperand::addAddrOperands(MCInst &Inst, unsigned N) const {
	M68kOperand::addExpr(Inst, MemOp.OuterDisp);
	}

	// ARI
	bool M68kOperand::isARI() const {
	return isMemOp() && MemOp.Op == M68kMemOp::Kind::RegIndirect &&
	M68k::AR32RegClass.contains(MemOp.OuterReg);
	}
	void M68kOperand::addARIOperands(MCInst &Inst, unsigned N) const {
	Inst.addOperand(MCOperand::createReg(MemOp.OuterReg));
	}

	// ARID
	bool M68kOperand::isARID() const {
	return isMemOp() && MemOp.Op == M68kMemOp::Kind::RegIndirectDisplacement &&
	M68k::AR32RegClass.contains(MemOp.OuterReg);
	}
	void M68kOperand::addARIDOperands(MCInst &Inst, unsigned N) const {
	M68kOperand::addExpr(Inst, MemOp.OuterDisp);
	Inst.addOperand(MCOperand::createReg(MemOp.OuterReg));
	}

	// ARII
	bool M68kOperand::isARII() const {
	return isMemOp() &&
	MemOp.Op == M68kMemOp::Kind::RegIndirectDisplacementIndex &&
	M68k::AR32RegClass.contains(MemOp.OuterReg);
	}
	void M68kOperand::addARIIOperands(MCInst &Inst, unsigned N) const {
	M68kOperand::addExpr(Inst, MemOp.OuterDisp);
	Inst.addOperand(MCOperand::createReg(MemOp.OuterReg));
	Inst.addOperand(MCOperand::createReg(MemOp.InnerReg));
	}

	// ARIPD
	bool M68kOperand::isARIPD() const {
	return isMemOp() && MemOp.Op == M68kMemOp::Kind::RegPreDecrement &&
	M68k::AR32RegClass.contains(MemOp.OuterReg);
	}
	void M68kOperand::addARIPDOperands(MCInst &Inst, unsigned N) const {
	Inst.addOperand(MCOperand::createReg(MemOp.OuterReg));
	}

	// ARIPI
	bool M68kOperand::isARIPI() const {
	return isMemOp() && MemOp.Op == M68kMemOp::Kind::RegPostIncrement &&
	M68k::AR32RegClass.contains(MemOp.OuterReg);
	}
	void M68kOperand::addARIPIOperands(MCInst &Inst, unsigned N) const {
	Inst.addOperand(MCOperand::createReg(MemOp.OuterReg));
	}

	// PCD
	bool M68kOperand::isPCD() const {
	return isMemOp() && MemOp.Op == M68kMemOp::Kind::RegIndirectDisplacement &&
	MemOp.OuterReg == M68k::PC;
	}
	void M68kOperand::addPCDOperands(MCInst &Inst, unsigned N) const {
	M68kOperand::addExpr(Inst, MemOp.OuterDisp);
	}

	// PCI
	bool M68kOperand::isPCI() const {
	return isMemOp() &&
	MemOp.Op == M68kMemOp::Kind::RegIndirectDisplacementIndex &&
	MemOp.OuterReg == M68k::PC;
	}
	void M68kOperand::addPCIOperands(MCInst &Inst, unsigned N) const {
	M68kOperand::addExpr(Inst, MemOp.OuterDisp);
	Inst.addOperand(MCOperand::createReg(MemOp.InnerReg));
	}

	static inline bool checkRegisterClass(unsigned RegNo, bool Data, bool Address,
	bool SP) {
	switch (RegNo) {
	case M68k::A0:
	case M68k::A1:
	case M68k::A2:
	case M68k::A3:
	case M68k::A4:
	case M68k::A5:
	case M68k::A6:
	return Address;

	case M68k::SP:
	return SP;

	case M68k::D0:
	case M68k::D1:
	case M68k::D2:
	case M68k::D3:
	case M68k::D4:
	case M68k::D5:
	case M68k::D6:
	case M68k::D7:
	return Data;

	case M68k::SR:
	case M68k::CCR:
	return false;

	default:
	llvm_unreachable("unexpected register type");
	return false;
	}
	}

	unsigned M68kAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
	unsigned Kind) {
	M68kOperand &Operand = (M68kOperand &)Op;

	switch (Kind) {
	case MCK_XR16:
	case MCK_SPILL:
	if (Operand.isReg() &&
	checkRegisterClass(Operand.getReg(), true, true, true)) {
	return Match_Success;
	}
	break;

	case MCK_AR16:
	case MCK_AR32:
	if (Operand.isReg() &&
	checkRegisterClass(Operand.getReg(), false, true, true)) {
	return Match_Success;
	}
	break;

	case MCK_AR32_NOSP:
	if (Operand.isReg() &&
	checkRegisterClass(Operand.getReg(), false, true, false)) {
	return Match_Success;
	}
	break;

	case MCK_DR8:
	case MCK_DR16:
	case MCK_DR32:
	if (Operand.isReg() &&
	checkRegisterClass(Operand.getReg(), true, false, false)) {
	return Match_Success;
	}
	break;

	case MCK_AR16_TC:
	if (Operand.isReg() &&
	((Operand.getReg() == M68k::A0) \|\| (Operand.getReg() == M68k::A1))) {
	return Match_Success;
	}
	break;

	case MCK_DR16_TC:
	if (Operand.isReg() &&
	((Operand.getReg() == M68k::D0) \|\| (Operand.getReg() == M68k::D1))) {
	return Match_Success;
	}
	break;

	case MCK_XR16_TC:
	if (Operand.isReg() &&
	((Operand.getReg() == M68k::D0) \|\| (Operand.getReg() == M68k::D1) \|\|
	(Operand.getReg() == M68k::A0) \|\| (Operand.getReg() == M68k::A1))) {
	return Match_Success;
	}
	break;
	}

	return Match_InvalidOperand;
	}

	bool M68kAsmParser::parseRegisterName(unsigned &RegNo, SMLoc Loc,
	StringRef RegisterName) {
	auto RegisterNameLower = RegisterName.lower();

	// CCR register
	if (RegisterNameLower == "ccr") {
	RegNo = M68k::CCR;
	return true;
	}

	// Parse simple general-purpose registers.
	if (RegisterNameLower.size() == 2) {
	static unsigned RegistersByIndex[] = {
	M68k::D0, M68k::D1, M68k::D2, M68k::D3, M68k::D4, M68k::D5,
	M68k::D6, M68k::D7, M68k::A0, M68k::A1, M68k::A2, M68k::A3,
	M68k::A4, M68k::A5, M68k::A6, M68k::SP,
	};

	switch (RegisterNameLower[0]) {
	case 'd':
	case 'a': {
	if (isdigit(RegisterNameLower[1])) {
	unsigned IndexOffset = (RegisterNameLower[0] == 'a') ? 8 : 0;
	unsigned RegIndex = (unsigned)(RegisterNameLower[1] - '0');
	if (RegIndex < 8) {
	RegNo = RegistersByIndex[IndexOffset + RegIndex];
	return true;
	}
	}
	break;
	}

	case 's':
	if (RegisterNameLower[1] == 'p') {
	RegNo = M68k::SP;
	return true;
	} else if (RegisterNameLower[1] == 'r') {
	RegNo = M68k::SR;
	return true;
	}
	break;

	case 'p':
	if (RegisterNameLower[1] == 'c') {
	RegNo = M68k::PC;
	return true;
	}
	break;
	}
	}

	return false;
	}

	OperandMatchResultTy M68kAsmParser::parseRegister(unsigned &RegNo) {
	bool HasPercent = false;
	AsmToken PercentToken;

	LLVM_DEBUG(dbgs() << "parseRegister "; getTok().dump(dbgs()); dbgs() << "\n");

	if (getTok().is(AsmToken::Percent)) {
	HasPercent = true;
	PercentToken = Lex();
	} else if (!RegisterPrefixOptional.getValue()) {
	return MatchOperand_NoMatch;
	}

	if (!Parser.getTok().is(AsmToken::Identifier)) {
	if (HasPercent) {
	getLexer().UnLex(PercentToken);
	}
	return MatchOperand_NoMatch;
	}

	auto RegisterName = Parser.getTok().getString();
	if (!parseRegisterName(RegNo, Parser.getLexer().getLoc(), RegisterName)) {
	if (HasPercent) {
	getLexer().UnLex(PercentToken);
	}
	return MatchOperand_NoMatch;
	}

	Parser.Lex();
	return MatchOperand_Success;
	}

	bool M68kAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
	SMLoc &EndLoc) {
	auto Result = tryParseRegister(RegNo, StartLoc, EndLoc);
	if (Result != MatchOperand_Success) {
	return Error(StartLoc, "expected register");
	}

	return false;
	}

	OperandMatchResultTy M68kAsmParser::tryParseRegister(unsigned &RegNo,
	SMLoc &StartLoc,
	SMLoc &EndLoc) {
	StartLoc = getLexer().getLoc();
	auto Result = parseRegister(RegNo);
	EndLoc = getLexer().getLoc();
	return Result;
	}

	bool M68kAsmParser::isExpr() {
	switch (Parser.getTok().getKind()) {
	case AsmToken::Identifier:
	case AsmToken::Integer:
	return true;
	case AsmToken::Minus:
	return getLexer().peekTok().getKind() == AsmToken::Integer;

	default:
	return false;
	}
	}

	OperandMatchResultTy M68kAsmParser::parseImm(OperandVector &Operands) {
	if (getLexer().isNot(AsmToken::Hash)) {
	return MatchOperand_NoMatch;
	}
	SMLoc Start = getLexer().getLoc();
	Parser.Lex();

	SMLoc End;
	const MCExpr *Expr;

	if (getParser().parseExpression(Expr, End)) {
	return MatchOperand_ParseFail;
	}

	Operands.push_back(M68kOperand::createImm(Expr, Start, End));
	return MatchOperand_Success;
	}

	OperandMatchResultTy M68kAsmParser::parseMemOp(OperandVector &Operands) {
	SMLoc Start = getLexer().getLoc();
	bool IsPD = false;
	M68kMemOp MemOp;

	// Check for a plain register.
	auto Result = parseRegister(MemOp.OuterReg);
	if (Result == MatchOperand_Success) {
	MemOp.Op = M68kMemOp::Kind::Reg;
	Operands.push_back(
	M68kOperand::createMemOp(MemOp, Start, getLexer().getLoc()));
	return MatchOperand_Success;
	}

	if (Result == MatchOperand_ParseFail) {
	return Result;
	}

	// Check for pre-decrement & outer displacement.
	bool HasDisplacement = false;
	if (getLexer().is(AsmToken::Minus)) {
	IsPD = true;
	Parser.Lex();
	} else if (isExpr()) {
	if (Parser.parseExpression(MemOp.OuterDisp)) {
	return MatchOperand_ParseFail;
	}
	HasDisplacement = true;
	}

	if (getLexer().isNot(AsmToken::LParen)) {
	if (HasDisplacement) {
	MemOp.Op = M68kMemOp::Kind::Addr;
	Operands.push_back(
	M68kOperand::createMemOp(MemOp, Start, getLexer().getLoc()));
	return MatchOperand_Success;
	} else if (IsPD) {
	Error(getLexer().getLoc(), "expected (");
	return MatchOperand_ParseFail;
	}

	return MatchOperand_NoMatch;
	}
	Parser.Lex();

	// Check for constant dereference & MIT-style displacement
	if (!HasDisplacement && isExpr()) {
	if (Parser.parseExpression(MemOp.OuterDisp)) {
	return MatchOperand_ParseFail;
	}
	HasDisplacement = true;

	// If we're not followed by a comma, we're a constant dereference.
	if (getLexer().isNot(AsmToken::Comma)) {
	MemOp.Op = M68kMemOp::Kind::Addr;
	Operands.push_back(
	M68kOperand::createMemOp(MemOp, Start, getLexer().getLoc()));
	return MatchOperand_Success;
	}

	Parser.Lex();
	}

	Result = parseRegister(MemOp.OuterReg);
	if (Result == MatchOperand_ParseFail) {
	return MatchOperand_ParseFail;
	}

	if (Result != MatchOperand_Success) {
	Error(getLexer().getLoc(), "expected register");
	return MatchOperand_ParseFail;
	}

	// Check for Index.
	bool HasIndex = false;
	if (Parser.getTok().is(AsmToken::Comma)) {
	Parser.Lex();

	Result = parseRegister(MemOp.InnerReg);
	if (Result == MatchOperand_ParseFail) {
	return Result;
	}

	if (Result == MatchOperand_NoMatch) {
	Error(getLexer().getLoc(), "expected register");
	return MatchOperand_ParseFail;
	}

	// TODO: parse size, scale and inner displacement.
	MemOp.Size = 4;
	MemOp.Scale = 1;
	MemOp.InnerDisp = MCConstantExpr::create(0, Parser.getContext(), true, 4);
	HasIndex = true;
	}

	if (Parser.getTok().isNot(AsmToken::RParen)) {
	Error(getLexer().getLoc(), "expected )");
	return MatchOperand_ParseFail;
	}
	Parser.Lex();

	bool IsPI = false;
	if (!IsPD && Parser.getTok().is(AsmToken::Plus)) {
	Parser.Lex();
	IsPI = true;
	}

	SMLoc End = getLexer().getLoc();

	unsigned OpCount = IsPD + IsPI + (HasIndex \|\| HasDisplacement);
	if (OpCount > 1) {
	Error(Start, "only one of post-increment, pre-decrement or displacement "
	"can be used");
	return MatchOperand_ParseFail;
	}

	if (IsPD) {
	MemOp.Op = M68kMemOp::Kind::RegPreDecrement;
	} else if (IsPI) {
	MemOp.Op = M68kMemOp::Kind::RegPostIncrement;
	} else if (HasIndex) {
	MemOp.Op = M68kMemOp::Kind::RegIndirectDisplacementIndex;
	} else if (HasDisplacement) {
	MemOp.Op = M68kMemOp::Kind::RegIndirectDisplacement;
	} else {
	MemOp.Op = M68kMemOp::Kind::RegIndirect;
	}

	Operands.push_back(M68kOperand::createMemOp(MemOp, Start, End));
	return MatchOperand_Success;
	}

	void M68kAsmParser::eatComma() {
	if (Parser.getTok().is(AsmToken::Comma)) {
	Parser.Lex();
	}
	}

	bool M68kAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
	SMLoc NameLoc, OperandVector &Operands) {
	SMLoc Start = getLexer().getLoc();
	Operands.push_back(M68kOperand::createToken(Name, Start, Start));

	bool First = true;
	while (Parser.getTok().isNot(AsmToken::EndOfStatement)) {
	if (!First) {
	eatComma();
	} else {
	First = false;
	}

	auto MatchResult = MatchOperandParserImpl(Operands, Name);
	if (MatchResult == MatchOperand_Success) {
	continue;
	}

	// Add custom operand formats here...
	SMLoc Loc = getLexer().getLoc();
	Parser.eatToEndOfStatement();
	return Error(Loc, "unexpected token parsing operands");
	}

	// Eat EndOfStatement.
	Parser.Lex();
	return false;
	}

	bool M68kAsmParser::ParseDirective(AsmToken DirectiveID) { return true; }

	bool M68kAsmParser::invalidOperand(SMLoc const &Loc,
	OperandVector const &Operands,
	uint64_t const &ErrorInfo) {
	SMLoc ErrorLoc = Loc;
	char const *Diag = 0;

	if (ErrorInfo != ~0U) {
	if (ErrorInfo >= Operands.size()) {
	Diag = "too few operands for instruction.";
	} else {
	auto const &Op = (M68kOperand const &)*Operands[ErrorInfo];
	if (Op.getStartLoc() != SMLoc()) {
	ErrorLoc = Op.getStartLoc();
	}
	}
	}

	if (!Diag) {
	Diag = "invalid operand for instruction";
	}

	return Error(ErrorLoc, Diag);
	}

	bool M68kAsmParser::missingFeature(llvm::SMLoc const &Loc,
	uint64_t const &ErrorInfo) {
	return Error(Loc, "instruction requires a CPU feature not currently enabled");
	}

	bool M68kAsmParser::emit(MCInst &Inst, SMLoc const &Loc,
	MCStreamer &Out) const {
	Inst.setLoc(Loc);
	Out.emitInstruction(Inst, STI);

	return false;
	}

	bool M68kAsmParser::MatchAndEmitInstruction(SMLoc Loc, unsigned &Opcode,
	OperandVector &Operands,
	MCStreamer &Out,
	uint64_t &ErrorInfo,
	bool MatchingInlineAsm) {
	MCInst Inst;
	unsigned MatchResult =
	MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm);

	switch (MatchResult) {
	case Match_Success:
	return emit(Inst, Loc, Out);
	case Match_MissingFeature:
	return missingFeature(Loc, ErrorInfo);
	case Match_InvalidOperand:
	return invalidOperand(Loc, Operands, ErrorInfo);
	case Match_MnemonicFail:
	return Error(Loc, "invalid instruction");
	default:
	return true;
	}
	}

	void M68kOperand::print(raw_ostream &OS) const {
	switch (Kind) {
	- case Kind::Invalid:
	+ case KindTy::Invalid:
	OS << "invalid";
	break;

	- case Kind::Token:
	+ case KindTy::Token:
	OS << "token '" << Token << "'";
	break;

	- case Kind::Imm:
	+ case KindTy::Imm:
	OS << "immediate " << Imm;
	break;

	- case Kind::MemOp:
	+ case KindTy::MemOp:
	MemOp.print(OS);
	break;
	}
	}
	diff --git a/contrib/llvm-project/llvm/lib/Target/M68k/M68kTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/M68k/M68kTargetMachine.cpp
	index 5b8fd3d41b14..cb7d8f8b25e3 100644
	--- a/contrib/llvm-project/llvm/lib/Target/M68k/M68kTargetMachine.cpp
	+++ b/contrib/llvm-project/llvm/lib/Target/M68k/M68kTargetMachine.cpp
	@@ -1,188 +1,192 @@
	//===-- M68kTargetMachine.cpp - M68k target machine ---------- C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	///
	/// \file
	/// This file contains implementation for M68k target machine.
	///
	//===----------------------------------------------------------------------===//

	#include "M68kTargetMachine.h"
	#include "M68k.h"
	#include "M68kSubtarget.h"
	#include "M68kTargetObjectFile.h"
	#include "TargetInfo/M68kTargetInfo.h"
	#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
	#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
	#include "llvm/CodeGen/GlobalISel/Legalizer.h"
	#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
	#include "llvm/CodeGen/Passes.h"
	#include "llvm/CodeGen/TargetPassConfig.h"
	#include "llvm/IR/LegacyPassManager.h"
	#include "llvm/InitializePasses.h"
	#include "llvm/PassRegistry.h"
	#include "llvm/Support/TargetRegistry.h"
	#include <memory>

	using namespace llvm;

	#define DEBUG_TYPE "m68k"

	extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeM68kTarget() {
	RegisterTargetMachine<M68kTargetMachine> X(getTheM68kTarget());
	auto *PR = PassRegistry::getPassRegistry();
	initializeGlobalISel(*PR);
	}

	namespace {

	std::string computeDataLayout(const Triple &TT, StringRef CPU,
	const TargetOptions &Options) {
	std::string Ret = "";
	// M68k is Big Endian
	Ret += "E";

	// FIXME how to wire it with the used object format?
	Ret += "-m:e";

	- // M68k pointers are always 32 bit wide even for 16 bit cpus
	- Ret += "-p:32:32";
	-
	- // M68k requires i8 to align on 2 byte boundry
	+ // M68k pointers are always 32 bit wide even for 16-bit CPUs.
	+ // The ABI only specifies 16-bit alignment.
	+ // On at least the 68020+ with a 32-bit bus, there is a performance benefit
	+ // to having 32-bit alignment.
	+ Ret += "-p:32:16:32";
	+
	+ // Bytes do not require special alignment, words are word aligned and
	+ // long words are word aligned at minimum.
	Ret += "-i8:8:8-i16:16:16-i32:16:32";

	// FIXME no floats at the moment

	// The registers can hold 8, 16, 32 bits
	Ret += "-n8:16:32";

	Ret += "-a:0:16-S16";

	return Ret;
	}

	Reloc::Model getEffectiveRelocModel(const Triple &TT,
	Optional<Reloc::Model> RM) {
	// If not defined we default to static
	if (!RM.hasValue()) {
	return Reloc::Static;
	}

	return *RM;
	}

	CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM,
	bool JIT) {
	if (!CM) {
	return CodeModel::Small;
	} else if (CM == CodeModel::Large) {
	llvm_unreachable("Large code model is not supported");
	} else if (CM == CodeModel::Kernel) {
	llvm_unreachable("Kernel code model is not implemented yet");
	}
	return CM.getValue();
	}
	} // end anonymous namespace

	M68kTargetMachine::M68kTargetMachine(const Target &T, const Triple &TT,
	StringRef CPU, StringRef FS,
	const TargetOptions &Options,
	Optional<Reloc::Model> RM,
	Optional<CodeModel::Model> CM,
	CodeGenOpt::Level OL, bool JIT)
	: LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options), TT, CPU, FS,
	Options, getEffectiveRelocModel(TT, RM),
	::getEffectiveCodeModel(CM, JIT), OL),
	TLOF(std::make_unique<M68kELFTargetObjectFile>()),
	Subtarget(TT, CPU, FS, *this) {
	initAsmInfo();
	}

	M68kTargetMachine::~M68kTargetMachine() {}

	const M68kSubtarget *
	M68kTargetMachine::getSubtargetImpl(const Function &F) const {
	Attribute CPUAttr = F.getFnAttribute("target-cpu");
	Attribute FSAttr = F.getFnAttribute("target-features");

	auto CPU = CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU;
	auto FS = FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS;

	auto &I = SubtargetMap[CPU + FS];
	if (!I) {
	// This needs to be done before we create a new subtarget since any
	// creation will depend on the TM and the code generation flags on the
	// function that reside in TargetOptions.
	resetTargetOptions(F);
	I = std::make_unique<M68kSubtarget>(TargetTriple, CPU, FS, *this);
	}
	return I.get();
	}

	//===----------------------------------------------------------------------===//
	// Pass Pipeline Configuration
	//===----------------------------------------------------------------------===//

	namespace {
	class M68kPassConfig : public TargetPassConfig {
	public:
	M68kPassConfig(M68kTargetMachine &TM, PassManagerBase &PM)
	: TargetPassConfig(TM, PM) {}

	M68kTargetMachine &getM68kTargetMachine() const {
	return getTM<M68kTargetMachine>();
	}

	const M68kSubtarget &getM68kSubtarget() const {
	return *getM68kTargetMachine().getSubtargetImpl();
	}
	bool addIRTranslator() override;
	bool addLegalizeMachineIR() override;
	bool addRegBankSelect() override;
	bool addGlobalInstructionSelect() override;
	bool addInstSelector() override;
	void addPreSched2() override;
	void addPreEmitPass() override;
	};
	} // namespace

	TargetPassConfig *M68kTargetMachine::createPassConfig(PassManagerBase &PM) {
	return new M68kPassConfig(*this, PM);
	}

	bool M68kPassConfig::addInstSelector() {
	// Install an instruction selector.
	addPass(createM68kISelDag(getM68kTargetMachine()));
	addPass(createM68kGlobalBaseRegPass());
	return false;
	}

	bool M68kPassConfig::addIRTranslator() {
	addPass(new IRTranslator());
	return false;
	}

	bool M68kPassConfig::addLegalizeMachineIR() {
	addPass(new Legalizer());
	return false;
	}

	bool M68kPassConfig::addRegBankSelect() {
	addPass(new RegBankSelect());
	return false;
	}

	bool M68kPassConfig::addGlobalInstructionSelect() {
	addPass(new InstructionSelect());
	return false;
	}

	void M68kPassConfig::addPreSched2() { addPass(createM68kExpandPseudoPass()); }

	void M68kPassConfig::addPreEmitPass() {
	addPass(createM68kCollapseMOVEMPass());
	}
	diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
	index a541daaff9f4..207101763ac2 100644
	--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
	+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
	@@ -1,1617 +1,1617 @@
	//===-- RISCVInstrInfo.cpp - RISCV Instruction Information ------- C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file contains the RISCV implementation of the TargetInstrInfo class.
	//
	//===----------------------------------------------------------------------===//

	#include "RISCVInstrInfo.h"
	#include "MCTargetDesc/RISCVMatInt.h"
	#include "RISCV.h"
	#include "RISCVMachineFunctionInfo.h"
	#include "RISCVSubtarget.h"
	#include "RISCVTargetMachine.h"
	#include "llvm/ADT/STLExtras.h"
	#include "llvm/ADT/SmallVector.h"
	#include "llvm/Analysis/MemoryLocation.h"
	#include "llvm/CodeGen/LiveVariables.h"
	#include "llvm/CodeGen/MachineFunctionPass.h"
	#include "llvm/CodeGen/MachineInstrBuilder.h"
	#include "llvm/CodeGen/MachineRegisterInfo.h"
	#include "llvm/CodeGen/RegisterScavenging.h"
	#include "llvm/MC/MCInstBuilder.h"
	#include "llvm/Support/ErrorHandling.h"
	#include "llvm/Support/TargetRegistry.h"

	using namespace llvm;

	#define GEN_CHECK_COMPRESS_INSTR
	#include "RISCVGenCompressInstEmitter.inc"

	#define GET_INSTRINFO_CTOR_DTOR
	#include "RISCVGenInstrInfo.inc"

	namespace llvm {
	namespace RISCVVPseudosTable {

	using namespace RISCV;

	#define GET_RISCVVPseudosTable_IMPL
	#include "RISCVGenSearchableTables.inc"

	} // namespace RISCVVPseudosTable
	} // namespace llvm

	RISCVInstrInfo::RISCVInstrInfo(RISCVSubtarget &STI)
	: RISCVGenInstrInfo(RISCV::ADJCALLSTACKDOWN, RISCV::ADJCALLSTACKUP),
	STI(STI) {}

	MCInst RISCVInstrInfo::getNop() const {
	if (STI.getFeatureBits()[RISCV::FeatureStdExtC])
	return MCInstBuilder(RISCV::C_NOP);
	return MCInstBuilder(RISCV::ADDI)
	.addReg(RISCV::X0)
	.addReg(RISCV::X0)
	.addImm(0);
	}

	unsigned RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
	int &FrameIndex) const {
	switch (MI.getOpcode()) {
	default:
	return 0;
	case RISCV::LB:
	case RISCV::LBU:
	case RISCV::LH:
	case RISCV::LHU:
	case RISCV::FLH:
	case RISCV::LW:
	case RISCV::FLW:
	case RISCV::LWU:
	case RISCV::LD:
	case RISCV::FLD:
	break;
	}

	if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
	MI.getOperand(2).getImm() == 0) {
	FrameIndex = MI.getOperand(1).getIndex();
	return MI.getOperand(0).getReg();
	}

	return 0;
	}

	unsigned RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
	int &FrameIndex) const {
	switch (MI.getOpcode()) {
	default:
	return 0;
	case RISCV::SB:
	case RISCV::SH:
	case RISCV::SW:
	case RISCV::FSH:
	case RISCV::FSW:
	case RISCV::SD:
	case RISCV::FSD:
	break;
	}

	if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
	MI.getOperand(2).getImm() == 0) {
	FrameIndex = MI.getOperand(1).getIndex();
	return MI.getOperand(0).getReg();
	}

	return 0;
	}

	static bool forwardCopyWillClobberTuple(unsigned DstReg, unsigned SrcReg,
	unsigned NumRegs) {
	// We really want the positive remainder mod 32 here, that happens to be
	// easily obtainable with a mask.
	return ((DstReg - SrcReg) & 0x1f) < NumRegs;
	}

	void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
	MachineBasicBlock::iterator MBBI,
	const DebugLoc &DL, MCRegister DstReg,
	MCRegister SrcReg, bool KillSrc) const {
	if (RISCV::GPRRegClass.contains(DstReg, SrcReg)) {
	BuildMI(MBB, MBBI, DL, get(RISCV::ADDI), DstReg)
	.addReg(SrcReg, getKillRegState(KillSrc))
	.addImm(0);
	return;
	}

	// FPR->FPR copies and VR->VR copies.
	unsigned Opc;
	bool IsScalableVector = true;
	unsigned NF = 1;
	unsigned LMul = 1;
	unsigned SubRegIdx = RISCV::sub_vrm1_0;
	if (RISCV::FPR16RegClass.contains(DstReg, SrcReg)) {
	Opc = RISCV::FSGNJ_H;
	IsScalableVector = false;
	} else if (RISCV::FPR32RegClass.contains(DstReg, SrcReg)) {
	Opc = RISCV::FSGNJ_S;
	IsScalableVector = false;
	} else if (RISCV::FPR64RegClass.contains(DstReg, SrcReg)) {
	Opc = RISCV::FSGNJ_D;
	IsScalableVector = false;
	} else if (RISCV::VRRegClass.contains(DstReg, SrcReg)) {
	Opc = RISCV::PseudoVMV1R_V;
	} else if (RISCV::VRM2RegClass.contains(DstReg, SrcReg)) {
	Opc = RISCV::PseudoVMV2R_V;
	} else if (RISCV::VRM4RegClass.contains(DstReg, SrcReg)) {
	Opc = RISCV::PseudoVMV4R_V;
	} else if (RISCV::VRM8RegClass.contains(DstReg, SrcReg)) {
	Opc = RISCV::PseudoVMV8R_V;
	} else if (RISCV::VRN2M1RegClass.contains(DstReg, SrcReg)) {
	Opc = RISCV::PseudoVMV1R_V;
	SubRegIdx = RISCV::sub_vrm1_0;
	NF = 2;
	LMul = 1;
	} else if (RISCV::VRN2M2RegClass.contains(DstReg, SrcReg)) {
	Opc = RISCV::PseudoVMV2R_V;
	SubRegIdx = RISCV::sub_vrm2_0;
	NF = 2;
	LMul = 2;
	} else if (RISCV::VRN2M4RegClass.contains(DstReg, SrcReg)) {
	Opc = RISCV::PseudoVMV4R_V;
	SubRegIdx = RISCV::sub_vrm4_0;
	NF = 2;
	LMul = 4;
	} else if (RISCV::VRN3M1RegClass.contains(DstReg, SrcReg)) {
	Opc = RISCV::PseudoVMV1R_V;
	SubRegIdx = RISCV::sub_vrm1_0;
	NF = 3;
	LMul = 1;
	} else if (RISCV::VRN3M2RegClass.contains(DstReg, SrcReg)) {
	Opc = RISCV::PseudoVMV2R_V;
	SubRegIdx = RISCV::sub_vrm2_0;
	NF = 3;
	LMul = 2;
	} else if (RISCV::VRN4M1RegClass.contains(DstReg, SrcReg)) {
	Opc = RISCV::PseudoVMV1R_V;
	SubRegIdx = RISCV::sub_vrm1_0;
	NF = 4;
	LMul = 1;
	} else if (RISCV::VRN4M2RegClass.contains(DstReg, SrcReg)) {
	Opc = RISCV::PseudoVMV2R_V;
	SubRegIdx = RISCV::sub_vrm2_0;
	NF = 4;
	LMul = 2;
	} else if (RISCV::VRN5M1RegClass.contains(DstReg, SrcReg)) {
	Opc = RISCV::PseudoVMV1R_V;
	SubRegIdx = RISCV::sub_vrm1_0;
	NF = 5;
	LMul = 1;
	} else if (RISCV::VRN6M1RegClass.contains(DstReg, SrcReg)) {
	Opc = RISCV::PseudoVMV1R_V;
	SubRegIdx = RISCV::sub_vrm1_0;
	NF = 6;
	LMul = 1;
	} else if (RISCV::VRN7M1RegClass.contains(DstReg, SrcReg)) {
	Opc = RISCV::PseudoVMV1R_V;
	SubRegIdx = RISCV::sub_vrm1_0;
	NF = 7;
	LMul = 1;
	} else if (RISCV::VRN8M1RegClass.contains(DstReg, SrcReg)) {
	Opc = RISCV::PseudoVMV1R_V;
	SubRegIdx = RISCV::sub_vrm1_0;
	NF = 8;
	LMul = 1;
	} else {
	llvm_unreachable("Impossible reg-to-reg copy");
	}

	if (IsScalableVector) {
	if (NF == 1) {
	BuildMI(MBB, MBBI, DL, get(Opc), DstReg)
	.addReg(SrcReg, getKillRegState(KillSrc));
	} else {
	const TargetRegisterInfo *TRI = STI.getRegisterInfo();

	int I = 0, End = NF, Incr = 1;
	unsigned SrcEncoding = TRI->getEncodingValue(SrcReg);
	unsigned DstEncoding = TRI->getEncodingValue(DstReg);
	if (forwardCopyWillClobberTuple(DstEncoding, SrcEncoding, NF * LMul)) {
	I = NF - 1;
	End = -1;
	Incr = -1;
	}

	for (; I != End; I += Incr) {
	BuildMI(MBB, MBBI, DL, get(Opc), TRI->getSubReg(DstReg, SubRegIdx + I))
	.addReg(TRI->getSubReg(SrcReg, SubRegIdx + I),
	getKillRegState(KillSrc));
	}
	}
	} else {
	BuildMI(MBB, MBBI, DL, get(Opc), DstReg)
	.addReg(SrcReg, getKillRegState(KillSrc))
	.addReg(SrcReg, getKillRegState(KillSrc));
	}
	}

	void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
	MachineBasicBlock::iterator I,
	Register SrcReg, bool IsKill, int FI,
	const TargetRegisterClass *RC,
	const TargetRegisterInfo *TRI) const {
	DebugLoc DL;
	if (I != MBB.end())
	DL = I->getDebugLoc();

	MachineFunction *MF = MBB.getParent();
	MachineFrameInfo &MFI = MF->getFrameInfo();

	unsigned Opcode;
	bool IsScalableVector = true;
	bool IsZvlsseg = true;
	if (RISCV::GPRRegClass.hasSubClassEq(RC)) {
	Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ?
	RISCV::SW : RISCV::SD;
	IsScalableVector = false;
	} else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) {
	Opcode = RISCV::FSH;
	IsScalableVector = false;
	} else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) {
	Opcode = RISCV::FSW;
	IsScalableVector = false;
	} else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) {
	Opcode = RISCV::FSD;
	IsScalableVector = false;
	} else if (RISCV::VRRegClass.hasSubClassEq(RC)) {
	Opcode = RISCV::PseudoVSPILL_M1;
	IsZvlsseg = false;
	} else if (RISCV::VRM2RegClass.hasSubClassEq(RC)) {
	Opcode = RISCV::PseudoVSPILL_M2;
	IsZvlsseg = false;
	} else if (RISCV::VRM4RegClass.hasSubClassEq(RC)) {
	Opcode = RISCV::PseudoVSPILL_M4;
	IsZvlsseg = false;
	} else if (RISCV::VRM8RegClass.hasSubClassEq(RC)) {
	Opcode = RISCV::PseudoVSPILL_M8;
	IsZvlsseg = false;
	} else if (RISCV::VRN2M1RegClass.hasSubClassEq(RC))
	Opcode = RISCV::PseudoVSPILL2_M1;
	else if (RISCV::VRN2M2RegClass.hasSubClassEq(RC))
	Opcode = RISCV::PseudoVSPILL2_M2;
	else if (RISCV::VRN2M4RegClass.hasSubClassEq(RC))
	Opcode = RISCV::PseudoVSPILL2_M4;
	else if (RISCV::VRN3M1RegClass.hasSubClassEq(RC))
	Opcode = RISCV::PseudoVSPILL3_M1;
	else if (RISCV::VRN3M2RegClass.hasSubClassEq(RC))
	Opcode = RISCV::PseudoVSPILL3_M2;
	else if (RISCV::VRN4M1RegClass.hasSubClassEq(RC))
	Opcode = RISCV::PseudoVSPILL4_M1;
	else if (RISCV::VRN4M2RegClass.hasSubClassEq(RC))
	Opcode = RISCV::PseudoVSPILL4_M2;
	else if (RISCV::VRN5M1RegClass.hasSubClassEq(RC))
	Opcode = RISCV::PseudoVSPILL5_M1;
	else if (RISCV::VRN6M1RegClass.hasSubClassEq(RC))
	Opcode = RISCV::PseudoVSPILL6_M1;
	else if (RISCV::VRN7M1RegClass.hasSubClassEq(RC))
	Opcode = RISCV::PseudoVSPILL7_M1;
	else if (RISCV::VRN8M1RegClass.hasSubClassEq(RC))
	Opcode = RISCV::PseudoVSPILL8_M1;
	else
	llvm_unreachable("Can't store this register to stack slot");

	if (IsScalableVector) {
	MachineMemOperand *MMO = MF->getMachineMemOperand(
	MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore,
	MemoryLocation::UnknownSize, MFI.getObjectAlign(FI));

	MFI.setStackID(FI, TargetStackID::ScalableVector);
	auto MIB = BuildMI(MBB, I, DL, get(Opcode))
	.addReg(SrcReg, getKillRegState(IsKill))
	.addFrameIndex(FI)
	.addMemOperand(MMO);
	if (IsZvlsseg) {
	// For spilling/reloading Zvlsseg registers, append the dummy field for
	// the scaled vector length. The argument will be used when expanding
	// these pseudo instructions.
	MIB.addReg(RISCV::X0);
	}
	} else {
	MachineMemOperand *MMO = MF->getMachineMemOperand(
	MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore,
	MFI.getObjectSize(FI), MFI.getObjectAlign(FI));

	BuildMI(MBB, I, DL, get(Opcode))
	.addReg(SrcReg, getKillRegState(IsKill))
	.addFrameIndex(FI)
	.addImm(0)
	.addMemOperand(MMO);
	}
	}

	void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
	MachineBasicBlock::iterator I,
	Register DstReg, int FI,
	const TargetRegisterClass *RC,
	const TargetRegisterInfo *TRI) const {
	DebugLoc DL;
	if (I != MBB.end())
	DL = I->getDebugLoc();

	MachineFunction *MF = MBB.getParent();
	MachineFrameInfo &MFI = MF->getFrameInfo();

	unsigned Opcode;
	bool IsScalableVector = true;
	bool IsZvlsseg = true;
	if (RISCV::GPRRegClass.hasSubClassEq(RC)) {
	Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ?
	RISCV::LW : RISCV::LD;
	IsScalableVector = false;
	} else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) {
	Opcode = RISCV::FLH;
	IsScalableVector = false;
	} else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) {
	Opcode = RISCV::FLW;
	IsScalableVector = false;
	} else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) {
	Opcode = RISCV::FLD;
	IsScalableVector = false;
	} else if (RISCV::VRRegClass.hasSubClassEq(RC)) {
	Opcode = RISCV::PseudoVRELOAD_M1;
	IsZvlsseg = false;
	} else if (RISCV::VRM2RegClass.hasSubClassEq(RC)) {
	Opcode = RISCV::PseudoVRELOAD_M2;
	IsZvlsseg = false;
	} else if (RISCV::VRM4RegClass.hasSubClassEq(RC)) {
	Opcode = RISCV::PseudoVRELOAD_M4;
	IsZvlsseg = false;
	} else if (RISCV::VRM8RegClass.hasSubClassEq(RC)) {
	Opcode = RISCV::PseudoVRELOAD_M8;
	IsZvlsseg = false;
	} else if (RISCV::VRN2M1RegClass.hasSubClassEq(RC))
	Opcode = RISCV::PseudoVRELOAD2_M1;
	else if (RISCV::VRN2M2RegClass.hasSubClassEq(RC))
	Opcode = RISCV::PseudoVRELOAD2_M2;
	else if (RISCV::VRN2M4RegClass.hasSubClassEq(RC))
	Opcode = RISCV::PseudoVRELOAD2_M4;
	else if (RISCV::VRN3M1RegClass.hasSubClassEq(RC))
	Opcode = RISCV::PseudoVRELOAD3_M1;
	else if (RISCV::VRN3M2RegClass.hasSubClassEq(RC))
	Opcode = RISCV::PseudoVRELOAD3_M2;
	else if (RISCV::VRN4M1RegClass.hasSubClassEq(RC))
	Opcode = RISCV::PseudoVRELOAD4_M1;
	else if (RISCV::VRN4M2RegClass.hasSubClassEq(RC))
	Opcode = RISCV::PseudoVRELOAD4_M2;
	else if (RISCV::VRN5M1RegClass.hasSubClassEq(RC))
	Opcode = RISCV::PseudoVRELOAD5_M1;
	else if (RISCV::VRN6M1RegClass.hasSubClassEq(RC))
	Opcode = RISCV::PseudoVRELOAD6_M1;
	else if (RISCV::VRN7M1RegClass.hasSubClassEq(RC))
	Opcode = RISCV::PseudoVRELOAD7_M1;
	else if (RISCV::VRN8M1RegClass.hasSubClassEq(RC))
	Opcode = RISCV::PseudoVRELOAD8_M1;
	else
	llvm_unreachable("Can't load this register from stack slot");

	if (IsScalableVector) {
	MachineMemOperand *MMO = MF->getMachineMemOperand(
	MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad,
	MemoryLocation::UnknownSize, MFI.getObjectAlign(FI));

	MFI.setStackID(FI, TargetStackID::ScalableVector);
	auto MIB = BuildMI(MBB, I, DL, get(Opcode), DstReg)
	.addFrameIndex(FI)
	.addMemOperand(MMO);
	if (IsZvlsseg) {
	// For spilling/reloading Zvlsseg registers, append the dummy field for
	// the scaled vector length. The argument will be used when expanding
	// these pseudo instructions.
	MIB.addReg(RISCV::X0);
	}
	} else {
	MachineMemOperand *MMO = MF->getMachineMemOperand(
	MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad,
	MFI.getObjectSize(FI), MFI.getObjectAlign(FI));

	BuildMI(MBB, I, DL, get(Opcode), DstReg)
	.addFrameIndex(FI)
	.addImm(0)
	.addMemOperand(MMO);
	}
	}

	void RISCVInstrInfo::movImm(MachineBasicBlock &MBB,
	MachineBasicBlock::iterator MBBI,
	const DebugLoc &DL, Register DstReg, uint64_t Val,
	MachineInstr::MIFlag Flag) const {
	MachineFunction *MF = MBB.getParent();
	MachineRegisterInfo &MRI = MF->getRegInfo();
	Register SrcReg = RISCV::X0;
	Register Result = MRI.createVirtualRegister(&RISCV::GPRRegClass);
	unsigned Num = 0;

	if (!STI.is64Bit() && !isInt<32>(Val))
	report_fatal_error("Should only materialize 32-bit constants for RV32");

	RISCVMatInt::InstSeq Seq =
	RISCVMatInt::generateInstSeq(Val, STI.getFeatureBits());
	assert(!Seq.empty());

	for (RISCVMatInt::Inst &Inst : Seq) {
	// Write the final result to DstReg if it's the last instruction in the Seq.
	// Otherwise, write the result to the temp register.
	if (++Num == Seq.size())
	Result = DstReg;

	if (Inst.Opc == RISCV::LUI) {
	BuildMI(MBB, MBBI, DL, get(RISCV::LUI), Result)
	.addImm(Inst.Imm)
	.setMIFlag(Flag);
	} else if (Inst.Opc == RISCV::ADDUW) {
	BuildMI(MBB, MBBI, DL, get(RISCV::ADDUW), Result)
	.addReg(SrcReg, RegState::Kill)
	.addReg(RISCV::X0)
	.setMIFlag(Flag);
	} else {
	BuildMI(MBB, MBBI, DL, get(Inst.Opc), Result)
	.addReg(SrcReg, RegState::Kill)
	.addImm(Inst.Imm)
	.setMIFlag(Flag);
	}
	// Only the first instruction has X0 as its source.
	SrcReg = Result;
	}
	}

	// The contents of values added to Cond are not examined outside of
	// RISCVInstrInfo, giving us flexibility in what to push to it. For RISCV, we
	// push BranchOpcode, Reg1, Reg2.
	static void parseCondBranch(MachineInstr &LastInst, MachineBasicBlock *&Target,
	SmallVectorImpl<MachineOperand> &Cond) {
	// Block ends with fall-through condbranch.
	assert(LastInst.getDesc().isConditionalBranch() &&
	"Unknown conditional branch");
	Target = LastInst.getOperand(2).getMBB();
	Cond.push_back(MachineOperand::CreateImm(LastInst.getOpcode()));
	Cond.push_back(LastInst.getOperand(0));
	Cond.push_back(LastInst.getOperand(1));
	}

	static unsigned getOppositeBranchOpcode(int Opc) {
	switch (Opc) {
	default:
	llvm_unreachable("Unrecognized conditional branch");
	case RISCV::BEQ:
	return RISCV::BNE;
	case RISCV::BNE:
	return RISCV::BEQ;
	case RISCV::BLT:
	return RISCV::BGE;
	case RISCV::BGE:
	return RISCV::BLT;
	case RISCV::BLTU:
	return RISCV::BGEU;
	case RISCV::BGEU:
	return RISCV::BLTU;
	}
	}

	bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
	MachineBasicBlock *&TBB,
	MachineBasicBlock *&FBB,
	SmallVectorImpl<MachineOperand> &Cond,
	bool AllowModify) const {
	TBB = FBB = nullptr;
	Cond.clear();

	// If the block has no terminators, it just falls into the block after it.
	MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
	if (I == MBB.end() \|\| !isUnpredicatedTerminator(*I))
	return false;

	// Count the number of terminators and find the first unconditional or
	// indirect branch.
	MachineBasicBlock::iterator FirstUncondOrIndirectBr = MBB.end();
	int NumTerminators = 0;
	for (auto J = I.getReverse(); J != MBB.rend() && isUnpredicatedTerminator(*J);
	J++) {
	NumTerminators++;
	if (J->getDesc().isUnconditionalBranch() \|\|
	J->getDesc().isIndirectBranch()) {
	FirstUncondOrIndirectBr = J.getReverse();
	}
	}

	// If AllowModify is true, we can erase any terminators after
	// FirstUncondOrIndirectBR.
	if (AllowModify && FirstUncondOrIndirectBr != MBB.end()) {
	while (std::next(FirstUncondOrIndirectBr) != MBB.end()) {
	std::next(FirstUncondOrIndirectBr)->eraseFromParent();
	NumTerminators--;
	}
	I = FirstUncondOrIndirectBr;
	}

	// We can't handle blocks that end in an indirect branch.
	if (I->getDesc().isIndirectBranch())
	return true;

	// We can't handle blocks with more than 2 terminators.
	if (NumTerminators > 2)
	return true;

	// Handle a single unconditional branch.
	if (NumTerminators == 1 && I->getDesc().isUnconditionalBranch()) {
	TBB = getBranchDestBlock(*I);
	return false;
	}

	// Handle a single conditional branch.
	if (NumTerminators == 1 && I->getDesc().isConditionalBranch()) {
	parseCondBranch(*I, TBB, Cond);
	return false;
	}

	// Handle a conditional branch followed by an unconditional branch.
	if (NumTerminators == 2 && std::prev(I)->getDesc().isConditionalBranch() &&
	I->getDesc().isUnconditionalBranch()) {
	parseCondBranch(*std::prev(I), TBB, Cond);
	FBB = getBranchDestBlock(*I);
	return false;
	}

	// Otherwise, we can't handle this.
	return true;
	}

	unsigned RISCVInstrInfo::removeBranch(MachineBasicBlock &MBB,
	int *BytesRemoved) const {
	if (BytesRemoved)
	*BytesRemoved = 0;
	MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
	if (I == MBB.end())
	return 0;

	if (!I->getDesc().isUnconditionalBranch() &&
	!I->getDesc().isConditionalBranch())
	return 0;

	// Remove the branch.
	if (BytesRemoved)
	BytesRemoved += getInstSizeInBytes(I);
	I->eraseFromParent();

	I = MBB.end();

	if (I == MBB.begin())
	return 1;
	--I;
	if (!I->getDesc().isConditionalBranch())
	return 1;

	// Remove the branch.
	if (BytesRemoved)
	BytesRemoved += getInstSizeInBytes(I);
	I->eraseFromParent();
	return 2;
	}

	// Inserts a branch into the end of the specific MachineBasicBlock, returning
	// the number of instructions inserted.
	unsigned RISCVInstrInfo::insertBranch(
	MachineBasicBlock &MBB, MachineBasicBlock TBB, MachineBasicBlock FBB,
	ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
	if (BytesAdded)
	*BytesAdded = 0;

	// Shouldn't be a fall through.
	assert(TBB && "insertBranch must not be told to insert a fallthrough");
	assert((Cond.size() == 3 \|\| Cond.size() == 0) &&
	"RISCV branch conditions have two components!");

	// Unconditional branch.
	if (Cond.empty()) {
	MachineInstr &MI = *BuildMI(&MBB, DL, get(RISCV::PseudoBR)).addMBB(TBB);
	if (BytesAdded)
	*BytesAdded += getInstSizeInBytes(MI);
	return 1;
	}

	// Either a one or two-way conditional branch.
	unsigned Opc = Cond[0].getImm();
	MachineInstr &CondMI =
	*BuildMI(&MBB, DL, get(Opc)).add(Cond[1]).add(Cond[2]).addMBB(TBB);
	if (BytesAdded)
	*BytesAdded += getInstSizeInBytes(CondMI);

	// One-way conditional branch.
	if (!FBB)
	return 1;

	// Two-way conditional branch.
	MachineInstr &MI = *BuildMI(&MBB, DL, get(RISCV::PseudoBR)).addMBB(FBB);
	if (BytesAdded)
	*BytesAdded += getInstSizeInBytes(MI);
	return 2;
	}

	unsigned RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
	MachineBasicBlock &DestBB,
	const DebugLoc &DL,
	int64_t BrOffset,
	RegScavenger *RS) const {
	assert(RS && "RegScavenger required for long branching");
	assert(MBB.empty() &&
	"new block should be inserted for expanding unconditional branch");
	assert(MBB.pred_size() == 1);

	MachineFunction *MF = MBB.getParent();
	MachineRegisterInfo &MRI = MF->getRegInfo();

	if (!isInt<32>(BrOffset))
	report_fatal_error(
	"Branch offsets outside of the signed 32-bit range not supported");

	// FIXME: A virtual register must be used initially, as the register
	// scavenger won't work with empty blocks (SIInstrInfo::insertIndirectBranch
	// uses the same workaround).
	Register ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
	auto II = MBB.end();

	MachineInstr &MI = *BuildMI(MBB, II, DL, get(RISCV::PseudoJump))
	.addReg(ScratchReg, RegState::Define \| RegState::Dead)
	.addMBB(&DestBB, RISCVII::MO_CALL);

	RS->enterBasicBlockEnd(MBB);
	unsigned Scav = RS->scavengeRegisterBackwards(RISCV::GPRRegClass,
	MI.getIterator(), false, 0);
	MRI.replaceRegWith(ScratchReg, Scav);
	MRI.clearVirtRegs();
	RS->setRegUsed(Scav);
	return 8;
	}

	bool RISCVInstrInfo::reverseBranchCondition(
	SmallVectorImpl<MachineOperand> &Cond) const {
	assert((Cond.size() == 3) && "Invalid branch condition!");
	Cond[0].setImm(getOppositeBranchOpcode(Cond[0].getImm()));
	return false;
	}

	MachineBasicBlock *
	RISCVInstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
	assert(MI.getDesc().isBranch() && "Unexpected opcode!");
	// The branch target is always the last operand.
	int NumOp = MI.getNumExplicitOperands();
	return MI.getOperand(NumOp - 1).getMBB();
	}

	bool RISCVInstrInfo::isBranchOffsetInRange(unsigned BranchOp,
	int64_t BrOffset) const {
	unsigned XLen = STI.getXLen();
	// Ideally we could determine the supported branch offset from the
	// RISCVII::FormMask, but this can't be used for Pseudo instructions like
	// PseudoBR.
	switch (BranchOp) {
	default:
	llvm_unreachable("Unexpected opcode!");
	case RISCV::BEQ:
	case RISCV::BNE:
	case RISCV::BLT:
	case RISCV::BGE:
	case RISCV::BLTU:
	case RISCV::BGEU:
	return isIntN(13, BrOffset);
	case RISCV::JAL:
	case RISCV::PseudoBR:
	return isIntN(21, BrOffset);
	case RISCV::PseudoJump:
	return isIntN(32, SignExtend64(BrOffset + 0x800, XLen));
	}
	}

	unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
	unsigned Opcode = MI.getOpcode();

	switch (Opcode) {
	default: {
	if (MI.getParent() && MI.getParent()->getParent()) {
	const auto MF = MI.getMF();
	const auto &TM = static_cast<const RISCVTargetMachine &>(MF->getTarget());
	const MCRegisterInfo &MRI = *TM.getMCRegisterInfo();
	const MCSubtargetInfo &STI = *TM.getMCSubtargetInfo();
	const RISCVSubtarget &ST = MF->getSubtarget<RISCVSubtarget>();
	if (isCompressibleInst(MI, &ST, MRI, STI))
	return 2;
	}
	return get(Opcode).getSize();
	}
	case TargetOpcode::EH_LABEL:
	case TargetOpcode::IMPLICIT_DEF:
	case TargetOpcode::KILL:
	case TargetOpcode::DBG_VALUE:
	return 0;
	// These values are determined based on RISCVExpandAtomicPseudoInsts,
	// RISCVExpandPseudoInsts and RISCVMCCodeEmitter, depending on where the
	// pseudos are expanded.
	case RISCV::PseudoCALLReg:
	case RISCV::PseudoCALL:
	case RISCV::PseudoJump:
	case RISCV::PseudoTAIL:
	case RISCV::PseudoLLA:
	case RISCV::PseudoLA:
	case RISCV::PseudoLA_TLS_IE:
	case RISCV::PseudoLA_TLS_GD:
	return 8;
	case RISCV::PseudoAtomicLoadNand32:
	case RISCV::PseudoAtomicLoadNand64:
	return 20;
	case RISCV::PseudoMaskedAtomicSwap32:
	case RISCV::PseudoMaskedAtomicLoadAdd32:
	case RISCV::PseudoMaskedAtomicLoadSub32:
	return 28;
	case RISCV::PseudoMaskedAtomicLoadNand32:
	return 32;
	case RISCV::PseudoMaskedAtomicLoadMax32:
	case RISCV::PseudoMaskedAtomicLoadMin32:
	return 44;
	case RISCV::PseudoMaskedAtomicLoadUMax32:
	case RISCV::PseudoMaskedAtomicLoadUMin32:
	return 36;
	case RISCV::PseudoCmpXchg32:
	case RISCV::PseudoCmpXchg64:
	return 16;
	case RISCV::PseudoMaskedCmpXchg32:
	return 32;
	case TargetOpcode::INLINEASM:
	case TargetOpcode::INLINEASM_BR: {
	const MachineFunction &MF = *MI.getParent()->getParent();
	const auto &TM = static_cast<const RISCVTargetMachine &>(MF.getTarget());
	return getInlineAsmLength(MI.getOperand(0).getSymbolName(),
	*TM.getMCAsmInfo());
	}
	case RISCV::PseudoVSPILL2_M1:
	case RISCV::PseudoVSPILL2_M2:
	case RISCV::PseudoVSPILL2_M4:
	case RISCV::PseudoVSPILL3_M1:
	case RISCV::PseudoVSPILL3_M2:
	case RISCV::PseudoVSPILL4_M1:
	case RISCV::PseudoVSPILL4_M2:
	case RISCV::PseudoVSPILL5_M1:
	case RISCV::PseudoVSPILL6_M1:
	case RISCV::PseudoVSPILL7_M1:
	case RISCV::PseudoVSPILL8_M1:
	case RISCV::PseudoVRELOAD2_M1:
	case RISCV::PseudoVRELOAD2_M2:
	case RISCV::PseudoVRELOAD2_M4:
	case RISCV::PseudoVRELOAD3_M1:
	case RISCV::PseudoVRELOAD3_M2:
	case RISCV::PseudoVRELOAD4_M1:
	case RISCV::PseudoVRELOAD4_M2:
	case RISCV::PseudoVRELOAD5_M1:
	case RISCV::PseudoVRELOAD6_M1:
	case RISCV::PseudoVRELOAD7_M1:
	case RISCV::PseudoVRELOAD8_M1: {
	// The values are determined based on expandVSPILL and expandVRELOAD that
	// expand the pseudos depending on NF.
	unsigned NF = isRVVSpillForZvlsseg(Opcode)->first;
	return 4 * (2 * NF - 1);
	}
	}
	}

	bool RISCVInstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
	const unsigned Opcode = MI.getOpcode();
	switch (Opcode) {
	default:
	break;
	case RISCV::FSGNJ_D:
	case RISCV::FSGNJ_S:
	// The canonical floating-point move is fsgnj rd, rs, rs.
	return MI.getOperand(1).isReg() && MI.getOperand(2).isReg() &&
	MI.getOperand(1).getReg() == MI.getOperand(2).getReg();
	case RISCV::ADDI:
	case RISCV::ORI:
	case RISCV::XORI:
	return (MI.getOperand(1).isReg() &&
	MI.getOperand(1).getReg() == RISCV::X0) \|\|
	(MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0);
	}
	return MI.isAsCheapAsAMove();
	}

	Optional<DestSourcePair>
	RISCVInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
	if (MI.isMoveReg())
	return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
	switch (MI.getOpcode()) {
	default:
	break;
	case RISCV::ADDI:
	// Operand 1 can be a frameindex but callers expect registers
	if (MI.getOperand(1).isReg() && MI.getOperand(2).isImm() &&
	MI.getOperand(2).getImm() == 0)
	return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
	break;
	case RISCV::FSGNJ_D:
	case RISCV::FSGNJ_S:
	// The canonical floating-point move is fsgnj rd, rs, rs.
	if (MI.getOperand(1).isReg() && MI.getOperand(2).isReg() &&
	MI.getOperand(1).getReg() == MI.getOperand(2).getReg())
	return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
	break;
	}
	return None;
	}

	bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
	StringRef &ErrInfo) const {
	const MCInstrInfo *MCII = STI.getInstrInfo();
	MCInstrDesc const &Desc = MCII->get(MI.getOpcode());

	for (auto &OI : enumerate(Desc.operands())) {
	unsigned OpType = OI.value().OperandType;
	if (OpType >= RISCVOp::OPERAND_FIRST_RISCV_IMM &&
	OpType <= RISCVOp::OPERAND_LAST_RISCV_IMM) {
	const MachineOperand &MO = MI.getOperand(OI.index());
	if (MO.isImm()) {
	int64_t Imm = MO.getImm();
	bool Ok;
	switch (OpType) {
	default:
	llvm_unreachable("Unexpected operand type");
	case RISCVOp::OPERAND_UIMM4:
	Ok = isUInt<4>(Imm);
	break;
	case RISCVOp::OPERAND_UIMM5:
	Ok = isUInt<5>(Imm);
	break;
	case RISCVOp::OPERAND_UIMM12:
	Ok = isUInt<12>(Imm);
	break;
	case RISCVOp::OPERAND_SIMM12:
	Ok = isInt<12>(Imm);
	break;
	case RISCVOp::OPERAND_UIMM20:
	Ok = isUInt<20>(Imm);
	break;
	case RISCVOp::OPERAND_UIMMLOG2XLEN:
	if (STI.getTargetTriple().isArch64Bit())
	Ok = isUInt<6>(Imm);
	else
	Ok = isUInt<5>(Imm);
	break;
	}
	if (!Ok) {
	ErrInfo = "Invalid immediate";
	return false;
	}
	}
	}
	}

	return true;
	}

	// Return true if get the base operand, byte offset of an instruction and the
	// memory width. Width is the size of memory that is being loaded/stored.
	bool RISCVInstrInfo::getMemOperandWithOffsetWidth(
	const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset,
	unsigned &Width, const TargetRegisterInfo *TRI) const {
	if (!LdSt.mayLoadOrStore())
	return false;

	// Here we assume the standard RISC-V ISA, which uses a base+offset
	// addressing mode. You'll need to relax these conditions to support custom
	// load/stores instructions.
	if (LdSt.getNumExplicitOperands() != 3)
	return false;
	if (!LdSt.getOperand(1).isReg() \|\| !LdSt.getOperand(2).isImm())
	return false;

	if (!LdSt.hasOneMemOperand())
	return false;

	Width = (*LdSt.memoperands_begin())->getSize();
	BaseReg = &LdSt.getOperand(1);
	Offset = LdSt.getOperand(2).getImm();
	return true;
	}

	bool RISCVInstrInfo::areMemAccessesTriviallyDisjoint(
	const MachineInstr &MIa, const MachineInstr &MIb) const {
	assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
	assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");

	if (MIa.hasUnmodeledSideEffects() \|\| MIb.hasUnmodeledSideEffects() \|\|
	MIa.hasOrderedMemoryRef() \|\| MIb.hasOrderedMemoryRef())
	return false;

	// Retrieve the base register, offset from the base register and width. Width
	// is the size of memory that is being loaded/stored (e.g. 1, 2, 4). If
	// base registers are identical, and the offset of a lower memory access +
	// the width doesn't overlap the offset of a higher memory access,
	// then the memory accesses are different.
	const TargetRegisterInfo *TRI = STI.getRegisterInfo();
	const MachineOperand BaseOpA = nullptr, BaseOpB = nullptr;
	int64_t OffsetA = 0, OffsetB = 0;
	unsigned int WidthA = 0, WidthB = 0;
	if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
	getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {
	if (BaseOpA->isIdenticalTo(*BaseOpB)) {
	int LowOffset = std::min(OffsetA, OffsetB);
	int HighOffset = std::max(OffsetA, OffsetB);
	int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
	if (LowOffset + LowWidth <= HighOffset)
	return true;
	}
	}
	return false;
	}

	std::pair<unsigned, unsigned>
	RISCVInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
	const unsigned Mask = RISCVII::MO_DIRECT_FLAG_MASK;
	return std::make_pair(TF & Mask, TF & ~Mask);
	}

	ArrayRef<std::pair<unsigned, const char *>>
	RISCVInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
	using namespace RISCVII;
	static const std::pair<unsigned, const char *> TargetFlags[] = {
	{MO_CALL, "riscv-call"},
	{MO_PLT, "riscv-plt"},
	{MO_LO, "riscv-lo"},
	{MO_HI, "riscv-hi"},
	{MO_PCREL_LO, "riscv-pcrel-lo"},
	{MO_PCREL_HI, "riscv-pcrel-hi"},
	{MO_GOT_HI, "riscv-got-hi"},
	{MO_TPREL_LO, "riscv-tprel-lo"},
	{MO_TPREL_HI, "riscv-tprel-hi"},
	{MO_TPREL_ADD, "riscv-tprel-add"},
	{MO_TLS_GOT_HI, "riscv-tls-got-hi"},
	{MO_TLS_GD_HI, "riscv-tls-gd-hi"}};
	return makeArrayRef(TargetFlags);
	}
	bool RISCVInstrInfo::isFunctionSafeToOutlineFrom(
	MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
	const Function &F = MF.getFunction();

	// Can F be deduplicated by the linker? If it can, don't outline from it.
	if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
	return false;

	// Don't outline from functions with section markings; the program could
	// expect that all the code is in the named section.
	if (F.hasSection())
	return false;

	// It's safe to outline from MF.
	return true;
	}

	bool RISCVInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
	unsigned &Flags) const {
	// More accurate safety checking is done in getOutliningCandidateInfo.
	return true;
	}

	// Enum values indicating how an outlined call should be constructed.
	enum MachineOutlinerConstructionID {
	MachineOutlinerDefault
	};

	outliner::OutlinedFunction RISCVInstrInfo::getOutliningCandidateInfo(
	std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {

	// First we need to filter out candidates where the X5 register (IE t0) can't
	// be used to setup the function call.
	auto CannotInsertCall = [](outliner::Candidate &C) {
	const TargetRegisterInfo *TRI = C.getMF()->getSubtarget().getRegisterInfo();

	C.initLRU(*TRI);
	LiveRegUnits LRU = C.LRU;
	return !LRU.available(RISCV::X5);
	};

	llvm::erase_if(RepeatedSequenceLocs, CannotInsertCall);

	// If the sequence doesn't have enough candidates left, then we're done.
	if (RepeatedSequenceLocs.size() < 2)
	return outliner::OutlinedFunction();

	unsigned SequenceSize = 0;

	auto I = RepeatedSequenceLocs[0].front();
	auto E = std::next(RepeatedSequenceLocs[0].back());
	for (; I != E; ++I)
	SequenceSize += getInstSizeInBytes(*I);

	// call t0, function = 8 bytes.
	unsigned CallOverhead = 8;
	for (auto &C : RepeatedSequenceLocs)
	C.setCallInfo(MachineOutlinerDefault, CallOverhead);

	// jr t0 = 4 bytes, 2 bytes if compressed instructions are enabled.
	unsigned FrameOverhead = 4;
	if (RepeatedSequenceLocs[0].getMF()->getSubtarget()
	.getFeatureBits()[RISCV::FeatureStdExtC])
	FrameOverhead = 2;

	return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
	FrameOverhead, MachineOutlinerDefault);
	}

	outliner::InstrType
	RISCVInstrInfo::getOutliningType(MachineBasicBlock::iterator &MBBI,
	unsigned Flags) const {
	MachineInstr &MI = *MBBI;
	MachineBasicBlock *MBB = MI.getParent();
	const TargetRegisterInfo *TRI =
	MBB->getParent()->getSubtarget().getRegisterInfo();

	// Positions generally can't safely be outlined.
	if (MI.isPosition()) {
	// We can manually strip out CFI instructions later.
	if (MI.isCFIInstruction())
	return outliner::InstrType::Invisible;

	return outliner::InstrType::Illegal;
	}

	// Don't trust the user to write safe inline assembly.
	if (MI.isInlineAsm())
	return outliner::InstrType::Illegal;

	// We can't outline branches to other basic blocks.
	if (MI.isTerminator() && !MBB->succ_empty())
	return outliner::InstrType::Illegal;

	// We need support for tail calls to outlined functions before return
	// statements can be allowed.
	if (MI.isReturn())
	return outliner::InstrType::Illegal;

	// Don't allow modifying the X5 register which we use for return addresses for
	// these outlined functions.
	if (MI.modifiesRegister(RISCV::X5, TRI) \|\|
	MI.getDesc().hasImplicitDefOfPhysReg(RISCV::X5))
	return outliner::InstrType::Illegal;

	// Make sure the operands don't reference something unsafe.
	for (const auto &MO : MI.operands())
	if (MO.isMBB() \|\| MO.isBlockAddress() \|\| MO.isCPI())
	return outliner::InstrType::Illegal;

	// Don't allow instructions which won't be materialized to impact outlining
	// analysis.
	if (MI.isMetaInstruction())
	return outliner::InstrType::Invisible;

	return outliner::InstrType::Legal;
	}

	void RISCVInstrInfo::buildOutlinedFrame(
	MachineBasicBlock &MBB, MachineFunction &MF,
	const outliner::OutlinedFunction &OF) const {

	// Strip out any CFI instructions
	bool Changed = true;
	while (Changed) {
	Changed = false;
	auto I = MBB.begin();
	auto E = MBB.end();
	for (; I != E; ++I) {
	if (I->isCFIInstruction()) {
	I->removeFromParent();
	Changed = true;
	break;
	}
	}
	}

	MBB.addLiveIn(RISCV::X5);

	// Add in a return instruction to the end of the outlined frame.
	MBB.insert(MBB.end(), BuildMI(MF, DebugLoc(), get(RISCV::JALR))
	.addReg(RISCV::X0, RegState::Define)
	.addReg(RISCV::X5)
	.addImm(0));
	}

	MachineBasicBlock::iterator RISCVInstrInfo::insertOutlinedCall(
	Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
	MachineFunction &MF, const outliner::Candidate &C) const {

	// Add in a call instruction to the outlined function at the given location.
	It = MBB.insert(It,
	BuildMI(MF, DebugLoc(), get(RISCV::PseudoCALLReg), RISCV::X5)
	.addGlobalAddress(M.getNamedValue(MF.getName()), 0,
	RISCVII::MO_CALL));
	return It;
	}

	// clang-format off
	#define CASE_VFMA_OPCODE_COMMON(OP, TYPE, LMUL) \
	RISCV::PseudoV##OP##_##TYPE##_##LMUL##_COMMUTABLE

	#define CASE_VFMA_OPCODE_LMULS(OP, TYPE) \
	CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF8): \
	case CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF4): \
	case CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF2): \
	case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M1): \
	case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M2): \
	case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M4): \
	case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M8)

	#define CASE_VFMA_SPLATS(OP) \
	CASE_VFMA_OPCODE_LMULS(OP, VF16): \
	case CASE_VFMA_OPCODE_LMULS(OP, VF32): \
	case CASE_VFMA_OPCODE_LMULS(OP, VF64)
	// clang-format on

	bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
	unsigned &SrcOpIdx1,
	unsigned &SrcOpIdx2) const {
	const MCInstrDesc &Desc = MI.getDesc();
	if (!Desc.isCommutable())
	return false;

	switch (MI.getOpcode()) {
	case CASE_VFMA_SPLATS(FMADD):
	case CASE_VFMA_SPLATS(FMSUB):
	case CASE_VFMA_SPLATS(FMACC):
	case CASE_VFMA_SPLATS(FMSAC):
	case CASE_VFMA_SPLATS(FNMADD):
	case CASE_VFMA_SPLATS(FNMSUB):
	case CASE_VFMA_SPLATS(FNMACC):
	case CASE_VFMA_SPLATS(FNMSAC):
	case CASE_VFMA_OPCODE_LMULS(FMACC, VV):
	case CASE_VFMA_OPCODE_LMULS(FMSAC, VV):
	case CASE_VFMA_OPCODE_LMULS(FNMACC, VV):
	case CASE_VFMA_OPCODE_LMULS(FNMSAC, VV):
	case CASE_VFMA_OPCODE_LMULS(MADD, VX):
	case CASE_VFMA_OPCODE_LMULS(NMSUB, VX):
	case CASE_VFMA_OPCODE_LMULS(MACC, VX):
	case CASE_VFMA_OPCODE_LMULS(NMSAC, VX):
	case CASE_VFMA_OPCODE_LMULS(MACC, VV):
	case CASE_VFMA_OPCODE_LMULS(NMSAC, VV): {
	// For these instructions we can only swap operand 1 and operand 3 by
	// changing the opcode.
	unsigned CommutableOpIdx1 = 1;
	unsigned CommutableOpIdx2 = 3;
	if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1,
	CommutableOpIdx2))
	return false;
	return true;
	}
	case CASE_VFMA_OPCODE_LMULS(FMADD, VV):
	case CASE_VFMA_OPCODE_LMULS(FMSUB, VV):
	case CASE_VFMA_OPCODE_LMULS(FNMADD, VV):
	case CASE_VFMA_OPCODE_LMULS(FNMSUB, VV):
	case CASE_VFMA_OPCODE_LMULS(MADD, VV):
	case CASE_VFMA_OPCODE_LMULS(NMSUB, VV): {
	// For these instructions we have more freedom. We can commute with the
	// other multiplicand or with the addend/subtrahend/minuend.

	// Any fixed operand must be from source 1, 2 or 3.
	if (SrcOpIdx1 != CommuteAnyOperandIndex && SrcOpIdx1 > 3)
	return false;
	if (SrcOpIdx2 != CommuteAnyOperandIndex && SrcOpIdx2 > 3)
	return false;

	// It both ops are fixed one must be the tied source.
	if (SrcOpIdx1 != CommuteAnyOperandIndex &&
	SrcOpIdx2 != CommuteAnyOperandIndex && SrcOpIdx1 != 1 && SrcOpIdx2 != 1)
	return false;

	// Look for two different register operands assumed to be commutable
	// regardless of the FMA opcode. The FMA opcode is adjusted later if
	// needed.
	if (SrcOpIdx1 == CommuteAnyOperandIndex \|\|
	SrcOpIdx2 == CommuteAnyOperandIndex) {
	// At least one of operands to be commuted is not specified and
	// this method is free to choose appropriate commutable operands.
	unsigned CommutableOpIdx1 = SrcOpIdx1;
	if (SrcOpIdx1 == SrcOpIdx2) {
	// Both of operands are not fixed. Set one of commutable
	// operands to the tied source.
	CommutableOpIdx1 = 1;
	- } else if (SrcOpIdx1 == CommutableOpIdx1) {
	+ } else if (SrcOpIdx1 == CommuteAnyOperandIndex) {
	// Only one of the operands is not fixed.
	CommutableOpIdx1 = SrcOpIdx2;
	}

	// CommutableOpIdx1 is well defined now. Let's choose another commutable
	// operand and assign its index to CommutableOpIdx2.
	unsigned CommutableOpIdx2;
	if (CommutableOpIdx1 != 1) {
	// If we haven't already used the tied source, we must use it now.
	CommutableOpIdx2 = 1;
	} else {
	Register Op1Reg = MI.getOperand(CommutableOpIdx1).getReg();

	// The commuted operands should have different registers.
	// Otherwise, the commute transformation does not change anything and
	// is useless. We use this as a hint to make our decision.
	if (Op1Reg != MI.getOperand(2).getReg())
	CommutableOpIdx2 = 2;
	else
	CommutableOpIdx2 = 3;
	}

	// Assign the found pair of commutable indices to SrcOpIdx1 and
	// SrcOpIdx2 to return those values.
	if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1,
	CommutableOpIdx2))
	return false;
	}

	return true;
	}
	}

	return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
	}

	#define CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, LMUL) \
	case RISCV::PseudoV##OLDOP##_##TYPE##_##LMUL##_COMMUTABLE: \
	Opc = RISCV::PseudoV##NEWOP##_##TYPE##_##LMUL##_COMMUTABLE; \
	break;

	#define CASE_VFMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, TYPE) \
	CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF8) \
	CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF4) \
	CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF2) \
	CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M1) \
	CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M2) \
	CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M4) \
	CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M8)

	#define CASE_VFMA_CHANGE_OPCODE_SPLATS(OLDOP, NEWOP) \
	CASE_VFMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, VF16) \
	CASE_VFMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, VF32) \
	CASE_VFMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, VF64)

	MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI,
	bool NewMI,
	unsigned OpIdx1,
	unsigned OpIdx2) const {
	auto cloneIfNew = [NewMI](MachineInstr &MI) -> MachineInstr & {
	if (NewMI)
	return *MI.getParent()->getParent()->CloneMachineInstr(&MI);
	return MI;
	};

	switch (MI.getOpcode()) {
	case CASE_VFMA_SPLATS(FMACC):
	case CASE_VFMA_SPLATS(FMADD):
	case CASE_VFMA_SPLATS(FMSAC):
	case CASE_VFMA_SPLATS(FMSUB):
	case CASE_VFMA_SPLATS(FNMACC):
	case CASE_VFMA_SPLATS(FNMADD):
	case CASE_VFMA_SPLATS(FNMSAC):
	case CASE_VFMA_SPLATS(FNMSUB):
	case CASE_VFMA_OPCODE_LMULS(FMACC, VV):
	case CASE_VFMA_OPCODE_LMULS(FMSAC, VV):
	case CASE_VFMA_OPCODE_LMULS(FNMACC, VV):
	case CASE_VFMA_OPCODE_LMULS(FNMSAC, VV):
	case CASE_VFMA_OPCODE_LMULS(MADD, VX):
	case CASE_VFMA_OPCODE_LMULS(NMSUB, VX):
	case CASE_VFMA_OPCODE_LMULS(MACC, VX):
	case CASE_VFMA_OPCODE_LMULS(NMSAC, VX):
	case CASE_VFMA_OPCODE_LMULS(MACC, VV):
	case CASE_VFMA_OPCODE_LMULS(NMSAC, VV): {
	// It only make sense to toggle these between clobbering the
	// addend/subtrahend/minuend one of the multiplicands.
	assert((OpIdx1 == 1 \|\| OpIdx2 == 1) && "Unexpected opcode index");
	assert((OpIdx1 == 3 \|\| OpIdx2 == 3) && "Unexpected opcode index");
	unsigned Opc;
	switch (MI.getOpcode()) {
	default:
	llvm_unreachable("Unexpected opcode");
	CASE_VFMA_CHANGE_OPCODE_SPLATS(FMACC, FMADD)
	CASE_VFMA_CHANGE_OPCODE_SPLATS(FMADD, FMACC)
	CASE_VFMA_CHANGE_OPCODE_SPLATS(FMSAC, FMSUB)
	CASE_VFMA_CHANGE_OPCODE_SPLATS(FMSUB, FMSAC)
	CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMACC, FNMADD)
	CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMADD, FNMACC)
	CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMSAC, FNMSUB)
	CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMSUB, FNMSAC)
	CASE_VFMA_CHANGE_OPCODE_LMULS(FMACC, FMADD, VV)
	CASE_VFMA_CHANGE_OPCODE_LMULS(FMSAC, FMSUB, VV)
	CASE_VFMA_CHANGE_OPCODE_LMULS(FNMACC, FNMADD, VV)
	CASE_VFMA_CHANGE_OPCODE_LMULS(FNMSAC, FNMSUB, VV)
	CASE_VFMA_CHANGE_OPCODE_LMULS(MACC, MADD, VX)
	CASE_VFMA_CHANGE_OPCODE_LMULS(MADD, MACC, VX)
	CASE_VFMA_CHANGE_OPCODE_LMULS(NMSAC, NMSUB, VX)
	CASE_VFMA_CHANGE_OPCODE_LMULS(NMSUB, NMSAC, VX)
	CASE_VFMA_CHANGE_OPCODE_LMULS(MACC, MADD, VV)
	CASE_VFMA_CHANGE_OPCODE_LMULS(NMSAC, NMSUB, VV)
	}

	auto &WorkingMI = cloneIfNew(MI);
	WorkingMI.setDesc(get(Opc));
	return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /NewMI=/false,
	OpIdx1, OpIdx2);
	}
	case CASE_VFMA_OPCODE_LMULS(FMADD, VV):
	case CASE_VFMA_OPCODE_LMULS(FMSUB, VV):
	case CASE_VFMA_OPCODE_LMULS(FNMADD, VV):
	case CASE_VFMA_OPCODE_LMULS(FNMSUB, VV):
	case CASE_VFMA_OPCODE_LMULS(MADD, VV):
	case CASE_VFMA_OPCODE_LMULS(NMSUB, VV): {
	assert((OpIdx1 == 1 \|\| OpIdx2 == 1) && "Unexpected opcode index");
	// If one of the operands, is the addend we need to change opcode.
	// Otherwise we're just swapping 2 of the multiplicands.
	if (OpIdx1 == 3 \|\| OpIdx2 == 3) {
	unsigned Opc;
	switch (MI.getOpcode()) {
	default:
	llvm_unreachable("Unexpected opcode");
	CASE_VFMA_CHANGE_OPCODE_LMULS(FMADD, FMACC, VV)
	CASE_VFMA_CHANGE_OPCODE_LMULS(FMSUB, FMSAC, VV)
	CASE_VFMA_CHANGE_OPCODE_LMULS(FNMADD, FNMACC, VV)
	CASE_VFMA_CHANGE_OPCODE_LMULS(FNMSUB, FNMSAC, VV)
	CASE_VFMA_CHANGE_OPCODE_LMULS(MADD, MACC, VV)
	CASE_VFMA_CHANGE_OPCODE_LMULS(NMSUB, NMSAC, VV)
	}

	auto &WorkingMI = cloneIfNew(MI);
	WorkingMI.setDesc(get(Opc));
	return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /NewMI=/false,
	OpIdx1, OpIdx2);
	}
	// Let the default code handle it.
	break;
	}
	}

	return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
	}

	#undef CASE_VFMA_CHANGE_OPCODE_SPLATS
	#undef CASE_VFMA_CHANGE_OPCODE_LMULS
	#undef CASE_VFMA_CHANGE_OPCODE_COMMON
	#undef CASE_VFMA_SPLATS
	#undef CASE_VFMA_OPCODE_LMULS
	#undef CASE_VFMA_OPCODE_COMMON

	// clang-format off
	#define CASE_WIDEOP_OPCODE_COMMON(OP, LMUL) \
	RISCV::PseudoV##OP##_##LMUL##_TIED

	#define CASE_WIDEOP_OPCODE_LMULS(OP) \
	CASE_WIDEOP_OPCODE_COMMON(OP, MF8): \
	case CASE_WIDEOP_OPCODE_COMMON(OP, MF4): \
	case CASE_WIDEOP_OPCODE_COMMON(OP, MF2): \
	case CASE_WIDEOP_OPCODE_COMMON(OP, M1): \
	case CASE_WIDEOP_OPCODE_COMMON(OP, M2): \
	case CASE_WIDEOP_OPCODE_COMMON(OP, M4)
	// clang-format on

	#define CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, LMUL) \
	case RISCV::PseudoV##OP##_##LMUL##_TIED: \
	NewOpc = RISCV::PseudoV##OP##_##LMUL; \
	break;

	#define CASE_WIDEOP_CHANGE_OPCODE_LMULS(OP) \
	CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF8) \
	CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF4) \
	CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2) \
	CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1) \
	CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2) \
	CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4)

	MachineInstr *RISCVInstrInfo::convertToThreeAddress(
	MachineFunction::iterator &MBB, MachineInstr &MI, LiveVariables *LV) const {
	switch (MI.getOpcode()) {
	default:
	break;
	case CASE_WIDEOP_OPCODE_LMULS(FWADD_WV):
	case CASE_WIDEOP_OPCODE_LMULS(FWSUB_WV):
	case CASE_WIDEOP_OPCODE_LMULS(WADD_WV):
	case CASE_WIDEOP_OPCODE_LMULS(WADDU_WV):
	case CASE_WIDEOP_OPCODE_LMULS(WSUB_WV):
	case CASE_WIDEOP_OPCODE_LMULS(WSUBU_WV): {
	// clang-format off
	unsigned NewOpc;
	switch (MI.getOpcode()) {
	default:
	llvm_unreachable("Unexpected opcode");
	CASE_WIDEOP_CHANGE_OPCODE_LMULS(FWADD_WV)
	CASE_WIDEOP_CHANGE_OPCODE_LMULS(FWSUB_WV)
	CASE_WIDEOP_CHANGE_OPCODE_LMULS(WADD_WV)
	CASE_WIDEOP_CHANGE_OPCODE_LMULS(WADDU_WV)
	CASE_WIDEOP_CHANGE_OPCODE_LMULS(WSUB_WV)
	CASE_WIDEOP_CHANGE_OPCODE_LMULS(WSUBU_WV)
	}
	//clang-format on

	MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc))
	.add(MI.getOperand(0))
	.add(MI.getOperand(1))
	.add(MI.getOperand(2))
	.add(MI.getOperand(3))
	.add(MI.getOperand(4));
	MIB.copyImplicitOps(MI);

	if (LV) {
	unsigned NumOps = MI.getNumOperands();
	for (unsigned I = 1; I < NumOps; ++I) {
	MachineOperand &Op = MI.getOperand(I);
	if (Op.isReg() && Op.isKill())
	LV->replaceKillInstruction(Op.getReg(), MI, *MIB);
	}
	}

	return MIB;
	}
	}

	return nullptr;
	}

	#undef CASE_WIDEOP_CHANGE_OPCODE_LMULS
	#undef CASE_WIDEOP_CHANGE_OPCODE_COMMON
	#undef CASE_WIDEOP_OPCODE_LMULS
	#undef CASE_WIDEOP_OPCODE_COMMON

	Register RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF,
	MachineBasicBlock &MBB,
	MachineBasicBlock::iterator II,
	const DebugLoc &DL,
	int64_t Amount,
	MachineInstr::MIFlag Flag) const {
	assert(Amount > 0 && "There is no need to get VLEN scaled value.");
	assert(Amount % 8 == 0 &&
	"Reserve the stack by the multiple of one vector size.");

	MachineRegisterInfo &MRI = MF.getRegInfo();
	const RISCVInstrInfo *TII = MF.getSubtarget<RISCVSubtarget>().getInstrInfo();
	int64_t NumOfVReg = Amount / 8;

	Register VL = MRI.createVirtualRegister(&RISCV::GPRRegClass);
	BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VL)
	.setMIFlag(Flag);
	assert(isInt<32>(NumOfVReg) &&
	"Expect the number of vector registers within 32-bits.");
	if (isPowerOf2_32(NumOfVReg)) {
	uint32_t ShiftAmount = Log2_32(NumOfVReg);
	if (ShiftAmount == 0)
	return VL;
	BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), VL)
	.addReg(VL, RegState::Kill)
	.addImm(ShiftAmount)
	.setMIFlag(Flag);
	} else if (isPowerOf2_32(NumOfVReg - 1)) {
	Register ScaledRegister = MRI.createVirtualRegister(&RISCV::GPRRegClass);
	uint32_t ShiftAmount = Log2_32(NumOfVReg - 1);
	BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), ScaledRegister)
	.addReg(VL)
	.addImm(ShiftAmount)
	.setMIFlag(Flag);
	BuildMI(MBB, II, DL, TII->get(RISCV::ADD), VL)
	.addReg(ScaledRegister, RegState::Kill)
	.addReg(VL, RegState::Kill)
	.setMIFlag(Flag);
	} else if (isPowerOf2_32(NumOfVReg + 1)) {
	Register ScaledRegister = MRI.createVirtualRegister(&RISCV::GPRRegClass);
	uint32_t ShiftAmount = Log2_32(NumOfVReg + 1);
	BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), ScaledRegister)
	.addReg(VL)
	.addImm(ShiftAmount)
	.setMIFlag(Flag);
	BuildMI(MBB, II, DL, TII->get(RISCV::SUB), VL)
	.addReg(ScaledRegister, RegState::Kill)
	.addReg(VL, RegState::Kill)
	.setMIFlag(Flag);
	} else {
	Register N = MRI.createVirtualRegister(&RISCV::GPRRegClass);
	if (!isInt<12>(NumOfVReg))
	movImm(MBB, II, DL, N, NumOfVReg);
	else {
	BuildMI(MBB, II, DL, TII->get(RISCV::ADDI), N)
	.addReg(RISCV::X0)
	.addImm(NumOfVReg)
	.setMIFlag(Flag);
	}
	if (!MF.getSubtarget<RISCVSubtarget>().hasStdExtM())
	MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
	MF.getFunction(),
	"M-extension must be enabled to calculate the vscaled size/offset."});
	BuildMI(MBB, II, DL, TII->get(RISCV::MUL), VL)
	.addReg(VL, RegState::Kill)
	.addReg(N, RegState::Kill)
	.setMIFlag(Flag);
	}

	return VL;
	}

	static bool isRVVWholeLoadStore(unsigned Opcode) {
	switch (Opcode) {
	default:
	return false;
	case RISCV::VS1R_V:
	case RISCV::VS2R_V:
	case RISCV::VS4R_V:
	case RISCV::VS8R_V:
	case RISCV::VL1RE8_V:
	case RISCV::VL2RE8_V:
	case RISCV::VL4RE8_V:
	case RISCV::VL8RE8_V:
	case RISCV::VL1RE16_V:
	case RISCV::VL2RE16_V:
	case RISCV::VL4RE16_V:
	case RISCV::VL8RE16_V:
	case RISCV::VL1RE32_V:
	case RISCV::VL2RE32_V:
	case RISCV::VL4RE32_V:
	case RISCV::VL8RE32_V:
	case RISCV::VL1RE64_V:
	case RISCV::VL2RE64_V:
	case RISCV::VL4RE64_V:
	case RISCV::VL8RE64_V:
	return true;
	}
	}

	bool RISCVInstrInfo::isRVVSpill(const MachineInstr &MI, bool CheckFIs) const {
	// RVV lacks any support for immediate addressing for stack addresses, so be
	// conservative.
	unsigned Opcode = MI.getOpcode();
	if (!RISCVVPseudosTable::getPseudoInfo(Opcode) &&
	!isRVVWholeLoadStore(Opcode) && !isRVVSpillForZvlsseg(Opcode))
	return false;
	return !CheckFIs \|\| any_of(MI.operands(), [](const MachineOperand &MO) {
	return MO.isFI();
	});
	}

	Optional<std::pair<unsigned, unsigned>>
	RISCVInstrInfo::isRVVSpillForZvlsseg(unsigned Opcode) const {
	switch (Opcode) {
	default:
	return None;
	case RISCV::PseudoVSPILL2_M1:
	case RISCV::PseudoVRELOAD2_M1:
	return std::make_pair(2u, 1u);
	case RISCV::PseudoVSPILL2_M2:
	case RISCV::PseudoVRELOAD2_M2:
	return std::make_pair(2u, 2u);
	case RISCV::PseudoVSPILL2_M4:
	case RISCV::PseudoVRELOAD2_M4:
	return std::make_pair(2u, 4u);
	case RISCV::PseudoVSPILL3_M1:
	case RISCV::PseudoVRELOAD3_M1:
	return std::make_pair(3u, 1u);
	case RISCV::PseudoVSPILL3_M2:
	case RISCV::PseudoVRELOAD3_M2:
	return std::make_pair(3u, 2u);
	case RISCV::PseudoVSPILL4_M1:
	case RISCV::PseudoVRELOAD4_M1:
	return std::make_pair(4u, 1u);
	case RISCV::PseudoVSPILL4_M2:
	case RISCV::PseudoVRELOAD4_M2:
	return std::make_pair(4u, 2u);
	case RISCV::PseudoVSPILL5_M1:
	case RISCV::PseudoVRELOAD5_M1:
	return std::make_pair(5u, 1u);
	case RISCV::PseudoVSPILL6_M1:
	case RISCV::PseudoVRELOAD6_M1:
	return std::make_pair(6u, 1u);
	case RISCV::PseudoVSPILL7_M1:
	case RISCV::PseudoVRELOAD7_M1:
	return std::make_pair(7u, 1u);
	case RISCV::PseudoVSPILL8_M1:
	case RISCV::PseudoVRELOAD8_M1:
	return std::make_pair(8u, 1u);
	}
	}
	diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
	index 171d59ae4c6b..ae5108b0cb0d 100644
	--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
	+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
	@@ -1,1436 +1,1434 @@
	//===-- WebAssemblyFastISel.cpp - WebAssembly FastISel implementation -----===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	///
	/// \file
	/// This file defines the WebAssembly-specific support for the FastISel
	/// class. Some of the target-specific code is generated by tablegen in the file
	/// WebAssemblyGenFastISel.inc, which is #included here.
	///
	/// TODO: kill flags
	///
	//===----------------------------------------------------------------------===//

	#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
	#include "Utils/WebAssemblyUtilities.h"
	#include "WebAssembly.h"
	#include "WebAssemblyMachineFunctionInfo.h"
	#include "WebAssemblySubtarget.h"
	#include "WebAssemblyTargetMachine.h"
	#include "llvm/Analysis/BranchProbabilityInfo.h"
	#include "llvm/CodeGen/FastISel.h"
	#include "llvm/CodeGen/FunctionLoweringInfo.h"
	#include "llvm/CodeGen/MachineConstantPool.h"
	#include "llvm/CodeGen/MachineFrameInfo.h"
	#include "llvm/CodeGen/MachineInstrBuilder.h"
	#include "llvm/CodeGen/MachineModuleInfo.h"
	#include "llvm/CodeGen/MachineRegisterInfo.h"
	#include "llvm/IR/DataLayout.h"
	#include "llvm/IR/DerivedTypes.h"
	#include "llvm/IR/Function.h"
	#include "llvm/IR/GetElementPtrTypeIterator.h"
	#include "llvm/IR/GlobalAlias.h"
	#include "llvm/IR/GlobalVariable.h"
	#include "llvm/IR/Instructions.h"
	#include "llvm/IR/IntrinsicInst.h"
	#include "llvm/IR/Operator.h"
	#include "llvm/IR/PatternMatch.h"

	using namespace llvm;
	using namespace PatternMatch;

	#define DEBUG_TYPE "wasm-fastisel"

	namespace {

	class WebAssemblyFastISel final : public FastISel {
	// All possible address modes.
	class Address {
	public:
	using BaseKind = enum { RegBase, FrameIndexBase };

	private:
	BaseKind Kind = RegBase;
	union {
	unsigned Reg;
	int FI;
	} Base;

	// Whether the base has been determined yet
	bool IsBaseSet = false;

	int64_t Offset = 0;

	const GlobalValue *GV = nullptr;

	public:
	// Innocuous defaults for our address.
	Address() { Base.Reg = 0; }
	void setKind(BaseKind K) {
	assert(!isSet() && "Can't change kind with non-zero base");
	Kind = K;
	}
	BaseKind getKind() const { return Kind; }
	bool isRegBase() const { return Kind == RegBase; }
	bool isFIBase() const { return Kind == FrameIndexBase; }
	void setReg(unsigned Reg) {
	assert(isRegBase() && "Invalid base register access!");
	assert(!IsBaseSet && "Base cannot be reset");
	Base.Reg = Reg;
	IsBaseSet = true;
	}
	unsigned getReg() const {
	assert(isRegBase() && "Invalid base register access!");
	return Base.Reg;
	}
	void setFI(unsigned FI) {
	assert(isFIBase() && "Invalid base frame index access!");
	assert(!IsBaseSet && "Base cannot be reset");
	Base.FI = FI;
	IsBaseSet = true;
	}
	unsigned getFI() const {
	assert(isFIBase() && "Invalid base frame index access!");
	return Base.FI;
	}

	void setOffset(int64_t NewOffset) {
	assert(NewOffset >= 0 && "Offsets must be non-negative");
	Offset = NewOffset;
	}
	int64_t getOffset() const { return Offset; }
	void setGlobalValue(const GlobalValue *G) { GV = G; }
	const GlobalValue *getGlobalValue() const { return GV; }
	bool isSet() const { return IsBaseSet; }
	};

	/// Keep a pointer to the WebAssemblySubtarget around so that we can make the
	/// right decision when generating code for different targets.
	const WebAssemblySubtarget *Subtarget;
	LLVMContext *Context;

	private:
	// Utility helper routines
	MVT::SimpleValueType getSimpleType(Type *Ty) {
	EVT VT = TLI.getValueType(DL, Ty, /AllowUnknown=/true);
	return VT.isSimple() ? VT.getSimpleVT().SimpleTy
	: MVT::INVALID_SIMPLE_VALUE_TYPE;
	}
	MVT::SimpleValueType getLegalType(MVT::SimpleValueType VT) {
	switch (VT) {
	case MVT::i1:
	case MVT::i8:
	case MVT::i16:
	return MVT::i32;
	case MVT::i32:
	case MVT::i64:
	case MVT::f32:
	case MVT::f64:
	return VT;
	case MVT::funcref:
	case MVT::externref:
	if (Subtarget->hasReferenceTypes())
	return VT;
	break;
	case MVT::f16:
	return MVT::f32;
	case MVT::v16i8:
	case MVT::v8i16:
	case MVT::v4i32:
	case MVT::v4f32:
	case MVT::v2i64:
	case MVT::v2f64:
	if (Subtarget->hasSIMD128())
	return VT;
	break;
	default:
	break;
	}
	return MVT::INVALID_SIMPLE_VALUE_TYPE;
	}
	bool computeAddress(const Value *Obj, Address &Addr);
	void materializeLoadStoreOperands(Address &Addr);
	void addLoadStoreOperands(const Address &Addr, const MachineInstrBuilder &MIB,
	MachineMemOperand *MMO);
	unsigned maskI1Value(unsigned Reg, const Value *V);
	- unsigned getRegForI1Value(const Value *V, bool &Not);
	+ unsigned getRegForI1Value(const Value V, const BasicBlock BB, bool &Not);
	unsigned zeroExtendToI32(unsigned Reg, const Value *V,
	MVT::SimpleValueType From);
	unsigned signExtendToI32(unsigned Reg, const Value *V,
	MVT::SimpleValueType From);
	unsigned zeroExtend(unsigned Reg, const Value *V, MVT::SimpleValueType From,
	MVT::SimpleValueType To);
	unsigned signExtend(unsigned Reg, const Value *V, MVT::SimpleValueType From,
	MVT::SimpleValueType To);
	unsigned getRegForUnsignedValue(const Value *V);
	unsigned getRegForSignedValue(const Value *V);
	unsigned getRegForPromotedValue(const Value *V, bool IsSigned);
	unsigned notValue(unsigned Reg);
	unsigned copyValue(unsigned Reg);

	// Backend specific FastISel code.
	unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
	unsigned fastMaterializeConstant(const Constant *C) override;
	bool fastLowerArguments() override;

	// Selection routines.
	bool selectCall(const Instruction *I);
	bool selectSelect(const Instruction *I);
	bool selectTrunc(const Instruction *I);
	bool selectZExt(const Instruction *I);
	bool selectSExt(const Instruction *I);
	bool selectICmp(const Instruction *I);
	bool selectFCmp(const Instruction *I);
	bool selectBitCast(const Instruction *I);
	bool selectLoad(const Instruction *I);
	bool selectStore(const Instruction *I);
	bool selectBr(const Instruction *I);
	bool selectRet(const Instruction *I);
	bool selectUnreachable(const Instruction *I);

	public:
	// Backend specific FastISel code.
	WebAssemblyFastISel(FunctionLoweringInfo &FuncInfo,
	const TargetLibraryInfo *LibInfo)
	: FastISel(FuncInfo, LibInfo, /SkipTargetIndependentISel=/true) {
	Subtarget = &FuncInfo.MF->getSubtarget<WebAssemblySubtarget>();
	Context = &FuncInfo.Fn->getContext();
	}

	bool fastSelectInstruction(const Instruction *I) override;

	#include "WebAssemblyGenFastISel.inc"
	};

	} // end anonymous namespace

	bool WebAssemblyFastISel::computeAddress(const Value *Obj, Address &Addr) {
	const User *U = nullptr;
	unsigned Opcode = Instruction::UserOp1;
	if (const auto *I = dyn_cast<Instruction>(Obj)) {
	// Don't walk into other basic blocks unless the object is an alloca from
	// another block, otherwise it may not have a virtual register assigned.
	if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) \|\|
	FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
	Opcode = I->getOpcode();
	U = I;
	}
	} else if (const auto *C = dyn_cast<ConstantExpr>(Obj)) {
	Opcode = C->getOpcode();
	U = C;
	}

	if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
	if (Ty->getAddressSpace() > 255)
	// Fast instruction selection doesn't support the special
	// address spaces.
	return false;

	if (const auto *GV = dyn_cast<GlobalValue>(Obj)) {
	if (TLI.isPositionIndependent())
	return false;
	if (Addr.getGlobalValue())
	return false;
	if (GV->isThreadLocal())
	return false;
	Addr.setGlobalValue(GV);
	return true;
	}

	switch (Opcode) {
	default:
	break;
	case Instruction::BitCast: {
	// Look through bitcasts.
	return computeAddress(U->getOperand(0), Addr);
	}
	case Instruction::IntToPtr: {
	// Look past no-op inttoptrs.
	if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
	TLI.getPointerTy(DL))
	return computeAddress(U->getOperand(0), Addr);
	break;
	}
	case Instruction::PtrToInt: {
	// Look past no-op ptrtoints.
	if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
	return computeAddress(U->getOperand(0), Addr);
	break;
	}
	case Instruction::GetElementPtr: {
	Address SavedAddr = Addr;
	uint64_t TmpOffset = Addr.getOffset();
	// Non-inbounds geps can wrap; wasm's offsets can't.
	if (!cast<GEPOperator>(U)->isInBounds())
	goto unsupported_gep;
	// Iterate through the GEP folding the constants into offsets where
	// we can.
	for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
	GTI != E; ++GTI) {
	const Value *Op = GTI.getOperand();
	if (StructType *STy = GTI.getStructTypeOrNull()) {
	const StructLayout *SL = DL.getStructLayout(STy);
	unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
	TmpOffset += SL->getElementOffset(Idx);
	} else {
	uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
	for (;;) {
	if (const auto *CI = dyn_cast<ConstantInt>(Op)) {
	// Constant-offset addressing.
	TmpOffset += CI->getSExtValue() * S;
	break;
	}
	if (S == 1 && Addr.isRegBase() && Addr.getReg() == 0) {
	// An unscaled add of a register. Set it as the new base.
	unsigned Reg = getRegForValue(Op);
	if (Reg == 0)
	return false;
	Addr.setReg(Reg);
	break;
	}
	if (canFoldAddIntoGEP(U, Op)) {
	// A compatible add with a constant operand. Fold the constant.
	auto *CI = cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
	TmpOffset += CI->getSExtValue() * S;
	// Iterate on the other operand.
	Op = cast<AddOperator>(Op)->getOperand(0);
	continue;
	}
	// Unsupported
	goto unsupported_gep;
	}
	}
	}
	// Don't fold in negative offsets.
	if (int64_t(TmpOffset) >= 0) {
	// Try to grab the base operand now.
	Addr.setOffset(TmpOffset);
	if (computeAddress(U->getOperand(0), Addr))
	return true;
	}
	// We failed, restore everything and try the other options.
	Addr = SavedAddr;
	unsupported_gep:
	break;
	}
	case Instruction::Alloca: {
	const auto *AI = cast<AllocaInst>(Obj);
	DenseMap<const AllocaInst *, int>::iterator SI =
	FuncInfo.StaticAllocaMap.find(AI);
	if (SI != FuncInfo.StaticAllocaMap.end()) {
	if (Addr.isSet()) {
	return false;
	}
	Addr.setKind(Address::FrameIndexBase);
	Addr.setFI(SI->second);
	return true;
	}
	break;
	}
	case Instruction::Add: {
	// Adds of constants are common and easy enough.
	const Value *LHS = U->getOperand(0);
	const Value *RHS = U->getOperand(1);

	if (isa<ConstantInt>(LHS))
	std::swap(LHS, RHS);

	if (const auto *CI = dyn_cast<ConstantInt>(RHS)) {
	uint64_t TmpOffset = Addr.getOffset() + CI->getSExtValue();
	if (int64_t(TmpOffset) >= 0) {
	Addr.setOffset(TmpOffset);
	return computeAddress(LHS, Addr);
	}
	}

	Address Backup = Addr;
	if (computeAddress(LHS, Addr) && computeAddress(RHS, Addr))
	return true;
	Addr = Backup;

	break;
	}
	case Instruction::Sub: {
	// Subs of constants are common and easy enough.
	const Value *LHS = U->getOperand(0);
	const Value *RHS = U->getOperand(1);

	if (const auto *CI = dyn_cast<ConstantInt>(RHS)) {
	int64_t TmpOffset = Addr.getOffset() - CI->getSExtValue();
	if (TmpOffset >= 0) {
	Addr.setOffset(TmpOffset);
	return computeAddress(LHS, Addr);
	}
	}
	break;
	}
	}
	if (Addr.isSet()) {
	return false;
	}
	unsigned Reg = getRegForValue(Obj);
	if (Reg == 0)
	return false;
	Addr.setReg(Reg);
	return Addr.getReg() != 0;
	}

	void WebAssemblyFastISel::materializeLoadStoreOperands(Address &Addr) {
	if (Addr.isRegBase()) {
	unsigned Reg = Addr.getReg();
	if (Reg == 0) {
	Reg = createResultReg(Subtarget->hasAddr64() ? &WebAssembly::I64RegClass
	: &WebAssembly::I32RegClass);
	unsigned Opc = Subtarget->hasAddr64() ? WebAssembly::CONST_I64
	: WebAssembly::CONST_I32;
	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), Reg)
	.addImm(0);
	Addr.setReg(Reg);
	}
	}
	}

	void WebAssemblyFastISel::addLoadStoreOperands(const Address &Addr,
	const MachineInstrBuilder &MIB,
	MachineMemOperand *MMO) {
	// Set the alignment operand (this is rewritten in SetP2AlignOperands).
	// TODO: Disable SetP2AlignOperands for FastISel and just do it here.
	MIB.addImm(0);

	if (const GlobalValue *GV = Addr.getGlobalValue())
	MIB.addGlobalAddress(GV, Addr.getOffset());
	else
	MIB.addImm(Addr.getOffset());

	if (Addr.isRegBase())
	MIB.addReg(Addr.getReg());
	else
	MIB.addFrameIndex(Addr.getFI());

	MIB.addMemOperand(MMO);
	}

	unsigned WebAssemblyFastISel::maskI1Value(unsigned Reg, const Value *V) {
	return zeroExtendToI32(Reg, V, MVT::i1);
	}

	-unsigned WebAssemblyFastISel::getRegForI1Value(const Value *V, bool &Not) {
	+unsigned WebAssemblyFastISel::getRegForI1Value(const Value *V,
	+ const BasicBlock *BB,
	+ bool &Not) {
	if (const auto *ICmp = dyn_cast<ICmpInst>(V))
	if (const ConstantInt *C = dyn_cast<ConstantInt>(ICmp->getOperand(1)))
	- if (ICmp->isEquality() && C->isZero() && C->getType()->isIntegerTy(32)) {
	+ if (ICmp->isEquality() && C->isZero() && C->getType()->isIntegerTy(32) &&
	+ ICmp->getParent() == BB) {
	Not = ICmp->isTrueWhenEqual();
	return getRegForValue(ICmp->getOperand(0));
	}

	- Value *NotV;
	- if (match(V, m_Not(m_Value(NotV))) && V->getType()->isIntegerTy(32)) {
	- Not = true;
	- return getRegForValue(NotV);
	- }
	-
	Not = false;
	unsigned Reg = getRegForValue(V);
	if (Reg == 0)
	return 0;
	return maskI1Value(Reg, V);
	}

	unsigned WebAssemblyFastISel::zeroExtendToI32(unsigned Reg, const Value *V,
	MVT::SimpleValueType From) {
	if (Reg == 0)
	return 0;

	switch (From) {
	case MVT::i1:
	// If the value is naturally an i1, we don't need to mask it. We only know
	// if a value is naturally an i1 if it is definitely lowered by FastISel,
	// not a DAG ISel fallback.
	if (V != nullptr && isa<Argument>(V) && cast<Argument>(V)->hasZExtAttr())
	return copyValue(Reg);
	break;
	case MVT::i8:
	case MVT::i16:
	break;
	case MVT::i32:
	return copyValue(Reg);
	default:
	return 0;
	}

	unsigned Imm = createResultReg(&WebAssembly::I32RegClass);
	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
	TII.get(WebAssembly::CONST_I32), Imm)
	.addImm(~(~uint64_t(0) << MVT(From).getSizeInBits()));

	unsigned Result = createResultReg(&WebAssembly::I32RegClass);
	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
	TII.get(WebAssembly::AND_I32), Result)
	.addReg(Reg)
	.addReg(Imm);

	return Result;
	}

	unsigned WebAssemblyFastISel::signExtendToI32(unsigned Reg, const Value *V,
	MVT::SimpleValueType From) {
	if (Reg == 0)
	return 0;

	switch (From) {
	case MVT::i1:
	case MVT::i8:
	case MVT::i16:
	break;
	case MVT::i32:
	return copyValue(Reg);
	default:
	return 0;
	}

	unsigned Imm = createResultReg(&WebAssembly::I32RegClass);
	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
	TII.get(WebAssembly::CONST_I32), Imm)
	.addImm(32 - MVT(From).getSizeInBits());

	unsigned Left = createResultReg(&WebAssembly::I32RegClass);
	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
	TII.get(WebAssembly::SHL_I32), Left)
	.addReg(Reg)
	.addReg(Imm);

	unsigned Right = createResultReg(&WebAssembly::I32RegClass);
	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
	TII.get(WebAssembly::SHR_S_I32), Right)
	.addReg(Left)
	.addReg(Imm);

	return Right;
	}

	unsigned WebAssemblyFastISel::zeroExtend(unsigned Reg, const Value *V,
	MVT::SimpleValueType From,
	MVT::SimpleValueType To) {
	if (To == MVT::i64) {
	if (From == MVT::i64)
	return copyValue(Reg);

	Reg = zeroExtendToI32(Reg, V, From);

	unsigned Result = createResultReg(&WebAssembly::I64RegClass);
	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
	TII.get(WebAssembly::I64_EXTEND_U_I32), Result)
	.addReg(Reg);
	return Result;
	}

	if (To == MVT::i32)
	return zeroExtendToI32(Reg, V, From);

	return 0;
	}

	unsigned WebAssemblyFastISel::signExtend(unsigned Reg, const Value *V,
	MVT::SimpleValueType From,
	MVT::SimpleValueType To) {
	if (To == MVT::i64) {
	if (From == MVT::i64)
	return copyValue(Reg);

	Reg = signExtendToI32(Reg, V, From);

	unsigned Result = createResultReg(&WebAssembly::I64RegClass);
	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
	TII.get(WebAssembly::I64_EXTEND_S_I32), Result)
	.addReg(Reg);
	return Result;
	}

	if (To == MVT::i32)
	return signExtendToI32(Reg, V, From);

	return 0;
	}

	unsigned WebAssemblyFastISel::getRegForUnsignedValue(const Value *V) {
	MVT::SimpleValueType From = getSimpleType(V->getType());
	MVT::SimpleValueType To = getLegalType(From);
	unsigned VReg = getRegForValue(V);
	if (VReg == 0)
	return 0;
	return zeroExtend(VReg, V, From, To);
	}

	unsigned WebAssemblyFastISel::getRegForSignedValue(const Value *V) {
	MVT::SimpleValueType From = getSimpleType(V->getType());
	MVT::SimpleValueType To = getLegalType(From);
	unsigned VReg = getRegForValue(V);
	if (VReg == 0)
	return 0;
	return signExtend(VReg, V, From, To);
	}

	unsigned WebAssemblyFastISel::getRegForPromotedValue(const Value *V,
	bool IsSigned) {
	return IsSigned ? getRegForSignedValue(V) : getRegForUnsignedValue(V);
	}

	unsigned WebAssemblyFastISel::notValue(unsigned Reg) {
	assert(MRI.getRegClass(Reg) == &WebAssembly::I32RegClass);

	unsigned NotReg = createResultReg(&WebAssembly::I32RegClass);
	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
	TII.get(WebAssembly::EQZ_I32), NotReg)
	.addReg(Reg);
	return NotReg;
	}

	unsigned WebAssemblyFastISel::copyValue(unsigned Reg) {
	unsigned ResultReg = createResultReg(MRI.getRegClass(Reg));
	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(WebAssembly::COPY),
	ResultReg)
	.addReg(Reg);
	return ResultReg;
	}

	unsigned WebAssemblyFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
	DenseMap<const AllocaInst *, int>::iterator SI =
	FuncInfo.StaticAllocaMap.find(AI);

	if (SI != FuncInfo.StaticAllocaMap.end()) {
	unsigned ResultReg =
	createResultReg(Subtarget->hasAddr64() ? &WebAssembly::I64RegClass
	: &WebAssembly::I32RegClass);
	unsigned Opc =
	Subtarget->hasAddr64() ? WebAssembly::COPY_I64 : WebAssembly::COPY_I32;
	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
	.addFrameIndex(SI->second);
	return ResultReg;
	}

	return 0;
	}

	unsigned WebAssemblyFastISel::fastMaterializeConstant(const Constant *C) {
	if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) {
	if (TLI.isPositionIndependent())
	return 0;
	if (GV->isThreadLocal())
	return 0;
	unsigned ResultReg =
	createResultReg(Subtarget->hasAddr64() ? &WebAssembly::I64RegClass
	: &WebAssembly::I32RegClass);
	unsigned Opc = Subtarget->hasAddr64() ? WebAssembly::CONST_I64
	: WebAssembly::CONST_I32;
	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
	.addGlobalAddress(GV);
	return ResultReg;
	}

	// Let target-independent code handle it.
	return 0;
	}

	bool WebAssemblyFastISel::fastLowerArguments() {
	if (!FuncInfo.CanLowerReturn)
	return false;

	const Function *F = FuncInfo.Fn;
	if (F->isVarArg())
	return false;

	if (FuncInfo.Fn->getCallingConv() == CallingConv::Swift)
	return false;

	unsigned I = 0;
	for (auto const &Arg : F->args()) {
	const AttributeList &Attrs = F->getAttributes();
	if (Attrs.hasParamAttribute(I, Attribute::ByVal) \|\|
	Attrs.hasParamAttribute(I, Attribute::SwiftSelf) \|\|
	Attrs.hasParamAttribute(I, Attribute::SwiftError) \|\|
	Attrs.hasParamAttribute(I, Attribute::InAlloca) \|\|
	Attrs.hasParamAttribute(I, Attribute::Nest))
	return false;

	Type *ArgTy = Arg.getType();
	if (ArgTy->isStructTy() \|\| ArgTy->isArrayTy())
	return false;
	if (!Subtarget->hasSIMD128() && ArgTy->isVectorTy())
	return false;

	unsigned Opc;
	const TargetRegisterClass *RC;
	switch (getSimpleType(ArgTy)) {
	case MVT::i1:
	case MVT::i8:
	case MVT::i16:
	case MVT::i32:
	Opc = WebAssembly::ARGUMENT_i32;
	RC = &WebAssembly::I32RegClass;
	break;
	case MVT::i64:
	Opc = WebAssembly::ARGUMENT_i64;
	RC = &WebAssembly::I64RegClass;
	break;
	case MVT::f32:
	Opc = WebAssembly::ARGUMENT_f32;
	RC = &WebAssembly::F32RegClass;
	break;
	case MVT::f64:
	Opc = WebAssembly::ARGUMENT_f64;
	RC = &WebAssembly::F64RegClass;
	break;
	case MVT::v16i8:
	Opc = WebAssembly::ARGUMENT_v16i8;
	RC = &WebAssembly::V128RegClass;
	break;
	case MVT::v8i16:
	Opc = WebAssembly::ARGUMENT_v8i16;
	RC = &WebAssembly::V128RegClass;
	break;
	case MVT::v4i32:
	Opc = WebAssembly::ARGUMENT_v4i32;
	RC = &WebAssembly::V128RegClass;
	break;
	case MVT::v2i64:
	Opc = WebAssembly::ARGUMENT_v2i64;
	RC = &WebAssembly::V128RegClass;
	break;
	case MVT::v4f32:
	Opc = WebAssembly::ARGUMENT_v4f32;
	RC = &WebAssembly::V128RegClass;
	break;
	case MVT::v2f64:
	Opc = WebAssembly::ARGUMENT_v2f64;
	RC = &WebAssembly::V128RegClass;
	break;
	case MVT::funcref:
	Opc = WebAssembly::ARGUMENT_funcref;
	RC = &WebAssembly::FUNCREFRegClass;
	break;
	case MVT::externref:
	Opc = WebAssembly::ARGUMENT_externref;
	RC = &WebAssembly::EXTERNREFRegClass;
	break;
	default:
	return false;
	}
	unsigned ResultReg = createResultReg(RC);
	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
	.addImm(I);
	updateValueMap(&Arg, ResultReg);

	++I;
	}

	MRI.addLiveIn(WebAssembly::ARGUMENTS);

	auto *MFI = MF->getInfo<WebAssemblyFunctionInfo>();
	for (auto const &Arg : F->args()) {
	MVT::SimpleValueType ArgTy = getLegalType(getSimpleType(Arg.getType()));
	if (ArgTy == MVT::INVALID_SIMPLE_VALUE_TYPE) {
	MFI->clearParamsAndResults();
	return false;
	}
	MFI->addParam(ArgTy);
	}

	if (!F->getReturnType()->isVoidTy()) {
	MVT::SimpleValueType RetTy =
	getLegalType(getSimpleType(F->getReturnType()));
	if (RetTy == MVT::INVALID_SIMPLE_VALUE_TYPE) {
	MFI->clearParamsAndResults();
	return false;
	}
	MFI->addResult(RetTy);
	}

	return true;
	}

	bool WebAssemblyFastISel::selectCall(const Instruction *I) {
	const auto *Call = cast<CallInst>(I);

	// TODO: Support tail calls in FastISel
	if (Call->isMustTailCall() \|\| Call->isInlineAsm() \|\|
	Call->getFunctionType()->isVarArg())
	return false;

	Function *Func = Call->getCalledFunction();
	if (Func && Func->isIntrinsic())
	return false;

	if (Call->getCallingConv() == CallingConv::Swift)
	return false;

	bool IsDirect = Func != nullptr;
	if (!IsDirect && isa<ConstantExpr>(Call->getCalledOperand()))
	return false;

	FunctionType *FuncTy = Call->getFunctionType();
	unsigned Opc = IsDirect ? WebAssembly::CALL : WebAssembly::CALL_INDIRECT;
	bool IsVoid = FuncTy->getReturnType()->isVoidTy();
	unsigned ResultReg;
	if (!IsVoid) {
	if (!Subtarget->hasSIMD128() && Call->getType()->isVectorTy())
	return false;

	MVT::SimpleValueType RetTy = getSimpleType(Call->getType());
	switch (RetTy) {
	case MVT::i1:
	case MVT::i8:
	case MVT::i16:
	case MVT::i32:
	ResultReg = createResultReg(&WebAssembly::I32RegClass);
	break;
	case MVT::i64:
	ResultReg = createResultReg(&WebAssembly::I64RegClass);
	break;
	case MVT::f32:
	ResultReg = createResultReg(&WebAssembly::F32RegClass);
	break;
	case MVT::f64:
	ResultReg = createResultReg(&WebAssembly::F64RegClass);
	break;
	case MVT::v16i8:
	ResultReg = createResultReg(&WebAssembly::V128RegClass);
	break;
	case MVT::v8i16:
	ResultReg = createResultReg(&WebAssembly::V128RegClass);
	break;
	case MVT::v4i32:
	ResultReg = createResultReg(&WebAssembly::V128RegClass);
	break;
	case MVT::v2i64:
	ResultReg = createResultReg(&WebAssembly::V128RegClass);
	break;
	case MVT::v4f32:
	ResultReg = createResultReg(&WebAssembly::V128RegClass);
	break;
	case MVT::v2f64:
	ResultReg = createResultReg(&WebAssembly::V128RegClass);
	break;
	case MVT::funcref:
	ResultReg = createResultReg(&WebAssembly::FUNCREFRegClass);
	break;
	case MVT::externref:
	ResultReg = createResultReg(&WebAssembly::EXTERNREFRegClass);
	break;
	default:
	return false;
	}
	}

	SmallVector<unsigned, 8> Args;
	for (unsigned I = 0, E = Call->getNumArgOperands(); I < E; ++I) {
	Value *V = Call->getArgOperand(I);
	MVT::SimpleValueType ArgTy = getSimpleType(V->getType());
	if (ArgTy == MVT::INVALID_SIMPLE_VALUE_TYPE)
	return false;

	const AttributeList &Attrs = Call->getAttributes();
	if (Attrs.hasParamAttribute(I, Attribute::ByVal) \|\|
	Attrs.hasParamAttribute(I, Attribute::SwiftSelf) \|\|
	Attrs.hasParamAttribute(I, Attribute::SwiftError) \|\|
	Attrs.hasParamAttribute(I, Attribute::InAlloca) \|\|
	Attrs.hasParamAttribute(I, Attribute::Nest))
	return false;

	unsigned Reg;

	if (Attrs.hasParamAttribute(I, Attribute::SExt))
	Reg = getRegForSignedValue(V);
	else if (Attrs.hasParamAttribute(I, Attribute::ZExt))
	Reg = getRegForUnsignedValue(V);
	else
	Reg = getRegForValue(V);

	if (Reg == 0)
	return false;

	Args.push_back(Reg);
	}

	unsigned CalleeReg = 0;
	if (!IsDirect) {
	CalleeReg = getRegForValue(Call->getCalledOperand());
	if (!CalleeReg)
	return false;
	}

	auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));

	if (!IsVoid)
	MIB.addReg(ResultReg, RegState::Define);

	if (IsDirect) {
	MIB.addGlobalAddress(Func);
	} else {
	// Placeholder for the type index.
	MIB.addImm(0);
	// The table into which this call_indirect indexes.
	MCSymbolWasm *Table = WebAssembly::getOrCreateFunctionTableSymbol(
	MF->getMMI().getContext(), Subtarget);
	if (Subtarget->hasReferenceTypes()) {
	MIB.addSym(Table);
	} else {
	// Otherwise for the MVP there is at most one table whose number is 0, but
	// we can't write a table symbol or issue relocations. Instead we just
	// ensure the table is live.
	Table->setNoStrip();
	MIB.addImm(0);
	}
	// See if we must truncate the function pointer.
	// CALL_INDIRECT takes an i32, but in wasm64 we represent function pointers
	// as 64-bit for uniformity with other pointer types.
	// See also: WebAssemblyISelLowering.cpp: LowerCallResults
	if (Subtarget->hasAddr64()) {
	auto Wrap = BuildMI(*FuncInfo.MBB, std::prev(FuncInfo.InsertPt), DbgLoc,
	TII.get(WebAssembly::I32_WRAP_I64));
	unsigned Reg32 = createResultReg(&WebAssembly::I32RegClass);
	Wrap.addReg(Reg32, RegState::Define);
	Wrap.addReg(CalleeReg);
	CalleeReg = Reg32;
	}
	}

	for (unsigned ArgReg : Args)
	MIB.addReg(ArgReg);

	if (!IsDirect)
	MIB.addReg(CalleeReg);

	if (!IsVoid)
	updateValueMap(Call, ResultReg);
	return true;
	}

	bool WebAssemblyFastISel::selectSelect(const Instruction *I) {
	const auto *Select = cast<SelectInst>(I);

	bool Not;
	- unsigned CondReg = getRegForI1Value(Select->getCondition(), Not);
	+ unsigned CondReg =
	+ getRegForI1Value(Select->getCondition(), I->getParent(), Not);
	if (CondReg == 0)
	return false;

	unsigned TrueReg = getRegForValue(Select->getTrueValue());
	if (TrueReg == 0)
	return false;

	unsigned FalseReg = getRegForValue(Select->getFalseValue());
	if (FalseReg == 0)
	return false;

	if (Not)
	std::swap(TrueReg, FalseReg);

	unsigned Opc;
	const TargetRegisterClass *RC;
	switch (getSimpleType(Select->getType())) {
	case MVT::i1:
	case MVT::i8:
	case MVT::i16:
	case MVT::i32:
	Opc = WebAssembly::SELECT_I32;
	RC = &WebAssembly::I32RegClass;
	break;
	case MVT::i64:
	Opc = WebAssembly::SELECT_I64;
	RC = &WebAssembly::I64RegClass;
	break;
	case MVT::f32:
	Opc = WebAssembly::SELECT_F32;
	RC = &WebAssembly::F32RegClass;
	break;
	case MVT::f64:
	Opc = WebAssembly::SELECT_F64;
	RC = &WebAssembly::F64RegClass;
	break;
	case MVT::funcref:
	Opc = WebAssembly::SELECT_FUNCREF;
	RC = &WebAssembly::FUNCREFRegClass;
	break;
	case MVT::externref:
	Opc = WebAssembly::SELECT_EXTERNREF;
	RC = &WebAssembly::EXTERNREFRegClass;
	break;
	default:
	return false;
	}

	unsigned ResultReg = createResultReg(RC);
	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
	.addReg(TrueReg)
	.addReg(FalseReg)
	.addReg(CondReg);

	updateValueMap(Select, ResultReg);
	return true;
	}

	bool WebAssemblyFastISel::selectTrunc(const Instruction *I) {
	const auto *Trunc = cast<TruncInst>(I);

	unsigned Reg = getRegForValue(Trunc->getOperand(0));
	if (Reg == 0)
	return false;

	if (Trunc->getOperand(0)->getType()->isIntegerTy(64)) {
	unsigned Result = createResultReg(&WebAssembly::I32RegClass);
	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
	TII.get(WebAssembly::I32_WRAP_I64), Result)
	.addReg(Reg);
	Reg = Result;
	}

	updateValueMap(Trunc, Reg);
	return true;
	}

	bool WebAssemblyFastISel::selectZExt(const Instruction *I) {
	const auto *ZExt = cast<ZExtInst>(I);

	const Value *Op = ZExt->getOperand(0);
	MVT::SimpleValueType From = getSimpleType(Op->getType());
	MVT::SimpleValueType To = getLegalType(getSimpleType(ZExt->getType()));
	unsigned In = getRegForValue(Op);
	if (In == 0)
	return false;
	unsigned Reg = zeroExtend(In, Op, From, To);
	if (Reg == 0)
	return false;

	updateValueMap(ZExt, Reg);
	return true;
	}

	bool WebAssemblyFastISel::selectSExt(const Instruction *I) {
	const auto *SExt = cast<SExtInst>(I);

	const Value *Op = SExt->getOperand(0);
	MVT::SimpleValueType From = getSimpleType(Op->getType());
	MVT::SimpleValueType To = getLegalType(getSimpleType(SExt->getType()));
	unsigned In = getRegForValue(Op);
	if (In == 0)
	return false;
	unsigned Reg = signExtend(In, Op, From, To);
	if (Reg == 0)
	return false;

	updateValueMap(SExt, Reg);
	return true;
	}

	bool WebAssemblyFastISel::selectICmp(const Instruction *I) {
	const auto *ICmp = cast<ICmpInst>(I);

	bool I32 = getSimpleType(ICmp->getOperand(0)->getType()) != MVT::i64;
	unsigned Opc;
	bool IsSigned = false;
	switch (ICmp->getPredicate()) {
	case ICmpInst::ICMP_EQ:
	Opc = I32 ? WebAssembly::EQ_I32 : WebAssembly::EQ_I64;
	break;
	case ICmpInst::ICMP_NE:
	Opc = I32 ? WebAssembly::NE_I32 : WebAssembly::NE_I64;
	break;
	case ICmpInst::ICMP_UGT:
	Opc = I32 ? WebAssembly::GT_U_I32 : WebAssembly::GT_U_I64;
	break;
	case ICmpInst::ICMP_UGE:
	Opc = I32 ? WebAssembly::GE_U_I32 : WebAssembly::GE_U_I64;
	break;
	case ICmpInst::ICMP_ULT:
	Opc = I32 ? WebAssembly::LT_U_I32 : WebAssembly::LT_U_I64;
	break;
	case ICmpInst::ICMP_ULE:
	Opc = I32 ? WebAssembly::LE_U_I32 : WebAssembly::LE_U_I64;
	break;
	case ICmpInst::ICMP_SGT:
	Opc = I32 ? WebAssembly::GT_S_I32 : WebAssembly::GT_S_I64;
	IsSigned = true;
	break;
	case ICmpInst::ICMP_SGE:
	Opc = I32 ? WebAssembly::GE_S_I32 : WebAssembly::GE_S_I64;
	IsSigned = true;
	break;
	case ICmpInst::ICMP_SLT:
	Opc = I32 ? WebAssembly::LT_S_I32 : WebAssembly::LT_S_I64;
	IsSigned = true;
	break;
	case ICmpInst::ICMP_SLE:
	Opc = I32 ? WebAssembly::LE_S_I32 : WebAssembly::LE_S_I64;
	IsSigned = true;
	break;
	default:
	return false;
	}

	unsigned LHS = getRegForPromotedValue(ICmp->getOperand(0), IsSigned);
	if (LHS == 0)
	return false;

	unsigned RHS = getRegForPromotedValue(ICmp->getOperand(1), IsSigned);
	if (RHS == 0)
	return false;

	unsigned ResultReg = createResultReg(&WebAssembly::I32RegClass);
	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
	.addReg(LHS)
	.addReg(RHS);
	updateValueMap(ICmp, ResultReg);
	return true;
	}

	bool WebAssemblyFastISel::selectFCmp(const Instruction *I) {
	const auto *FCmp = cast<FCmpInst>(I);

	unsigned LHS = getRegForValue(FCmp->getOperand(0));
	if (LHS == 0)
	return false;

	unsigned RHS = getRegForValue(FCmp->getOperand(1));
	if (RHS == 0)
	return false;

	bool F32 = getSimpleType(FCmp->getOperand(0)->getType()) != MVT::f64;
	unsigned Opc;
	bool Not = false;
	switch (FCmp->getPredicate()) {
	case FCmpInst::FCMP_OEQ:
	Opc = F32 ? WebAssembly::EQ_F32 : WebAssembly::EQ_F64;
	break;
	case FCmpInst::FCMP_UNE:
	Opc = F32 ? WebAssembly::NE_F32 : WebAssembly::NE_F64;
	break;
	case FCmpInst::FCMP_OGT:
	Opc = F32 ? WebAssembly::GT_F32 : WebAssembly::GT_F64;
	break;
	case FCmpInst::FCMP_OGE:
	Opc = F32 ? WebAssembly::GE_F32 : WebAssembly::GE_F64;
	break;
	case FCmpInst::FCMP_OLT:
	Opc = F32 ? WebAssembly::LT_F32 : WebAssembly::LT_F64;
	break;
	case FCmpInst::FCMP_OLE:
	Opc = F32 ? WebAssembly::LE_F32 : WebAssembly::LE_F64;
	break;
	case FCmpInst::FCMP_UGT:
	Opc = F32 ? WebAssembly::LE_F32 : WebAssembly::LE_F64;
	Not = true;
	break;
	case FCmpInst::FCMP_UGE:
	Opc = F32 ? WebAssembly::LT_F32 : WebAssembly::LT_F64;
	Not = true;
	break;
	case FCmpInst::FCMP_ULT:
	Opc = F32 ? WebAssembly::GE_F32 : WebAssembly::GE_F64;
	Not = true;
	break;
	case FCmpInst::FCMP_ULE:
	Opc = F32 ? WebAssembly::GT_F32 : WebAssembly::GT_F64;
	Not = true;
	break;
	default:
	return false;
	}

	unsigned ResultReg = createResultReg(&WebAssembly::I32RegClass);
	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
	.addReg(LHS)
	.addReg(RHS);

	if (Not)
	ResultReg = notValue(ResultReg);

	updateValueMap(FCmp, ResultReg);
	return true;
	}

	bool WebAssemblyFastISel::selectBitCast(const Instruction *I) {
	// Target-independent code can handle this, except it doesn't set the dead
	// flag on the ARGUMENTS clobber, so we have to do that manually in order
	// to satisfy code that expects this of isBitcast() instructions.
	EVT VT = TLI.getValueType(DL, I->getOperand(0)->getType());
	EVT RetVT = TLI.getValueType(DL, I->getType());
	if (!VT.isSimple() \|\| !RetVT.isSimple())
	return false;

	unsigned In = getRegForValue(I->getOperand(0));
	if (In == 0)
	return false;

	if (VT == RetVT) {
	// No-op bitcast.
	updateValueMap(I, In);
	return true;
	}

	Register Reg = fastEmit_ISD_BITCAST_r(VT.getSimpleVT(), RetVT.getSimpleVT(),
	In);
	if (!Reg)
	return false;
	MachineBasicBlock::iterator Iter = FuncInfo.InsertPt;
	--Iter;
	assert(Iter->isBitcast());
	Iter->setPhysRegsDeadExcept(ArrayRef<Register>(), TRI);
	updateValueMap(I, Reg);
	return true;
	}

	bool WebAssemblyFastISel::selectLoad(const Instruction *I) {
	const auto *Load = cast<LoadInst>(I);
	if (Load->isAtomic())
	return false;
	if (!WebAssembly::isDefaultAddressSpace(Load->getPointerAddressSpace()))
	return false;
	if (!Subtarget->hasSIMD128() && Load->getType()->isVectorTy())
	return false;

	Address Addr;
	if (!computeAddress(Load->getPointerOperand(), Addr))
	return false;

	// TODO: Fold a following sign-/zero-extend into the load instruction.

	unsigned Opc;
	const TargetRegisterClass *RC;
	bool A64 = Subtarget->hasAddr64();
	switch (getSimpleType(Load->getType())) {
	case MVT::i1:
	case MVT::i8:
	Opc = A64 ? WebAssembly::LOAD8_U_I32_A64 : WebAssembly::LOAD8_U_I32_A32;
	RC = &WebAssembly::I32RegClass;
	break;
	case MVT::i16:
	Opc = A64 ? WebAssembly::LOAD16_U_I32_A64 : WebAssembly::LOAD16_U_I32_A32;
	RC = &WebAssembly::I32RegClass;
	break;
	case MVT::i32:
	Opc = A64 ? WebAssembly::LOAD_I32_A64 : WebAssembly::LOAD_I32_A32;
	RC = &WebAssembly::I32RegClass;
	break;
	case MVT::i64:
	Opc = A64 ? WebAssembly::LOAD_I64_A64 : WebAssembly::LOAD_I64_A32;
	RC = &WebAssembly::I64RegClass;
	break;
	case MVT::f32:
	Opc = A64 ? WebAssembly::LOAD_F32_A64 : WebAssembly::LOAD_F32_A32;
	RC = &WebAssembly::F32RegClass;
	break;
	case MVT::f64:
	Opc = A64 ? WebAssembly::LOAD_F64_A64 : WebAssembly::LOAD_F64_A32;
	RC = &WebAssembly::F64RegClass;
	break;
	default:
	return false;
	}

	materializeLoadStoreOperands(Addr);

	unsigned ResultReg = createResultReg(RC);
	auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
	ResultReg);

	addLoadStoreOperands(Addr, MIB, createMachineMemOperandFor(Load));

	updateValueMap(Load, ResultReg);
	return true;
	}

	bool WebAssemblyFastISel::selectStore(const Instruction *I) {
	const auto *Store = cast<StoreInst>(I);
	if (Store->isAtomic())
	return false;
	if (!WebAssembly::isDefaultAddressSpace(Store->getPointerAddressSpace()))
	return false;
	if (!Subtarget->hasSIMD128() &&
	Store->getValueOperand()->getType()->isVectorTy())
	return false;

	Address Addr;
	if (!computeAddress(Store->getPointerOperand(), Addr))
	return false;

	unsigned Opc;
	bool VTIsi1 = false;
	bool A64 = Subtarget->hasAddr64();
	switch (getSimpleType(Store->getValueOperand()->getType())) {
	case MVT::i1:
	VTIsi1 = true;
	LLVM_FALLTHROUGH;
	case MVT::i8:
	Opc = A64 ? WebAssembly::STORE8_I32_A64 : WebAssembly::STORE8_I32_A32;
	break;
	case MVT::i16:
	Opc = A64 ? WebAssembly::STORE16_I32_A64 : WebAssembly::STORE16_I32_A32;
	break;
	case MVT::i32:
	Opc = A64 ? WebAssembly::STORE_I32_A64 : WebAssembly::STORE_I32_A32;
	break;
	case MVT::i64:
	Opc = A64 ? WebAssembly::STORE_I64_A64 : WebAssembly::STORE_I64_A32;
	break;
	case MVT::f32:
	Opc = A64 ? WebAssembly::STORE_F32_A64 : WebAssembly::STORE_F32_A32;
	break;
	case MVT::f64:
	Opc = A64 ? WebAssembly::STORE_F64_A64 : WebAssembly::STORE_F64_A32;
	break;
	default:
	return false;
	}

	materializeLoadStoreOperands(Addr);

	unsigned ValueReg = getRegForValue(Store->getValueOperand());
	if (ValueReg == 0)
	return false;
	if (VTIsi1)
	ValueReg = maskI1Value(ValueReg, Store->getValueOperand());

	auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));

	addLoadStoreOperands(Addr, MIB, createMachineMemOperandFor(Store));

	MIB.addReg(ValueReg);
	return true;
	}

	bool WebAssemblyFastISel::selectBr(const Instruction *I) {
	const auto *Br = cast<BranchInst>(I);
	if (Br->isUnconditional()) {
	MachineBasicBlock *MSucc = FuncInfo.MBBMap[Br->getSuccessor(0)];
	fastEmitBranch(MSucc, Br->getDebugLoc());
	return true;
	}

	MachineBasicBlock *TBB = FuncInfo.MBBMap[Br->getSuccessor(0)];
	MachineBasicBlock *FBB = FuncInfo.MBBMap[Br->getSuccessor(1)];

	bool Not;
	- unsigned CondReg = getRegForI1Value(Br->getCondition(), Not);
	+ unsigned CondReg = getRegForI1Value(Br->getCondition(), Br->getParent(), Not);
	if (CondReg == 0)
	return false;

	unsigned Opc = WebAssembly::BR_IF;
	if (Not)
	Opc = WebAssembly::BR_UNLESS;

	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
	.addMBB(TBB)
	.addReg(CondReg);

	finishCondBranch(Br->getParent(), TBB, FBB);
	return true;
	}

	bool WebAssemblyFastISel::selectRet(const Instruction *I) {
	if (!FuncInfo.CanLowerReturn)
	return false;

	const auto *Ret = cast<ReturnInst>(I);

	if (Ret->getNumOperands() == 0) {
	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
	TII.get(WebAssembly::RETURN));
	return true;
	}

	// TODO: support multiple return in FastISel
	if (Ret->getNumOperands() > 1)
	return false;

	Value *RV = Ret->getOperand(0);
	if (!Subtarget->hasSIMD128() && RV->getType()->isVectorTy())
	return false;

	switch (getSimpleType(RV->getType())) {
	case MVT::i1:
	case MVT::i8:
	case MVT::i16:
	case MVT::i32:
	case MVT::i64:
	case MVT::f32:
	case MVT::f64:
	case MVT::v16i8:
	case MVT::v8i16:
	case MVT::v4i32:
	case MVT::v2i64:
	case MVT::v4f32:
	case MVT::v2f64:
	case MVT::funcref:
	case MVT::externref:
	break;
	default:
	return false;
	}

	unsigned Reg;
	if (FuncInfo.Fn->getAttributes().hasAttribute(0, Attribute::SExt))
	Reg = getRegForSignedValue(RV);
	else if (FuncInfo.Fn->getAttributes().hasAttribute(0, Attribute::ZExt))
	Reg = getRegForUnsignedValue(RV);
	else
	Reg = getRegForValue(RV);

	if (Reg == 0)
	return false;

	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
	TII.get(WebAssembly::RETURN))
	.addReg(Reg);
	return true;
	}

	bool WebAssemblyFastISel::selectUnreachable(const Instruction *I) {
	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
	TII.get(WebAssembly::UNREACHABLE));
	return true;
	}

	bool WebAssemblyFastISel::fastSelectInstruction(const Instruction *I) {
	switch (I->getOpcode()) {
	case Instruction::Call:
	if (selectCall(I))
	return true;
	break;
	case Instruction::Select:
	return selectSelect(I);
	case Instruction::Trunc:
	return selectTrunc(I);
	case Instruction::ZExt:
	return selectZExt(I);
	case Instruction::SExt:
	return selectSExt(I);
	case Instruction::ICmp:
	return selectICmp(I);
	case Instruction::FCmp:
	return selectFCmp(I);
	case Instruction::BitCast:
	return selectBitCast(I);
	case Instruction::Load:
	return selectLoad(I);
	case Instruction::Store:
	return selectStore(I);
	case Instruction::Br:
	return selectBr(I);
	case Instruction::Ret:
	return selectRet(I);
	case Instruction::Unreachable:
	return selectUnreachable(I);
	default:
	break;
	}

	// Fall back to target-independent instruction selection.
	return selectOperator(I, I->getOpcode());
	}

	FastISel *WebAssembly::createFastISel(FunctionLoweringInfo &FuncInfo,
	const TargetLibraryInfo *LibInfo) {
	return new WebAssemblyFastISel(FuncInfo, LibInfo);
	}
	diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
	index 37329b489555..eea848d3eb2f 100644
	--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
	+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
	@@ -1,578 +1,599 @@
	//===- ThinLTOBitcodeWriter.cpp - Bitcode writing pass for ThinLTO --------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h"
	#include "llvm/Analysis/BasicAliasAnalysis.h"
	#include "llvm/Analysis/ModuleSummaryAnalysis.h"
	#include "llvm/Analysis/ProfileSummaryInfo.h"
	#include "llvm/Analysis/TypeMetadataUtils.h"
	#include "llvm/Bitcode/BitcodeWriter.h"
	#include "llvm/IR/Constants.h"
	#include "llvm/IR/DebugInfo.h"
	#include "llvm/IR/Instructions.h"
	#include "llvm/IR/Intrinsics.h"
	#include "llvm/IR/Module.h"
	#include "llvm/IR/PassManager.h"
	#include "llvm/InitializePasses.h"
	#include "llvm/Object/ModuleSymbolTable.h"
	#include "llvm/Pass.h"
	#include "llvm/Support/ScopedPrinter.h"
	#include "llvm/Support/raw_ostream.h"
	#include "llvm/Transforms/IPO.h"
	#include "llvm/Transforms/IPO/FunctionAttrs.h"
	#include "llvm/Transforms/IPO/FunctionImport.h"
	#include "llvm/Transforms/IPO/LowerTypeTests.h"
	#include "llvm/Transforms/Utils/Cloning.h"
	#include "llvm/Transforms/Utils/ModuleUtils.h"
	using namespace llvm;

	namespace {

	+// Determine if a promotion alias should be created for a symbol name.
	+static bool allowPromotionAlias(const std::string &Name) {
	+ // Promotion aliases are used only in inline assembly. It's safe to
	+ // simply skip unusual names. Subset of MCAsmInfo::isAcceptableChar()
	+ // and MCAsmInfoXCOFF::isAcceptableChar().
	+ for (const char &C : Name) {
	+ if (isAlnum(C) \|\| C == '_' \|\| C == '.')
	+ continue;
	+ return false;
	+ }
	+ return true;
	+}
	+
	// Promote each local-linkage entity defined by ExportM and used by ImportM by
	// changing visibility and appending the given ModuleId.
	void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId,
	SetVector<GlobalValue *> &PromoteExtra) {
	DenseMap<const Comdat , Comdat > RenamedComdats;
	for (auto &ExportGV : ExportM.global_values()) {
	if (!ExportGV.hasLocalLinkage())
	continue;

	auto Name = ExportGV.getName();
	GlobalValue *ImportGV = nullptr;
	if (!PromoteExtra.count(&ExportGV)) {
	ImportGV = ImportM.getNamedValue(Name);
	if (!ImportGV)
	continue;
	ImportGV->removeDeadConstantUsers();
	if (ImportGV->use_empty()) {
	ImportGV->eraseFromParent();
	continue;
	}
	}

	+ std::string OldName = Name.str();
	std::string NewName = (Name + ModuleId).str();

	if (const auto *C = ExportGV.getComdat())
	if (C->getName() == Name)
	RenamedComdats.try_emplace(C, ExportM.getOrInsertComdat(NewName));

	ExportGV.setName(NewName);
	ExportGV.setLinkage(GlobalValue::ExternalLinkage);
	ExportGV.setVisibility(GlobalValue::HiddenVisibility);

	if (ImportGV) {
	ImportGV->setName(NewName);
	ImportGV->setVisibility(GlobalValue::HiddenVisibility);
	}
	+
	+ if (isa<Function>(&ExportGV) && allowPromotionAlias(OldName)) {
	+ // Create a local alias with the original name to avoid breaking
	+ // references from inline assembly.
	+ std::string Alias = ".set " + OldName + "," + NewName + "\n";
	+ ExportM.appendModuleInlineAsm(Alias);
	+ }
	}

	if (!RenamedComdats.empty())
	for (auto &GO : ExportM.global_objects())
	if (auto *C = GO.getComdat()) {
	auto Replacement = RenamedComdats.find(C);
	if (Replacement != RenamedComdats.end())
	GO.setComdat(Replacement->second);
	}
	}

	// Promote all internal (i.e. distinct) type ids used by the module by replacing
	// them with external type ids formed using the module id.
	//
	// Note that this needs to be done before we clone the module because each clone
	// will receive its own set of distinct metadata nodes.
	void promoteTypeIds(Module &M, StringRef ModuleId) {
	DenseMap<Metadata , Metadata > LocalToGlobal;
	auto ExternalizeTypeId = [&](CallInst *CI, unsigned ArgNo) {
	Metadata *MD =
	cast<MetadataAsValue>(CI->getArgOperand(ArgNo))->getMetadata();

	if (isa<MDNode>(MD) && cast<MDNode>(MD)->isDistinct()) {
	Metadata *&GlobalMD = LocalToGlobal[MD];
	if (!GlobalMD) {
	std::string NewName = (Twine(LocalToGlobal.size()) + ModuleId).str();
	GlobalMD = MDString::get(M.getContext(), NewName);
	}

	CI->setArgOperand(ArgNo,
	MetadataAsValue::get(M.getContext(), GlobalMD));
	}
	};

	if (Function *TypeTestFunc =
	M.getFunction(Intrinsic::getName(Intrinsic::type_test))) {
	for (const Use &U : TypeTestFunc->uses()) {
	auto CI = cast<CallInst>(U.getUser());
	ExternalizeTypeId(CI, 1);
	}
	}

	if (Function *TypeCheckedLoadFunc =
	M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load))) {
	for (const Use &U : TypeCheckedLoadFunc->uses()) {
	auto CI = cast<CallInst>(U.getUser());
	ExternalizeTypeId(CI, 2);
	}
	}

	for (GlobalObject &GO : M.global_objects()) {
	SmallVector<MDNode *, 1> MDs;
	GO.getMetadata(LLVMContext::MD_type, MDs);

	GO.eraseMetadata(LLVMContext::MD_type);
	for (auto MD : MDs) {
	auto I = LocalToGlobal.find(MD->getOperand(1));
	if (I == LocalToGlobal.end()) {
	GO.addMetadata(LLVMContext::MD_type, *MD);
	continue;
	}
	GO.addMetadata(
	LLVMContext::MD_type,
	*MDNode::get(M.getContext(), {MD->getOperand(0), I->second}));
	}
	}
	}

	// Drop unused globals, and drop type information from function declarations.
	// FIXME: If we made functions typeless then there would be no need to do this.
	void simplifyExternals(Module &M) {
	FunctionType *EmptyFT =
	FunctionType::get(Type::getVoidTy(M.getContext()), false);

	for (auto I = M.begin(), E = M.end(); I != E;) {
	Function &F = *I++;
	if (F.isDeclaration() && F.use_empty()) {
	F.eraseFromParent();
	continue;
	}

	if (!F.isDeclaration() \|\| F.getFunctionType() == EmptyFT \|\|
	// Changing the type of an intrinsic may invalidate the IR.
	F.getName().startswith("llvm."))
	continue;

	Function *NewF =
	Function::Create(EmptyFT, GlobalValue::ExternalLinkage,
	F.getAddressSpace(), "", &M);
	NewF->copyAttributesFrom(&F);
	// Only copy function attribtues.
	NewF->setAttributes(
	AttributeList::get(M.getContext(), AttributeList::FunctionIndex,
	F.getAttributes().getFnAttributes()));
	NewF->takeName(&F);
	F.replaceAllUsesWith(ConstantExpr::getBitCast(NewF, F.getType()));
	F.eraseFromParent();
	}

	for (auto I = M.global_begin(), E = M.global_end(); I != E;) {
	GlobalVariable &GV = *I++;
	if (GV.isDeclaration() && GV.use_empty()) {
	GV.eraseFromParent();
	continue;
	}
	}
	}

	static void
	filterModule(Module *M,
	function_ref<bool(const GlobalValue *)> ShouldKeepDefinition) {
	std::vector<GlobalValue *> V;
	for (GlobalValue &GV : M->global_values())
	if (!ShouldKeepDefinition(&GV))
	V.push_back(&GV);

	for (GlobalValue *GV : V)
	if (!convertToDeclaration(*GV))
	GV->eraseFromParent();
	}

	void forEachVirtualFunction(Constant C, function_ref<void(Function )> Fn) {
	if (auto *F = dyn_cast<Function>(C))
	return Fn(F);
	if (isa<GlobalValue>(C))
	return;
	for (Value *Op : C->operands())
	forEachVirtualFunction(cast<Constant>(Op), Fn);
	}

	// Clone any @llvm[.compiler].used over to the new module and append
	// values whose defs were cloned into that module.
	static void cloneUsedGlobalVariables(const Module &SrcM, Module &DestM,
	bool CompilerUsed) {
	SmallVector<GlobalValue *, 4> Used, NewUsed;
	// First collect those in the llvm[.compiler].used set.
	collectUsedGlobalVariables(SrcM, Used, CompilerUsed);
	// Next build a set of the equivalent values defined in DestM.
	for (auto *V : Used) {
	auto *GV = DestM.getNamedValue(V->getName());
	if (GV && !GV->isDeclaration())
	NewUsed.push_back(GV);
	}
	// Finally, add them to a llvm[.compiler].used variable in DestM.
	if (CompilerUsed)
	appendToCompilerUsed(DestM, NewUsed);
	else
	appendToUsed(DestM, NewUsed);
	}

	// If it's possible to split M into regular and thin LTO parts, do so and write
	// a multi-module bitcode file with the two parts to OS. Otherwise, write only a
	// regular LTO bitcode file to OS.
	void splitAndWriteThinLTOBitcode(
	raw_ostream &OS, raw_ostream *ThinLinkOS,
	function_ref<AAResults &(Function &)> AARGetter, Module &M) {
	std::string ModuleId = getUniqueModuleId(&M);
	if (ModuleId.empty()) {
	// We couldn't generate a module ID for this module, write it out as a
	// regular LTO module with an index for summary-based dead stripping.
	ProfileSummaryInfo PSI(M);
	M.addModuleFlag(Module::Error, "ThinLTO", uint32_t(0));
	ModuleSummaryIndex Index = buildModuleSummaryIndex(M, nullptr, &PSI);
	WriteBitcodeToFile(M, OS, /ShouldPreserveUseListOrder=/false, &Index);

	if (ThinLinkOS)
	// We don't have a ThinLTO part, but still write the module to the
	// ThinLinkOS if requested so that the expected output file is produced.
	WriteBitcodeToFile(M, ThinLinkOS, /ShouldPreserveUseListOrder=*/false,
	&Index);

	return;
	}

	promoteTypeIds(M, ModuleId);

	// Returns whether a global or its associated global has attached type
	// metadata. The former may participate in CFI or whole-program
	// devirtualization, so they need to appear in the merged module instead of
	// the thin LTO module. Similarly, globals that are associated with globals
	// with type metadata need to appear in the merged module because they will
	// reference the global's section directly.
	auto HasTypeMetadata = [](const GlobalObject *GO) {
	if (MDNode *MD = GO->getMetadata(LLVMContext::MD_associated))
	if (auto *AssocVM = dyn_cast_or_null<ValueAsMetadata>(MD->getOperand(0)))
	if (auto *AssocGO = dyn_cast<GlobalObject>(AssocVM->getValue()))
	if (AssocGO->hasMetadata(LLVMContext::MD_type))
	return true;
	return GO->hasMetadata(LLVMContext::MD_type);
	};

	// Collect the set of virtual functions that are eligible for virtual constant
	// propagation. Each eligible function must not access memory, must return
	// an integer of width <=64 bits, must take at least one argument, must not
	// use its first argument (assumed to be "this") and all arguments other than
	// the first one must be of <=64 bit integer type.
	//
	// Note that we test whether this copy of the function is readnone, rather
	// than testing function attributes, which must hold for any copy of the
	// function, even a less optimized version substituted at link time. This is
	// sound because the virtual constant propagation optimizations effectively
	// inline all implementations of the virtual function into each call site,
	// rather than using function attributes to perform local optimization.
	DenseSet<const Function *> EligibleVirtualFns;
	// If any member of a comdat lives in MergedM, put all members of that
	// comdat in MergedM to keep the comdat together.
	DenseSet<const Comdat *> MergedMComdats;
	for (GlobalVariable &GV : M.globals())
	if (HasTypeMetadata(&GV)) {
	if (const auto *C = GV.getComdat())
	MergedMComdats.insert(C);
	forEachVirtualFunction(GV.getInitializer(), [&](Function *F) {
	auto *RT = dyn_cast<IntegerType>(F->getReturnType());
	if (!RT \|\| RT->getBitWidth() > 64 \|\| F->arg_empty() \|\|
	!F->arg_begin()->use_empty())
	return;
	for (auto &Arg : drop_begin(F->args())) {
	auto *ArgT = dyn_cast<IntegerType>(Arg.getType());
	if (!ArgT \|\| ArgT->getBitWidth() > 64)
	return;
	}
	if (!F->isDeclaration() &&
	computeFunctionBodyMemoryAccess(F, AARGetter(F)) == MAK_ReadNone)
	EligibleVirtualFns.insert(F);
	});
	}

	ValueToValueMapTy VMap;
	std::unique_ptr<Module> MergedM(
	CloneModule(M, VMap, [&](const GlobalValue *GV) -> bool {
	if (const auto *C = GV->getComdat())
	if (MergedMComdats.count(C))
	return true;
	if (auto *F = dyn_cast<Function>(GV))
	return EligibleVirtualFns.count(F);
	if (auto *GVar = dyn_cast_or_null<GlobalVariable>(GV->getBaseObject()))
	return HasTypeMetadata(GVar);
	return false;
	}));
	StripDebugInfo(*MergedM);
	MergedM->setModuleInlineAsm("");

	// Clone any llvm.*used globals to ensure the included values are
	// not deleted.
	cloneUsedGlobalVariables(M, MergedM, /CompilerUsed*/ false);
	cloneUsedGlobalVariables(M, MergedM, /CompilerUsed*/ true);

	for (Function &F : *MergedM)
	if (!F.isDeclaration()) {
	// Reset the linkage of all functions eligible for virtual constant
	// propagation. The canonical definitions live in the thin LTO module so
	// that they can be imported.
	F.setLinkage(GlobalValue::AvailableExternallyLinkage);
	F.setComdat(nullptr);
	}

	SetVector<GlobalValue *> CfiFunctions;
	for (auto &F : M)
	if ((!F.hasLocalLinkage() \|\| F.hasAddressTaken()) && HasTypeMetadata(&F))
	CfiFunctions.insert(&F);

	// Remove all globals with type metadata, globals with comdats that live in
	// MergedM, and aliases pointing to such globals from the thin LTO module.
	filterModule(&M, [&](const GlobalValue *GV) {
	if (auto *GVar = dyn_cast_or_null<GlobalVariable>(GV->getBaseObject()))
	if (HasTypeMetadata(GVar))
	return false;
	if (const auto *C = GV->getComdat())
	if (MergedMComdats.count(C))
	return false;
	return true;
	});

	promoteInternals(*MergedM, M, ModuleId, CfiFunctions);
	promoteInternals(M, *MergedM, ModuleId, CfiFunctions);

	auto &Ctx = MergedM->getContext();
	SmallVector<MDNode *, 8> CfiFunctionMDs;
	for (auto V : CfiFunctions) {
	Function &F = *cast<Function>(V);
	SmallVector<MDNode *, 2> Types;
	F.getMetadata(LLVMContext::MD_type, Types);

	SmallVector<Metadata *, 4> Elts;
	Elts.push_back(MDString::get(Ctx, F.getName()));
	CfiFunctionLinkage Linkage;
	if (lowertypetests::isJumpTableCanonical(&F))
	Linkage = CFL_Definition;
	else if (F.hasExternalWeakLinkage())
	Linkage = CFL_WeakDeclaration;
	else
	Linkage = CFL_Declaration;
	Elts.push_back(ConstantAsMetadata::get(
	llvm::ConstantInt::get(Type::getInt8Ty(Ctx), Linkage)));
	append_range(Elts, Types);
	CfiFunctionMDs.push_back(MDTuple::get(Ctx, Elts));
	}

	if(!CfiFunctionMDs.empty()) {
	NamedMDNode *NMD = MergedM->getOrInsertNamedMetadata("cfi.functions");
	for (auto MD : CfiFunctionMDs)
	NMD->addOperand(MD);
	}

	SmallVector<MDNode *, 8> FunctionAliases;
	for (auto &A : M.aliases()) {
	if (!isa<Function>(A.getAliasee()))
	continue;

	auto *F = cast<Function>(A.getAliasee());

	Metadata *Elts[] = {
	MDString::get(Ctx, A.getName()),
	MDString::get(Ctx, F->getName()),
	ConstantAsMetadata::get(
	ConstantInt::get(Type::getInt8Ty(Ctx), A.getVisibility())),
	ConstantAsMetadata::get(
	ConstantInt::get(Type::getInt8Ty(Ctx), A.isWeakForLinker())),
	};

	FunctionAliases.push_back(MDTuple::get(Ctx, Elts));
	}

	if (!FunctionAliases.empty()) {
	NamedMDNode *NMD = MergedM->getOrInsertNamedMetadata("aliases");
	for (auto MD : FunctionAliases)
	NMD->addOperand(MD);
	}

	SmallVector<MDNode *, 8> Symvers;
	ModuleSymbolTable::CollectAsmSymvers(M, [&](StringRef Name, StringRef Alias) {
	Function *F = M.getFunction(Name);
	if (!F \|\| F->use_empty())
	return;

	Symvers.push_back(MDTuple::get(
	Ctx, {MDString::get(Ctx, Name), MDString::get(Ctx, Alias)}));
	});

	if (!Symvers.empty()) {
	NamedMDNode *NMD = MergedM->getOrInsertNamedMetadata("symvers");
	for (auto MD : Symvers)
	NMD->addOperand(MD);
	}

	simplifyExternals(*MergedM);

	// FIXME: Try to re-use BSI and PFI from the original module here.
	ProfileSummaryInfo PSI(M);
	ModuleSummaryIndex Index = buildModuleSummaryIndex(M, nullptr, &PSI);

	// Mark the merged module as requiring full LTO. We still want an index for
	// it though, so that it can participate in summary-based dead stripping.
	MergedM->addModuleFlag(Module::Error, "ThinLTO", uint32_t(0));
	ModuleSummaryIndex MergedMIndex =
	buildModuleSummaryIndex(*MergedM, nullptr, &PSI);

	SmallVector<char, 0> Buffer;

	BitcodeWriter W(Buffer);
	// Save the module hash produced for the full bitcode, which will
	// be used in the backends, and use that in the minimized bitcode
	// produced for the full link.
	ModuleHash ModHash = {{0}};
	W.writeModule(M, /ShouldPreserveUseListOrder=/false, &Index,
	/GenerateHash=/true, &ModHash);
	W.writeModule(MergedM, /ShouldPreserveUseListOrder=*/false, &MergedMIndex);
	W.writeSymtab();
	W.writeStrtab();
	OS << Buffer;

	// If a minimized bitcode module was requested for the thin link, only
	// the information that is needed by thin link will be written in the
	// given OS (the merged module will be written as usual).
	if (ThinLinkOS) {
	Buffer.clear();
	BitcodeWriter W2(Buffer);
	StripDebugInfo(M);
	W2.writeThinLinkBitcode(M, Index, ModHash);
	W2.writeModule(MergedM, /ShouldPreserveUseListOrder=*/false,
	&MergedMIndex);
	W2.writeSymtab();
	W2.writeStrtab();
	*ThinLinkOS << Buffer;
	}
	}

	// Check if the LTO Unit splitting has been enabled.
	bool enableSplitLTOUnit(Module &M) {
	bool EnableSplitLTOUnit = false;
	if (auto *MD = mdconst::extract_or_null<ConstantInt>(
	M.getModuleFlag("EnableSplitLTOUnit")))
	EnableSplitLTOUnit = MD->getZExtValue();
	return EnableSplitLTOUnit;
	}

	// Returns whether this module needs to be split because it uses type metadata.
	bool hasTypeMetadata(Module &M) {
	for (auto &GO : M.global_objects()) {
	if (GO.hasMetadata(LLVMContext::MD_type))
	return true;
	}
	return false;
	}

	void writeThinLTOBitcode(raw_ostream &OS, raw_ostream *ThinLinkOS,
	function_ref<AAResults &(Function &)> AARGetter,
	Module &M, const ModuleSummaryIndex *Index) {
	std::unique_ptr<ModuleSummaryIndex> NewIndex = nullptr;
	// See if this module has any type metadata. If so, we try to split it
	// or at least promote type ids to enable WPD.
	if (hasTypeMetadata(M)) {
	if (enableSplitLTOUnit(M))
	return splitAndWriteThinLTOBitcode(OS, ThinLinkOS, AARGetter, M);
	// Promote type ids as needed for index-based WPD.
	std::string ModuleId = getUniqueModuleId(&M);
	if (!ModuleId.empty()) {
	promoteTypeIds(M, ModuleId);
	// Need to rebuild the index so that it contains type metadata
	// for the newly promoted type ids.
	// FIXME: Probably should not bother building the index at all
	// in the caller of writeThinLTOBitcode (which does so via the
	// ModuleSummaryIndexAnalysis pass), since we have to rebuild it
	// anyway whenever there is type metadata (here or in
	// splitAndWriteThinLTOBitcode). Just always build it once via the
	// buildModuleSummaryIndex when Module(s) are ready.
	ProfileSummaryInfo PSI(M);
	NewIndex = std::make_unique<ModuleSummaryIndex>(
	buildModuleSummaryIndex(M, nullptr, &PSI));
	Index = NewIndex.get();
	}
	}

	// Write it out as an unsplit ThinLTO module.

	// Save the module hash produced for the full bitcode, which will
	// be used in the backends, and use that in the minimized bitcode
	// produced for the full link.
	ModuleHash ModHash = {{0}};
	WriteBitcodeToFile(M, OS, /ShouldPreserveUseListOrder=/false, Index,
	/GenerateHash=/true, &ModHash);
	// If a minimized bitcode module was requested for the thin link, only
	// the information that is needed by thin link will be written in the
	// given OS.
	if (ThinLinkOS && Index)
	WriteThinLinkBitcodeToFile(M, ThinLinkOS, Index, ModHash);
	}

	class WriteThinLTOBitcode : public ModulePass {
	raw_ostream &OS; // raw_ostream to print on
	// The output stream on which to emit a minimized module for use
	// just in the thin link, if requested.
	raw_ostream *ThinLinkOS;

	public:
	static char ID; // Pass identification, replacement for typeid
	WriteThinLTOBitcode() : ModulePass(ID), OS(dbgs()), ThinLinkOS(nullptr) {
	initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry());
	}

	explicit WriteThinLTOBitcode(raw_ostream &o, raw_ostream *ThinLinkOS)
	: ModulePass(ID), OS(o), ThinLinkOS(ThinLinkOS) {
	initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry());
	}

	StringRef getPassName() const override { return "ThinLTO Bitcode Writer"; }

	bool runOnModule(Module &M) override {
	const ModuleSummaryIndex *Index =
	&(getAnalysis<ModuleSummaryIndexWrapperPass>().getIndex());
	writeThinLTOBitcode(OS, ThinLinkOS, LegacyAARGetter(*this), M, Index);
	return true;
	}
	void getAnalysisUsage(AnalysisUsage &AU) const override {
	AU.setPreservesAll();
	AU.addRequired<AssumptionCacheTracker>();
	AU.addRequired<ModuleSummaryIndexWrapperPass>();
	AU.addRequired<TargetLibraryInfoWrapperPass>();
	}
	};
	} // anonymous namespace

	char WriteThinLTOBitcode::ID = 0;
	INITIALIZE_PASS_BEGIN(WriteThinLTOBitcode, "write-thinlto-bitcode",
	"Write ThinLTO Bitcode", false, true)
	INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
	INITIALIZE_PASS_DEPENDENCY(ModuleSummaryIndexWrapperPass)
	INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
	INITIALIZE_PASS_END(WriteThinLTOBitcode, "write-thinlto-bitcode",
	"Write ThinLTO Bitcode", false, true)

	ModulePass *llvm::createWriteThinLTOBitcodePass(raw_ostream &Str,
	raw_ostream *ThinLinkOS) {
	return new WriteThinLTOBitcode(Str, ThinLinkOS);
	}

	PreservedAnalyses
	llvm::ThinLTOBitcodeWriterPass::run(Module &M, ModuleAnalysisManager &AM) {
	FunctionAnalysisManager &FAM =
	AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
	writeThinLTOBitcode(OS, ThinLinkOS,
	[&FAM](Function &F) -> AAResults & {
	return FAM.getResult<AAManager>(F);
	},
	M, &AM.getResult<ModuleSummaryIndexAnalysis>(M));
	return PreservedAnalyses::all();
	}
	diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
	index be21db9087d2..e4ec5f266eb8 100644
	--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
	+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
	@@ -1,360 +1,364 @@
	//===----------------------- AlignmentFromAssumptions.cpp -----------------===//
	// Set Load/Store Alignments From Assumptions
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file implements a ScalarEvolution-based transformation to set
	// the alignments of load, stores and memory intrinsics based on the truth
	// expressions of assume intrinsics. The primary motivation is to handle
	// complex alignment assumptions that apply to vector loads and stores that
	// appear after vectorization and unrolling.
	//
	//===----------------------------------------------------------------------===//

	#include "llvm/IR/Instructions.h"
	#include "llvm/InitializePasses.h"
	#include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h"
	#include "llvm/ADT/SmallPtrSet.h"
	#include "llvm/ADT/Statistic.h"
	#include "llvm/Analysis/AliasAnalysis.h"
	#include "llvm/Analysis/AssumptionCache.h"
	#include "llvm/Analysis/GlobalsModRef.h"
	#include "llvm/Analysis/LoopInfo.h"
	#include "llvm/Analysis/ScalarEvolutionExpressions.h"
	#include "llvm/Analysis/ValueTracking.h"
	#include "llvm/IR/Constant.h"
	#include "llvm/IR/Dominators.h"
	#include "llvm/IR/Instruction.h"
	#include "llvm/IR/IntrinsicInst.h"
	#include "llvm/IR/Intrinsics.h"
	#include "llvm/IR/Module.h"
	#include "llvm/Support/Debug.h"
	#include "llvm/Support/raw_ostream.h"
	#include "llvm/Transforms/Scalar.h"

	#define AA_NAME "alignment-from-assumptions"
	#define DEBUG_TYPE AA_NAME
	using namespace llvm;

	STATISTIC(NumLoadAlignChanged,
	"Number of loads changed by alignment assumptions");
	STATISTIC(NumStoreAlignChanged,
	"Number of stores changed by alignment assumptions");
	STATISTIC(NumMemIntAlignChanged,
	"Number of memory intrinsics changed by alignment assumptions");

	namespace {
	struct AlignmentFromAssumptions : public FunctionPass {
	static char ID; // Pass identification, replacement for typeid
	AlignmentFromAssumptions() : FunctionPass(ID) {
	initializeAlignmentFromAssumptionsPass(*PassRegistry::getPassRegistry());
	}

	bool runOnFunction(Function &F) override;

	void getAnalysisUsage(AnalysisUsage &AU) const override {
	AU.addRequired<AssumptionCacheTracker>();
	AU.addRequired<ScalarEvolutionWrapperPass>();
	AU.addRequired<DominatorTreeWrapperPass>();

	AU.setPreservesCFG();
	AU.addPreserved<AAResultsWrapperPass>();
	AU.addPreserved<GlobalsAAWrapperPass>();
	AU.addPreserved<LoopInfoWrapperPass>();
	AU.addPreserved<DominatorTreeWrapperPass>();
	AU.addPreserved<ScalarEvolutionWrapperPass>();
	}

	AlignmentFromAssumptionsPass Impl;
	};
	}

	char AlignmentFromAssumptions::ID = 0;
	static const char aip_name[] = "Alignment from assumptions";
	INITIALIZE_PASS_BEGIN(AlignmentFromAssumptions, AA_NAME,
	aip_name, false, false)
	INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
	INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
	INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
	INITIALIZE_PASS_END(AlignmentFromAssumptions, AA_NAME,
	aip_name, false, false)

	FunctionPass *llvm::createAlignmentFromAssumptionsPass() {
	return new AlignmentFromAssumptions();
	}

	// Given an expression for the (constant) alignment, AlignSCEV, and an
	// expression for the displacement between a pointer and the aligned address,
	// DiffSCEV, compute the alignment of the displaced pointer if it can be reduced
	// to a constant. Using SCEV to compute alignment handles the case where
	// DiffSCEV is a recurrence with constant start such that the aligned offset
	// is constant. e.g. {16,+,32} % 32 -> 16.
	static MaybeAlign getNewAlignmentDiff(const SCEV *DiffSCEV,
	const SCEV *AlignSCEV,
	ScalarEvolution *SE) {
	// DiffUnits = Diff % int64_t(Alignment)
	const SCEV *DiffUnitsSCEV = SE->getURemExpr(DiffSCEV, AlignSCEV);

	LLVM_DEBUG(dbgs() << "\talignment relative to " << *AlignSCEV << " is "
	<< DiffUnitsSCEV << " (diff: " << DiffSCEV << ")\n");

	if (const SCEVConstant *ConstDUSCEV =
	dyn_cast<SCEVConstant>(DiffUnitsSCEV)) {
	int64_t DiffUnits = ConstDUSCEV->getValue()->getSExtValue();

	// If the displacement is an exact multiple of the alignment, then the
	// displaced pointer has the same alignment as the aligned pointer, so
	// return the alignment value.
	if (!DiffUnits)
	return cast<SCEVConstant>(AlignSCEV)->getValue()->getAlignValue();

	// If the displacement is not an exact multiple, but the remainder is a
	// constant, then return this remainder (but only if it is a power of 2).
	uint64_t DiffUnitsAbs = std::abs(DiffUnits);
	if (isPowerOf2_64(DiffUnitsAbs))
	return Align(DiffUnitsAbs);
	}

	return None;
	}

	// There is an address given by an offset OffSCEV from AASCEV which has an
	// alignment AlignSCEV. Use that information, if possible, to compute a new
	// alignment for Ptr.
	static Align getNewAlignment(const SCEV AASCEV, const SCEV AlignSCEV,
	const SCEV OffSCEV, Value Ptr,
	ScalarEvolution *SE) {
	const SCEV *PtrSCEV = SE->getSCEV(Ptr);
	// On a platform with 32-bit allocas, but 64-bit flat/global pointer sizes
	// (cough AMDGPU), the effective SCEV type of AASCEV and PtrSCEV
	// may disagree. Trunc/extend so they agree.
	PtrSCEV = SE->getTruncateOrZeroExtend(
	PtrSCEV, SE->getEffectiveSCEVType(AASCEV->getType()));
	const SCEV *DiffSCEV = SE->getMinusSCEV(PtrSCEV, AASCEV);
	if (isa<SCEVCouldNotCompute>(DiffSCEV))
	return Align(1);

	// On 32-bit platforms, DiffSCEV might now have type i32 -- we've always
	// sign-extended OffSCEV to i64, so make sure they agree again.
	DiffSCEV = SE->getNoopOrSignExtend(DiffSCEV, OffSCEV->getType());

	// What we really want to know is the overall offset to the aligned
	// address. This address is displaced by the provided offset.
	DiffSCEV = SE->getAddExpr(DiffSCEV, OffSCEV);

	LLVM_DEBUG(dbgs() << "AFI: alignment of " << *Ptr << " relative to "
	<< AlignSCEV << " and offset " << OffSCEV
	<< " using diff " << *DiffSCEV << "\n");

	if (MaybeAlign NewAlignment = getNewAlignmentDiff(DiffSCEV, AlignSCEV, SE)) {
	LLVM_DEBUG(dbgs() << "\tnew alignment: " << DebugStr(NewAlignment) << "\n");
	return *NewAlignment;
	}

	if (const SCEVAddRecExpr *DiffARSCEV = dyn_cast<SCEVAddRecExpr>(DiffSCEV)) {
	// The relative offset to the alignment assumption did not yield a constant,
	// but we should try harder: if we assume that a is 32-byte aligned, then in
	// for (i = 0; i < 1024; i += 4) r += a[i]; not all of the loads from a are
	// 32-byte aligned, but instead alternate between 32 and 16-byte alignment.
	// As a result, the new alignment will not be a constant, but can still
	// be improved over the default (of 4) to 16.

	const SCEV *DiffStartSCEV = DiffARSCEV->getStart();
	const SCEV DiffIncSCEV = DiffARSCEV->getStepRecurrence(SE);

	LLVM_DEBUG(dbgs() << "\ttrying start/inc alignment using start "
	<< DiffStartSCEV << " and inc " << DiffIncSCEV << "\n");

	// Now compute the new alignment using the displacement to the value in the
	// first iteration, and also the alignment using the per-iteration delta.
	// If these are the same, then use that answer. Otherwise, use the smaller
	// one, but only if it divides the larger one.
	MaybeAlign NewAlignment = getNewAlignmentDiff(DiffStartSCEV, AlignSCEV, SE);
	MaybeAlign NewIncAlignment =
	getNewAlignmentDiff(DiffIncSCEV, AlignSCEV, SE);

	LLVM_DEBUG(dbgs() << "\tnew start alignment: " << DebugStr(NewAlignment)
	<< "\n");
	LLVM_DEBUG(dbgs() << "\tnew inc alignment: " << DebugStr(NewIncAlignment)
	<< "\n");

	if (!NewAlignment \|\| !NewIncAlignment)
	return Align(1);

	const Align NewAlign = *NewAlignment;
	const Align NewIncAlign = *NewIncAlignment;
	if (NewAlign > NewIncAlign) {
	LLVM_DEBUG(dbgs() << "\tnew start/inc alignment: "
	<< DebugStr(NewIncAlign) << "\n");
	return NewIncAlign;
	}
	if (NewIncAlign > NewAlign) {
	LLVM_DEBUG(dbgs() << "\tnew start/inc alignment: " << DebugStr(NewAlign)
	<< "\n");
	return NewAlign;
	}
	assert(NewIncAlign == NewAlign);
	LLVM_DEBUG(dbgs() << "\tnew start/inc alignment: " << DebugStr(NewAlign)
	<< "\n");
	return NewAlign;
	}

	return Align(1);
	}

	bool AlignmentFromAssumptionsPass::extractAlignmentInfo(CallInst *I,
	unsigned Idx,
	Value *&AAPtr,
	const SCEV *&AlignSCEV,
	const SCEV *&OffSCEV) {
	Type *Int64Ty = Type::getInt64Ty(I->getContext());
	OperandBundleUse AlignOB = I->getOperandBundleAt(Idx);
	if (AlignOB.getTagName() != "align")
	return false;
	assert(AlignOB.Inputs.size() >= 2);
	AAPtr = AlignOB.Inputs[0].get();
	// TODO: Consider accumulating the offset to the base.
	AAPtr = AAPtr->stripPointerCastsSameRepresentation();
	AlignSCEV = SE->getSCEV(AlignOB.Inputs[1].get());
	AlignSCEV = SE->getTruncateOrZeroExtend(AlignSCEV, Int64Ty);
	+ if (!isa<SCEVConstant>(AlignSCEV))
	+ // Added to suppress a crash because consumer doesn't expect non-constant
	+ // alignments in the assume bundle. TODO: Consider generalizing caller.
	+ return false;
	if (AlignOB.Inputs.size() == 3)
	OffSCEV = SE->getSCEV(AlignOB.Inputs[2].get());
	else
	OffSCEV = SE->getZero(Int64Ty);
	OffSCEV = SE->getTruncateOrZeroExtend(OffSCEV, Int64Ty);
	return true;
	}

	bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
	unsigned Idx) {
	Value *AAPtr;
	const SCEV AlignSCEV, OffSCEV;
	if (!extractAlignmentInfo(ACall, Idx, AAPtr, AlignSCEV, OffSCEV))
	return false;

	// Skip ConstantPointerNull and UndefValue. Assumptions on these shouldn't
	// affect other users.
	if (isa<ConstantData>(AAPtr))
	return false;

	const SCEV *AASCEV = SE->getSCEV(AAPtr);

	// Apply the assumption to all other users of the specified pointer.
	SmallPtrSet<Instruction *, 32> Visited;
	SmallVector<Instruction*, 16> WorkList;
	for (User *J : AAPtr->users()) {
	if (J == ACall)
	continue;

	if (Instruction *K = dyn_cast<Instruction>(J))
	WorkList.push_back(K);
	}

	while (!WorkList.empty()) {
	Instruction *J = WorkList.pop_back_val();
	if (LoadInst *LI = dyn_cast<LoadInst>(J)) {
	if (!isValidAssumeForContext(ACall, J, DT))
	continue;
	Align NewAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV,
	LI->getPointerOperand(), SE);
	if (NewAlignment > LI->getAlign()) {
	LI->setAlignment(NewAlignment);
	++NumLoadAlignChanged;
	}
	} else if (StoreInst *SI = dyn_cast<StoreInst>(J)) {
	if (!isValidAssumeForContext(ACall, J, DT))
	continue;
	Align NewAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV,
	SI->getPointerOperand(), SE);
	if (NewAlignment > SI->getAlign()) {
	SI->setAlignment(NewAlignment);
	++NumStoreAlignChanged;
	}
	} else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(J)) {
	if (!isValidAssumeForContext(ACall, J, DT))
	continue;
	Align NewDestAlignment =
	getNewAlignment(AASCEV, AlignSCEV, OffSCEV, MI->getDest(), SE);

	LLVM_DEBUG(dbgs() << "\tmem inst: " << DebugStr(NewDestAlignment)
	<< "\n";);
	if (NewDestAlignment > *MI->getDestAlign()) {
	MI->setDestAlignment(NewDestAlignment);
	++NumMemIntAlignChanged;
	}

	// For memory transfers, there is also a source alignment that
	// can be set.
	if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
	Align NewSrcAlignment =
	getNewAlignment(AASCEV, AlignSCEV, OffSCEV, MTI->getSource(), SE);

	LLVM_DEBUG(dbgs() << "\tmem trans: " << DebugStr(NewSrcAlignment)
	<< "\n";);

	if (NewSrcAlignment > *MTI->getSourceAlign()) {
	MTI->setSourceAlignment(NewSrcAlignment);
	++NumMemIntAlignChanged;
	}
	}
	}

	// Now that we've updated that use of the pointer, look for other uses of
	// the pointer to update.
	Visited.insert(J);
	for (User *UJ : J->users()) {
	Instruction *K = cast<Instruction>(UJ);
	if (!Visited.count(K))
	WorkList.push_back(K);
	}
	}

	return true;
	}

	bool AlignmentFromAssumptions::runOnFunction(Function &F) {
	if (skipFunction(F))
	return false;

	auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
	ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
	DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();

	return Impl.runImpl(F, AC, SE, DT);
	}

	bool AlignmentFromAssumptionsPass::runImpl(Function &F, AssumptionCache &AC,
	ScalarEvolution *SE_,
	DominatorTree *DT_) {
	SE = SE_;
	DT = DT_;

	bool Changed = false;
	for (auto &AssumeVH : AC.assumptions())
	if (AssumeVH) {
	CallInst *Call = cast<CallInst>(AssumeVH);
	for (unsigned Idx = 0; Idx < Call->getNumOperandBundles(); Idx++)
	Changed \|= processAssumption(Call, Idx);
	}

	return Changed;
	}

	PreservedAnalyses
	AlignmentFromAssumptionsPass::run(Function &F, FunctionAnalysisManager &AM) {

	AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F);
	ScalarEvolution &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
	DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
	if (!runImpl(F, AC, &SE, &DT))
	return PreservedAnalyses::all();

	PreservedAnalyses PA;
	PA.preserveSet<CFGAnalyses>();
	PA.preserve<ScalarEvolutionAnalysis>();
	return PA;
	}
	diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
	index 3d60e205b002..a153f393448c 100644
	--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
	+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
	@@ -1,2787 +1,2792 @@
	//===- LoopIdiomRecognize.cpp - Loop idiom recognition --------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This pass implements an idiom recognizer that transforms simple loops into a
	// non-loop form. In cases that this kicks in, it can be a significant
	// performance win.
	//
	// If compiling for code size we avoid idiom recognition if the resulting
	// code could be larger than the code for the original loop. One way this could
	// happen is if the loop is not removable after idiom recognition due to the
	// presence of non-idiom instructions. The initial implementation of the
	// heuristics applies to idioms in multi-block loops.
	//
	//===----------------------------------------------------------------------===//
	//
	// TODO List:
	//
	// Future loop memory idioms to recognize:
	// memcmp, strlen, etc.
	// Future floating point idioms to recognize in -ffast-math mode:
	// fpowi
	// Future integer operation idioms to recognize:
	// ctpop
	//
	// Beware that isel's default lowering for ctpop is highly inefficient for
	// i64 and larger types when i64 is legal and the value has few bits set. It
	// would be good to enhance isel to emit a loop for ctpop in this case.
	//
	// This could recognize common matrix multiplies and dot product idioms and
	// replace them with calls to BLAS (if linked in??).
	//
	//===----------------------------------------------------------------------===//

	#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
	#include "llvm/ADT/APInt.h"
	#include "llvm/ADT/ArrayRef.h"
	#include "llvm/ADT/DenseMap.h"
	#include "llvm/ADT/MapVector.h"
	#include "llvm/ADT/SetVector.h"
	#include "llvm/ADT/SmallPtrSet.h"
	#include "llvm/ADT/SmallVector.h"
	#include "llvm/ADT/Statistic.h"
	#include "llvm/ADT/StringRef.h"
	#include "llvm/Analysis/AliasAnalysis.h"
	#include "llvm/Analysis/CmpInstAnalysis.h"
	#include "llvm/Analysis/LoopAccessAnalysis.h"
	#include "llvm/Analysis/LoopInfo.h"
	#include "llvm/Analysis/LoopPass.h"
	#include "llvm/Analysis/MemoryLocation.h"
	#include "llvm/Analysis/MemorySSA.h"
	#include "llvm/Analysis/MemorySSAUpdater.h"
	#include "llvm/Analysis/MustExecute.h"
	#include "llvm/Analysis/OptimizationRemarkEmitter.h"
	#include "llvm/Analysis/ScalarEvolution.h"
	#include "llvm/Analysis/ScalarEvolutionExpressions.h"
	#include "llvm/Analysis/TargetLibraryInfo.h"
	#include "llvm/Analysis/TargetTransformInfo.h"
	#include "llvm/Analysis/ValueTracking.h"
	#include "llvm/IR/Attributes.h"
	#include "llvm/IR/BasicBlock.h"
	#include "llvm/IR/Constant.h"
	#include "llvm/IR/Constants.h"
	#include "llvm/IR/DataLayout.h"
	#include "llvm/IR/DebugLoc.h"
	#include "llvm/IR/DerivedTypes.h"
	#include "llvm/IR/Dominators.h"
	#include "llvm/IR/GlobalValue.h"
	#include "llvm/IR/GlobalVariable.h"
	#include "llvm/IR/IRBuilder.h"
	#include "llvm/IR/InstrTypes.h"
	#include "llvm/IR/Instruction.h"
	#include "llvm/IR/Instructions.h"
	#include "llvm/IR/IntrinsicInst.h"
	#include "llvm/IR/Intrinsics.h"
	#include "llvm/IR/LLVMContext.h"
	#include "llvm/IR/Module.h"
	#include "llvm/IR/PassManager.h"
	#include "llvm/IR/PatternMatch.h"
	#include "llvm/IR/Type.h"
	#include "llvm/IR/User.h"
	#include "llvm/IR/Value.h"
	#include "llvm/IR/ValueHandle.h"
	#include "llvm/InitializePasses.h"
	#include "llvm/Pass.h"
	#include "llvm/Support/Casting.h"
	#include "llvm/Support/CommandLine.h"
	#include "llvm/Support/Debug.h"
	#include "llvm/Support/InstructionCost.h"
	#include "llvm/Support/raw_ostream.h"
	#include "llvm/Transforms/Scalar.h"
	#include "llvm/Transforms/Utils/BuildLibCalls.h"
	#include "llvm/Transforms/Utils/Local.h"
	#include "llvm/Transforms/Utils/LoopUtils.h"
	#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
	#include <algorithm>
	#include <cassert>
	#include <cstdint>
	#include <utility>
	#include <vector>

	using namespace llvm;

	#define DEBUG_TYPE "loop-idiom"

	STATISTIC(NumMemSet, "Number of memset's formed from loop stores");
	STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
	STATISTIC(NumMemMove, "Number of memmove's formed from loop load+stores");
	STATISTIC(
	NumShiftUntilBitTest,
	"Number of uncountable loops recognized as 'shift until bitttest' idiom");
	STATISTIC(NumShiftUntilZero,
	"Number of uncountable loops recognized as 'shift until zero' idiom");

	bool DisableLIRP::All;
	static cl::opt<bool, true>
	DisableLIRPAll("disable-" DEBUG_TYPE "-all",
	cl::desc("Options to disable Loop Idiom Recognize Pass."),
	cl::location(DisableLIRP::All), cl::init(false),
	cl::ReallyHidden);

	bool DisableLIRP::Memset;
	static cl::opt<bool, true>
	DisableLIRPMemset("disable-" DEBUG_TYPE "-memset",
	cl::desc("Proceed with loop idiom recognize pass, but do "
	"not convert loop(s) to memset."),
	cl::location(DisableLIRP::Memset), cl::init(false),
	cl::ReallyHidden);

	bool DisableLIRP::Memcpy;
	static cl::opt<bool, true>
	DisableLIRPMemcpy("disable-" DEBUG_TYPE "-memcpy",
	cl::desc("Proceed with loop idiom recognize pass, but do "
	"not convert loop(s) to memcpy."),
	cl::location(DisableLIRP::Memcpy), cl::init(false),
	cl::ReallyHidden);

	static cl::opt<bool> UseLIRCodeSizeHeurs(
	"use-lir-code-size-heurs",
	cl::desc("Use loop idiom recognition code size heuristics when compiling"
	"with -Os/-Oz"),
	cl::init(true), cl::Hidden);

	namespace {

	class LoopIdiomRecognize {
	Loop *CurLoop = nullptr;
	AliasAnalysis *AA;
	DominatorTree *DT;
	LoopInfo *LI;
	ScalarEvolution *SE;
	TargetLibraryInfo *TLI;
	const TargetTransformInfo *TTI;
	const DataLayout *DL;
	OptimizationRemarkEmitter &ORE;
	bool ApplyCodeSizeHeuristics;
	std::unique_ptr<MemorySSAUpdater> MSSAU;

	public:
	explicit LoopIdiomRecognize(AliasAnalysis AA, DominatorTree DT,
	LoopInfo LI, ScalarEvolution SE,
	TargetLibraryInfo *TLI,
	const TargetTransformInfo TTI, MemorySSA MSSA,
	const DataLayout *DL,
	OptimizationRemarkEmitter &ORE)
	: AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL), ORE(ORE) {
	if (MSSA)
	MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
	}

	bool runOnLoop(Loop *L);

	private:
	using StoreList = SmallVector<StoreInst *, 8>;
	using StoreListMap = MapVector<Value *, StoreList>;

	StoreListMap StoreRefsForMemset;
	StoreListMap StoreRefsForMemsetPattern;
	StoreList StoreRefsForMemcpy;
	bool HasMemset;
	bool HasMemsetPattern;
	bool HasMemcpy;

	/// Return code for isLegalStore()
	enum LegalStoreKind {
	None = 0,
	Memset,
	MemsetPattern,
	Memcpy,
	UnorderedAtomicMemcpy,
	DontUse // Dummy retval never to be used. Allows catching errors in retval
	// handling.
	};

	/// \name Countable Loop Idiom Handling
	/// @{

	bool runOnCountableLoop();
	bool runOnLoopBlock(BasicBlock BB, const SCEV BECount,
	SmallVectorImpl<BasicBlock *> &ExitBlocks);

	void collectStores(BasicBlock *BB);
	LegalStoreKind isLegalStore(StoreInst *SI);
	enum class ForMemset { No, Yes };
	bool processLoopStores(SmallVectorImpl<StoreInst > &SL, const SCEV BECount,
	ForMemset For);

	template <typename MemInst>
	bool processLoopMemIntrinsic(
	BasicBlock *BB,
	bool (LoopIdiomRecognize::Processor)(MemInst , const SCEV *),
	const SCEV *BECount);
	bool processLoopMemCpy(MemCpyInst MCI, const SCEV BECount);
	bool processLoopMemSet(MemSetInst MSI, const SCEV BECount);

	bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
	MaybeAlign StoreAlignment, Value *StoredVal,
	Instruction *TheStore,
	SmallPtrSetImpl<Instruction *> &Stores,
	const SCEVAddRecExpr Ev, const SCEV BECount,
	bool NegStride, bool IsLoopMemset = false);
	bool processLoopStoreOfLoopLoad(StoreInst SI, const SCEV BECount);
	bool processLoopStoreOfLoopLoad(Value DestPtr, Value SourcePtr,
	unsigned StoreSize, MaybeAlign StoreAlign,
	MaybeAlign LoadAlign, Instruction *TheStore,
	Instruction *TheLoad,
	const SCEVAddRecExpr *StoreEv,
	const SCEVAddRecExpr *LoadEv,
	const SCEV *BECount);
	bool avoidLIRForMultiBlockLoop(bool IsMemset = false,
	bool IsLoopMemset = false);

	/// @}
	/// \name Noncountable Loop Idiom Handling
	/// @{

	bool runOnNoncountableLoop();

	bool recognizePopcount();
	void transformLoopToPopcount(BasicBlock PreCondBB, Instruction CntInst,
	PHINode CntPhi, Value Var);
	bool recognizeAndInsertFFS(); /// Find First Set: ctlz or cttz
	void transformLoopToCountable(Intrinsic::ID IntrinID, BasicBlock *PreCondBB,
	Instruction CntInst, PHINode CntPhi,
	Value Var, Instruction DefX,
	const DebugLoc &DL, bool ZeroCheck,
	bool IsCntPhiUsedOutsideLoop);

	bool recognizeShiftUntilBitTest();
	bool recognizeShiftUntilZero();

	/// @}
	};

	class LoopIdiomRecognizeLegacyPass : public LoopPass {
	public:
	static char ID;

	explicit LoopIdiomRecognizeLegacyPass() : LoopPass(ID) {
	initializeLoopIdiomRecognizeLegacyPassPass(
	*PassRegistry::getPassRegistry());
	}

	bool runOnLoop(Loop *L, LPPassManager &LPM) override {
	if (DisableLIRP::All)
	return false;

	if (skipLoop(L))
	return false;

	AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
	DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
	LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
	ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
	TargetLibraryInfo *TLI =
	&getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
	*L->getHeader()->getParent());
	const TargetTransformInfo *TTI =
	&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
	*L->getHeader()->getParent());
	const DataLayout *DL = &L->getHeader()->getModule()->getDataLayout();
	auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
	MemorySSA *MSSA = nullptr;
	if (MSSAAnalysis)
	MSSA = &MSSAAnalysis->getMSSA();

	// For the old PM, we can't use OptimizationRemarkEmitter as an analysis
	// pass. Function analyses need to be preserved across loop transformations
	// but ORE cannot be preserved (see comment before the pass definition).
	OptimizationRemarkEmitter ORE(L->getHeader()->getParent());

	LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI, TTI, MSSA, DL, ORE);
	return LIR.runOnLoop(L);
	}

	/// This transformation requires natural loop information & requires that
	/// loop preheaders be inserted into the CFG.
	void getAnalysisUsage(AnalysisUsage &AU) const override {
	AU.addRequired<TargetLibraryInfoWrapperPass>();
	AU.addRequired<TargetTransformInfoWrapperPass>();
	AU.addPreserved<MemorySSAWrapperPass>();
	getLoopAnalysisUsage(AU);
	}
	};

	} // end anonymous namespace

	char LoopIdiomRecognizeLegacyPass::ID = 0;

	PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM,
	LoopStandardAnalysisResults &AR,
	LPMUpdater &) {
	if (DisableLIRP::All)
	return PreservedAnalyses::all();

	const auto *DL = &L.getHeader()->getModule()->getDataLayout();

	// For the new PM, we also can't use OptimizationRemarkEmitter as an analysis
	// pass. Function analyses need to be preserved across loop transformations
	// but ORE cannot be preserved (see comment before the pass definition).
	OptimizationRemarkEmitter ORE(L.getHeader()->getParent());

	LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI,
	AR.MSSA, DL, ORE);
	if (!LIR.runOnLoop(&L))
	return PreservedAnalyses::all();

	auto PA = getLoopPassPreservedAnalyses();
	if (AR.MSSA)
	PA.preserve<MemorySSAAnalysis>();
	return PA;
	}

	INITIALIZE_PASS_BEGIN(LoopIdiomRecognizeLegacyPass, "loop-idiom",
	"Recognize loop idioms", false, false)
	INITIALIZE_PASS_DEPENDENCY(LoopPass)
	INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
	INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
	INITIALIZE_PASS_END(LoopIdiomRecognizeLegacyPass, "loop-idiom",
	"Recognize loop idioms", false, false)

	Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognizeLegacyPass(); }

	static void deleteDeadInstruction(Instruction *I) {
	I->replaceAllUsesWith(UndefValue::get(I->getType()));
	I->eraseFromParent();
	}

	//===----------------------------------------------------------------------===//
	//
	// Implementation of LoopIdiomRecognize
	//
	//===----------------------------------------------------------------------===//

	bool LoopIdiomRecognize::runOnLoop(Loop *L) {
	CurLoop = L;
	// If the loop could not be converted to canonical form, it must have an
	// indirectbr in it, just give up.
	if (!L->getLoopPreheader())
	return false;

	// Disable loop idiom recognition if the function's name is a common idiom.
	StringRef Name = L->getHeader()->getParent()->getName();
	if (Name == "memset" \|\| Name == "memcpy")
	return false;

	// Determine if code size heuristics need to be applied.
	ApplyCodeSizeHeuristics =
	L->getHeader()->getParent()->hasOptSize() && UseLIRCodeSizeHeurs;

	HasMemset = TLI->has(LibFunc_memset);
	HasMemsetPattern = TLI->has(LibFunc_memset_pattern16);
	HasMemcpy = TLI->has(LibFunc_memcpy);

	if (HasMemset \|\| HasMemsetPattern \|\| HasMemcpy)
	if (SE->hasLoopInvariantBackedgeTakenCount(L))
	return runOnCountableLoop();

	return runOnNoncountableLoop();
	}

	bool LoopIdiomRecognize::runOnCountableLoop() {
	const SCEV *BECount = SE->getBackedgeTakenCount(CurLoop);
	assert(!isa<SCEVCouldNotCompute>(BECount) &&
	"runOnCountableLoop() called on a loop without a predictable"
	"backedge-taken count");

	// If this loop executes exactly one time, then it should be peeled, not
	// optimized by this pass.
	if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
	if (BECst->getAPInt() == 0)
	return false;

	SmallVector<BasicBlock *, 8> ExitBlocks;
	CurLoop->getUniqueExitBlocks(ExitBlocks);

	LLVM_DEBUG(dbgs() << DEBUG_TYPE " Scanning: F["
	<< CurLoop->getHeader()->getParent()->getName()
	<< "] Countable Loop %" << CurLoop->getHeader()->getName()
	<< "\n");

	// The following transforms hoist stores/memsets into the loop pre-header.
	// Give up if the loop has instructions that may throw.
	SimpleLoopSafetyInfo SafetyInfo;
	SafetyInfo.computeLoopSafetyInfo(CurLoop);
	if (SafetyInfo.anyBlockMayThrow())
	return false;

	bool MadeChange = false;

	// Scan all the blocks in the loop that are not in subloops.
	for (auto *BB : CurLoop->getBlocks()) {
	// Ignore blocks in subloops.
	if (LI->getLoopFor(BB) != CurLoop)
	continue;

	MadeChange \|= runOnLoopBlock(BB, BECount, ExitBlocks);
	}
	return MadeChange;
	}

	static APInt getStoreStride(const SCEVAddRecExpr *StoreEv) {
	const SCEVConstant *ConstStride = cast<SCEVConstant>(StoreEv->getOperand(1));
	return ConstStride->getAPInt();
	}

	/// getMemSetPatternValue - If a strided store of the specified value is safe to
	/// turn into a memset_pattern16, return a ConstantArray of 16 bytes that should
	/// be passed in. Otherwise, return null.
	///
	/// Note that we don't ever attempt to use memset_pattern8 or 4, because these
	/// just replicate their input array and then pass on to memset_pattern16.
	static Constant getMemSetPatternValue(Value V, const DataLayout *DL) {
	// FIXME: This could check for UndefValue because it can be merged into any
	// other valid pattern.

	// If the value isn't a constant, we can't promote it to being in a constant
	// array. We could theoretically do a store to an alloca or something, but
	// that doesn't seem worthwhile.
	Constant *C = dyn_cast<Constant>(V);
	if (!C)
	return nullptr;

	// Only handle simple values that are a power of two bytes in size.
	uint64_t Size = DL->getTypeSizeInBits(V->getType());
	if (Size == 0 \|\| (Size & 7) \|\| (Size & (Size - 1)))
	return nullptr;

	// Don't care enough about darwin/ppc to implement this.
	if (DL->isBigEndian())
	return nullptr;

	// Convert to size in bytes.
	Size /= 8;

	// TODO: If CI is larger than 16-bytes, we can try slicing it in half to see
	// if the top and bottom are the same (e.g. for vectors and large integers).
	if (Size > 16)
	return nullptr;

	// If the constant is exactly 16 bytes, just use it.
	if (Size == 16)
	return C;

	// Otherwise, we'll use an array of the constants.
	unsigned ArraySize = 16 / Size;
	ArrayType *AT = ArrayType::get(V->getType(), ArraySize);
	return ConstantArray::get(AT, std::vector<Constant *>(ArraySize, C));
	}

	LoopIdiomRecognize::LegalStoreKind
	LoopIdiomRecognize::isLegalStore(StoreInst *SI) {
	// Don't touch volatile stores.
	if (SI->isVolatile())
	return LegalStoreKind::None;
	// We only want simple or unordered-atomic stores.
	if (!SI->isUnordered())
	return LegalStoreKind::None;

	// Avoid merging nontemporal stores.
	if (SI->getMetadata(LLVMContext::MD_nontemporal))
	return LegalStoreKind::None;

	Value *StoredVal = SI->getValueOperand();
	Value *StorePtr = SI->getPointerOperand();

	// Don't convert stores of non-integral pointer types to memsets (which stores
	// integers).
	if (DL->isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
	return LegalStoreKind::None;

	// Reject stores that are so large that they overflow an unsigned.
	// When storing out scalable vectors we bail out for now, since the code
	// below currently only works for constant strides.
	TypeSize SizeInBits = DL->getTypeSizeInBits(StoredVal->getType());
	if (SizeInBits.isScalable() \|\| (SizeInBits.getFixedSize() & 7) \|\|
	(SizeInBits.getFixedSize() >> 32) != 0)
	return LegalStoreKind::None;

	// See if the pointer expression is an AddRec like {base,+,1} on the current
	// loop, which indicates a strided store. If we have something else, it's a
	// random store we can't handle.
	const SCEVAddRecExpr *StoreEv =
	dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
	if (!StoreEv \|\| StoreEv->getLoop() != CurLoop \|\| !StoreEv->isAffine())
	return LegalStoreKind::None;

	// Check to see if we have a constant stride.
	if (!isa<SCEVConstant>(StoreEv->getOperand(1)))
	return LegalStoreKind::None;

	// See if the store can be turned into a memset.

	// If the stored value is a byte-wise value (like i32 -1), then it may be
	// turned into a memset of i8 -1, assuming that all the consecutive bytes
	// are stored. A store of i32 0x01020304 can never be turned into a memset,
	// but it can be turned into memset_pattern if the target supports it.
	Value SplatValue = isBytewiseValue(StoredVal, DL);

	// Note: memset and memset_pattern on unordered-atomic is yet not supported
	bool UnorderedAtomic = SI->isUnordered() && !SI->isSimple();

	// If we're allowed to form a memset, and the stored value would be
	// acceptable for memset, use it.
	if (!UnorderedAtomic && HasMemset && SplatValue && !DisableLIRP::Memset &&
	// Verify that the stored value is loop invariant. If not, we can't
	// promote the memset.
	CurLoop->isLoopInvariant(SplatValue)) {
	// It looks like we can use SplatValue.
	return LegalStoreKind::Memset;
	}
	if (!UnorderedAtomic && HasMemsetPattern && !DisableLIRP::Memset &&
	// Don't create memset_pattern16s with address spaces.
	StorePtr->getType()->getPointerAddressSpace() == 0 &&
	getMemSetPatternValue(StoredVal, DL)) {
	// It looks like we can use PatternValue!
	return LegalStoreKind::MemsetPattern;
	}

	// Otherwise, see if the store can be turned into a memcpy.
	if (HasMemcpy && !DisableLIRP::Memcpy) {
	// Check to see if the stride matches the size of the store. If so, then we
	// know that every byte is touched in the loop.
	APInt Stride = getStoreStride(StoreEv);
	unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());
	if (StoreSize != Stride && StoreSize != -Stride)
	return LegalStoreKind::None;

	// The store must be feeding a non-volatile load.
	LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand());

	// Only allow non-volatile loads
	if (!LI \|\| LI->isVolatile())
	return LegalStoreKind::None;
	// Only allow simple or unordered-atomic loads
	if (!LI->isUnordered())
	return LegalStoreKind::None;

	// See if the pointer expression is an AddRec like {base,+,1} on the current
	// loop, which indicates a strided load. If we have something else, it's a
	// random load we can't handle.
	const SCEVAddRecExpr *LoadEv =
	dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand()));
	if (!LoadEv \|\| LoadEv->getLoop() != CurLoop \|\| !LoadEv->isAffine())
	return LegalStoreKind::None;

	// The store and load must share the same stride.
	if (StoreEv->getOperand(1) != LoadEv->getOperand(1))
	return LegalStoreKind::None;

	// Success. This store can be converted into a memcpy.
	UnorderedAtomic = UnorderedAtomic \|\| LI->isAtomic();
	return UnorderedAtomic ? LegalStoreKind::UnorderedAtomicMemcpy
	: LegalStoreKind::Memcpy;
	}
	// This store can't be transformed into a memset/memcpy.
	return LegalStoreKind::None;
	}

	void LoopIdiomRecognize::collectStores(BasicBlock *BB) {
	StoreRefsForMemset.clear();
	StoreRefsForMemsetPattern.clear();
	StoreRefsForMemcpy.clear();
	for (Instruction &I : *BB) {
	StoreInst *SI = dyn_cast<StoreInst>(&I);
	if (!SI)
	continue;

	// Make sure this is a strided store with a constant stride.
	switch (isLegalStore(SI)) {
	case LegalStoreKind::None:
	// Nothing to do
	break;
	case LegalStoreKind::Memset: {
	// Find the base pointer.
	Value *Ptr = getUnderlyingObject(SI->getPointerOperand());
	StoreRefsForMemset[Ptr].push_back(SI);
	} break;
	case LegalStoreKind::MemsetPattern: {
	// Find the base pointer.
	Value *Ptr = getUnderlyingObject(SI->getPointerOperand());
	StoreRefsForMemsetPattern[Ptr].push_back(SI);
	} break;
	case LegalStoreKind::Memcpy:
	case LegalStoreKind::UnorderedAtomicMemcpy:
	StoreRefsForMemcpy.push_back(SI);
	break;
	default:
	assert(false && "unhandled return value");
	break;
	}
	}
	}

	/// runOnLoopBlock - Process the specified block, which lives in a counted loop
	/// with the specified backedge count. This block is known to be in the current
	/// loop and not in any subloops.
	bool LoopIdiomRecognize::runOnLoopBlock(
	BasicBlock BB, const SCEV BECount,
	SmallVectorImpl<BasicBlock *> &ExitBlocks) {
	// We can only promote stores in this block if they are unconditionally
	// executed in the loop. For a block to be unconditionally executed, it has
	// to dominate all the exit blocks of the loop. Verify this now.
	for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
	if (!DT->dominates(BB, ExitBlocks[i]))
	return false;

	bool MadeChange = false;
	// Look for store instructions, which may be optimized to memset/memcpy.
	collectStores(BB);

	// Look for a single store or sets of stores with a common base, which can be
	// optimized into a memset (memset_pattern). The latter most commonly happens
	// with structs and handunrolled loops.
	for (auto &SL : StoreRefsForMemset)
	MadeChange \|= processLoopStores(SL.second, BECount, ForMemset::Yes);

	for (auto &SL : StoreRefsForMemsetPattern)
	MadeChange \|= processLoopStores(SL.second, BECount, ForMemset::No);

	// Optimize the store into a memcpy, if it feeds an similarly strided load.
	for (auto &SI : StoreRefsForMemcpy)
	MadeChange \|= processLoopStoreOfLoopLoad(SI, BECount);

	MadeChange \|= processLoopMemIntrinsic<MemCpyInst>(
	BB, &LoopIdiomRecognize::processLoopMemCpy, BECount);
	MadeChange \|= processLoopMemIntrinsic<MemSetInst>(
	BB, &LoopIdiomRecognize::processLoopMemSet, BECount);

	return MadeChange;
	}

	/// See if this store(s) can be promoted to a memset.
	bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
	const SCEV *BECount, ForMemset For) {
	// Try to find consecutive stores that can be transformed into memsets.
	SetVector<StoreInst *> Heads, Tails;
	SmallDenseMap<StoreInst , StoreInst > ConsecutiveChain;

	// Do a quadratic search on all of the given stores and find
	// all of the pairs of stores that follow each other.
	SmallVector<unsigned, 16> IndexQueue;
	for (unsigned i = 0, e = SL.size(); i < e; ++i) {
	assert(SL[i]->isSimple() && "Expected only non-volatile stores.");

	Value *FirstStoredVal = SL[i]->getValueOperand();
	Value *FirstStorePtr = SL[i]->getPointerOperand();
	const SCEVAddRecExpr *FirstStoreEv =
	cast<SCEVAddRecExpr>(SE->getSCEV(FirstStorePtr));
	APInt FirstStride = getStoreStride(FirstStoreEv);
	unsigned FirstStoreSize = DL->getTypeStoreSize(SL[i]->getValueOperand()->getType());

	// See if we can optimize just this store in isolation.
	if (FirstStride == FirstStoreSize \|\| -FirstStride == FirstStoreSize) {
	Heads.insert(SL[i]);
	continue;
	}

	Value *FirstSplatValue = nullptr;
	Constant *FirstPatternValue = nullptr;

	if (For == ForMemset::Yes)
	FirstSplatValue = isBytewiseValue(FirstStoredVal, *DL);
	else
	FirstPatternValue = getMemSetPatternValue(FirstStoredVal, DL);

	assert((FirstSplatValue \|\| FirstPatternValue) &&
	"Expected either splat value or pattern value.");

	IndexQueue.clear();
	// If a store has multiple consecutive store candidates, search Stores
	// array according to the sequence: from i+1 to e, then from i-1 to 0.
	// This is because usually pairing with immediate succeeding or preceding
	// candidate create the best chance to find memset opportunity.
	unsigned j = 0;
	for (j = i + 1; j < e; ++j)
	IndexQueue.push_back(j);
	for (j = i; j > 0; --j)
	IndexQueue.push_back(j - 1);

	for (auto &k : IndexQueue) {
	assert(SL[k]->isSimple() && "Expected only non-volatile stores.");
	Value *SecondStorePtr = SL[k]->getPointerOperand();
	const SCEVAddRecExpr *SecondStoreEv =
	cast<SCEVAddRecExpr>(SE->getSCEV(SecondStorePtr));
	APInt SecondStride = getStoreStride(SecondStoreEv);

	if (FirstStride != SecondStride)
	continue;

	Value *SecondStoredVal = SL[k]->getValueOperand();
	Value *SecondSplatValue = nullptr;
	Constant *SecondPatternValue = nullptr;

	if (For == ForMemset::Yes)
	SecondSplatValue = isBytewiseValue(SecondStoredVal, *DL);
	else
	SecondPatternValue = getMemSetPatternValue(SecondStoredVal, DL);

	assert((SecondSplatValue \|\| SecondPatternValue) &&
	"Expected either splat value or pattern value.");

	if (isConsecutiveAccess(SL[i], SL[k], DL, SE, false)) {
	if (For == ForMemset::Yes) {
	if (isa<UndefValue>(FirstSplatValue))
	FirstSplatValue = SecondSplatValue;
	if (FirstSplatValue != SecondSplatValue)
	continue;
	} else {
	if (isa<UndefValue>(FirstPatternValue))
	FirstPatternValue = SecondPatternValue;
	if (FirstPatternValue != SecondPatternValue)
	continue;
	}
	Tails.insert(SL[k]);
	Heads.insert(SL[i]);
	ConsecutiveChain[SL[i]] = SL[k];
	break;
	}
	}
	}

	// We may run into multiple chains that merge into a single chain. We mark the
	// stores that we transformed so that we don't visit the same store twice.
	SmallPtrSet<Value *, 16> TransformedStores;
	bool Changed = false;

	// For stores that start but don't end a link in the chain:
	for (SetVector<StoreInst *>::iterator it = Heads.begin(), e = Heads.end();
	it != e; ++it) {
	if (Tails.count(*it))
	continue;

	// We found a store instr that starts a chain. Now follow the chain and try
	// to transform it.
	SmallPtrSet<Instruction *, 8> AdjacentStores;
	StoreInst I = it;

	StoreInst *HeadStore = I;
	unsigned StoreSize = 0;

	// Collect the chain into a list.
	while (Tails.count(I) \|\| Heads.count(I)) {
	if (TransformedStores.count(I))
	break;
	AdjacentStores.insert(I);

	StoreSize += DL->getTypeStoreSize(I->getValueOperand()->getType());
	// Move to the next value in the chain.
	I = ConsecutiveChain[I];
	}

	Value *StoredVal = HeadStore->getValueOperand();
	Value *StorePtr = HeadStore->getPointerOperand();
	const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
	APInt Stride = getStoreStride(StoreEv);

	// Check to see if the stride matches the size of the stores. If so, then
	// we know that every byte is touched in the loop.
	if (StoreSize != Stride && StoreSize != -Stride)
	continue;

	bool NegStride = StoreSize == -Stride;

	if (processLoopStridedStore(StorePtr, StoreSize,
	MaybeAlign(HeadStore->getAlignment()),
	StoredVal, HeadStore, AdjacentStores, StoreEv,
	BECount, NegStride)) {
	TransformedStores.insert(AdjacentStores.begin(), AdjacentStores.end());
	Changed = true;
	}
	}

	return Changed;
	}

	/// processLoopMemIntrinsic - Template function for calling different processor
	/// functions based on mem instrinsic type.
	template <typename MemInst>
	bool LoopIdiomRecognize::processLoopMemIntrinsic(
	BasicBlock *BB,
	bool (LoopIdiomRecognize::Processor)(MemInst , const SCEV *),
	const SCEV *BECount) {
	bool MadeChange = false;
	for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
	Instruction Inst = &I++;
	// Look for memory instructions, which may be optimized to a larger one.
	if (MemInst *MI = dyn_cast<MemInst>(Inst)) {
	WeakTrackingVH InstPtr(&*I);
	if (!(this->*Processor)(MI, BECount))
	continue;
	MadeChange = true;

	// If processing the instruction invalidated our iterator, start over from
	// the top of the block.
	if (!InstPtr)
	I = BB->begin();
	}
	}
	return MadeChange;
	}

	/// processLoopMemCpy - See if this memcpy can be promoted to a large memcpy
	bool LoopIdiomRecognize::processLoopMemCpy(MemCpyInst *MCI,
	const SCEV *BECount) {
	// We can only handle non-volatile memcpys with a constant size.
	if (MCI->isVolatile() \|\| !isa<ConstantInt>(MCI->getLength()))
	return false;

	// If we're not allowed to hack on memcpy, we fail.
	if ((!HasMemcpy && !isa<MemCpyInlineInst>(MCI)) \|\| DisableLIRP::Memcpy)
	return false;

	Value *Dest = MCI->getDest();
	Value *Source = MCI->getSource();
	if (!Dest \|\| !Source)
	return false;

	// See if the load and store pointer expressions are AddRec like {base,+,1} on
	// the current loop, which indicates a strided load and store. If we have
	// something else, it's a random load or store we can't handle.
	const SCEVAddRecExpr *StoreEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Dest));
	if (!StoreEv \|\| StoreEv->getLoop() != CurLoop \|\| !StoreEv->isAffine())
	return false;
	const SCEVAddRecExpr *LoadEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Source));
	if (!LoadEv \|\| LoadEv->getLoop() != CurLoop \|\| !LoadEv->isAffine())
	return false;

	// Reject memcpys that are so large that they overflow an unsigned.
	uint64_t SizeInBytes = cast<ConstantInt>(MCI->getLength())->getZExtValue();
	if ((SizeInBytes >> 32) != 0)
	return false;

	// Check if the stride matches the size of the memcpy. If so, then we know
	// that every byte is touched in the loop.
	const SCEVConstant *StoreStride =
	dyn_cast<SCEVConstant>(StoreEv->getOperand(1));
	const SCEVConstant *LoadStride =
	dyn_cast<SCEVConstant>(LoadEv->getOperand(1));
	if (!StoreStride \|\| !LoadStride)
	return false;

	APInt StoreStrideValue = StoreStride->getAPInt();
	APInt LoadStrideValue = LoadStride->getAPInt();
	// Huge stride value - give up
	if (StoreStrideValue.getBitWidth() > 64 \|\| LoadStrideValue.getBitWidth() > 64)
	return false;

	if (SizeInBytes != StoreStrideValue && SizeInBytes != -StoreStrideValue) {
	ORE.emit([&]() {
	return OptimizationRemarkMissed(DEBUG_TYPE, "SizeStrideUnequal", MCI)
	<< ore::NV("Inst", "memcpy") << " in "
	<< ore::NV("Function", MCI->getFunction())
	<< " function will not be hoised: "
	<< ore::NV("Reason", "memcpy size is not equal to stride");
	});
	return false;
	}

	int64_t StoreStrideInt = StoreStrideValue.getSExtValue();
	int64_t LoadStrideInt = LoadStrideValue.getSExtValue();
	// Check if the load stride matches the store stride.
	if (StoreStrideInt != LoadStrideInt)
	return false;

	return processLoopStoreOfLoopLoad(Dest, Source, (unsigned)SizeInBytes,
	MCI->getDestAlign(), MCI->getSourceAlign(),
	MCI, MCI, StoreEv, LoadEv, BECount);
	}

	/// processLoopMemSet - See if this memset can be promoted to a large memset.
	bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
	const SCEV *BECount) {
	// We can only handle non-volatile memsets with a constant size.
	if (MSI->isVolatile() \|\| !isa<ConstantInt>(MSI->getLength()))
	return false;

	// If we're not allowed to hack on memset, we fail.
	if (!HasMemset \|\| DisableLIRP::Memset)
	return false;

	Value *Pointer = MSI->getDest();

	// See if the pointer expression is an AddRec like {base,+,1} on the current
	// loop, which indicates a strided store. If we have something else, it's a
	// random store we can't handle.
	const SCEVAddRecExpr *Ev = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Pointer));
	if (!Ev \|\| Ev->getLoop() != CurLoop \|\| !Ev->isAffine())
	return false;

	// Reject memsets that are so large that they overflow an unsigned.
	uint64_t SizeInBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
	if ((SizeInBytes >> 32) != 0)
	return false;

	// Check to see if the stride matches the size of the memset. If so, then we
	// know that every byte is touched in the loop.
	const SCEVConstant *ConstStride = dyn_cast<SCEVConstant>(Ev->getOperand(1));
	if (!ConstStride)
	return false;

	APInt Stride = ConstStride->getAPInt();
	if (SizeInBytes != Stride && SizeInBytes != -Stride)
	return false;

	// Verify that the memset value is loop invariant. If not, we can't promote
	// the memset.
	Value *SplatValue = MSI->getValue();
	if (!SplatValue \|\| !CurLoop->isLoopInvariant(SplatValue))
	return false;

	SmallPtrSet<Instruction *, 1> MSIs;
	MSIs.insert(MSI);
	bool NegStride = SizeInBytes == -Stride;
	return processLoopStridedStore(
	Pointer, (unsigned)SizeInBytes, MaybeAlign(MSI->getDestAlignment()),
	SplatValue, MSI, MSIs, Ev, BECount, NegStride, /IsLoopMemset=/true);
	}

	/// mayLoopAccessLocation - Return true if the specified loop might access the
	/// specified pointer location, which is a loop-strided access. The 'Access'
	/// argument specifies what the verboten forms of access are (read or write).
	static bool
	mayLoopAccessLocation(Value Ptr, ModRefInfo Access, Loop L,
	const SCEV *BECount, unsigned StoreSize,
	AliasAnalysis &AA,
	SmallPtrSetImpl<Instruction *> &IgnoredStores) {
	// Get the location that may be stored across the loop. Since the access is
	// strided positively through memory, we say that the modified location starts
	// at the pointer and has infinite size.
	LocationSize AccessSize = LocationSize::afterPointer();

	// If the loop iterates a fixed number of times, we can refine the access size
	// to be exactly the size of the memset, which is (BECount+1)*StoreSize
	if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
	AccessSize = LocationSize::precise((BECst->getValue()->getZExtValue() + 1) *
	StoreSize);

	// TODO: For this to be really effective, we have to dive into the pointer
	// operand in the store. Store to &A[i] of 100 will always return may alias
	// with store of &A[100], we need to StoreLoc to be "A" with size of 100,
	// which will then no-alias a store to &A[100].
	MemoryLocation StoreLoc(Ptr, AccessSize);

	for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;
	++BI)
	for (Instruction &I : **BI)
	if (IgnoredStores.count(&I) == 0 &&
	isModOrRefSet(
	intersectModRef(AA.getModRefInfo(&I, StoreLoc), Access)))
	return true;

	return false;
	}

	// If we have a negative stride, Start refers to the end of the memory location
	// we're trying to memset. Therefore, we need to recompute the base pointer,
	// which is just Start - BECount*Size.
	static const SCEV getStartForNegStride(const SCEV Start, const SCEV *BECount,
	Type *IntPtr, unsigned StoreSize,
	ScalarEvolution *SE) {
	const SCEV *Index = SE->getTruncateOrZeroExtend(BECount, IntPtr);
	if (StoreSize != 1)
	Index = SE->getMulExpr(Index, SE->getConstant(IntPtr, StoreSize),
	SCEV::FlagNUW);
	return SE->getMinusSCEV(Start, Index);
	}

	/// Compute the number of bytes as a SCEV from the backedge taken count.
	///
	/// This also maps the SCEV into the provided type and tries to handle the
	/// computation in a way that will fold cleanly.
	static const SCEV getNumBytes(const SCEV BECount, Type *IntPtr,
	unsigned StoreSize, Loop *CurLoop,
	const DataLayout DL, ScalarEvolution SE) {
	const SCEV *NumBytesS;
	// The # stored bytes is (BECount+1)*Size. Expand the trip count out to
	// pointer size if it isn't already.
	//
	// If we're going to need to zero extend the BE count, check if we can add
	// one to it prior to zero extending without overflow. Provided this is safe,
	// it allows better simplification of the +1.
	if (DL->getTypeSizeInBits(BECount->getType()).getFixedSize() <
	DL->getTypeSizeInBits(IntPtr).getFixedSize() &&
	SE->isLoopEntryGuardedByCond(
	CurLoop, ICmpInst::ICMP_NE, BECount,
	SE->getNegativeSCEV(SE->getOne(BECount->getType())))) {
	NumBytesS = SE->getZeroExtendExpr(
	SE->getAddExpr(BECount, SE->getOne(BECount->getType()), SCEV::FlagNUW),
	IntPtr);
	} else {
	NumBytesS = SE->getAddExpr(SE->getTruncateOrZeroExtend(BECount, IntPtr),
	SE->getOne(IntPtr), SCEV::FlagNUW);
	}

	// And scale it based on the store size.
	if (StoreSize != 1) {
	NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
	SCEV::FlagNUW);
	}
	return NumBytesS;
	}

	/// processLoopStridedStore - We see a strided store of some value. If we can
	/// transform this into a memset or memset_pattern in the loop preheader, do so.
	bool LoopIdiomRecognize::processLoopStridedStore(
	Value *DestPtr, unsigned StoreSize, MaybeAlign StoreAlignment,
	Value StoredVal, Instruction TheStore,
	SmallPtrSetImpl<Instruction > &Stores, const SCEVAddRecExpr Ev,
	const SCEV *BECount, bool NegStride, bool IsLoopMemset) {
	Value SplatValue = isBytewiseValue(StoredVal, DL);
	Constant *PatternValue = nullptr;

	if (!SplatValue)
	PatternValue = getMemSetPatternValue(StoredVal, DL);

	assert((SplatValue \|\| PatternValue) &&
	"Expected either splat value or pattern value.");

	// The trip count of the loop and the base pointer of the addrec SCEV is
	// guaranteed to be loop invariant, which means that it should dominate the
	// header. This allows us to insert code for it in the preheader.
	unsigned DestAS = DestPtr->getType()->getPointerAddressSpace();
	BasicBlock *Preheader = CurLoop->getLoopPreheader();
	IRBuilder<> Builder(Preheader->getTerminator());
	SCEVExpander Expander(SE, DL, "loop-idiom");
	SCEVExpanderCleaner ExpCleaner(Expander, *DT);

	Type *DestInt8PtrTy = Builder.getInt8PtrTy(DestAS);
	Type *IntIdxTy = DL->getIndexType(DestPtr->getType());

	bool Changed = false;
	const SCEV *Start = Ev->getStart();
	// Handle negative strided loops.
	if (NegStride)
	Start = getStartForNegStride(Start, BECount, IntIdxTy, StoreSize, SE);

	// TODO: ideally we should still be able to generate memset if SCEV expander
	// is taught to generate the dependencies at the latest point.
	if (!isSafeToExpand(Start, *SE))
	return Changed;

	// Okay, we have a strided store "p[i]" of a splattable value. We can turn
	// this into a memset in the loop preheader now if we want. However, this
	// would be unsafe to do if there is anything else in the loop that may read
	// or write to the aliased location. Check for any overlap by generating the
	// base pointer and checking the region.
	Value *BasePtr =
	Expander.expandCodeFor(Start, DestInt8PtrTy, Preheader->getTerminator());

	// From here on out, conservatively report to the pass manager that we've
	// changed the IR, even if we later clean up these added instructions. There
	// may be structural differences e.g. in the order of use lists not accounted
	// for in just a textual dump of the IR. This is written as a variable, even
	// though statically all the places this dominates could be replaced with
	// 'true', with the hope that anyone trying to be clever / "more precise" with
	// the return value will read this comment, and leave them alone.
	Changed = true;

	if (mayLoopAccessLocation(BasePtr, ModRefInfo::ModRef, CurLoop, BECount,
	StoreSize, *AA, Stores))
	return Changed;

	if (avoidLIRForMultiBlockLoop(/IsMemset=/true, IsLoopMemset))
	return Changed;

	// Okay, everything looks good, insert the memset.

	const SCEV *NumBytesS =
	getNumBytes(BECount, IntIdxTy, StoreSize, CurLoop, DL, SE);

	// TODO: ideally we should still be able to generate memset if SCEV expander
	// is taught to generate the dependencies at the latest point.
	if (!isSafeToExpand(NumBytesS, *SE))
	return Changed;

	Value *NumBytes =
	Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator());

	CallInst *NewCall;
	if (SplatValue) {
	NewCall = Builder.CreateMemSet(BasePtr, SplatValue, NumBytes,
	MaybeAlign(StoreAlignment));
	} else {
	// Everything is emitted in default address space
	Type *Int8PtrTy = DestInt8PtrTy;

	Module *M = TheStore->getModule();
	StringRef FuncName = "memset_pattern16";
	FunctionCallee MSP = M->getOrInsertFunction(FuncName, Builder.getVoidTy(),
	Int8PtrTy, Int8PtrTy, IntIdxTy);
	inferLibFuncAttributes(M, FuncName, *TLI);

	// Otherwise we should form a memset_pattern16. PatternValue is known to be
	// an constant array of 16-bytes. Plop the value into a mergable global.
	GlobalVariable GV = new GlobalVariable(M, PatternValue->getType(), true,
	GlobalValue::PrivateLinkage,
	PatternValue, ".memset_pattern");
	GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); // Ok to merge these.
	GV->setAlignment(Align(16));
	Value *PatternPtr = ConstantExpr::getBitCast(GV, Int8PtrTy);
	NewCall = Builder.CreateCall(MSP, {BasePtr, PatternPtr, NumBytes});
	}
	NewCall->setDebugLoc(TheStore->getDebugLoc());

	if (MSSAU) {
	MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB(
	NewCall, nullptr, NewCall->getParent(), MemorySSA::BeforeTerminator);
	MSSAU->insertDef(cast<MemoryDef>(NewMemAcc), true);
	}

	LLVM_DEBUG(dbgs() << " Formed memset: " << *NewCall << "\n"
	<< " from store to: " << Ev << " at: " << TheStore
	<< "\n");

	ORE.emit([&]() {
	return OptimizationRemark(DEBUG_TYPE, "ProcessLoopStridedStore",
	NewCall->getDebugLoc(), Preheader)
	<< "Transformed loop-strided store in "
	<< ore::NV("Function", TheStore->getFunction())
	<< " function into a call to "
	<< ore::NV("NewFunction", NewCall->getCalledFunction())
	<< "() intrinsic";
	});

	// Okay, the memset has been formed. Zap the original store and anything that
	// feeds into it.
	for (auto *I : Stores) {
	if (MSSAU)
	MSSAU->removeMemoryAccess(I, true);
	deleteDeadInstruction(I);
	}
	if (MSSAU && VerifyMemorySSA)
	MSSAU->getMemorySSA()->verifyMemorySSA();
	++NumMemSet;
	ExpCleaner.markResultUsed();
	return true;
	}

	/// If the stored value is a strided load in the same loop with the same stride
	/// this may be transformable into a memcpy. This kicks in for stuff like
	/// for (i) A[i] = B[i];
	bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
	const SCEV *BECount) {
	assert(SI->isUnordered() && "Expected only non-volatile non-ordered stores.");

	Value *StorePtr = SI->getPointerOperand();
	const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
	unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());

	// The store must be feeding a non-volatile load.
	LoadInst *LI = cast<LoadInst>(SI->getValueOperand());
	assert(LI->isUnordered() && "Expected only non-volatile non-ordered loads.");

	// See if the pointer expression is an AddRec like {base,+,1} on the current
	// loop, which indicates a strided load. If we have something else, it's a
	// random load we can't handle.
	Value *LoadPtr = LI->getPointerOperand();
	const SCEVAddRecExpr *LoadEv = cast<SCEVAddRecExpr>(SE->getSCEV(LoadPtr));
	return processLoopStoreOfLoopLoad(StorePtr, LoadPtr, StoreSize,
	SI->getAlign(), LI->getAlign(), SI, LI,
	StoreEv, LoadEv, BECount);
	}

	bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
	Value DestPtr, Value SourcePtr, unsigned StoreSize, MaybeAlign StoreAlign,
	MaybeAlign LoadAlign, Instruction TheStore, Instruction TheLoad,
	const SCEVAddRecExpr StoreEv, const SCEVAddRecExpr LoadEv,
	const SCEV *BECount) {

	// FIXME: until llvm.memcpy.inline supports dynamic sizes, we need to
	// conservatively bail here, since otherwise we may have to transform
	// llvm.memcpy.inline into llvm.memcpy which is illegal.
	if (isa<MemCpyInlineInst>(TheStore))
	return false;

	// The trip count of the loop and the base pointer of the addrec SCEV is
	// guaranteed to be loop invariant, which means that it should dominate the
	// header. This allows us to insert code for it in the preheader.
	BasicBlock *Preheader = CurLoop->getLoopPreheader();
	IRBuilder<> Builder(Preheader->getTerminator());
	SCEVExpander Expander(SE, DL, "loop-idiom");

	SCEVExpanderCleaner ExpCleaner(Expander, *DT);

	bool Changed = false;
	const SCEV *StrStart = StoreEv->getStart();
	unsigned StrAS = DestPtr->getType()->getPointerAddressSpace();
	Type *IntIdxTy = Builder.getIntNTy(DL->getIndexSizeInBits(StrAS));

	APInt Stride = getStoreStride(StoreEv);
	bool NegStride = StoreSize == -Stride;

	// Handle negative strided loops.
	if (NegStride)
	StrStart = getStartForNegStride(StrStart, BECount, IntIdxTy, StoreSize, SE);

	// Okay, we have a strided store "p[i]" of a loaded value. We can turn
	// this into a memcpy in the loop preheader now if we want. However, this
	// would be unsafe to do if there is anything else in the loop that may read
	// or write the memory region we're storing to. This includes the load that
	// feeds the stores. Check for an alias by generating the base address and
	// checking everything.
	Value *StoreBasePtr = Expander.expandCodeFor(
	StrStart, Builder.getInt8PtrTy(StrAS), Preheader->getTerminator());

	// From here on out, conservatively report to the pass manager that we've
	// changed the IR, even if we later clean up these added instructions. There
	// may be structural differences e.g. in the order of use lists not accounted
	// for in just a textual dump of the IR. This is written as a variable, even
	// though statically all the places this dominates could be replaced with
	// 'true', with the hope that anyone trying to be clever / "more precise" with
	// the return value will read this comment, and leave them alone.
	Changed = true;

	SmallPtrSet<Instruction *, 2> Stores;
	Stores.insert(TheStore);

	bool IsMemCpy = isa<MemCpyInst>(TheStore);
	const StringRef InstRemark = IsMemCpy ? "memcpy" : "load and store";

	bool UseMemMove =
	mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount,
	StoreSize, *AA, Stores);
	if (UseMemMove) {
	+ // For memmove case it's not enough to guarantee that loop doesn't access
	+ // TheStore and TheLoad. Additionally we need to make sure that TheStore is
	+ // the only user of TheLoad.
	+ if (!TheLoad->hasOneUse())
	+ return Changed;
	Stores.insert(TheLoad);
	if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop,
	BECount, StoreSize, *AA, Stores)) {
	ORE.emit([&]() {
	return OptimizationRemarkMissed(DEBUG_TYPE, "LoopMayAccessStore",
	TheStore)
	<< ore::NV("Inst", InstRemark) << " in "
	<< ore::NV("Function", TheStore->getFunction())
	<< " function will not be hoisted: "
	<< ore::NV("Reason", "The loop may access store location");
	});
	return Changed;
	}
	Stores.erase(TheLoad);
	}

	const SCEV *LdStart = LoadEv->getStart();
	unsigned LdAS = SourcePtr->getType()->getPointerAddressSpace();

	// Handle negative strided loops.
	if (NegStride)
	LdStart = getStartForNegStride(LdStart, BECount, IntIdxTy, StoreSize, SE);

	// For a memcpy, we have to make sure that the input array is not being
	// mutated by the loop.
	Value *LoadBasePtr = Expander.expandCodeFor(
	LdStart, Builder.getInt8PtrTy(LdAS), Preheader->getTerminator());

	// If the store is a memcpy instruction, we must check if it will write to
	// the load memory locations. So remove it from the ignored stores.
	if (IsMemCpy)
	Stores.erase(TheStore);
	if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount,
	StoreSize, *AA, Stores)) {
	ORE.emit([&]() {
	return OptimizationRemarkMissed(DEBUG_TYPE, "LoopMayAccessLoad", TheLoad)
	<< ore::NV("Inst", InstRemark) << " in "
	<< ore::NV("Function", TheStore->getFunction())
	<< " function will not be hoisted: "
	<< ore::NV("Reason", "The loop may access load location");
	});
	return Changed;
	}
	if (UseMemMove) {
	// Ensure that LoadBasePtr is after StoreBasePtr or before StoreBasePtr for
	// negative stride. LoadBasePtr shouldn't overlap with StoreBasePtr.
	int64_t LoadOff = 0, StoreOff = 0;
	const Value *BP1 = llvm::GetPointerBaseWithConstantOffset(
	LoadBasePtr->stripPointerCasts(), LoadOff, *DL);
	const Value *BP2 = llvm::GetPointerBaseWithConstantOffset(
	StoreBasePtr->stripPointerCasts(), StoreOff, *DL);
	int64_t LoadSize =
	DL->getTypeSizeInBits(TheLoad->getType()).getFixedSize() / 8;
	if (BP1 != BP2 \|\| LoadSize != int64_t(StoreSize))
	return Changed;
	if ((!NegStride && LoadOff < StoreOff + int64_t(StoreSize)) \|\|
	(NegStride && LoadOff + LoadSize > StoreOff))
	return Changed;
	}

	if (avoidLIRForMultiBlockLoop())
	return Changed;

	// Okay, everything is safe, we can transform this!

	const SCEV *NumBytesS =
	getNumBytes(BECount, IntIdxTy, StoreSize, CurLoop, DL, SE);

	Value *NumBytes =
	Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator());

	CallInst *NewCall = nullptr;
	// Check whether to generate an unordered atomic memcpy:
	// If the load or store are atomic, then they must necessarily be unordered
	// by previous checks.
	if (!TheStore->isAtomic() && !TheLoad->isAtomic()) {
	if (UseMemMove)
	NewCall = Builder.CreateMemMove(StoreBasePtr, StoreAlign, LoadBasePtr,
	LoadAlign, NumBytes);
	else
	NewCall = Builder.CreateMemCpy(StoreBasePtr, StoreAlign, LoadBasePtr,
	LoadAlign, NumBytes);
	} else {
	// For now don't support unordered atomic memmove.
	if (UseMemMove)
	return Changed;
	// We cannot allow unaligned ops for unordered load/store, so reject
	// anything where the alignment isn't at least the element size.
	assert((StoreAlign.hasValue() && LoadAlign.hasValue()) &&
	"Expect unordered load/store to have align.");
	if (StoreAlign.getValue() < StoreSize \|\| LoadAlign.getValue() < StoreSize)
	return Changed;

	// If the element.atomic memcpy is not lowered into explicit
	// loads/stores later, then it will be lowered into an element-size
	// specific lib call. If the lib call doesn't exist for our store size, then
	// we shouldn't generate the memcpy.
	if (StoreSize > TTI->getAtomicMemIntrinsicMaxElementSize())
	return Changed;

	// Create the call.
	// Note that unordered atomic loads/stores are required by the spec to
	// have an alignment but non-atomic loads/stores may not.
	NewCall = Builder.CreateElementUnorderedAtomicMemCpy(
	StoreBasePtr, StoreAlign.getValue(), LoadBasePtr, LoadAlign.getValue(),
	NumBytes, StoreSize);
	}
	NewCall->setDebugLoc(TheStore->getDebugLoc());

	if (MSSAU) {
	MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB(
	NewCall, nullptr, NewCall->getParent(), MemorySSA::BeforeTerminator);
	MSSAU->insertDef(cast<MemoryDef>(NewMemAcc), true);
	}

	LLVM_DEBUG(dbgs() << " Formed new call: " << *NewCall << "\n"
	<< " from load ptr=" << LoadEv << " at: " << TheLoad
	<< "\n"
	<< " from store ptr=" << StoreEv << " at: " << TheStore
	<< "\n");

	ORE.emit([&]() {
	return OptimizationRemark(DEBUG_TYPE, "ProcessLoopStoreOfLoopLoad",
	NewCall->getDebugLoc(), Preheader)
	<< "Formed a call to "
	<< ore::NV("NewFunction", NewCall->getCalledFunction())
	<< "() intrinsic from " << ore::NV("Inst", InstRemark)
	<< " instruction in " << ore::NV("Function", TheStore->getFunction())
	<< " function";
	});

	// Okay, the memcpy has been formed. Zap the original store and anything that
	// feeds into it.
	if (MSSAU)
	MSSAU->removeMemoryAccess(TheStore, true);
	deleteDeadInstruction(TheStore);
	if (MSSAU && VerifyMemorySSA)
	MSSAU->getMemorySSA()->verifyMemorySSA();
	if (UseMemMove)
	++NumMemMove;
	else
	++NumMemCpy;
	ExpCleaner.markResultUsed();
	return true;
	}

	// When compiling for codesize we avoid idiom recognition for a multi-block loop
	// unless it is a loop_memset idiom or a memset/memcpy idiom in a nested loop.
	//
	bool LoopIdiomRecognize::avoidLIRForMultiBlockLoop(bool IsMemset,
	bool IsLoopMemset) {
	if (ApplyCodeSizeHeuristics && CurLoop->getNumBlocks() > 1) {
	if (CurLoop->isOutermost() && (!IsMemset \|\| !IsLoopMemset)) {
	LLVM_DEBUG(dbgs() << " " << CurLoop->getHeader()->getParent()->getName()
	<< " : LIR " << (IsMemset ? "Memset" : "Memcpy")
	<< " avoided: multi-block top-level loop\n");
	return true;
	}
	}

	return false;
	}

	bool LoopIdiomRecognize::runOnNoncountableLoop() {
	LLVM_DEBUG(dbgs() << DEBUG_TYPE " Scanning: F["
	<< CurLoop->getHeader()->getParent()->getName()
	<< "] Noncountable Loop %"
	<< CurLoop->getHeader()->getName() << "\n");

	return recognizePopcount() \|\| recognizeAndInsertFFS() \|\|
	recognizeShiftUntilBitTest() \|\| recognizeShiftUntilZero();
	}

	/// Check if the given conditional branch is based on the comparison between
	/// a variable and zero, and if the variable is non-zero or zero (JmpOnZero is
	/// true), the control yields to the loop entry. If the branch matches the
	/// behavior, the variable involved in the comparison is returned. This function
	/// will be called to see if the precondition and postcondition of the loop are
	/// in desirable form.
	static Value matchCondition(BranchInst BI, BasicBlock *LoopEntry,
	bool JmpOnZero = false) {
	if (!BI \|\| !BI->isConditional())
	return nullptr;

	ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
	if (!Cond)
	return nullptr;

	ConstantInt *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand(1));
	if (!CmpZero \|\| !CmpZero->isZero())
	return nullptr;

	BasicBlock *TrueSucc = BI->getSuccessor(0);
	BasicBlock *FalseSucc = BI->getSuccessor(1);
	if (JmpOnZero)
	std::swap(TrueSucc, FalseSucc);

	ICmpInst::Predicate Pred = Cond->getPredicate();
	if ((Pred == ICmpInst::ICMP_NE && TrueSucc == LoopEntry) \|\|
	(Pred == ICmpInst::ICMP_EQ && FalseSucc == LoopEntry))
	return Cond->getOperand(0);

	return nullptr;
	}

	// Check if the recurrence variable `VarX` is in the right form to create
	// the idiom. Returns the value coerced to a PHINode if so.
	static PHINode getRecurrenceVar(Value VarX, Instruction *DefX,
	BasicBlock *LoopEntry) {
	auto *PhiX = dyn_cast<PHINode>(VarX);
	if (PhiX && PhiX->getParent() == LoopEntry &&
	(PhiX->getOperand(0) == DefX \|\| PhiX->getOperand(1) == DefX))
	return PhiX;
	return nullptr;
	}

	/// Return true iff the idiom is detected in the loop.
	///
	/// Additionally:
	/// 1) \p CntInst is set to the instruction counting the population bit.
	/// 2) \p CntPhi is set to the corresponding phi node.
	/// 3) \p Var is set to the value whose population bits are being counted.
	///
	/// The core idiom we are trying to detect is:
	/// \code
	/// if (x0 != 0)
	/// goto loop-exit // the precondition of the loop
	/// cnt0 = init-val;
	/// do {
	/// x1 = phi (x0, x2);
	/// cnt1 = phi(cnt0, cnt2);
	///
	/// cnt2 = cnt1 + 1;
	/// ...
	/// x2 = x1 & (x1 - 1);
	/// ...
	/// } while(x != 0);
	///
	/// loop-exit:
	/// \endcode
	static bool detectPopcountIdiom(Loop CurLoop, BasicBlock PreCondBB,
	Instruction &CntInst, PHINode &CntPhi,
	Value *&Var) {
	// step 1: Check to see if the look-back branch match this pattern:
	// "if (a!=0) goto loop-entry".
	BasicBlock *LoopEntry;
	Instruction DefX2, CountInst;
	Value VarX1, VarX0;
	PHINode PhiX, CountPhi;

	DefX2 = CountInst = nullptr;
	VarX1 = VarX0 = nullptr;
	PhiX = CountPhi = nullptr;
	LoopEntry = *(CurLoop->block_begin());

	// step 1: Check if the loop-back branch is in desirable form.
	{
	if (Value *T = matchCondition(
	dyn_cast<BranchInst>(LoopEntry->getTerminator()), LoopEntry))
	DefX2 = dyn_cast<Instruction>(T);
	else
	return false;
	}

	// step 2: detect instructions corresponding to "x2 = x1 & (x1 - 1)"
	{
	if (!DefX2 \|\| DefX2->getOpcode() != Instruction::And)
	return false;

	BinaryOperator *SubOneOp;

	if ((SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(0))))
	VarX1 = DefX2->getOperand(1);
	else {
	VarX1 = DefX2->getOperand(0);
	SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(1));
	}
	if (!SubOneOp \|\| SubOneOp->getOperand(0) != VarX1)
	return false;

	ConstantInt *Dec = dyn_cast<ConstantInt>(SubOneOp->getOperand(1));
	if (!Dec \|\|
	!((SubOneOp->getOpcode() == Instruction::Sub && Dec->isOne()) \|\|
	(SubOneOp->getOpcode() == Instruction::Add &&
	Dec->isMinusOne()))) {
	return false;
	}
	}

	// step 3: Check the recurrence of variable X
	PhiX = getRecurrenceVar(VarX1, DefX2, LoopEntry);
	if (!PhiX)
	return false;

	// step 4: Find the instruction which count the population: cnt2 = cnt1 + 1
	{
	CountInst = nullptr;
	for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI()->getIterator(),
	IterE = LoopEntry->end();
	Iter != IterE; Iter++) {
	Instruction Inst = &Iter;
	if (Inst->getOpcode() != Instruction::Add)
	continue;

	ConstantInt *Inc = dyn_cast<ConstantInt>(Inst->getOperand(1));
	if (!Inc \|\| !Inc->isOne())
	continue;

	PHINode *Phi = getRecurrenceVar(Inst->getOperand(0), Inst, LoopEntry);
	if (!Phi)
	continue;

	// Check if the result of the instruction is live of the loop.
	bool LiveOutLoop = false;
	for (User *U : Inst->users()) {
	if ((cast<Instruction>(U))->getParent() != LoopEntry) {
	LiveOutLoop = true;
	break;
	}
	}

	if (LiveOutLoop) {
	CountInst = Inst;
	CountPhi = Phi;
	break;
	}
	}

	if (!CountInst)
	return false;
	}

	// step 5: check if the precondition is in this form:
	// "if (x != 0) goto loop-head ; else goto somewhere-we-don't-care;"
	{
	auto *PreCondBr = dyn_cast<BranchInst>(PreCondBB->getTerminator());
	Value *T = matchCondition(PreCondBr, CurLoop->getLoopPreheader());
	if (T != PhiX->getOperand(0) && T != PhiX->getOperand(1))
	return false;

	CntInst = CountInst;
	CntPhi = CountPhi;
	Var = T;
	}

	return true;
	}

	/// Return true if the idiom is detected in the loop.
	///
	/// Additionally:
	/// 1) \p CntInst is set to the instruction Counting Leading Zeros (CTLZ)
	/// or nullptr if there is no such.
	/// 2) \p CntPhi is set to the corresponding phi node
	/// or nullptr if there is no such.
	/// 3) \p Var is set to the value whose CTLZ could be used.
	/// 4) \p DefX is set to the instruction calculating Loop exit condition.
	///
	/// The core idiom we are trying to detect is:
	/// \code
	/// if (x0 == 0)
	/// goto loop-exit // the precondition of the loop
	/// cnt0 = init-val;
	/// do {
	/// x = phi (x0, x.next); //PhiX
	/// cnt = phi(cnt0, cnt.next);
	///
	/// cnt.next = cnt + 1;
	/// ...
	/// x.next = x >> 1; // DefX
	/// ...
	/// } while(x.next != 0);
	///
	/// loop-exit:
	/// \endcode
	static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL,
	Intrinsic::ID &IntrinID, Value *&InitX,
	Instruction &CntInst, PHINode &CntPhi,
	Instruction *&DefX) {
	BasicBlock *LoopEntry;
	Value *VarX = nullptr;

	DefX = nullptr;
	CntInst = nullptr;
	CntPhi = nullptr;
	LoopEntry = *(CurLoop->block_begin());

	// step 1: Check if the loop-back branch is in desirable form.
	if (Value *T = matchCondition(
	dyn_cast<BranchInst>(LoopEntry->getTerminator()), LoopEntry))
	DefX = dyn_cast<Instruction>(T);
	else
	return false;

	// step 2: detect instructions corresponding to "x.next = x >> 1 or x << 1"
	if (!DefX \|\| !DefX->isShift())
	return false;
	IntrinID = DefX->getOpcode() == Instruction::Shl ? Intrinsic::cttz :
	Intrinsic::ctlz;
	ConstantInt *Shft = dyn_cast<ConstantInt>(DefX->getOperand(1));
	if (!Shft \|\| !Shft->isOne())
	return false;
	VarX = DefX->getOperand(0);

	// step 3: Check the recurrence of variable X
	PHINode *PhiX = getRecurrenceVar(VarX, DefX, LoopEntry);
	if (!PhiX)
	return false;

	InitX = PhiX->getIncomingValueForBlock(CurLoop->getLoopPreheader());

	// Make sure the initial value can't be negative otherwise the ashr in the
	// loop might never reach zero which would make the loop infinite.
	if (DefX->getOpcode() == Instruction::AShr && !isKnownNonNegative(InitX, DL))
	return false;

	// step 4: Find the instruction which count the CTLZ: cnt.next = cnt + 1
	// or cnt.next = cnt + -1.
	// TODO: We can skip the step. If loop trip count is known (CTLZ),
	// then all uses of "cnt.next" could be optimized to the trip count
	// plus "cnt0". Currently it is not optimized.
	// This step could be used to detect POPCNT instruction:
	// cnt.next = cnt + (x.next & 1)
	for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI()->getIterator(),
	IterE = LoopEntry->end();
	Iter != IterE; Iter++) {
	Instruction Inst = &Iter;
	if (Inst->getOpcode() != Instruction::Add)
	continue;

	ConstantInt *Inc = dyn_cast<ConstantInt>(Inst->getOperand(1));
	if (!Inc \|\| (!Inc->isOne() && !Inc->isMinusOne()))
	continue;

	PHINode *Phi = getRecurrenceVar(Inst->getOperand(0), Inst, LoopEntry);
	if (!Phi)
	continue;

	CntInst = Inst;
	CntPhi = Phi;
	break;
	}
	if (!CntInst)
	return false;

	return true;
	}

	/// Recognize CTLZ or CTTZ idiom in a non-countable loop and convert the loop
	/// to countable (with CTLZ / CTTZ trip count). If CTLZ / CTTZ inserted as a new
	/// trip count returns true; otherwise, returns false.
	bool LoopIdiomRecognize::recognizeAndInsertFFS() {
	// Give up if the loop has multiple blocks or multiple backedges.
	if (CurLoop->getNumBackEdges() != 1 \|\| CurLoop->getNumBlocks() != 1)
	return false;

	Intrinsic::ID IntrinID;
	Value *InitX;
	Instruction *DefX = nullptr;
	PHINode *CntPhi = nullptr;
	Instruction *CntInst = nullptr;
	// Help decide if transformation is profitable. For ShiftUntilZero idiom,
	// this is always 6.
	size_t IdiomCanonicalSize = 6;

	if (!detectShiftUntilZeroIdiom(CurLoop, *DL, IntrinID, InitX,
	CntInst, CntPhi, DefX))
	return false;

	bool IsCntPhiUsedOutsideLoop = false;
	for (User *U : CntPhi->users())
	if (!CurLoop->contains(cast<Instruction>(U))) {
	IsCntPhiUsedOutsideLoop = true;
	break;
	}
	bool IsCntInstUsedOutsideLoop = false;
	for (User *U : CntInst->users())
	if (!CurLoop->contains(cast<Instruction>(U))) {
	IsCntInstUsedOutsideLoop = true;
	break;
	}
	// If both CntInst and CntPhi are used outside the loop the profitability
	// is questionable.
	if (IsCntInstUsedOutsideLoop && IsCntPhiUsedOutsideLoop)
	return false;

	// For some CPUs result of CTLZ(X) intrinsic is undefined
	// when X is 0. If we can not guarantee X != 0, we need to check this
	// when expand.
	bool ZeroCheck = false;
	// It is safe to assume Preheader exist as it was checked in
	// parent function RunOnLoop.
	BasicBlock *PH = CurLoop->getLoopPreheader();

	// If we are using the count instruction outside the loop, make sure we
	// have a zero check as a precondition. Without the check the loop would run
	// one iteration for before any check of the input value. This means 0 and 1
	// would have identical behavior in the original loop and thus
	if (!IsCntPhiUsedOutsideLoop) {
	auto *PreCondBB = PH->getSinglePredecessor();
	if (!PreCondBB)
	return false;
	auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator());
	if (!PreCondBI)
	return false;
	if (matchCondition(PreCondBI, PH) != InitX)
	return false;
	ZeroCheck = true;
	}

	// Check if CTLZ / CTTZ intrinsic is profitable. Assume it is always
	// profitable if we delete the loop.

	// the loop has only 6 instructions:
	// %n.addr.0 = phi [ %n, %entry ], [ %shr, %while.cond ]
	// %i.0 = phi [ %i0, %entry ], [ %inc, %while.cond ]
	// %shr = ashr %n.addr.0, 1
	// %tobool = icmp eq %shr, 0
	// %inc = add nsw %i.0, 1
	// br i1 %tobool

	const Value *Args[] = {InitX,
	ConstantInt::getBool(InitX->getContext(), ZeroCheck)};

	// @llvm.dbg doesn't count as they have no semantic effect.
	auto InstWithoutDebugIt = CurLoop->getHeader()->instructionsWithoutDebug();
	uint32_t HeaderSize =
	std::distance(InstWithoutDebugIt.begin(), InstWithoutDebugIt.end());

	IntrinsicCostAttributes Attrs(IntrinID, InitX->getType(), Args);
	InstructionCost Cost =
	TTI->getIntrinsicInstrCost(Attrs, TargetTransformInfo::TCK_SizeAndLatency);
	if (HeaderSize != IdiomCanonicalSize &&
	Cost > TargetTransformInfo::TCC_Basic)
	return false;

	transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,
	DefX->getDebugLoc(), ZeroCheck,
	IsCntPhiUsedOutsideLoop);
	return true;
	}

	/// Recognizes a population count idiom in a non-countable loop.
	///
	/// If detected, transforms the relevant code to issue the popcount intrinsic
	/// function call, and returns true; otherwise, returns false.
	bool LoopIdiomRecognize::recognizePopcount() {
	if (TTI->getPopcntSupport(32) != TargetTransformInfo::PSK_FastHardware)
	return false;

	// Counting population are usually conducted by few arithmetic instructions.
	// Such instructions can be easily "absorbed" by vacant slots in a
	// non-compact loop. Therefore, recognizing popcount idiom only makes sense
	// in a compact loop.

	// Give up if the loop has multiple blocks or multiple backedges.
	if (CurLoop->getNumBackEdges() != 1 \|\| CurLoop->getNumBlocks() != 1)
	return false;

	BasicBlock LoopBody = (CurLoop->block_begin());
	if (LoopBody->size() >= 20) {
	// The loop is too big, bail out.
	return false;
	}

	// It should have a preheader containing nothing but an unconditional branch.
	BasicBlock *PH = CurLoop->getLoopPreheader();
	if (!PH \|\| &PH->front() != PH->getTerminator())
	return false;
	auto *EntryBI = dyn_cast<BranchInst>(PH->getTerminator());
	if (!EntryBI \|\| EntryBI->isConditional())
	return false;

	// It should have a precondition block where the generated popcount intrinsic
	// function can be inserted.
	auto *PreCondBB = PH->getSinglePredecessor();
	if (!PreCondBB)
	return false;
	auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator());
	if (!PreCondBI \|\| PreCondBI->isUnconditional())
	return false;

	Instruction *CntInst;
	PHINode *CntPhi;
	Value *Val;
	if (!detectPopcountIdiom(CurLoop, PreCondBB, CntInst, CntPhi, Val))
	return false;

	transformLoopToPopcount(PreCondBB, CntInst, CntPhi, Val);
	return true;
	}

	static CallInst createPopcntIntrinsic(IRBuilder<> &IRBuilder, Value Val,
	const DebugLoc &DL) {
	Value *Ops[] = {Val};
	Type *Tys[] = {Val->getType()};

	Module *M = IRBuilder.GetInsertBlock()->getParent()->getParent();
	Function *Func = Intrinsic::getDeclaration(M, Intrinsic::ctpop, Tys);
	CallInst *CI = IRBuilder.CreateCall(Func, Ops);
	CI->setDebugLoc(DL);

	return CI;
	}

	static CallInst createFFSIntrinsic(IRBuilder<> &IRBuilder, Value Val,
	const DebugLoc &DL, bool ZeroCheck,
	Intrinsic::ID IID) {
	Value *Ops[] = {Val, IRBuilder.getInt1(ZeroCheck)};
	Type *Tys[] = {Val->getType()};

	Module *M = IRBuilder.GetInsertBlock()->getParent()->getParent();
	Function *Func = Intrinsic::getDeclaration(M, IID, Tys);
	CallInst *CI = IRBuilder.CreateCall(Func, Ops);
	CI->setDebugLoc(DL);

	return CI;
	}

	/// Transform the following loop (Using CTLZ, CTTZ is similar):
	/// loop:
	/// CntPhi = PHI [Cnt0, CntInst]
	/// PhiX = PHI [InitX, DefX]
	/// CntInst = CntPhi + 1
	/// DefX = PhiX >> 1
	/// LOOP_BODY
	/// Br: loop if (DefX != 0)
	/// Use(CntPhi) or Use(CntInst)
	///
	/// Into:
	/// If CntPhi used outside the loop:
	/// CountPrev = BitWidth(InitX) - CTLZ(InitX >> 1)
	/// Count = CountPrev + 1
	/// else
	/// Count = BitWidth(InitX) - CTLZ(InitX)
	/// loop:
	/// CntPhi = PHI [Cnt0, CntInst]
	/// PhiX = PHI [InitX, DefX]
	/// PhiCount = PHI [Count, Dec]
	/// CntInst = CntPhi + 1
	/// DefX = PhiX >> 1
	/// Dec = PhiCount - 1
	/// LOOP_BODY
	/// Br: loop if (Dec != 0)
	/// Use(CountPrev + Cnt0) // Use(CntPhi)
	/// or
	/// Use(Count + Cnt0) // Use(CntInst)
	///
	/// If LOOP_BODY is empty the loop will be deleted.
	/// If CntInst and DefX are not used in LOOP_BODY they will be removed.
	void LoopIdiomRecognize::transformLoopToCountable(
	Intrinsic::ID IntrinID, BasicBlock Preheader, Instruction CntInst,
	PHINode CntPhi, Value InitX, Instruction *DefX, const DebugLoc &DL,
	bool ZeroCheck, bool IsCntPhiUsedOutsideLoop) {
	BranchInst *PreheaderBr = cast<BranchInst>(Preheader->getTerminator());

	// Step 1: Insert the CTLZ/CTTZ instruction at the end of the preheader block
	IRBuilder<> Builder(PreheaderBr);
	Builder.SetCurrentDebugLocation(DL);

	// If there are no uses of CntPhi crate:
	// Count = BitWidth - CTLZ(InitX);
	// NewCount = Count;
	// If there are uses of CntPhi create:
	// NewCount = BitWidth - CTLZ(InitX >> 1);
	// Count = NewCount + 1;
	Value *InitXNext;
	if (IsCntPhiUsedOutsideLoop) {
	if (DefX->getOpcode() == Instruction::AShr)
	InitXNext = Builder.CreateAShr(InitX, 1);
	else if (DefX->getOpcode() == Instruction::LShr)
	InitXNext = Builder.CreateLShr(InitX, 1);
	else if (DefX->getOpcode() == Instruction::Shl) // cttz
	InitXNext = Builder.CreateShl(InitX, 1);
	else
	llvm_unreachable("Unexpected opcode!");
	} else
	InitXNext = InitX;
	Value *Count =
	createFFSIntrinsic(Builder, InitXNext, DL, ZeroCheck, IntrinID);
	Type *CountTy = Count->getType();
	Count = Builder.CreateSub(
	ConstantInt::get(CountTy, CountTy->getIntegerBitWidth()), Count);
	Value *NewCount = Count;
	if (IsCntPhiUsedOutsideLoop)
	Count = Builder.CreateAdd(Count, ConstantInt::get(CountTy, 1));

	NewCount = Builder.CreateZExtOrTrunc(NewCount, CntInst->getType());

	Value *CntInitVal = CntPhi->getIncomingValueForBlock(Preheader);
	if (cast<ConstantInt>(CntInst->getOperand(1))->isOne()) {
	// If the counter was being incremented in the loop, add NewCount to the
	// counter's initial value, but only if the initial value is not zero.
	ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
	if (!InitConst \|\| !InitConst->isZero())
	NewCount = Builder.CreateAdd(NewCount, CntInitVal);
	} else {
	// If the count was being decremented in the loop, subtract NewCount from
	// the counter's initial value.
	NewCount = Builder.CreateSub(CntInitVal, NewCount);
	}

	// Step 2: Insert new IV and loop condition:
	// loop:
	// ...
	// PhiCount = PHI [Count, Dec]
	// ...
	// Dec = PhiCount - 1
	// ...
	// Br: loop if (Dec != 0)
	BasicBlock Body = (CurLoop->block_begin());
	auto *LbBr = cast<BranchInst>(Body->getTerminator());
	ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());

	PHINode *TcPhi = PHINode::Create(CountTy, 2, "tcphi", &Body->front());

	Builder.SetInsertPoint(LbCond);
	Instruction *TcDec = cast<Instruction>(Builder.CreateSub(
	TcPhi, ConstantInt::get(CountTy, 1), "tcdec", false, true));

	TcPhi->addIncoming(Count, Preheader);
	TcPhi->addIncoming(TcDec, Body);

	CmpInst::Predicate Pred =
	(LbBr->getSuccessor(0) == Body) ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ;
	LbCond->setPredicate(Pred);
	LbCond->setOperand(0, TcDec);
	LbCond->setOperand(1, ConstantInt::get(CountTy, 0));

	// Step 3: All the references to the original counter outside
	// the loop are replaced with the NewCount
	if (IsCntPhiUsedOutsideLoop)
	CntPhi->replaceUsesOutsideBlock(NewCount, Body);
	else
	CntInst->replaceUsesOutsideBlock(NewCount, Body);

	// step 4: Forget the "non-computable" trip-count SCEV associated with the
	// loop. The loop would otherwise not be deleted even if it becomes empty.
	SE->forgetLoop(CurLoop);
	}

	void LoopIdiomRecognize::transformLoopToPopcount(BasicBlock *PreCondBB,
	Instruction *CntInst,
	PHINode CntPhi, Value Var) {
	BasicBlock *PreHead = CurLoop->getLoopPreheader();
	auto *PreCondBr = cast<BranchInst>(PreCondBB->getTerminator());
	const DebugLoc &DL = CntInst->getDebugLoc();

	// Assuming before transformation, the loop is following:
	// if (x) // the precondition
	// do { cnt++; x &= x - 1; } while(x);

	// Step 1: Insert the ctpop instruction at the end of the precondition block
	IRBuilder<> Builder(PreCondBr);
	Value PopCnt, PopCntZext, NewCount, TripCnt;
	{
	PopCnt = createPopcntIntrinsic(Builder, Var, DL);
	NewCount = PopCntZext =
	Builder.CreateZExtOrTrunc(PopCnt, cast<IntegerType>(CntPhi->getType()));

	if (NewCount != PopCnt)
	(cast<Instruction>(NewCount))->setDebugLoc(DL);

	// TripCnt is exactly the number of iterations the loop has
	TripCnt = NewCount;

	// If the population counter's initial value is not zero, insert Add Inst.
	Value *CntInitVal = CntPhi->getIncomingValueForBlock(PreHead);
	ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
	if (!InitConst \|\| !InitConst->isZero()) {
	NewCount = Builder.CreateAdd(NewCount, CntInitVal);
	(cast<Instruction>(NewCount))->setDebugLoc(DL);
	}
	}

	// Step 2: Replace the precondition from "if (x == 0) goto loop-exit" to
	// "if (NewCount == 0) loop-exit". Without this change, the intrinsic
	// function would be partial dead code, and downstream passes will drag
	// it back from the precondition block to the preheader.
	{
	ICmpInst *PreCond = cast<ICmpInst>(PreCondBr->getCondition());

	Value *Opnd0 = PopCntZext;
	Value *Opnd1 = ConstantInt::get(PopCntZext->getType(), 0);
	if (PreCond->getOperand(0) != Var)
	std::swap(Opnd0, Opnd1);

	ICmpInst *NewPreCond = cast<ICmpInst>(
	Builder.CreateICmp(PreCond->getPredicate(), Opnd0, Opnd1));
	PreCondBr->setCondition(NewPreCond);

	RecursivelyDeleteTriviallyDeadInstructions(PreCond, TLI);
	}

	// Step 3: Note that the population count is exactly the trip count of the
	// loop in question, which enable us to convert the loop from noncountable
	// loop into a countable one. The benefit is twofold:
	//
	// - If the loop only counts population, the entire loop becomes dead after
	// the transformation. It is a lot easier to prove a countable loop dead
	// than to prove a noncountable one. (In some C dialects, an infinite loop
	// isn't dead even if it computes nothing useful. In general, DCE needs
	// to prove a noncountable loop finite before safely delete it.)
	//
	// - If the loop also performs something else, it remains alive.
	// Since it is transformed to countable form, it can be aggressively
	// optimized by some optimizations which are in general not applicable
	// to a noncountable loop.
	//
	// After this step, this loop (conceptually) would look like following:
	// newcnt = __builtin_ctpop(x);
	// t = newcnt;
	// if (x)
	// do { cnt++; x &= x-1; t--) } while (t > 0);
	BasicBlock Body = (CurLoop->block_begin());
	{
	auto *LbBr = cast<BranchInst>(Body->getTerminator());
	ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
	Type *Ty = TripCnt->getType();

	PHINode *TcPhi = PHINode::Create(Ty, 2, "tcphi", &Body->front());

	Builder.SetInsertPoint(LbCond);
	Instruction *TcDec = cast<Instruction>(
	Builder.CreateSub(TcPhi, ConstantInt::get(Ty, 1),
	"tcdec", false, true));

	TcPhi->addIncoming(TripCnt, PreHead);
	TcPhi->addIncoming(TcDec, Body);

	CmpInst::Predicate Pred =
	(LbBr->getSuccessor(0) == Body) ? CmpInst::ICMP_UGT : CmpInst::ICMP_SLE;
	LbCond->setPredicate(Pred);
	LbCond->setOperand(0, TcDec);
	LbCond->setOperand(1, ConstantInt::get(Ty, 0));
	}

	// Step 4: All the references to the original population counter outside
	// the loop are replaced with the NewCount -- the value returned from
	// __builtin_ctpop().
	CntInst->replaceUsesOutsideBlock(NewCount, Body);

	// step 5: Forget the "non-computable" trip-count SCEV associated with the
	// loop. The loop would otherwise not be deleted even if it becomes empty.
	SE->forgetLoop(CurLoop);
	}

	/// Match loop-invariant value.
	template <typename SubPattern_t> struct match_LoopInvariant {
	SubPattern_t SubPattern;
	const Loop *L;

	match_LoopInvariant(const SubPattern_t &SP, const Loop *L)
	: SubPattern(SP), L(L) {}

	template <typename ITy> bool match(ITy *V) {
	return L->isLoopInvariant(V) && SubPattern.match(V);
	}
	};

	/// Matches if the value is loop-invariant.
	template <typename Ty>
	inline match_LoopInvariant<Ty> m_LoopInvariant(const Ty &M, const Loop *L) {
	return match_LoopInvariant<Ty>(M, L);
	}

	/// Return true if the idiom is detected in the loop.
	///
	/// The core idiom we are trying to detect is:
	/// \code
	/// entry:
	/// <...>
	/// %bitmask = shl i32 1, %bitpos
	/// br label %loop
	///
	/// loop:
	/// %x.curr = phi i32 [ %x, %entry ], [ %x.next, %loop ]
	/// %x.curr.bitmasked = and i32 %x.curr, %bitmask
	/// %x.curr.isbitunset = icmp eq i32 %x.curr.bitmasked, 0
	/// %x.next = shl i32 %x.curr, 1
	/// <...>
	/// br i1 %x.curr.isbitunset, label %loop, label %end
	///
	/// end:
	/// %x.curr.res = phi i32 [ %x.curr, %loop ] <...>
	/// %x.next.res = phi i32 [ %x.next, %loop ] <...>
	/// <...>
	/// \endcode
	static bool detectShiftUntilBitTestIdiom(Loop CurLoop, Value &BaseX,
	Value &BitMask, Value &BitPos,
	Value &CurrX, Instruction &NextX) {
	LLVM_DEBUG(dbgs() << DEBUG_TYPE
	" Performing shift-until-bittest idiom detection.\n");

	// Give up if the loop has multiple blocks or multiple backedges.
	if (CurLoop->getNumBlocks() != 1 \|\| CurLoop->getNumBackEdges() != 1) {
	LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad block/backedge count.\n");
	return false;
	}

	BasicBlock *LoopHeaderBB = CurLoop->getHeader();
	BasicBlock *LoopPreheaderBB = CurLoop->getLoopPreheader();
	assert(LoopPreheaderBB && "There is always a loop preheader.");

	using namespace PatternMatch;

	// Step 1: Check if the loop backedge is in desirable form.

	ICmpInst::Predicate Pred;
	Value CmpLHS, CmpRHS;
	BasicBlock TrueBB, FalseBB;
	if (!match(LoopHeaderBB->getTerminator(),
	m_Br(m_ICmp(Pred, m_Value(CmpLHS), m_Value(CmpRHS)),
	m_BasicBlock(TrueBB), m_BasicBlock(FalseBB)))) {
	LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge structure.\n");
	return false;
	}

	// Step 2: Check if the backedge's condition is in desirable form.

	auto MatchVariableBitMask = [&]() {
	return ICmpInst::isEquality(Pred) && match(CmpRHS, m_Zero()) &&
	match(CmpLHS,
	m_c_And(m_Value(CurrX),
	m_CombineAnd(
	m_Value(BitMask),
	m_LoopInvariant(m_Shl(m_One(), m_Value(BitPos)),
	CurLoop))));
	};
	auto MatchConstantBitMask = [&]() {
	return ICmpInst::isEquality(Pred) && match(CmpRHS, m_Zero()) &&
	match(CmpLHS, m_And(m_Value(CurrX),
	m_CombineAnd(m_Value(BitMask), m_Power2()))) &&
	(BitPos = ConstantExpr::getExactLogBase2(cast<Constant>(BitMask)));
	};
	auto MatchDecomposableConstantBitMask = [&]() {
	APInt Mask;
	return llvm::decomposeBitTestICmp(CmpLHS, CmpRHS, Pred, CurrX, Mask) &&
	ICmpInst::isEquality(Pred) && Mask.isPowerOf2() &&
	(BitMask = ConstantInt::get(CurrX->getType(), Mask)) &&
	(BitPos = ConstantInt::get(CurrX->getType(), Mask.logBase2()));
	};

	if (!MatchVariableBitMask() && !MatchConstantBitMask() &&
	!MatchDecomposableConstantBitMask()) {
	LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge comparison.\n");
	return false;
	}

	// Step 3: Check if the recurrence is in desirable form.
	auto *CurrXPN = dyn_cast<PHINode>(CurrX);
	if (!CurrXPN \|\| CurrXPN->getParent() != LoopHeaderBB) {
	LLVM_DEBUG(dbgs() << DEBUG_TYPE " Not an expected PHI node.\n");
	return false;
	}

	BaseX = CurrXPN->getIncomingValueForBlock(LoopPreheaderBB);
	NextX =
	dyn_cast<Instruction>(CurrXPN->getIncomingValueForBlock(LoopHeaderBB));

	assert(CurLoop->isLoopInvariant(BaseX) &&
	"Expected BaseX to be avaliable in the preheader!");

	if (!NextX \|\| !match(NextX, m_Shl(m_Specific(CurrX), m_One()))) {
	// FIXME: support right-shift?
	LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad recurrence.\n");
	return false;
	}

	// Step 4: Check if the backedge's destinations are in desirable form.

	assert(ICmpInst::isEquality(Pred) &&
	"Should only get equality predicates here.");

	// cmp-br is commutative, so canonicalize to a single variant.
	if (Pred != ICmpInst::Predicate::ICMP_EQ) {
	Pred = ICmpInst::getInversePredicate(Pred);
	std::swap(TrueBB, FalseBB);
	}

	// We expect to exit loop when comparison yields false,
	// so when it yields true we should branch back to loop header.
	if (TrueBB != LoopHeaderBB) {
	LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge flow.\n");
	return false;
	}

	// Okay, idiom checks out.
	return true;
	}

	/// Look for the following loop:
	/// \code
	/// entry:
	/// <...>
	/// %bitmask = shl i32 1, %bitpos
	/// br label %loop
	///
	/// loop:
	/// %x.curr = phi i32 [ %x, %entry ], [ %x.next, %loop ]
	/// %x.curr.bitmasked = and i32 %x.curr, %bitmask
	/// %x.curr.isbitunset = icmp eq i32 %x.curr.bitmasked, 0
	/// %x.next = shl i32 %x.curr, 1
	/// <...>
	/// br i1 %x.curr.isbitunset, label %loop, label %end
	///
	/// end:
	/// %x.curr.res = phi i32 [ %x.curr, %loop ] <...>
	/// %x.next.res = phi i32 [ %x.next, %loop ] <...>
	/// <...>
	/// \endcode
	///
	/// And transform it into:
	/// \code
	/// entry:
	/// %bitmask = shl i32 1, %bitpos
	/// %lowbitmask = add i32 %bitmask, -1
	/// %mask = or i32 %lowbitmask, %bitmask
	/// %x.masked = and i32 %x, %mask
	/// %x.masked.numleadingzeros = call i32 @llvm.ctlz.i32(i32 %x.masked,
	/// i1 true)
	/// %x.masked.numactivebits = sub i32 32, %x.masked.numleadingzeros
	/// %x.masked.leadingonepos = add i32 %x.masked.numactivebits, -1
	/// %backedgetakencount = sub i32 %bitpos, %x.masked.leadingonepos
	/// %tripcount = add i32 %backedgetakencount, 1
	/// %x.curr = shl i32 %x, %backedgetakencount
	/// %x.next = shl i32 %x, %tripcount
	/// br label %loop
	///
	/// loop:
	/// %loop.iv = phi i32 [ 0, %entry ], [ %loop.iv.next, %loop ]
	/// %loop.iv.next = add nuw i32 %loop.iv, 1
	/// %loop.ivcheck = icmp eq i32 %loop.iv.next, %tripcount
	/// <...>
	/// br i1 %loop.ivcheck, label %end, label %loop
	///
	/// end:
	/// %x.curr.res = phi i32 [ %x.curr, %loop ] <...>
	/// %x.next.res = phi i32 [ %x.next, %loop ] <...>
	/// <...>
	/// \endcode
	bool LoopIdiomRecognize::recognizeShiftUntilBitTest() {
	bool MadeChange = false;

	Value X, BitMask, BitPos, XCurr;
	Instruction *XNext;
	if (!detectShiftUntilBitTestIdiom(CurLoop, X, BitMask, BitPos, XCurr,
	XNext)) {
	LLVM_DEBUG(dbgs() << DEBUG_TYPE
	" shift-until-bittest idiom detection failed.\n");
	return MadeChange;
	}
	LLVM_DEBUG(dbgs() << DEBUG_TYPE " shift-until-bittest idiom detected!\n");

	// Ok, it is the idiom we were looking for, we could transform this loop,
	// but is it profitable to transform?

	BasicBlock *LoopHeaderBB = CurLoop->getHeader();
	BasicBlock *LoopPreheaderBB = CurLoop->getLoopPreheader();
	assert(LoopPreheaderBB && "There is always a loop preheader.");

	BasicBlock *SuccessorBB = CurLoop->getExitBlock();
	assert(SuccessorBB && "There is only a single successor.");

	IRBuilder<> Builder(LoopPreheaderBB->getTerminator());
	Builder.SetCurrentDebugLocation(cast<Instruction>(XCurr)->getDebugLoc());

	Intrinsic::ID IntrID = Intrinsic::ctlz;
	Type *Ty = X->getType();
	unsigned Bitwidth = Ty->getScalarSizeInBits();

	TargetTransformInfo::TargetCostKind CostKind =
	TargetTransformInfo::TCK_SizeAndLatency;

	// The rewrite is considered to be unprofitable iff and only iff the
	// intrinsic/shift we'll use are not cheap. Note that we are okay with just
	// making the loop countable, even if nothing else changes.
	IntrinsicCostAttributes Attrs(
	IntrID, Ty, {UndefValue::get(Ty), /is_zero_undef=/Builder.getTrue()});
	InstructionCost Cost = TTI->getIntrinsicInstrCost(Attrs, CostKind);
	if (Cost > TargetTransformInfo::TCC_Basic) {
	LLVM_DEBUG(dbgs() << DEBUG_TYPE
	" Intrinsic is too costly, not beneficial\n");
	return MadeChange;
	}
	if (TTI->getArithmeticInstrCost(Instruction::Shl, Ty, CostKind) >
	TargetTransformInfo::TCC_Basic) {
	LLVM_DEBUG(dbgs() << DEBUG_TYPE " Shift is too costly, not beneficial\n");
	return MadeChange;
	}

	// Ok, transform appears worthwhile.
	MadeChange = true;

	// Step 1: Compute the loop trip count.

	Value *LowBitMask = Builder.CreateAdd(BitMask, Constant::getAllOnesValue(Ty),
	BitPos->getName() + ".lowbitmask");
	Value *Mask =
	Builder.CreateOr(LowBitMask, BitMask, BitPos->getName() + ".mask");
	Value *XMasked = Builder.CreateAnd(X, Mask, X->getName() + ".masked");
	CallInst *XMaskedNumLeadingZeros = Builder.CreateIntrinsic(
	IntrID, Ty, {XMasked, /is_zero_undef=/Builder.getTrue()},
	/FMFSource=/nullptr, XMasked->getName() + ".numleadingzeros");
	Value *XMaskedNumActiveBits = Builder.CreateSub(
	ConstantInt::get(Ty, Ty->getScalarSizeInBits()), XMaskedNumLeadingZeros,
	XMasked->getName() + ".numactivebits", /HasNUW=/true,
	/HasNSW=/Bitwidth != 2);
	Value *XMaskedLeadingOnePos =
	Builder.CreateAdd(XMaskedNumActiveBits, Constant::getAllOnesValue(Ty),
	XMasked->getName() + ".leadingonepos", /HasNUW=/false,
	/HasNSW=/Bitwidth > 2);

	Value *LoopBackedgeTakenCount = Builder.CreateSub(
	BitPos, XMaskedLeadingOnePos, CurLoop->getName() + ".backedgetakencount",
	/HasNUW=/true, /HasNSW=/true);
	// We know loop's backedge-taken count, but what's loop's trip count?
	// Note that while NUW is always safe, while NSW is only for bitwidths != 2.
	Value *LoopTripCount =
	Builder.CreateAdd(LoopBackedgeTakenCount, ConstantInt::get(Ty, 1),
	CurLoop->getName() + ".tripcount", /HasNUW=/true,
	/HasNSW=/Bitwidth != 2);

	// Step 2: Compute the recurrence's final value without a loop.

	// NewX is always safe to compute, because `LoopBackedgeTakenCount`
	// will always be smaller than `bitwidth(X)`, i.e. we never get poison.
	Value *NewX = Builder.CreateShl(X, LoopBackedgeTakenCount);
	NewX->takeName(XCurr);
	if (auto *I = dyn_cast<Instruction>(NewX))
	I->copyIRFlags(XNext, /IncludeWrapFlags=/true);

	Value *NewXNext;
	// Rewriting XNext is more complicated, however, because `X << LoopTripCount`
	// will be poison iff `LoopTripCount == bitwidth(X)` (which will happen
	// iff `BitPos` is `bitwidth(x) - 1` and `X` is `1`). So unless we know
	// that isn't the case, we'll need to emit an alternative, safe IR.
	if (XNext->hasNoSignedWrap() \|\| XNext->hasNoUnsignedWrap() \|\|
	PatternMatch::match(
	BitPos, PatternMatch::m_SpecificInt_ICMP(
	ICmpInst::ICMP_NE, APInt(Ty->getScalarSizeInBits(),
	Ty->getScalarSizeInBits() - 1))))
	NewXNext = Builder.CreateShl(X, LoopTripCount);
	else {
	// Otherwise, just additionally shift by one. It's the smallest solution,
	// alternatively, we could check that NewX is INT_MIN (or BitPos is )
	// and select 0 instead.
	NewXNext = Builder.CreateShl(NewX, ConstantInt::get(Ty, 1));
	}

	NewXNext->takeName(XNext);
	if (auto *I = dyn_cast<Instruction>(NewXNext))
	I->copyIRFlags(XNext, /IncludeWrapFlags=/true);

	// Step 3: Adjust the successor basic block to recieve the computed
	// recurrence's final value instead of the recurrence itself.

	XCurr->replaceUsesOutsideBlock(NewX, LoopHeaderBB);
	XNext->replaceUsesOutsideBlock(NewXNext, LoopHeaderBB);

	// Step 4: Rewrite the loop into a countable form, with canonical IV.

	// The new canonical induction variable.
	Builder.SetInsertPoint(&LoopHeaderBB->front());
	auto *IV = Builder.CreatePHI(Ty, 2, CurLoop->getName() + ".iv");

	// The induction itself.
	// Note that while NUW is always safe, while NSW is only for bitwidths != 2.
	Builder.SetInsertPoint(LoopHeaderBB->getTerminator());
	auto *IVNext =
	Builder.CreateAdd(IV, ConstantInt::get(Ty, 1), IV->getName() + ".next",
	/HasNUW=/true, /HasNSW=/Bitwidth != 2);

	// The loop trip count check.
	auto *IVCheck = Builder.CreateICmpEQ(IVNext, LoopTripCount,
	CurLoop->getName() + ".ivcheck");
	Builder.CreateCondBr(IVCheck, SuccessorBB, LoopHeaderBB);
	LoopHeaderBB->getTerminator()->eraseFromParent();

	// Populate the IV PHI.
	IV->addIncoming(ConstantInt::get(Ty, 0), LoopPreheaderBB);
	IV->addIncoming(IVNext, LoopHeaderBB);

	// Step 5: Forget the "non-computable" trip-count SCEV associated with the
	// loop. The loop would otherwise not be deleted even if it becomes empty.

	SE->forgetLoop(CurLoop);

	// Other passes will take care of actually deleting the loop if possible.

	LLVM_DEBUG(dbgs() << DEBUG_TYPE " shift-until-bittest idiom optimized!\n");

	++NumShiftUntilBitTest;
	return MadeChange;
	}

	/// Return true if the idiom is detected in the loop.
	///
	/// The core idiom we are trying to detect is:
	/// \code
	/// entry:
	/// <...>
	/// %start = <...>
	/// %extraoffset = <...>
	/// <...>
	/// br label %for.cond
	///
	/// loop:
	/// %iv = phi i8 [ %start, %entry ], [ %iv.next, %for.cond ]
	/// %nbits = add nsw i8 %iv, %extraoffset
	/// %val.shifted = {{l,a}shr,shl} i8 %val, %nbits
	/// %val.shifted.iszero = icmp eq i8 %val.shifted, 0
	/// %iv.next = add i8 %iv, 1
	/// <...>
	/// br i1 %val.shifted.iszero, label %end, label %loop
	///
	/// end:
	/// %iv.res = phi i8 [ %iv, %loop ] <...>
	/// %nbits.res = phi i8 [ %nbits, %loop ] <...>
	/// %val.shifted.res = phi i8 [ %val.shifted, %loop ] <...>
	/// %val.shifted.iszero.res = phi i1 [ %val.shifted.iszero, %loop ] <...>
	/// %iv.next.res = phi i8 [ %iv.next, %loop ] <...>
	/// <...>
	/// \endcode
	static bool detectShiftUntilZeroIdiom(Loop CurLoop, ScalarEvolution SE,
	Instruction *&ValShiftedIsZero,
	Intrinsic::ID &IntrinID, Instruction *&IV,
	Value &Start, Value &Val,
	const SCEV *&ExtraOffsetExpr,
	bool &InvertedCond) {
	LLVM_DEBUG(dbgs() << DEBUG_TYPE
	" Performing shift-until-zero idiom detection.\n");

	// Give up if the loop has multiple blocks or multiple backedges.
	if (CurLoop->getNumBlocks() != 1 \|\| CurLoop->getNumBackEdges() != 1) {
	LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad block/backedge count.\n");
	return false;
	}

	Instruction ValShifted, NBits, *IVNext;
	Value *ExtraOffset;

	BasicBlock *LoopHeaderBB = CurLoop->getHeader();
	BasicBlock *LoopPreheaderBB = CurLoop->getLoopPreheader();
	assert(LoopPreheaderBB && "There is always a loop preheader.");

	using namespace PatternMatch;

	// Step 1: Check if the loop backedge, condition is in desirable form.

	ICmpInst::Predicate Pred;
	BasicBlock TrueBB, FalseBB;
	if (!match(LoopHeaderBB->getTerminator(),
	m_Br(m_Instruction(ValShiftedIsZero), m_BasicBlock(TrueBB),
	m_BasicBlock(FalseBB))) \|\|
	!match(ValShiftedIsZero,
	m_ICmp(Pred, m_Instruction(ValShifted), m_Zero())) \|\|
	!ICmpInst::isEquality(Pred)) {
	LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge structure.\n");
	return false;
	}

	// Step 2: Check if the comparison's operand is in desirable form.
	// FIXME: Val could be a one-input PHI node, which we should look past.
	if (!match(ValShifted, m_Shift(m_LoopInvariant(m_Value(Val), CurLoop),
	m_Instruction(NBits)))) {
	LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad comparisons value computation.\n");
	return false;
	}
	IntrinID = ValShifted->getOpcode() == Instruction::Shl ? Intrinsic::cttz
	: Intrinsic::ctlz;

	// Step 3: Check if the shift amount is in desirable form.

	if (match(NBits, m_c_Add(m_Instruction(IV),
	m_LoopInvariant(m_Value(ExtraOffset), CurLoop))) &&
	(NBits->hasNoSignedWrap() \|\| NBits->hasNoUnsignedWrap()))
	ExtraOffsetExpr = SE->getNegativeSCEV(SE->getSCEV(ExtraOffset));
	else if (match(NBits,
	m_Sub(m_Instruction(IV),
	m_LoopInvariant(m_Value(ExtraOffset), CurLoop))) &&
	NBits->hasNoSignedWrap())
	ExtraOffsetExpr = SE->getSCEV(ExtraOffset);
	else {
	IV = NBits;
	ExtraOffsetExpr = SE->getZero(NBits->getType());
	}

	// Step 4: Check if the recurrence is in desirable form.
	auto *IVPN = dyn_cast<PHINode>(IV);
	if (!IVPN \|\| IVPN->getParent() != LoopHeaderBB) {
	LLVM_DEBUG(dbgs() << DEBUG_TYPE " Not an expected PHI node.\n");
	return false;
	}

	Start = IVPN->getIncomingValueForBlock(LoopPreheaderBB);
	IVNext = dyn_cast<Instruction>(IVPN->getIncomingValueForBlock(LoopHeaderBB));

	if (!IVNext \|\| !match(IVNext, m_Add(m_Specific(IVPN), m_One()))) {
	LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad recurrence.\n");
	return false;
	}

	// Step 4: Check if the backedge's destinations are in desirable form.

	assert(ICmpInst::isEquality(Pred) &&
	"Should only get equality predicates here.");

	// cmp-br is commutative, so canonicalize to a single variant.
	InvertedCond = Pred != ICmpInst::Predicate::ICMP_EQ;
	if (InvertedCond) {
	Pred = ICmpInst::getInversePredicate(Pred);
	std::swap(TrueBB, FalseBB);
	}

	// We expect to exit loop when comparison yields true,
	// so when it yields false we should branch back to loop header.
	if (FalseBB != LoopHeaderBB) {
	LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge flow.\n");
	return false;
	}

	// The new, countable, loop will certainly only run a known number of
	// iterations, It won't be infinite. But the old loop might be infinite
	// under certain conditions. For logical shifts, the value will become zero
	// after at most bitwidth(%Val) loop iterations. However, for arithmetic
	// right-shift, iff the sign bit was set, the value will never become zero,
	// and the loop may never finish.
	if (ValShifted->getOpcode() == Instruction::AShr &&
	!isMustProgress(CurLoop) && !SE->isKnownNonNegative(SE->getSCEV(Val))) {
	LLVM_DEBUG(dbgs() << DEBUG_TYPE " Can not prove the loop is finite.\n");
	return false;
	}

	// Okay, idiom checks out.
	return true;
	}

	/// Look for the following loop:
	/// \code
	/// entry:
	/// <...>
	/// %start = <...>
	/// %extraoffset = <...>
	/// <...>
	/// br label %for.cond
	///
	/// loop:
	/// %iv = phi i8 [ %start, %entry ], [ %iv.next, %for.cond ]
	/// %nbits = add nsw i8 %iv, %extraoffset
	/// %val.shifted = {{l,a}shr,shl} i8 %val, %nbits
	/// %val.shifted.iszero = icmp eq i8 %val.shifted, 0
	/// %iv.next = add i8 %iv, 1
	/// <...>
	/// br i1 %val.shifted.iszero, label %end, label %loop
	///
	/// end:
	/// %iv.res = phi i8 [ %iv, %loop ] <...>
	/// %nbits.res = phi i8 [ %nbits, %loop ] <...>
	/// %val.shifted.res = phi i8 [ %val.shifted, %loop ] <...>
	/// %val.shifted.iszero.res = phi i1 [ %val.shifted.iszero, %loop ] <...>
	/// %iv.next.res = phi i8 [ %iv.next, %loop ] <...>
	/// <...>
	/// \endcode
	///
	/// And transform it into:
	/// \code
	/// entry:
	/// <...>
	/// %start = <...>
	/// %extraoffset = <...>
	/// <...>
	/// %val.numleadingzeros = call i8 @llvm.ct{l,t}z.i8(i8 %val, i1 0)
	/// %val.numactivebits = sub i8 8, %val.numleadingzeros
	/// %extraoffset.neg = sub i8 0, %extraoffset
	/// %tmp = add i8 %val.numactivebits, %extraoffset.neg
	/// %iv.final = call i8 @llvm.smax.i8(i8 %tmp, i8 %start)
	/// %loop.tripcount = sub i8 %iv.final, %start
	/// br label %loop
	///
	/// loop:
	/// %loop.iv = phi i8 [ 0, %entry ], [ %loop.iv.next, %loop ]
	/// %loop.iv.next = add i8 %loop.iv, 1
	/// %loop.ivcheck = icmp eq i8 %loop.iv.next, %loop.tripcount
	/// %iv = add i8 %loop.iv, %start
	/// <...>
	/// br i1 %loop.ivcheck, label %end, label %loop
	///
	/// end:
	/// %iv.res = phi i8 [ %iv.final, %loop ] <...>
	/// <...>
	/// \endcode
	bool LoopIdiomRecognize::recognizeShiftUntilZero() {
	bool MadeChange = false;

	Instruction *ValShiftedIsZero;
	Intrinsic::ID IntrID;
	Instruction *IV;
	Value Start, Val;
	const SCEV *ExtraOffsetExpr;
	bool InvertedCond;
	if (!detectShiftUntilZeroIdiom(CurLoop, SE, ValShiftedIsZero, IntrID, IV,
	Start, Val, ExtraOffsetExpr, InvertedCond)) {
	LLVM_DEBUG(dbgs() << DEBUG_TYPE
	" shift-until-zero idiom detection failed.\n");
	return MadeChange;
	}
	LLVM_DEBUG(dbgs() << DEBUG_TYPE " shift-until-zero idiom detected!\n");

	// Ok, it is the idiom we were looking for, we could transform this loop,
	// but is it profitable to transform?

	BasicBlock *LoopHeaderBB = CurLoop->getHeader();
	BasicBlock *LoopPreheaderBB = CurLoop->getLoopPreheader();
	assert(LoopPreheaderBB && "There is always a loop preheader.");

	BasicBlock *SuccessorBB = CurLoop->getExitBlock();
	assert(SuccessorBB && "There is only a single successor.");

	IRBuilder<> Builder(LoopPreheaderBB->getTerminator());
	Builder.SetCurrentDebugLocation(IV->getDebugLoc());

	Type *Ty = Val->getType();
	unsigned Bitwidth = Ty->getScalarSizeInBits();

	TargetTransformInfo::TargetCostKind CostKind =
	TargetTransformInfo::TCK_SizeAndLatency;

	// The rewrite is considered to be unprofitable iff and only iff the
	// intrinsic we'll use are not cheap. Note that we are okay with just
	// making the loop countable, even if nothing else changes.
	IntrinsicCostAttributes Attrs(
	IntrID, Ty, {UndefValue::get(Ty), /is_zero_undef=/Builder.getFalse()});
	InstructionCost Cost = TTI->getIntrinsicInstrCost(Attrs, CostKind);
	if (Cost > TargetTransformInfo::TCC_Basic) {
	LLVM_DEBUG(dbgs() << DEBUG_TYPE
	" Intrinsic is too costly, not beneficial\n");
	return MadeChange;
	}

	// Ok, transform appears worthwhile.
	MadeChange = true;

	bool OffsetIsZero = false;
	if (auto *ExtraOffsetExprC = dyn_cast<SCEVConstant>(ExtraOffsetExpr))
	OffsetIsZero = ExtraOffsetExprC->isZero();

	// Step 1: Compute the loop's final IV value / trip count.

	CallInst *ValNumLeadingZeros = Builder.CreateIntrinsic(
	IntrID, Ty, {Val, /is_zero_undef=/Builder.getFalse()},
	/FMFSource=/nullptr, Val->getName() + ".numleadingzeros");
	Value *ValNumActiveBits = Builder.CreateSub(
	ConstantInt::get(Ty, Ty->getScalarSizeInBits()), ValNumLeadingZeros,
	Val->getName() + ".numactivebits", /HasNUW=/true,
	/HasNSW=/Bitwidth != 2);

	SCEVExpander Expander(SE, DL, "loop-idiom");
	Expander.setInsertPoint(&*Builder.GetInsertPoint());
	Value *ExtraOffset = Expander.expandCodeFor(ExtraOffsetExpr);

	Value *ValNumActiveBitsOffset = Builder.CreateAdd(
	ValNumActiveBits, ExtraOffset, ValNumActiveBits->getName() + ".offset",
	/HasNUW=/OffsetIsZero, /HasNSW=/true);
	Value *IVFinal = Builder.CreateIntrinsic(Intrinsic::smax, {Ty},
	{ValNumActiveBitsOffset, Start},
	/FMFSource=/nullptr, "iv.final");

	auto *LoopBackedgeTakenCount = cast<Instruction>(Builder.CreateSub(
	IVFinal, Start, CurLoop->getName() + ".backedgetakencount",
	/HasNUW=/OffsetIsZero, /HasNSW=/true));
	// FIXME: or when the offset was `add nuw`

	// We know loop's backedge-taken count, but what's loop's trip count?
	Value *LoopTripCount =
	Builder.CreateAdd(LoopBackedgeTakenCount, ConstantInt::get(Ty, 1),
	CurLoop->getName() + ".tripcount", /HasNUW=/true,
	/HasNSW=/Bitwidth != 2);

	// Step 2: Adjust the successor basic block to recieve the original
	// induction variable's final value instead of the orig. IV itself.

	IV->replaceUsesOutsideBlock(IVFinal, LoopHeaderBB);

	// Step 3: Rewrite the loop into a countable form, with canonical IV.

	// The new canonical induction variable.
	Builder.SetInsertPoint(&LoopHeaderBB->front());
	auto *CIV = Builder.CreatePHI(Ty, 2, CurLoop->getName() + ".iv");

	// The induction itself.
	Builder.SetInsertPoint(LoopHeaderBB->getFirstNonPHI());
	auto *CIVNext =
	Builder.CreateAdd(CIV, ConstantInt::get(Ty, 1), CIV->getName() + ".next",
	/HasNUW=/true, /HasNSW=/Bitwidth != 2);

	// The loop trip count check.
	auto *CIVCheck = Builder.CreateICmpEQ(CIVNext, LoopTripCount,
	CurLoop->getName() + ".ivcheck");
	auto *NewIVCheck = CIVCheck;
	if (InvertedCond) {
	NewIVCheck = Builder.CreateNot(CIVCheck);
	NewIVCheck->takeName(ValShiftedIsZero);
	}

	// The original IV, but rebased to be an offset to the CIV.
	auto IVDePHId = Builder.CreateAdd(CIV, Start, "", /HasNUW=*/false,
	/HasNSW=/true); // FIXME: what about NUW?
	IVDePHId->takeName(IV);

	// The loop terminator.
	Builder.SetInsertPoint(LoopHeaderBB->getTerminator());
	Builder.CreateCondBr(CIVCheck, SuccessorBB, LoopHeaderBB);
	LoopHeaderBB->getTerminator()->eraseFromParent();

	// Populate the IV PHI.
	CIV->addIncoming(ConstantInt::get(Ty, 0), LoopPreheaderBB);
	CIV->addIncoming(CIVNext, LoopHeaderBB);

	// Step 4: Forget the "non-computable" trip-count SCEV associated with the
	// loop. The loop would otherwise not be deleted even if it becomes empty.

	SE->forgetLoop(CurLoop);

	// Step 5: Try to cleanup the loop's body somewhat.
	IV->replaceAllUsesWith(IVDePHId);
	IV->eraseFromParent();

	ValShiftedIsZero->replaceAllUsesWith(NewIVCheck);
	ValShiftedIsZero->eraseFromParent();

	// Other passes will take care of actually deleting the loop if possible.

	LLVM_DEBUG(dbgs() << DEBUG_TYPE " shift-until-zero idiom optimized!\n");

	++NumShiftUntilZero;
	return MadeChange;
	}
	diff --git a/contrib/llvm-project/llvm/tools/llvm-cov/CoverageExporterLcov.cpp b/contrib/llvm-project/llvm/tools/llvm-cov/CoverageExporterLcov.cpp
	index 6cf5d9285b90..0096a3d44d85 100644
	--- a/contrib/llvm-project/llvm/tools/llvm-cov/CoverageExporterLcov.cpp
	+++ b/contrib/llvm-project/llvm/tools/llvm-cov/CoverageExporterLcov.cpp
	@@ -1,222 +1,222 @@
	//===- CoverageExporterLcov.cpp - Code coverage export --------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file implements export of code coverage data to lcov trace file format.
	//
	//===----------------------------------------------------------------------===//

	//===----------------------------------------------------------------------===//
	//
	// The trace file code coverage export follows the following format (see also
	// https://linux.die.net/man/1/geninfo). Each quoted string appears on its own
	// line; the indentation shown here is only for documentation purposes.
	//
	// - for each source file:
	// - "SF:<absolute path to source file>"
	// - for each function:
	// - "FN:<line number of function start>,<function name>"
	// - for each function:
	// - "FNDA:<execution count>,<function name>"
	// - "FNF:<number of functions found>"
	// - "FNH:<number of functions hit>"
	// - for each instrumented line:
	// - "DA:<line number>,<execution count>[,<checksum>]
	// - for each branch:
	// - "BRDA:<line number>,<branch pair id>,<branch id>,<count>"
	// - "BRF:<number of branches found>"
	// - "BRH:<number of branches hit>"
	// - "LH:<number of lines with non-zero execution count>"
	// - "LF:<number of instrumented lines>"
	// - "end_of_record"
	//
	// If the user is exporting summary information only, then the FN, FNDA, and DA
	// lines will not be present.
	//
	//===----------------------------------------------------------------------===//

	#include "CoverageExporterLcov.h"
	#include "CoverageReport.h"

	using namespace llvm;

	namespace {

	void renderFunctionSummary(raw_ostream &OS,
	const FileCoverageSummary &Summary) {
	OS << "FNF:" << Summary.FunctionCoverage.getNumFunctions() << '\n'
	<< "FNH:" << Summary.FunctionCoverage.getExecuted() << '\n';
	}

	void renderFunctions(
	raw_ostream &OS,
	const iterator_range<coverage::FunctionRecordIterator> &Functions) {
	for (const auto &F : Functions) {
	auto StartLine = F.CountedRegions.front().LineStart;
	OS << "FN:" << StartLine << ',' << F.Name << '\n';
	}
	for (const auto &F : Functions)
	OS << "FNDA:" << F.ExecutionCount << ',' << F.Name << '\n';
	}

	void renderLineExecutionCounts(raw_ostream &OS,
	const coverage::CoverageData &FileCoverage) {
	coverage::LineCoverageIterator LCI{FileCoverage, 1};
	coverage::LineCoverageIterator LCIEnd = LCI.getEnd();
	for (; LCI != LCIEnd; ++LCI) {
	const coverage::LineCoverageStats &LCS = *LCI;
	if (LCS.isMapped()) {
	OS << "DA:" << LCS.getLine() << ',' << LCS.getExecutionCount() << '\n';
	}
	}
	}

	std::vector<llvm::coverage::CountedRegion>
	collectNestedBranches(const coverage::CoverageMapping &Coverage,
	ArrayRef<llvm::coverage::ExpansionRecord> Expansions,
	int ViewDepth = 0, int SrcLine = 0) {
	std::vector<llvm::coverage::CountedRegion> Branches;
	for (const auto &Expansion : Expansions) {
	auto ExpansionCoverage = Coverage.getCoverageForExpansion(Expansion);

	// If we're at the top level, set the corresponding source line.
	if (ViewDepth == 0)
	SrcLine = Expansion.Region.LineStart;

	// Recursively collect branches from nested expansions.
	auto NestedExpansions = ExpansionCoverage.getExpansions();
	auto NestedExBranches = collectNestedBranches(Coverage, NestedExpansions,
	ViewDepth + 1, SrcLine);
	append_range(Branches, NestedExBranches);

	// Add branches from this level of expansion.
	auto ExBranches = ExpansionCoverage.getBranches();
	for (auto B : ExBranches)
	if (B.FileID == Expansion.FileID) {
	B.LineStart = SrcLine;
	Branches.push_back(B);
	}
	}

	return Branches;
	}

	bool sortLine(llvm::coverage::CountedRegion I,
	llvm::coverage::CountedRegion J) {
	return (I.LineStart < J.LineStart) \|\|
	((I.LineStart == J.LineStart) && (I.ColumnStart < J.ColumnStart));
	}

	void renderBranchExecutionCounts(raw_ostream &OS,
	const coverage::CoverageMapping &Coverage,
	const coverage::CoverageData &FileCoverage) {
	std::vector<llvm::coverage::CountedRegion> Branches =
	FileCoverage.getBranches();

	// Recursively collect branches for all file expansions.
	std::vector<llvm::coverage::CountedRegion> ExBranches =
	collectNestedBranches(Coverage, FileCoverage.getExpansions());

	// Append Expansion Branches to Source Branches.
	append_range(Branches, ExBranches);

	// Sort branches based on line number to ensure branches corresponding to the
	// same source line are counted together.
	llvm::sort(Branches, sortLine);

	auto NextBranch = Branches.begin();
	auto EndBranch = Branches.end();

	// Branches with the same source line are enumerated individually
	// (BranchIndex) as well as based on True/False pairs (PairIndex).
	while (NextBranch != EndBranch) {
	unsigned CurrentLine = NextBranch->LineStart;
	unsigned PairIndex = 0;
	unsigned BranchIndex = 0;

	while (NextBranch != EndBranch && CurrentLine == NextBranch->LineStart) {
	if (!NextBranch->Folded) {
	unsigned BC1 = NextBranch->ExecutionCount;
	unsigned BC2 = NextBranch->FalseExecutionCount;
	bool BranchNotExecuted = (BC1 == 0 && BC2 == 0);

	for (int I = 0; I < 2; I++, BranchIndex++) {
	OS << "BRDA:" << CurrentLine << ',' << PairIndex << ','
	<< BranchIndex;
	if (BranchNotExecuted)
	OS << ',' << '-' << '\n';
	else
	OS << ',' << (I == 0 ? BC1 : BC2) << '\n';
	}

	PairIndex++;
	}
	NextBranch++;
	}
	}
	}

	void renderLineSummary(raw_ostream &OS, const FileCoverageSummary &Summary) {
	OS << "LF:" << Summary.LineCoverage.getNumLines() << '\n'
	<< "LH:" << Summary.LineCoverage.getCovered() << '\n';
	}

	void renderBranchSummary(raw_ostream &OS, const FileCoverageSummary &Summary) {
	OS << "BRF:" << Summary.BranchCoverage.getNumBranches() << '\n'
	- << "BFH:" << Summary.BranchCoverage.getCovered() << '\n';
	+ << "BRH:" << Summary.BranchCoverage.getCovered() << '\n';
	}

	void renderFile(raw_ostream &OS, const coverage::CoverageMapping &Coverage,
	const std::string &Filename,
	const FileCoverageSummary &FileReport, bool ExportSummaryOnly,
	bool SkipFunctions) {
	OS << "SF:" << Filename << '\n';

	if (!ExportSummaryOnly && !SkipFunctions) {
	renderFunctions(OS, Coverage.getCoveredFunctions(Filename));
	}
	renderFunctionSummary(OS, FileReport);

	if (!ExportSummaryOnly) {
	// Calculate and render detailed coverage information for given file.
	auto FileCoverage = Coverage.getCoverageForFile(Filename);
	renderLineExecutionCounts(OS, FileCoverage);
	renderBranchExecutionCounts(OS, Coverage, FileCoverage);
	}
	renderBranchSummary(OS, FileReport);
	renderLineSummary(OS, FileReport);

	OS << "end_of_record\n";
	}

	void renderFiles(raw_ostream &OS, const coverage::CoverageMapping &Coverage,
	ArrayRef<std::string> SourceFiles,
	ArrayRef<FileCoverageSummary> FileReports,
	bool ExportSummaryOnly, bool SkipFunctions) {
	for (unsigned I = 0, E = SourceFiles.size(); I < E; ++I)
	renderFile(OS, Coverage, SourceFiles[I], FileReports[I], ExportSummaryOnly,
	SkipFunctions);
	}

	} // end anonymous namespace

	void CoverageExporterLcov::renderRoot(const CoverageFilters &IgnoreFilters) {
	std::vector<std::string> SourceFiles;
	for (StringRef SF : Coverage.getUniqueSourceFiles()) {
	if (!IgnoreFilters.matchesFilename(SF))
	SourceFiles.emplace_back(SF);
	}
	renderRoot(SourceFiles);
	}

	void CoverageExporterLcov::renderRoot(ArrayRef<std::string> SourceFiles) {
	FileCoverageSummary Totals = FileCoverageSummary("Totals");
	auto FileReports = CoverageReport::prepareFileReports(Coverage, Totals,
	SourceFiles, Options);
	renderFiles(OS, Coverage, SourceFiles, FileReports, Options.ExportSummaryOnly,
	Options.SkipFunctions);
	}
	diff --git a/contrib/llvm-project/llvm/tools/llvm-objdump/llvm-objdump.cpp b/contrib/llvm-project/llvm/tools/llvm-objdump/llvm-objdump.cpp
	index 48ae92f734c7..9d461b08f3f8 100644
	--- a/contrib/llvm-project/llvm/tools/llvm-objdump/llvm-objdump.cpp
	+++ b/contrib/llvm-project/llvm/tools/llvm-objdump/llvm-objdump.cpp
	@@ -1,2693 +1,2698 @@
	//===-- llvm-objdump.cpp - Object file dumping utility for llvm -----------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This program is a utility that works like binutils "objdump", that is, it
	// dumps out a plethora of information about an object file depending on the
	// flags.
	//
	// The flags and output of this program should be near identical to those of
	// binutils objdump.
	//
	//===----------------------------------------------------------------------===//

	#include "llvm-objdump.h"
	#include "COFFDump.h"
	#include "ELFDump.h"
	#include "MachODump.h"
	#include "ObjdumpOptID.h"
	#include "SourcePrinter.h"
	#include "WasmDump.h"
	#include "XCOFFDump.h"
	#include "llvm/ADT/IndexedMap.h"
	#include "llvm/ADT/Optional.h"
	#include "llvm/ADT/STLExtras.h"
	#include "llvm/ADT/SetOperations.h"
	#include "llvm/ADT/SmallSet.h"
	#include "llvm/ADT/StringExtras.h"
	#include "llvm/ADT/StringSet.h"
	#include "llvm/ADT/Triple.h"
	#include "llvm/ADT/Twine.h"
	#include "llvm/DebugInfo/DWARF/DWARFContext.h"
	#include "llvm/DebugInfo/Symbolize/Symbolize.h"
	#include "llvm/Demangle/Demangle.h"
	#include "llvm/MC/MCAsmInfo.h"
	#include "llvm/MC/MCContext.h"
	#include "llvm/MC/MCDisassembler/MCDisassembler.h"
	#include "llvm/MC/MCDisassembler/MCRelocationInfo.h"
	#include "llvm/MC/MCInst.h"
	#include "llvm/MC/MCInstPrinter.h"
	#include "llvm/MC/MCInstrAnalysis.h"
	#include "llvm/MC/MCInstrInfo.h"
	#include "llvm/MC/MCObjectFileInfo.h"
	#include "llvm/MC/MCRegisterInfo.h"
	#include "llvm/MC/MCSubtargetInfo.h"
	#include "llvm/MC/MCTargetOptions.h"
	#include "llvm/Object/Archive.h"
	#include "llvm/Object/COFF.h"
	#include "llvm/Object/COFFImportFile.h"
	#include "llvm/Object/ELFObjectFile.h"
	#include "llvm/Object/FaultMapParser.h"
	#include "llvm/Object/MachO.h"
	#include "llvm/Object/MachOUniversal.h"
	#include "llvm/Object/ObjectFile.h"
	#include "llvm/Object/Wasm.h"
	#include "llvm/Option/Arg.h"
	#include "llvm/Option/ArgList.h"
	#include "llvm/Option/Option.h"
	#include "llvm/Support/Casting.h"
	#include "llvm/Support/Debug.h"
	#include "llvm/Support/Errc.h"
	#include "llvm/Support/FileSystem.h"
	#include "llvm/Support/Format.h"
	#include "llvm/Support/FormatVariadic.h"
	#include "llvm/Support/GraphWriter.h"
	#include "llvm/Support/Host.h"
	#include "llvm/Support/InitLLVM.h"
	#include "llvm/Support/MemoryBuffer.h"
	#include "llvm/Support/SourceMgr.h"
	#include "llvm/Support/StringSaver.h"
	#include "llvm/Support/TargetRegistry.h"
	#include "llvm/Support/TargetSelect.h"
	#include "llvm/Support/WithColor.h"
	#include "llvm/Support/raw_ostream.h"
	#include <algorithm>
	#include <cctype>
	#include <cstring>
	#include <system_error>
	#include <unordered_map>
	#include <utility>

	using namespace llvm;
	using namespace llvm::object;
	using namespace llvm::objdump;
	using namespace llvm::opt;

	namespace {

	class CommonOptTable : public opt::OptTable {
	public:
	CommonOptTable(ArrayRef<Info> OptionInfos, const char *Usage,
	const char *Description)
	: OptTable(OptionInfos), Usage(Usage), Description(Description) {
	setGroupedShortOptions(true);
	}

	void printHelp(StringRef Argv0, bool ShowHidden = false) const {
	Argv0 = sys::path::filename(Argv0);
	opt::OptTable::printHelp(outs(), (Argv0 + Usage).str().c_str(), Description,
	ShowHidden, ShowHidden);
	// TODO Replace this with OptTable API once it adds extrahelp support.
	outs() << "\nPass @FILE as argument to read options from FILE.\n";
	}

	private:
	const char *Usage;
	const char *Description;
	};

	// ObjdumpOptID is in ObjdumpOptID.h

	#define PREFIX(NAME, VALUE) const char *const OBJDUMP_##NAME[] = VALUE;
	#include "ObjdumpOpts.inc"
	#undef PREFIX

	static constexpr opt::OptTable::Info ObjdumpInfoTable[] = {
	#define OBJDUMP_nullptr nullptr
	#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
	HELPTEXT, METAVAR, VALUES) \
	{OBJDUMP_##PREFIX, NAME, HELPTEXT, \
	METAVAR, OBJDUMP_##ID, opt::Option::KIND##Class, \
	PARAM, FLAGS, OBJDUMP_##GROUP, \
	OBJDUMP_##ALIAS, ALIASARGS, VALUES},
	#include "ObjdumpOpts.inc"
	#undef OPTION
	#undef OBJDUMP_nullptr
	};

	class ObjdumpOptTable : public CommonOptTable {
	public:
	ObjdumpOptTable()
	: CommonOptTable(ObjdumpInfoTable, " [options] <input object files>",
	"llvm object file dumper") {}
	};

	enum OtoolOptID {
	OTOOL_INVALID = 0, // This is not an option ID.
	#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
	HELPTEXT, METAVAR, VALUES) \
	OTOOL_##ID,
	#include "OtoolOpts.inc"
	#undef OPTION
	};

	#define PREFIX(NAME, VALUE) const char *const OTOOL_##NAME[] = VALUE;
	#include "OtoolOpts.inc"
	#undef PREFIX

	static constexpr opt::OptTable::Info OtoolInfoTable[] = {
	#define OTOOL_nullptr nullptr
	#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
	HELPTEXT, METAVAR, VALUES) \
	{OTOOL_##PREFIX, NAME, HELPTEXT, \
	METAVAR, OTOOL_##ID, opt::Option::KIND##Class, \
	PARAM, FLAGS, OTOOL_##GROUP, \
	OTOOL_##ALIAS, ALIASARGS, VALUES},
	#include "OtoolOpts.inc"
	#undef OPTION
	#undef OTOOL_nullptr
	};

	class OtoolOptTable : public CommonOptTable {
	public:
	OtoolOptTable()
	: CommonOptTable(OtoolInfoTable, " [option...] [file...]",
	"Mach-O object file displaying tool") {}
	};

	} // namespace

	#define DEBUG_TYPE "objdump"

	static uint64_t AdjustVMA;
	static bool AllHeaders;
	static std::string ArchName;
	bool objdump::ArchiveHeaders;
	bool objdump::Demangle;
	bool objdump::Disassemble;
	bool objdump::DisassembleAll;
	bool objdump::SymbolDescription;
	static std::vector<std::string> DisassembleSymbols;
	static bool DisassembleZeroes;
	static std::vector<std::string> DisassemblerOptions;
	DIDumpType objdump::DwarfDumpType;
	static bool DynamicRelocations;
	static bool FaultMapSection;
	static bool FileHeaders;
	bool objdump::SectionContents;
	static std::vector<std::string> InputFilenames;
	bool objdump::PrintLines;
	static bool MachOOpt;
	std::string objdump::MCPU;
	std::vector<std::string> objdump::MAttrs;
	bool objdump::ShowRawInsn;
	bool objdump::LeadingAddr;
	static bool RawClangAST;
	bool objdump::Relocations;
	bool objdump::PrintImmHex;
	bool objdump::PrivateHeaders;
	std::vector<std::string> objdump::FilterSections;
	bool objdump::SectionHeaders;
	static bool ShowLMA;
	bool objdump::PrintSource;

	static uint64_t StartAddress;
	static bool HasStartAddressFlag;
	static uint64_t StopAddress = UINT64_MAX;
	static bool HasStopAddressFlag;

	bool objdump::SymbolTable;
	static bool SymbolizeOperands;
	static bool DynamicSymbolTable;
	std::string objdump::TripleName;
	bool objdump::UnwindInfo;
	static bool Wide;
	std::string objdump::Prefix;
	uint32_t objdump::PrefixStrip;

	DebugVarsFormat objdump::DbgVariables = DVDisabled;

	int objdump::DbgIndent = 52;

	static StringSet<> DisasmSymbolSet;
	StringSet<> objdump::FoundSectionSet;
	static StringRef ToolName;

	namespace {
	struct FilterResult {
	// True if the section should not be skipped.
	bool Keep;

	// True if the index counter should be incremented, even if the section should
	// be skipped. For example, sections may be skipped if they are not included
	// in the --section flag, but we still want those to count toward the section
	// count.
	bool IncrementIndex;
	};
	} // namespace

	static FilterResult checkSectionFilter(object::SectionRef S) {
	if (FilterSections.empty())
	return {/Keep=/true, /IncrementIndex=/true};

	Expected<StringRef> SecNameOrErr = S.getName();
	if (!SecNameOrErr) {
	consumeError(SecNameOrErr.takeError());
	return {/Keep=/false, /IncrementIndex=/false};
	}
	StringRef SecName = *SecNameOrErr;

	// StringSet does not allow empty key so avoid adding sections with
	// no name (such as the section with index 0) here.
	if (!SecName.empty())
	FoundSectionSet.insert(SecName);

	// Only show the section if it's in the FilterSections list, but always
	// increment so the indexing is stable.
	return {/Keep=/is_contained(FilterSections, SecName),
	/IncrementIndex=/true};
	}

	SectionFilter objdump::ToolSectionFilter(object::ObjectFile const &O,
	uint64_t *Idx) {
	// Start at UINT64_MAX so that the first index returned after an increment is
	// zero (after the unsigned wrap).
	if (Idx)
	*Idx = UINT64_MAX;
	return SectionFilter(
	[Idx](object::SectionRef S) {
	FilterResult Result = checkSectionFilter(S);
	if (Idx != nullptr && Result.IncrementIndex)
	*Idx += 1;
	return Result.Keep;
	},
	O);
	}

	std::string objdump::getFileNameForError(const object::Archive::Child &C,
	unsigned Index) {
	Expected<StringRef> NameOrErr = C.getName();
	if (NameOrErr)
	return std::string(NameOrErr.get());
	// If we have an error getting the name then we print the index of the archive
	// member. Since we are already in an error state, we just ignore this error.
	consumeError(NameOrErr.takeError());
	return "<file index: " + std::to_string(Index) + ">";
	}

	void objdump::reportWarning(const Twine &Message, StringRef File) {
	// Output order between errs() and outs() matters especially for archive
	// files where the output is per member object.
	outs().flush();
	WithColor::warning(errs(), ToolName)
	<< "'" << File << "': " << Message << "\n";
	}

	LLVM_ATTRIBUTE_NORETURN void objdump::reportError(StringRef File,
	const Twine &Message) {
	outs().flush();
	WithColor::error(errs(), ToolName) << "'" << File << "': " << Message << "\n";
	exit(1);
	}

	LLVM_ATTRIBUTE_NORETURN void objdump::reportError(Error E, StringRef FileName,
	StringRef ArchiveName,
	StringRef ArchitectureName) {
	assert(E);
	outs().flush();
	WithColor::error(errs(), ToolName);
	if (ArchiveName != "")
	errs() << ArchiveName << "(" << FileName << ")";
	else
	errs() << "'" << FileName << "'";
	if (!ArchitectureName.empty())
	errs() << " (for architecture " << ArchitectureName << ")";
	errs() << ": ";
	logAllUnhandledErrors(std::move(E), errs());
	exit(1);
	}

	static void reportCmdLineWarning(const Twine &Message) {
	WithColor::warning(errs(), ToolName) << Message << "\n";
	}

	LLVM_ATTRIBUTE_NORETURN static void reportCmdLineError(const Twine &Message) {
	WithColor::error(errs(), ToolName) << Message << "\n";
	exit(1);
	}

	static void warnOnNoMatchForSections() {
	SetVector<StringRef> MissingSections;
	for (StringRef S : FilterSections) {
	if (FoundSectionSet.count(S))
	return;
	// User may specify a unnamed section. Don't warn for it.
	if (!S.empty())
	MissingSections.insert(S);
	}

	// Warn only if no section in FilterSections is matched.
	for (StringRef S : MissingSections)
	reportCmdLineWarning("section '" + S +
	"' mentioned in a -j/--section option, but not "
	"found in any input file");
	}

	static const Target getTarget(const ObjectFile Obj) {
	// Figure out the target triple.
	Triple TheTriple("unknown-unknown-unknown");
	if (TripleName.empty()) {
	TheTriple = Obj->makeTriple();
	} else {
	TheTriple.setTriple(Triple::normalize(TripleName));
	auto Arch = Obj->getArch();
	if (Arch == Triple::arm \|\| Arch == Triple::armeb)
	Obj->setARMSubArch(TheTriple);
	}

	// Get the target specific parser.
	std::string Error;
	const Target *TheTarget = TargetRegistry::lookupTarget(ArchName, TheTriple,
	Error);
	if (!TheTarget)
	reportError(Obj->getFileName(), "can't find target: " + Error);

	// Update the triple name and return the found target.
	TripleName = TheTriple.getTriple();
	return TheTarget;
	}

	bool objdump::isRelocAddressLess(RelocationRef A, RelocationRef B) {
	return A.getOffset() < B.getOffset();
	}

	static Error getRelocationValueString(const RelocationRef &Rel,
	SmallVectorImpl<char> &Result) {
	const ObjectFile *Obj = Rel.getObject();
	if (auto *ELF = dyn_cast<ELFObjectFileBase>(Obj))
	return getELFRelocationValueString(ELF, Rel, Result);
	if (auto *COFF = dyn_cast<COFFObjectFile>(Obj))
	return getCOFFRelocationValueString(COFF, Rel, Result);
	if (auto *Wasm = dyn_cast<WasmObjectFile>(Obj))
	return getWasmRelocationValueString(Wasm, Rel, Result);
	if (auto *MachO = dyn_cast<MachOObjectFile>(Obj))
	return getMachORelocationValueString(MachO, Rel, Result);
	if (auto *XCOFF = dyn_cast<XCOFFObjectFile>(Obj))
	return getXCOFFRelocationValueString(XCOFF, Rel, Result);
	llvm_unreachable("unknown object file format");
	}

	/// Indicates whether this relocation should hidden when listing
	/// relocations, usually because it is the trailing part of a multipart
	/// relocation that will be printed as part of the leading relocation.
	static bool getHidden(RelocationRef RelRef) {
	auto *MachO = dyn_cast<MachOObjectFile>(RelRef.getObject());
	if (!MachO)
	return false;

	unsigned Arch = MachO->getArch();
	DataRefImpl Rel = RelRef.getRawDataRefImpl();
	uint64_t Type = MachO->getRelocationType(Rel);

	// On arches that use the generic relocations, GENERIC_RELOC_PAIR
	// is always hidden.
	if (Arch == Triple::x86 \|\| Arch == Triple::arm \|\| Arch == Triple::ppc)
	return Type == MachO::GENERIC_RELOC_PAIR;

	if (Arch == Triple::x86_64) {
	// On x86_64, X86_64_RELOC_UNSIGNED is hidden only when it follows
	// an X86_64_RELOC_SUBTRACTOR.
	if (Type == MachO::X86_64_RELOC_UNSIGNED && Rel.d.a > 0) {
	DataRefImpl RelPrev = Rel;
	RelPrev.d.a--;
	uint64_t PrevType = MachO->getRelocationType(RelPrev);
	if (PrevType == MachO::X86_64_RELOC_SUBTRACTOR)
	return true;
	}
	}

	return false;
	}

	namespace {

	/// Get the column at which we want to start printing the instruction
	/// disassembly, taking into account anything which appears to the left of it.
	unsigned getInstStartColumn(const MCSubtargetInfo &STI) {
	return !ShowRawInsn ? 16 : STI.getTargetTriple().isX86() ? 40 : 24;
	}

	static bool isAArch64Elf(const ObjectFile *Obj) {
	const auto *Elf = dyn_cast<ELFObjectFileBase>(Obj);
	return Elf && Elf->getEMachine() == ELF::EM_AARCH64;
	}

	static bool isArmElf(const ObjectFile *Obj) {
	const auto *Elf = dyn_cast<ELFObjectFileBase>(Obj);
	return Elf && Elf->getEMachine() == ELF::EM_ARM;
	}

	static bool hasMappingSymbols(const ObjectFile *Obj) {
	return isArmElf(Obj) \|\| isAArch64Elf(Obj);
	}

	static void printRelocation(formatted_raw_ostream &OS, StringRef FileName,
	const RelocationRef &Rel, uint64_t Address,
	bool Is64Bits) {
	StringRef Fmt = Is64Bits ? "\t\t%016" PRIx64 ": " : "\t\t\t%08" PRIx64 ": ";
	SmallString<16> Name;
	SmallString<32> Val;
	Rel.getTypeName(Name);
	if (Error E = getRelocationValueString(Rel, Val))
	reportError(std::move(E), FileName);
	OS << format(Fmt.data(), Address) << Name << "\t" << Val;
	}

	class PrettyPrinter {
	public:
	virtual ~PrettyPrinter() = default;
	virtual void
	printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef<uint8_t> Bytes,
	object::SectionedAddress Address, formatted_raw_ostream &OS,
	StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP,
	StringRef ObjectFilename, std::vector<RelocationRef> *Rels,
	LiveVariablePrinter &LVP) {
	if (SP && (PrintSource \|\| PrintLines))
	SP->printSourceLine(OS, Address, ObjectFilename, LVP);
	LVP.printBetweenInsts(OS, false);

	size_t Start = OS.tell();
	if (LeadingAddr)
	OS << format("%8" PRIx64 ":", Address.Address);
	if (ShowRawInsn) {
	OS << ' ';
	dumpBytes(Bytes, OS);
	}

	// The output of printInst starts with a tab. Print some spaces so that
	// the tab has 1 column and advances to the target tab stop.
	unsigned TabStop = getInstStartColumn(STI);
	unsigned Column = OS.tell() - Start;
	OS.indent(Column < TabStop - 1 ? TabStop - 1 - Column : 7 - Column % 8);

	if (MI) {
	// See MCInstPrinter::printInst. On targets where a PC relative immediate
	// is relative to the next instruction and the length of a MCInst is
	// difficult to measure (x86), this is the address of the next
	// instruction.
	uint64_t Addr =
	Address.Address + (STI.getTargetTriple().isX86() ? Bytes.size() : 0);
	IP.printInst(MI, Addr, "", STI, OS);
	} else
	OS << "\t<unknown>";
	}
	};
	PrettyPrinter PrettyPrinterInst;

	class HexagonPrettyPrinter : public PrettyPrinter {
	public:
	void printLead(ArrayRef<uint8_t> Bytes, uint64_t Address,
	formatted_raw_ostream &OS) {
	uint32_t opcode =
	(Bytes[3] << 24) \| (Bytes[2] << 16) \| (Bytes[1] << 8) \| Bytes[0];
	if (LeadingAddr)
	OS << format("%8" PRIx64 ":", Address);
	if (ShowRawInsn) {
	OS << "\t";
	dumpBytes(Bytes.slice(0, 4), OS);
	OS << format("\t%08" PRIx32, opcode);
	}
	}
	void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef<uint8_t> Bytes,
	object::SectionedAddress Address, formatted_raw_ostream &OS,
	StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP,
	StringRef ObjectFilename, std::vector<RelocationRef> *Rels,
	LiveVariablePrinter &LVP) override {
	if (SP && (PrintSource \|\| PrintLines))
	SP->printSourceLine(OS, Address, ObjectFilename, LVP, "");
	if (!MI) {
	printLead(Bytes, Address.Address, OS);
	OS << " <unknown>";
	return;
	}
	std::string Buffer;
	{
	raw_string_ostream TempStream(Buffer);
	IP.printInst(MI, Address.Address, "", STI, TempStream);
	}
	StringRef Contents(Buffer);
	// Split off bundle attributes
	auto PacketBundle = Contents.rsplit('\n');
	// Split off first instruction from the rest
	auto HeadTail = PacketBundle.first.split('\n');
	auto Preamble = " { ";
	auto Separator = "";

	// Hexagon's packets require relocations to be inline rather than
	// clustered at the end of the packet.
	std::vector<RelocationRef>::const_iterator RelCur = Rels->begin();
	std::vector<RelocationRef>::const_iterator RelEnd = Rels->end();
	auto PrintReloc = [&]() -> void {
	while ((RelCur != RelEnd) && (RelCur->getOffset() <= Address.Address)) {
	if (RelCur->getOffset() == Address.Address) {
	printRelocation(OS, ObjectFilename, *RelCur, Address.Address, false);
	return;
	}
	++RelCur;
	}
	};

	while (!HeadTail.first.empty()) {
	OS << Separator;
	Separator = "\n";
	if (SP && (PrintSource \|\| PrintLines))
	SP->printSourceLine(OS, Address, ObjectFilename, LVP, "");
	printLead(Bytes, Address.Address, OS);
	OS << Preamble;
	Preamble = " ";
	StringRef Inst;
	auto Duplex = HeadTail.first.split('\v');
	if (!Duplex.second.empty()) {
	OS << Duplex.first;
	OS << "; ";
	Inst = Duplex.second;
	}
	else
	Inst = HeadTail.first;
	OS << Inst;
	HeadTail = HeadTail.second.split('\n');
	if (HeadTail.first.empty())
	OS << " } " << PacketBundle.second;
	PrintReloc();
	Bytes = Bytes.slice(4);
	Address.Address += 4;
	}
	}
	};
	HexagonPrettyPrinter HexagonPrettyPrinterInst;

	class AMDGCNPrettyPrinter : public PrettyPrinter {
	public:
	void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef<uint8_t> Bytes,
	object::SectionedAddress Address, formatted_raw_ostream &OS,
	StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP,
	StringRef ObjectFilename, std::vector<RelocationRef> *Rels,
	LiveVariablePrinter &LVP) override {
	if (SP && (PrintSource \|\| PrintLines))
	SP->printSourceLine(OS, Address, ObjectFilename, LVP);

	if (MI) {
	SmallString<40> InstStr;
	raw_svector_ostream IS(InstStr);

	IP.printInst(MI, Address.Address, "", STI, IS);

	OS << left_justify(IS.str(), 60);
	} else {
	// an unrecognized encoding - this is probably data so represent it
	// using the .long directive, or .byte directive if fewer than 4 bytes
	// remaining
	if (Bytes.size() >= 4) {
	OS << format("\t.long 0x%08" PRIx32 " ",
	support::endian::read32<support::little>(Bytes.data()));
	OS.indent(42);
	} else {
	OS << format("\t.byte 0x%02" PRIx8, Bytes[0]);
	for (unsigned int i = 1; i < Bytes.size(); i++)
	OS << format(", 0x%02" PRIx8, Bytes[i]);
	OS.indent(55 - (6 * Bytes.size()));
	}
	}

	OS << format("// %012" PRIX64 ":", Address.Address);
	if (Bytes.size() >= 4) {
	// D should be casted to uint32_t here as it is passed by format to
	// snprintf as vararg.
	for (uint32_t D : makeArrayRef(
	reinterpret_cast<const support::little32_t *>(Bytes.data()),
	Bytes.size() / 4))
	OS << format(" %08" PRIX32, D);
	} else {
	for (unsigned char B : Bytes)
	OS << format(" %02" PRIX8, B);
	}

	if (!Annot.empty())
	OS << " // " << Annot;
	}
	};
	AMDGCNPrettyPrinter AMDGCNPrettyPrinterInst;

	class BPFPrettyPrinter : public PrettyPrinter {
	public:
	void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef<uint8_t> Bytes,
	object::SectionedAddress Address, formatted_raw_ostream &OS,
	StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP,
	StringRef ObjectFilename, std::vector<RelocationRef> *Rels,
	LiveVariablePrinter &LVP) override {
	if (SP && (PrintSource \|\| PrintLines))
	SP->printSourceLine(OS, Address, ObjectFilename, LVP);
	if (LeadingAddr)
	OS << format("%8" PRId64 ":", Address.Address / 8);
	if (ShowRawInsn) {
	OS << "\t";
	dumpBytes(Bytes, OS);
	}
	if (MI)
	IP.printInst(MI, Address.Address, "", STI, OS);
	else
	OS << "\t<unknown>";
	}
	};
	BPFPrettyPrinter BPFPrettyPrinterInst;

	PrettyPrinter &selectPrettyPrinter(Triple const &Triple) {
	switch(Triple.getArch()) {
	default:
	return PrettyPrinterInst;
	case Triple::hexagon:
	return HexagonPrettyPrinterInst;
	case Triple::amdgcn:
	return AMDGCNPrettyPrinterInst;
	case Triple::bpfel:
	case Triple::bpfeb:
	return BPFPrettyPrinterInst;
	}
	}
	}

	static uint8_t getElfSymbolType(const ObjectFile *Obj, const SymbolRef &Sym) {
	assert(Obj->isELF());
	if (auto *Elf32LEObj = dyn_cast<ELF32LEObjectFile>(Obj))
	return unwrapOrError(Elf32LEObj->getSymbol(Sym.getRawDataRefImpl()),
	Obj->getFileName())
	->getType();
	if (auto *Elf64LEObj = dyn_cast<ELF64LEObjectFile>(Obj))
	return unwrapOrError(Elf64LEObj->getSymbol(Sym.getRawDataRefImpl()),
	Obj->getFileName())
	->getType();
	if (auto *Elf32BEObj = dyn_cast<ELF32BEObjectFile>(Obj))
	return unwrapOrError(Elf32BEObj->getSymbol(Sym.getRawDataRefImpl()),
	Obj->getFileName())
	->getType();
	if (auto *Elf64BEObj = cast<ELF64BEObjectFile>(Obj))
	return unwrapOrError(Elf64BEObj->getSymbol(Sym.getRawDataRefImpl()),
	Obj->getFileName())
	->getType();
	llvm_unreachable("Unsupported binary format");
	}

	template <class ELFT> static void
	addDynamicElfSymbols(const ELFObjectFile<ELFT> *Obj,
	std::map<SectionRef, SectionSymbolsTy> &AllSymbols) {
	for (auto Symbol : Obj->getDynamicSymbolIterators()) {
	uint8_t SymbolType = Symbol.getELFType();
	if (SymbolType == ELF::STT_SECTION)
	continue;

	uint64_t Address = unwrapOrError(Symbol.getAddress(), Obj->getFileName());
	// ELFSymbolRef::getAddress() returns size instead of value for common
	// symbols which is not desirable for disassembly output. Overriding.
	if (SymbolType == ELF::STT_COMMON)
	Address = unwrapOrError(Obj->getSymbol(Symbol.getRawDataRefImpl()),
	Obj->getFileName())
	->st_value;

	StringRef Name = unwrapOrError(Symbol.getName(), Obj->getFileName());
	if (Name.empty())
	continue;

	section_iterator SecI =
	unwrapOrError(Symbol.getSection(), Obj->getFileName());
	if (SecI == Obj->section_end())
	continue;

	AllSymbols[*SecI].emplace_back(Address, Name, SymbolType);
	}
	}

	static void
	addDynamicElfSymbols(const ObjectFile *Obj,
	std::map<SectionRef, SectionSymbolsTy> &AllSymbols) {
	assert(Obj->isELF());
	if (auto *Elf32LEObj = dyn_cast<ELF32LEObjectFile>(Obj))
	addDynamicElfSymbols(Elf32LEObj, AllSymbols);
	else if (auto *Elf64LEObj = dyn_cast<ELF64LEObjectFile>(Obj))
	addDynamicElfSymbols(Elf64LEObj, AllSymbols);
	else if (auto *Elf32BEObj = dyn_cast<ELF32BEObjectFile>(Obj))
	addDynamicElfSymbols(Elf32BEObj, AllSymbols);
	else if (auto *Elf64BEObj = cast<ELF64BEObjectFile>(Obj))
	addDynamicElfSymbols(Elf64BEObj, AllSymbols);
	else
	llvm_unreachable("Unsupported binary format");
	}

	static Optional<SectionRef> getWasmCodeSection(const WasmObjectFile *Obj) {
	for (auto SecI : Obj->sections()) {
	const WasmSection &Section = Obj->getWasmSection(SecI);
	if (Section.Type == wasm::WASM_SEC_CODE)
	return SecI;
	}
	return None;
	}

	static void
	addMissingWasmCodeSymbols(const WasmObjectFile *Obj,
	std::map<SectionRef, SectionSymbolsTy> &AllSymbols) {
	Optional<SectionRef> Section = getWasmCodeSection(Obj);
	if (!Section)
	return;
	SectionSymbolsTy &Symbols = AllSymbols[*Section];

	std::set<uint64_t> SymbolAddresses;
	for (const auto &Sym : Symbols)
	SymbolAddresses.insert(Sym.Addr);

	for (const wasm::WasmFunction &Function : Obj->functions()) {
	uint64_t Address = Function.CodeSectionOffset;
	// Only add fallback symbols for functions not already present in the symbol
	// table.
	if (SymbolAddresses.count(Address))
	continue;
	// This function has no symbol, so it should have no SymbolName.
	assert(Function.SymbolName.empty());
	// We use DebugName for the name, though it may be empty if there is no
	// "name" custom section, or that section is missing a name for this
	// function.
	StringRef Name = Function.DebugName;
	Symbols.emplace_back(Address, Name, ELF::STT_NOTYPE);
	}
	}

	static void addPltEntries(const ObjectFile *Obj,
	std::map<SectionRef, SectionSymbolsTy> &AllSymbols,
	StringSaver &Saver) {
	Optional<SectionRef> Plt = None;
	for (const SectionRef &Section : Obj->sections()) {
	Expected<StringRef> SecNameOrErr = Section.getName();
	if (!SecNameOrErr) {
	consumeError(SecNameOrErr.takeError());
	continue;
	}
	if (*SecNameOrErr == ".plt")
	Plt = Section;
	}
	if (!Plt)
	return;
	if (auto *ElfObj = dyn_cast<ELFObjectFileBase>(Obj)) {
	for (auto PltEntry : ElfObj->getPltAddresses()) {
	if (PltEntry.first) {
	SymbolRef Symbol(*PltEntry.first, ElfObj);
	uint8_t SymbolType = getElfSymbolType(Obj, Symbol);
	if (Expected<StringRef> NameOrErr = Symbol.getName()) {
	if (!NameOrErr->empty())
	AllSymbols[*Plt].emplace_back(
	PltEntry.second, Saver.save((*NameOrErr + "@plt").str()),
	SymbolType);
	continue;
	} else {
	// The warning has been reported in disassembleObject().
	consumeError(NameOrErr.takeError());
	}
	}
	reportWarning("PLT entry at 0x" + Twine::utohexstr(PltEntry.second) +
	" references an invalid symbol",
	Obj->getFileName());
	}
	}
	}

	// Normally the disassembly output will skip blocks of zeroes. This function
	// returns the number of zero bytes that can be skipped when dumping the
	// disassembly of the instructions in Buf.
	static size_t countSkippableZeroBytes(ArrayRef<uint8_t> Buf) {
	// Find the number of leading zeroes.
	size_t N = 0;
	while (N < Buf.size() && !Buf[N])
	++N;

	// We may want to skip blocks of zero bytes, but unless we see
	// at least 8 of them in a row.
	if (N < 8)
	return 0;

	// We skip zeroes in multiples of 4 because do not want to truncate an
	// instruction if it starts with a zero byte.
	return N & ~0x3;
	}

	// Returns a map from sections to their relocations.
	static std::map<SectionRef, std::vector<RelocationRef>>
	getRelocsMap(object::ObjectFile const &Obj) {
	std::map<SectionRef, std::vector<RelocationRef>> Ret;
	uint64_t I = (uint64_t)-1;
	for (SectionRef Sec : Obj.sections()) {
	++I;
	Expected<section_iterator> RelocatedOrErr = Sec.getRelocatedSection();
	if (!RelocatedOrErr)
	reportError(Obj.getFileName(),
	"section (" + Twine(I) +
	"): failed to get a relocated section: " +
	toString(RelocatedOrErr.takeError()));

	section_iterator Relocated = *RelocatedOrErr;
	if (Relocated == Obj.section_end() \|\| !checkSectionFilter(*Relocated).Keep)
	continue;
	std::vector<RelocationRef> &V = Ret[*Relocated];
	append_range(V, Sec.relocations());
	// Sort relocations by address.
	llvm::stable_sort(V, isRelocAddressLess);
	}
	return Ret;
	}

	// Used for --adjust-vma to check if address should be adjusted by the
	// specified value for a given section.
	// For ELF we do not adjust non-allocatable sections like debug ones,
	// because they are not loadable.
	// TODO: implement for other file formats.
	static bool shouldAdjustVA(const SectionRef &Section) {
	const ObjectFile *Obj = Section.getObject();
	if (Obj->isELF())
	return ELFSectionRef(Section).getFlags() & ELF::SHF_ALLOC;
	return false;
	}


	typedef std::pair<uint64_t, char> MappingSymbolPair;
	static char getMappingSymbolKind(ArrayRef<MappingSymbolPair> MappingSymbols,
	uint64_t Address) {
	auto It =
	partition_point(MappingSymbols, [Address](const MappingSymbolPair &Val) {
	return Val.first <= Address;
	});
	// Return zero for any address before the first mapping symbol; this means
	// we should use the default disassembly mode, depending on the target.
	if (It == MappingSymbols.begin())
	return '\x00';
	return (It - 1)->second;
	}

	static uint64_t dumpARMELFData(uint64_t SectionAddr, uint64_t Index,
	uint64_t End, const ObjectFile *Obj,
	ArrayRef<uint8_t> Bytes,
	ArrayRef<MappingSymbolPair> MappingSymbols,
	raw_ostream &OS) {
	support::endianness Endian =
	Obj->isLittleEndian() ? support::little : support::big;
	OS << format("%8" PRIx64 ":\t", SectionAddr + Index);
	if (Index + 4 <= End) {
	dumpBytes(Bytes.slice(Index, 4), OS);
	OS << "\t.word\t"
	<< format_hex(support::endian::read32(Bytes.data() + Index, Endian),
	10);
	return 4;
	}
	if (Index + 2 <= End) {
	dumpBytes(Bytes.slice(Index, 2), OS);
	OS << "\t\t.short\t"
	<< format_hex(support::endian::read16(Bytes.data() + Index, Endian),
	6);
	return 2;
	}
	dumpBytes(Bytes.slice(Index, 1), OS);
	OS << "\t\t.byte\t" << format_hex(Bytes[0], 4);
	return 1;
	}

	static void dumpELFData(uint64_t SectionAddr, uint64_t Index, uint64_t End,
	ArrayRef<uint8_t> Bytes) {
	// print out data up to 8 bytes at a time in hex and ascii
	uint8_t AsciiData[9] = {'\0'};
	uint8_t Byte;
	int NumBytes = 0;

	for (; Index < End; ++Index) {
	if (NumBytes == 0)
	outs() << format("%8" PRIx64 ":", SectionAddr + Index);
	Byte = Bytes.slice(Index)[0];
	outs() << format(" %02x", Byte);
	AsciiData[NumBytes] = isPrint(Byte) ? Byte : '.';

	uint8_t IndentOffset = 0;
	NumBytes++;
	if (Index == End - 1 \|\| NumBytes > 8) {
	// Indent the space for less than 8 bytes data.
	// 2 spaces for byte and one for space between bytes
	IndentOffset = 3 * (8 - NumBytes);
	for (int Excess = NumBytes; Excess < 8; Excess++)
	AsciiData[Excess] = '\0';
	NumBytes = 8;
	}
	if (NumBytes == 8) {
	AsciiData[8] = '\0';
	outs() << std::string(IndentOffset, ' ') << " ";
	outs() << reinterpret_cast<char *>(AsciiData);
	outs() << '\n';
	NumBytes = 0;
	}
	}
	}

	SymbolInfoTy objdump::createSymbolInfo(const ObjectFile *Obj,
	const SymbolRef &Symbol) {
	const StringRef FileName = Obj->getFileName();
	const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName);
	const StringRef Name = unwrapOrError(Symbol.getName(), FileName);

	if (Obj->isXCOFF() && SymbolDescription) {
	const auto *XCOFFObj = cast<XCOFFObjectFile>(Obj);
	DataRefImpl SymbolDRI = Symbol.getRawDataRefImpl();

	const uint32_t SymbolIndex = XCOFFObj->getSymbolIndex(SymbolDRI.p);
	Optional<XCOFF::StorageMappingClass> Smc =
	getXCOFFSymbolCsectSMC(XCOFFObj, Symbol);
	return SymbolInfoTy(Addr, Name, Smc, SymbolIndex,
	isLabel(XCOFFObj, Symbol));
	} else
	return SymbolInfoTy(Addr, Name,
	Obj->isELF() ? getElfSymbolType(Obj, Symbol)
	: (uint8_t)ELF::STT_NOTYPE);
	}

	static SymbolInfoTy createDummySymbolInfo(const ObjectFile *Obj,
	const uint64_t Addr, StringRef &Name,
	uint8_t Type) {
	if (Obj->isXCOFF() && SymbolDescription)
	return SymbolInfoTy(Addr, Name, None, None, false);
	else
	return SymbolInfoTy(Addr, Name, Type);
	}

	static void
	collectLocalBranchTargets(ArrayRef<uint8_t> Bytes, const MCInstrAnalysis *MIA,
	MCDisassembler DisAsm, MCInstPrinter IP,
	const MCSubtargetInfo *STI, uint64_t SectionAddr,
	uint64_t Start, uint64_t End,
	std::unordered_map<uint64_t, std::string> &Labels) {
	// So far only supports X86.
	if (!STI->getTargetTriple().isX86())
	return;

	Labels.clear();
	unsigned LabelCount = 0;
	Start += SectionAddr;
	End += SectionAddr;
	uint64_t Index = Start;
	while (Index < End) {
	// Disassemble a real instruction and record function-local branch labels.
	MCInst Inst;
	uint64_t Size;
	bool Disassembled = DisAsm->getInstruction(
	Inst, Size, Bytes.slice(Index - SectionAddr), Index, nulls());
	if (Size == 0)
	Size = 1;

	if (Disassembled && MIA) {
	uint64_t Target;
	bool TargetKnown = MIA->evaluateBranch(Inst, Index, Size, Target);
	if (TargetKnown && (Target >= Start && Target < End) &&
	!Labels.count(Target))
	Labels[Target] = ("L" + Twine(LabelCount++)).str();
	}

	Index += Size;
	}
	}

	// Create an MCSymbolizer for the target and add it to the MCDisassembler.
	// This is currently only used on AMDGPU, and assumes the format of the
	// void * argument passed to AMDGPU's createMCSymbolizer.
	static void addSymbolizer(
	MCContext &Ctx, const Target *Target, StringRef TripleName,
	MCDisassembler *DisAsm, uint64_t SectionAddr, ArrayRef<uint8_t> Bytes,
	SectionSymbolsTy &Symbols,
	std::vector<std::unique_ptr<std::string>> &SynthesizedLabelNames) {

	std::unique_ptr<MCRelocationInfo> RelInfo(
	Target->createMCRelocationInfo(TripleName, Ctx));
	if (!RelInfo)
	return;
	std::unique_ptr<MCSymbolizer> Symbolizer(Target->createMCSymbolizer(
	TripleName, nullptr, nullptr, &Symbols, &Ctx, std::move(RelInfo)));
	MCSymbolizer SymbolizerPtr = &Symbolizer;
	DisAsm->setSymbolizer(std::move(Symbolizer));

	if (!SymbolizeOperands)
	return;

	// Synthesize labels referenced by branch instructions by
	// disassembling, discarding the output, and collecting the referenced
	// addresses from the symbolizer.
	for (size_t Index = 0; Index != Bytes.size();) {
	MCInst Inst;
	uint64_t Size;
	DisAsm->getInstruction(Inst, Size, Bytes.slice(Index), SectionAddr + Index,
	nulls());
	if (Size == 0)
	Size = 1;
	Index += Size;
	}
	ArrayRef<uint64_t> LabelAddrsRef = SymbolizerPtr->getReferencedAddresses();
	// Copy and sort to remove duplicates.
	std::vector<uint64_t> LabelAddrs;
	LabelAddrs.insert(LabelAddrs.end(), LabelAddrsRef.begin(),
	LabelAddrsRef.end());
	llvm::sort(LabelAddrs);
	LabelAddrs.resize(std::unique(LabelAddrs.begin(), LabelAddrs.end()) -
	LabelAddrs.begin());
	// Add the labels.
	for (unsigned LabelNum = 0; LabelNum != LabelAddrs.size(); ++LabelNum) {
	auto Name = std::make_unique<std::string>();
	*Name = (Twine("L") + Twine(LabelNum)).str();
	SynthesizedLabelNames.push_back(std::move(Name));
	Symbols.push_back(SymbolInfoTy(
	LabelAddrs[LabelNum], *SynthesizedLabelNames.back(), ELF::STT_NOTYPE));
	}
	llvm::stable_sort(Symbols);
	// Recreate the symbolizer with the new symbols list.
	RelInfo.reset(Target->createMCRelocationInfo(TripleName, Ctx));
	Symbolizer.reset(Target->createMCSymbolizer(
	TripleName, nullptr, nullptr, &Symbols, &Ctx, std::move(RelInfo)));
	DisAsm->setSymbolizer(std::move(Symbolizer));
	}

	static StringRef getSegmentName(const MachOObjectFile *MachO,
	const SectionRef &Section) {
	if (MachO) {
	DataRefImpl DR = Section.getRawDataRefImpl();
	StringRef SegmentName = MachO->getSectionFinalSegmentName(DR);
	return SegmentName;
	}
	return "";
	}

	static void emitPostInstructionInfo(formatted_raw_ostream &FOS,
	const MCAsmInfo &MAI,
	const MCSubtargetInfo &STI,
	StringRef Comments,
	LiveVariablePrinter &LVP) {
	do {
	if (!Comments.empty()) {
	// Emit a line of comments.
	StringRef Comment;
	std::tie(Comment, Comments) = Comments.split('\n');
	// MAI.getCommentColumn() assumes that instructions are printed at the
	// position of 8, while getInstStartColumn() returns the actual position.
	unsigned CommentColumn =
	MAI.getCommentColumn() - 8 + getInstStartColumn(STI);
	FOS.PadToColumn(CommentColumn);
	FOS << MAI.getCommentString() << ' ' << Comment;
	}
	LVP.printAfterInst(FOS);
	FOS << '\n';
	} while (!Comments.empty());
	FOS.flush();
	}

	static void disassembleObject(const Target TheTarget, const ObjectFile Obj,
	MCContext &Ctx, MCDisassembler *PrimaryDisAsm,
	MCDisassembler *SecondaryDisAsm,
	const MCInstrAnalysis MIA, MCInstPrinter IP,
	const MCSubtargetInfo *PrimarySTI,
	const MCSubtargetInfo *SecondarySTI,
	PrettyPrinter &PIP,
	SourcePrinter &SP, bool InlineRelocs) {
	const MCSubtargetInfo *STI = PrimarySTI;
	MCDisassembler *DisAsm = PrimaryDisAsm;
	bool PrimaryIsThumb = false;
	if (isArmElf(Obj))
	PrimaryIsThumb = STI->checkFeatures("+thumb-mode");

	std::map<SectionRef, std::vector<RelocationRef>> RelocMap;
	if (InlineRelocs)
	RelocMap = getRelocsMap(*Obj);
	bool Is64Bits = Obj->getBytesInAddress() > 4;

	// Create a mapping from virtual address to symbol name. This is used to
	// pretty print the symbols while disassembling.
	std::map<SectionRef, SectionSymbolsTy> AllSymbols;
	SectionSymbolsTy AbsoluteSymbols;
	const StringRef FileName = Obj->getFileName();
	const MachOObjectFile *MachO = dyn_cast<const MachOObjectFile>(Obj);
	for (const SymbolRef &Symbol : Obj->symbols()) {
	Expected<StringRef> NameOrErr = Symbol.getName();
	if (!NameOrErr) {
	reportWarning(toString(NameOrErr.takeError()), FileName);
	continue;
	}
	if (NameOrErr->empty() && !(Obj->isXCOFF() && SymbolDescription))
	continue;

	if (Obj->isELF() && getElfSymbolType(Obj, Symbol) == ELF::STT_SECTION)
	continue;

	if (MachO) {
	// __mh_(execute\|dylib\|dylinker\|bundle\|preload\|object)_header are special
	// symbols that support MachO header introspection. They do not bind to
	// code locations and are irrelevant for disassembly.
	if (NameOrErr->startswith("__mh_") && NameOrErr->endswith("_header"))
	continue;
	// Don't ask a Mach-O STAB symbol for its section unless you know that
	// STAB symbol's section field refers to a valid section index. Otherwise
	// the symbol may error trying to load a section that does not exist.
	DataRefImpl SymDRI = Symbol.getRawDataRefImpl();
	uint8_t NType = (MachO->is64Bit() ?
	MachO->getSymbol64TableEntry(SymDRI).n_type:
	MachO->getSymbolTableEntry(SymDRI).n_type);
	if (NType & MachO::N_STAB)
	continue;
	}

	section_iterator SecI = unwrapOrError(Symbol.getSection(), FileName);
	if (SecI != Obj->section_end())
	AllSymbols[*SecI].push_back(createSymbolInfo(Obj, Symbol));
	else
	AbsoluteSymbols.push_back(createSymbolInfo(Obj, Symbol));
	}

	if (AllSymbols.empty() && Obj->isELF())
	addDynamicElfSymbols(Obj, AllSymbols);

	if (Obj->isWasm())
	addMissingWasmCodeSymbols(cast<WasmObjectFile>(Obj), AllSymbols);

	BumpPtrAllocator A;
	StringSaver Saver(A);
	addPltEntries(Obj, AllSymbols, Saver);

	// Create a mapping from virtual address to section. An empty section can
	// cause more than one section at the same address. Sort such sections to be
	// before same-addressed non-empty sections so that symbol lookups prefer the
	// non-empty section.
	std::vector<std::pair<uint64_t, SectionRef>> SectionAddresses;
	for (SectionRef Sec : Obj->sections())
	SectionAddresses.emplace_back(Sec.getAddress(), Sec);
	llvm::stable_sort(SectionAddresses, [](const auto &LHS, const auto &RHS) {
	if (LHS.first != RHS.first)
	return LHS.first < RHS.first;
	return LHS.second.getSize() < RHS.second.getSize();
	});

	// Linked executables (.exe and .dll files) typically don't include a real
	// symbol table but they might contain an export table.
	if (const auto *COFFObj = dyn_cast<COFFObjectFile>(Obj)) {
	for (const auto &ExportEntry : COFFObj->export_directories()) {
	StringRef Name;
	if (Error E = ExportEntry.getSymbolName(Name))
	reportError(std::move(E), Obj->getFileName());
	if (Name.empty())
	continue;

	uint32_t RVA;
	if (Error E = ExportEntry.getExportRVA(RVA))
	reportError(std::move(E), Obj->getFileName());

	uint64_t VA = COFFObj->getImageBase() + RVA;
	auto Sec = partition_point(
	SectionAddresses, [VA](const std::pair<uint64_t, SectionRef> &O) {
	return O.first <= VA;
	});
	if (Sec != SectionAddresses.begin()) {
	--Sec;
	AllSymbols[Sec->second].emplace_back(VA, Name, ELF::STT_NOTYPE);
	} else
	AbsoluteSymbols.emplace_back(VA, Name, ELF::STT_NOTYPE);
	}
	}

	// Sort all the symbols, this allows us to use a simple binary search to find
	// Multiple symbols can have the same address. Use a stable sort to stabilize
	// the output.
	StringSet<> FoundDisasmSymbolSet;
	for (std::pair<const SectionRef, SectionSymbolsTy> &SecSyms : AllSymbols)
	llvm::stable_sort(SecSyms.second);
	llvm::stable_sort(AbsoluteSymbols);

	std::unique_ptr<DWARFContext> DICtx;
	LiveVariablePrinter LVP(Ctx.getRegisterInfo(), STI);

	if (DbgVariables != DVDisabled) {
	DICtx = DWARFContext::create(*Obj);
	for (const std::unique_ptr<DWARFUnit> &CU : DICtx->compile_units())
	LVP.addCompileUnit(CU->getUnitDIE(false));
	}

	LLVM_DEBUG(LVP.dump());

	for (const SectionRef &Section : ToolSectionFilter(*Obj)) {
	if (FilterSections.empty() && !DisassembleAll &&
	(!Section.isText() \|\| Section.isVirtual()))
	continue;

	uint64_t SectionAddr = Section.getAddress();
	uint64_t SectSize = Section.getSize();
	if (!SectSize)
	continue;

	// Get the list of all the symbols in this section.
	SectionSymbolsTy &Symbols = AllSymbols[Section];
	std::vector<MappingSymbolPair> MappingSymbols;
	if (hasMappingSymbols(Obj)) {
	for (const auto &Symb : Symbols) {
	uint64_t Address = Symb.Addr;
	StringRef Name = Symb.Name;
	if (Name.startswith("$d"))
	MappingSymbols.emplace_back(Address - SectionAddr, 'd');
	if (Name.startswith("$x"))
	MappingSymbols.emplace_back(Address - SectionAddr, 'x');
	if (Name.startswith("$a"))
	MappingSymbols.emplace_back(Address - SectionAddr, 'a');
	if (Name.startswith("$t"))
	MappingSymbols.emplace_back(Address - SectionAddr, 't');
	}
	}

	llvm::sort(MappingSymbols);

	ArrayRef<uint8_t> Bytes = arrayRefFromStringRef(
	unwrapOrError(Section.getContents(), Obj->getFileName()));

	std::vector<std::unique_ptr<std::string>> SynthesizedLabelNames;
	if (Obj->isELF() && Obj->getArch() == Triple::amdgcn) {
	// AMDGPU disassembler uses symbolizer for printing labels
	addSymbolizer(Ctx, TheTarget, TripleName, DisAsm, SectionAddr, Bytes,
	Symbols, SynthesizedLabelNames);
	}

	StringRef SegmentName = getSegmentName(MachO, Section);
	StringRef SectionName = unwrapOrError(Section.getName(), Obj->getFileName());
	// If the section has no symbol at the start, just insert a dummy one.
	if (Symbols.empty() \|\| Symbols[0].Addr != 0) {
	Symbols.insert(Symbols.begin(),
	createDummySymbolInfo(Obj, SectionAddr, SectionName,
	Section.isText() ? ELF::STT_FUNC
	: ELF::STT_OBJECT));
	}

	SmallString<40> Comments;
	raw_svector_ostream CommentStream(Comments);

	uint64_t VMAAdjustment = 0;
	if (shouldAdjustVA(Section))
	VMAAdjustment = AdjustVMA;

	+ // In executable and shared objects, r_offset holds a virtual address.
	+ // Subtract SectionAddr from the r_offset field of a relocation to get
	+ // the section offset.
	+ uint64_t RelAdjustment = Obj->isRelocatableObject() ? 0 : SectionAddr;
	uint64_t Size;
	uint64_t Index;
	bool PrintedSection = false;
	std::vector<RelocationRef> Rels = RelocMap[Section];
	std::vector<RelocationRef>::const_iterator RelCur = Rels.begin();
	std::vector<RelocationRef>::const_iterator RelEnd = Rels.end();
	// Disassemble symbol by symbol.
	for (unsigned SI = 0, SE = Symbols.size(); SI != SE; ++SI) {
	std::string SymbolName = Symbols[SI].Name.str();
	if (Demangle)
	SymbolName = demangle(SymbolName);

	// Skip if --disassemble-symbols is not empty and the symbol is not in
	// the list.
	if (!DisasmSymbolSet.empty() && !DisasmSymbolSet.count(SymbolName))
	continue;

	uint64_t Start = Symbols[SI].Addr;
	if (Start < SectionAddr \|\| StopAddress <= Start)
	continue;
	else
	FoundDisasmSymbolSet.insert(SymbolName);

	// The end is the section end, the beginning of the next symbol, or
	// --stop-address.
	uint64_t End = std::min<uint64_t>(SectionAddr + SectSize, StopAddress);
	if (SI + 1 < SE)
	End = std::min(End, Symbols[SI + 1].Addr);
	if (Start >= End \|\| End <= StartAddress)
	continue;
	Start -= SectionAddr;
	End -= SectionAddr;

	if (!PrintedSection) {
	PrintedSection = true;
	outs() << "\nDisassembly of section ";
	if (!SegmentName.empty())
	outs() << SegmentName << ",";
	outs() << SectionName << ":\n";
	}

	outs() << '\n';
	if (LeadingAddr)
	outs() << format(Is64Bits ? "%016" PRIx64 " " : "%08" PRIx64 " ",
	SectionAddr + Start + VMAAdjustment);
	if (Obj->isXCOFF() && SymbolDescription) {
	outs() << getXCOFFSymbolDescription(Symbols[SI], SymbolName) << ":\n";
	} else
	outs() << '<' << SymbolName << ">:\n";

	// Don't print raw contents of a virtual section. A virtual section
	// doesn't have any contents in the file.
	if (Section.isVirtual()) {
	outs() << "...\n";
	continue;
	}

	auto Status = DisAsm->onSymbolStart(Symbols[SI], Size,
	Bytes.slice(Start, End - Start),
	SectionAddr + Start, CommentStream);
	// To have round trippable disassembly, we fall back to decoding the
	// remaining bytes as instructions.
	//
	// If there is a failure, we disassemble the failed region as bytes before
	// falling back. The target is expected to print nothing in this case.
	//
	// If there is Success or SoftFail i.e no 'real' failure, we go ahead by
	// Size bytes before falling back.
	// So if the entire symbol is 'eaten' by the target:
	// Start += Size // Now Start = End and we will never decode as
	// // instructions
	//
	// Right now, most targets return None i.e ignore to treat a symbol
	// separately. But WebAssembly decodes preludes for some symbols.
	//
	if (Status.hasValue()) {
	if (Status.getValue() == MCDisassembler::Fail) {
	outs() << "// Error in decoding " << SymbolName
	<< " : Decoding failed region as bytes.\n";
	for (uint64_t I = 0; I < Size; ++I) {
	outs() << "\t.byte\t " << format_hex(Bytes[I], 1, /Upper=/true)
	<< "\n";
	}
	}
	} else {
	Size = 0;
	}

	Start += Size;

	Index = Start;
	if (SectionAddr < StartAddress)
	Index = std::max<uint64_t>(Index, StartAddress - SectionAddr);

	// If there is a data/common symbol inside an ELF text section and we are
	// only disassembling text (applicable all architectures), we are in a
	// situation where we must print the data and not disassemble it.
	if (Obj->isELF() && !DisassembleAll && Section.isText()) {
	uint8_t SymTy = Symbols[SI].Type;
	if (SymTy == ELF::STT_OBJECT \|\| SymTy == ELF::STT_COMMON) {
	dumpELFData(SectionAddr, Index, End, Bytes);
	Index = End;
	}
	}

	bool CheckARMELFData = hasMappingSymbols(Obj) &&
	Symbols[SI].Type != ELF::STT_OBJECT &&
	!DisassembleAll;
	bool DumpARMELFData = false;
	formatted_raw_ostream FOS(outs());

	std::unordered_map<uint64_t, std::string> AllLabels;
	if (SymbolizeOperands)
	collectLocalBranchTargets(Bytes, MIA, DisAsm, IP, PrimarySTI,
	SectionAddr, Index, End, AllLabels);

	while (Index < End) {
	// ARM and AArch64 ELF binaries can interleave data and text in the
	// same section. We rely on the markers introduced to understand what
	// we need to dump. If the data marker is within a function, it is
	// denoted as a word/short etc.
	if (CheckARMELFData) {
	char Kind = getMappingSymbolKind(MappingSymbols, Index);
	DumpARMELFData = Kind == 'd';
	if (SecondarySTI) {
	if (Kind == 'a') {
	STI = PrimaryIsThumb ? SecondarySTI : PrimarySTI;
	DisAsm = PrimaryIsThumb ? SecondaryDisAsm : PrimaryDisAsm;
	} else if (Kind == 't') {
	STI = PrimaryIsThumb ? PrimarySTI : SecondarySTI;
	DisAsm = PrimaryIsThumb ? PrimaryDisAsm : SecondaryDisAsm;
	}
	}
	}

	if (DumpARMELFData) {
	Size = dumpARMELFData(SectionAddr, Index, End, Obj, Bytes,
	MappingSymbols, FOS);
	} else {
	// When -z or --disassemble-zeroes are given we always dissasemble
	// them. Otherwise we might want to skip zero bytes we see.
	if (!DisassembleZeroes) {
	uint64_t MaxOffset = End - Index;
	// For --reloc: print zero blocks patched by relocations, so that
	// relocations can be shown in the dump.
	if (RelCur != RelEnd)
	- MaxOffset = RelCur->getOffset() - Index;
	+ MaxOffset = std::min(RelCur->getOffset() - RelAdjustment - Index,
	+ MaxOffset);

	if (size_t N =
	countSkippableZeroBytes(Bytes.slice(Index, MaxOffset))) {
	FOS << "\t\t..." << '\n';
	Index += N;
	continue;
	}
	}

	// Print local label if there's any.
	auto Iter = AllLabels.find(SectionAddr + Index);
	if (Iter != AllLabels.end())
	FOS << "<" << Iter->second << ">:\n";

	// Disassemble a real instruction or a data when disassemble all is
	// provided
	MCInst Inst;
	bool Disassembled =
	DisAsm->getInstruction(Inst, Size, Bytes.slice(Index),
	SectionAddr + Index, CommentStream);
	if (Size == 0)
	Size = 1;

	LVP.update({Index, Section.getIndex()},
	{Index + Size, Section.getIndex()}, Index + Size != End);

	IP->setCommentStream(CommentStream);

	PIP.printInst(
	*IP, Disassembled ? &Inst : nullptr, Bytes.slice(Index, Size),
	{SectionAddr + Index + VMAAdjustment, Section.getIndex()}, FOS,
	"", *STI, &SP, Obj->getFileName(), &Rels, LVP);

	IP->setCommentStream(llvm::nulls());

	// If disassembly has failed, avoid analysing invalid/incomplete
	// instruction information. Otherwise, try to resolve the target
	// address (jump target or memory operand address) and print it on the
	// right of the instruction.
	if (Disassembled && MIA) {
	// Branch targets are printed just after the instructions.
	llvm::raw_ostream *TargetOS = &FOS;
	uint64_t Target;
	bool PrintTarget =
	MIA->evaluateBranch(Inst, SectionAddr + Index, Size, Target);
	if (!PrintTarget)
	if (Optional<uint64_t> MaybeTarget =
	MIA->evaluateMemoryOperandAddress(
	Inst, SectionAddr + Index, Size)) {
	Target = *MaybeTarget;
	PrintTarget = true;
	// Do not print real address when symbolizing.
	if (!SymbolizeOperands) {
	// Memory operand addresses are printed as comments.
	TargetOS = &CommentStream;
	*TargetOS << "0x" << Twine::utohexstr(Target);
	}
	}
	if (PrintTarget) {
	// In a relocatable object, the target's section must reside in
	// the same section as the call instruction or it is accessed
	// through a relocation.
	//
	// In a non-relocatable object, the target may be in any section.
	// In that case, locate the section(s) containing the target
	// address and find the symbol in one of those, if possible.
	//
	// N.B. We don't walk the relocations in the relocatable case yet.
	std::vector<const SectionSymbolsTy *> TargetSectionSymbols;
	if (!Obj->isRelocatableObject()) {
	auto It = llvm::partition_point(
	SectionAddresses,
	[=](const std::pair<uint64_t, SectionRef> &O) {
	return O.first <= Target;
	});
	uint64_t TargetSecAddr = 0;
	while (It != SectionAddresses.begin()) {
	--It;
	if (TargetSecAddr == 0)
	TargetSecAddr = It->first;
	if (It->first != TargetSecAddr)
	break;
	TargetSectionSymbols.push_back(&AllSymbols[It->second]);
	}
	} else {
	TargetSectionSymbols.push_back(&Symbols);
	}
	TargetSectionSymbols.push_back(&AbsoluteSymbols);

	// Find the last symbol in the first candidate section whose
	// offset is less than or equal to the target. If there are no
	// such symbols, try in the next section and so on, before finally
	// using the nearest preceding absolute symbol (if any), if there
	// are no other valid symbols.
	const SymbolInfoTy *TargetSym = nullptr;
	for (const SectionSymbolsTy *TargetSymbols :
	TargetSectionSymbols) {
	auto It = llvm::partition_point(
	*TargetSymbols,
	[=](const SymbolInfoTy &O) { return O.Addr <= Target; });
	if (It != TargetSymbols->begin()) {
	TargetSym = &*(It - 1);
	break;
	}
	}

	// Print the labels corresponding to the target if there's any.
	bool LabelAvailable = AllLabels.count(Target);
	if (TargetSym != nullptr) {
	uint64_t TargetAddress = TargetSym->Addr;
	uint64_t Disp = Target - TargetAddress;
	std::string TargetName = TargetSym->Name.str();
	if (Demangle)
	TargetName = demangle(TargetName);

	*TargetOS << " <";
	if (!Disp) {
	// Always Print the binary symbol precisely corresponding to
	// the target address.
	*TargetOS << TargetName;
	} else if (!LabelAvailable) {
	// Always Print the binary symbol plus an offset if there's no
	// local label corresponding to the target address.
	*TargetOS << TargetName << "+0x" << Twine::utohexstr(Disp);
	} else {
	*TargetOS << AllLabels[Target];
	}
	*TargetOS << ">";
	} else if (LabelAvailable) {
	*TargetOS << " <" << AllLabels[Target] << ">";
	}
	// By convention, each record in the comment stream should be
	// terminated.
	if (TargetOS == &CommentStream)
	*TargetOS << "\n";
	}
	}
	}

	assert(Ctx.getAsmInfo());
	emitPostInstructionInfo(FOS, Ctx.getAsmInfo(), STI,
	CommentStream.str(), LVP);
	Comments.clear();

	// Hexagon does this in pretty printer
	if (Obj->getArch() != Triple::hexagon) {
	// Print relocation for instruction and data.
	while (RelCur != RelEnd) {
	- uint64_t Offset = RelCur->getOffset();
	+ uint64_t Offset = RelCur->getOffset() - RelAdjustment;
	// If this relocation is hidden, skip it.
	if (getHidden(*RelCur) \|\| SectionAddr + Offset < StartAddress) {
	++RelCur;
	continue;
	}

	// Stop when RelCur's offset is past the disassembled
	// instruction/data. Note that it's possible the disassembled data
	// is not the complete data: we might see the relocation printed in
	// the middle of the data, but this matches the binutils objdump
	// output.
	if (Offset >= Index + Size)
	break;

	// When --adjust-vma is used, update the address printed.
	if (RelCur->getSymbol() != Obj->symbol_end()) {
	Expected<section_iterator> SymSI =
	RelCur->getSymbol()->getSection();
	if (SymSI && *SymSI != Obj->section_end() &&
	shouldAdjustVA(**SymSI))
	Offset += AdjustVMA;
	}

	printRelocation(FOS, Obj->getFileName(), *RelCur,
	SectionAddr + Offset, Is64Bits);
	LVP.printAfterOtherLine(FOS, true);
	++RelCur;
	}
	}

	Index += Size;
	}
	}
	}
	StringSet<> MissingDisasmSymbolSet =
	set_difference(DisasmSymbolSet, FoundDisasmSymbolSet);
	for (StringRef Sym : MissingDisasmSymbolSet.keys())
	reportWarning("failed to disassemble missing symbol " + Sym, FileName);
	}

	static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
	const Target *TheTarget = getTarget(Obj);

	// Package up features to be passed to target/subtarget
	SubtargetFeatures Features = Obj->getFeatures();
	if (!MAttrs.empty())
	for (unsigned I = 0; I != MAttrs.size(); ++I)
	Features.AddFeature(MAttrs[I]);

	std::unique_ptr<const MCRegisterInfo> MRI(
	TheTarget->createMCRegInfo(TripleName));
	if (!MRI)
	reportError(Obj->getFileName(),
	"no register info for target " + TripleName);

	// Set up disassembler.
	MCTargetOptions MCOptions;
	std::unique_ptr<const MCAsmInfo> AsmInfo(
	TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions));
	if (!AsmInfo)
	reportError(Obj->getFileName(),
	"no assembly info for target " + TripleName);

	if (MCPU.empty())
	MCPU = Obj->tryGetCPUName().getValueOr("").str();

	std::unique_ptr<const MCSubtargetInfo> STI(
	TheTarget->createMCSubtargetInfo(TripleName, MCPU, Features.getString()));
	if (!STI)
	reportError(Obj->getFileName(),
	"no subtarget info for target " + TripleName);
	std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
	if (!MII)
	reportError(Obj->getFileName(),
	"no instruction info for target " + TripleName);
	MCContext Ctx(Triple(TripleName), AsmInfo.get(), MRI.get(), STI.get());
	// FIXME: for now initialize MCObjectFileInfo with default values
	std::unique_ptr<MCObjectFileInfo> MOFI(
	TheTarget->createMCObjectFileInfo(Ctx, /PIC=/false));
	Ctx.setObjectFileInfo(MOFI.get());

	std::unique_ptr<MCDisassembler> DisAsm(
	TheTarget->createMCDisassembler(*STI, Ctx));
	if (!DisAsm)
	reportError(Obj->getFileName(), "no disassembler for target " + TripleName);

	// If we have an ARM object file, we need a second disassembler, because
	// ARM CPUs have two different instruction sets: ARM mode, and Thumb mode.
	// We use mapping symbols to switch between the two assemblers, where
	// appropriate.
	std::unique_ptr<MCDisassembler> SecondaryDisAsm;
	std::unique_ptr<const MCSubtargetInfo> SecondarySTI;
	if (isArmElf(Obj) && !STI->checkFeatures("+mclass")) {
	if (STI->checkFeatures("+thumb-mode"))
	Features.AddFeature("-thumb-mode");
	else
	Features.AddFeature("+thumb-mode");
	SecondarySTI.reset(TheTarget->createMCSubtargetInfo(TripleName, MCPU,
	Features.getString()));
	SecondaryDisAsm.reset(TheTarget->createMCDisassembler(*SecondarySTI, Ctx));
	}

	std::unique_ptr<const MCInstrAnalysis> MIA(
	TheTarget->createMCInstrAnalysis(MII.get()));

	int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
	std::unique_ptr<MCInstPrinter> IP(TheTarget->createMCInstPrinter(
	Triple(TripleName), AsmPrinterVariant, AsmInfo, MII, *MRI));
	if (!IP)
	reportError(Obj->getFileName(),
	"no instruction printer for target " + TripleName);
	IP->setPrintImmHex(PrintImmHex);
	IP->setPrintBranchImmAsAddress(true);
	IP->setSymbolizeOperands(SymbolizeOperands);
	IP->setMCInstrAnalysis(MIA.get());

	PrettyPrinter &PIP = selectPrettyPrinter(Triple(TripleName));
	SourcePrinter SP(Obj, TheTarget->getName());

	for (StringRef Opt : DisassemblerOptions)
	if (!IP->applyTargetSpecificCLOption(Opt))
	reportError(Obj->getFileName(),
	"Unrecognized disassembler option: " + Opt);

	disassembleObject(TheTarget, Obj, Ctx, DisAsm.get(), SecondaryDisAsm.get(),
	MIA.get(), IP.get(), STI.get(), SecondarySTI.get(), PIP,
	SP, InlineRelocs);
	}

	void objdump::printRelocations(const ObjectFile *Obj) {
	StringRef Fmt = Obj->getBytesInAddress() > 4 ? "%016" PRIx64 :
	"%08" PRIx64;
	// Regular objdump doesn't print relocations in non-relocatable object
	// files.
	if (!Obj->isRelocatableObject())
	return;

	// Build a mapping from relocation target to a vector of relocation
	// sections. Usually, there is an only one relocation section for
	// each relocated section.
	MapVector<SectionRef, std::vector<SectionRef>> SecToRelSec;
	uint64_t Ndx;
	for (const SectionRef &Section : ToolSectionFilter(*Obj, &Ndx)) {
	if (Section.relocation_begin() == Section.relocation_end())
	continue;
	Expected<section_iterator> SecOrErr = Section.getRelocatedSection();
	if (!SecOrErr)
	reportError(Obj->getFileName(),
	"section (" + Twine(Ndx) +
	"): unable to get a relocation target: " +
	toString(SecOrErr.takeError()));
	SecToRelSec[**SecOrErr].push_back(Section);
	}

	for (std::pair<SectionRef, std::vector<SectionRef>> &P : SecToRelSec) {
	StringRef SecName = unwrapOrError(P.first.getName(), Obj->getFileName());
	outs() << "\nRELOCATION RECORDS FOR [" << SecName << "]:\n";
	uint32_t OffsetPadding = (Obj->getBytesInAddress() > 4 ? 16 : 8);
	uint32_t TypePadding = 24;
	outs() << left_justify("OFFSET", OffsetPadding) << " "
	<< left_justify("TYPE", TypePadding) << " "
	<< "VALUE\n";

	for (SectionRef Section : P.second) {
	for (const RelocationRef &Reloc : Section.relocations()) {
	uint64_t Address = Reloc.getOffset();
	SmallString<32> RelocName;
	SmallString<32> ValueStr;
	if (Address < StartAddress \|\| Address > StopAddress \|\| getHidden(Reloc))
	continue;
	Reloc.getTypeName(RelocName);
	if (Error E = getRelocationValueString(Reloc, ValueStr))
	reportError(std::move(E), Obj->getFileName());

	outs() << format(Fmt.data(), Address) << " "
	<< left_justify(RelocName, TypePadding) << " " << ValueStr
	<< "\n";
	}
	}
	}
	}

	void objdump::printDynamicRelocations(const ObjectFile *Obj) {
	// For the moment, this option is for ELF only
	if (!Obj->isELF())
	return;

	const auto *Elf = dyn_cast<ELFObjectFileBase>(Obj);
	if (!Elf \|\| Elf->getEType() != ELF::ET_DYN) {
	reportError(Obj->getFileName(), "not a dynamic object");
	return;
	}

	std::vector<SectionRef> DynRelSec = Obj->dynamic_relocation_sections();
	if (DynRelSec.empty())
	return;

	outs() << "DYNAMIC RELOCATION RECORDS\n";
	StringRef Fmt = Obj->getBytesInAddress() > 4 ? "%016" PRIx64 : "%08" PRIx64;
	for (const SectionRef &Section : DynRelSec)
	for (const RelocationRef &Reloc : Section.relocations()) {
	uint64_t Address = Reloc.getOffset();
	SmallString<32> RelocName;
	SmallString<32> ValueStr;
	Reloc.getTypeName(RelocName);
	if (Error E = getRelocationValueString(Reloc, ValueStr))
	reportError(std::move(E), Obj->getFileName());
	outs() << format(Fmt.data(), Address) << " " << RelocName << " "
	<< ValueStr << "\n";
	}
	}

	// Returns true if we need to show LMA column when dumping section headers. We
	// show it only when the platform is ELF and either we have at least one section
	// whose VMA and LMA are different and/or when --show-lma flag is used.
	static bool shouldDisplayLMA(const ObjectFile *Obj) {
	if (!Obj->isELF())
	return false;
	for (const SectionRef &S : ToolSectionFilter(*Obj))
	if (S.getAddress() != getELFSectionLMA(S))
	return true;
	return ShowLMA;
	}

	static size_t getMaxSectionNameWidth(const ObjectFile *Obj) {
	// Default column width for names is 13 even if no names are that long.
	size_t MaxWidth = 13;
	for (const SectionRef &Section : ToolSectionFilter(*Obj)) {
	StringRef Name = unwrapOrError(Section.getName(), Obj->getFileName());
	MaxWidth = std::max(MaxWidth, Name.size());
	}
	return MaxWidth;
	}

	void objdump::printSectionHeaders(const ObjectFile *Obj) {
	size_t NameWidth = getMaxSectionNameWidth(Obj);
	size_t AddressWidth = 2 * Obj->getBytesInAddress();
	bool HasLMAColumn = shouldDisplayLMA(Obj);
	outs() << "\nSections:\n";
	if (HasLMAColumn)
	outs() << "Idx " << left_justify("Name", NameWidth) << " Size "
	<< left_justify("VMA", AddressWidth) << " "
	<< left_justify("LMA", AddressWidth) << " Type\n";
	else
	outs() << "Idx " << left_justify("Name", NameWidth) << " Size "
	<< left_justify("VMA", AddressWidth) << " Type\n";

	uint64_t Idx;
	for (const SectionRef &Section : ToolSectionFilter(*Obj, &Idx)) {
	StringRef Name = unwrapOrError(Section.getName(), Obj->getFileName());
	uint64_t VMA = Section.getAddress();
	if (shouldAdjustVA(Section))
	VMA += AdjustVMA;

	uint64_t Size = Section.getSize();

	std::string Type = Section.isText() ? "TEXT" : "";
	if (Section.isData())
	Type += Type.empty() ? "DATA" : ", DATA";
	if (Section.isBSS())
	Type += Type.empty() ? "BSS" : ", BSS";
	if (Section.isDebugSection())
	Type += Type.empty() ? "DEBUG" : ", DEBUG";

	if (HasLMAColumn)
	outs() << format("%3" PRIu64 " %-*s %08" PRIx64 " ", Idx, NameWidth,
	Name.str().c_str(), Size)
	<< format_hex_no_prefix(VMA, AddressWidth) << " "
	<< format_hex_no_prefix(getELFSectionLMA(Section), AddressWidth)
	<< " " << Type << "\n";
	else
	outs() << format("%3" PRIu64 " %-*s %08" PRIx64 " ", Idx, NameWidth,
	Name.str().c_str(), Size)
	<< format_hex_no_prefix(VMA, AddressWidth) << " " << Type << "\n";
	}
	}

	void objdump::printSectionContents(const ObjectFile *Obj) {
	const MachOObjectFile *MachO = dyn_cast<const MachOObjectFile>(Obj);

	for (const SectionRef &Section : ToolSectionFilter(*Obj)) {
	StringRef Name = unwrapOrError(Section.getName(), Obj->getFileName());
	uint64_t BaseAddr = Section.getAddress();
	uint64_t Size = Section.getSize();
	if (!Size)
	continue;

	outs() << "Contents of section ";
	StringRef SegmentName = getSegmentName(MachO, Section);
	if (!SegmentName.empty())
	outs() << SegmentName << ",";
	outs() << Name << ":\n";
	if (Section.isBSS()) {
	outs() << format("<skipping contents of bss section at [%04" PRIx64
	", %04" PRIx64 ")>\n",
	BaseAddr, BaseAddr + Size);
	continue;
	}

	StringRef Contents = unwrapOrError(Section.getContents(), Obj->getFileName());

	// Dump out the content as hex and printable ascii characters.
	for (std::size_t Addr = 0, End = Contents.size(); Addr < End; Addr += 16) {
	outs() << format(" %04" PRIx64 " ", BaseAddr + Addr);
	// Dump line of hex.
	for (std::size_t I = 0; I < 16; ++I) {
	if (I != 0 && I % 4 == 0)
	outs() << ' ';
	if (Addr + I < End)
	outs() << hexdigit((Contents[Addr + I] >> 4) & 0xF, true)
	<< hexdigit(Contents[Addr + I] & 0xF, true);
	else
	outs() << " ";
	}
	// Print ascii.
	outs() << " ";
	for (std::size_t I = 0; I < 16 && Addr + I < End; ++I) {
	if (isPrint(static_cast<unsigned char>(Contents[Addr + I]) & 0xFF))
	outs() << Contents[Addr + I];
	else
	outs() << ".";
	}
	outs() << "\n";
	}
	}
	}

	void objdump::printSymbolTable(const ObjectFile *O, StringRef ArchiveName,
	StringRef ArchitectureName, bool DumpDynamic) {
	if (O->isCOFF() && !DumpDynamic) {
	outs() << "\nSYMBOL TABLE:\n";
	printCOFFSymbolTable(cast<const COFFObjectFile>(O));
	return;
	}

	const StringRef FileName = O->getFileName();

	if (!DumpDynamic) {
	outs() << "\nSYMBOL TABLE:\n";
	for (auto I = O->symbol_begin(); I != O->symbol_end(); ++I)
	printSymbol(O, *I, FileName, ArchiveName, ArchitectureName, DumpDynamic);
	return;
	}

	outs() << "\nDYNAMIC SYMBOL TABLE:\n";
	if (!O->isELF()) {
	reportWarning(
	"this operation is not currently supported for this file format",
	FileName);
	return;
	}

	const ELFObjectFileBase *ELF = cast<const ELFObjectFileBase>(O);
	for (auto I = ELF->getDynamicSymbolIterators().begin();
	I != ELF->getDynamicSymbolIterators().end(); ++I)
	printSymbol(O, *I, FileName, ArchiveName, ArchitectureName, DumpDynamic);
	}

	void objdump::printSymbol(const ObjectFile *O, const SymbolRef &Symbol,
	StringRef FileName, StringRef ArchiveName,
	StringRef ArchitectureName, bool DumpDynamic) {
	const MachOObjectFile *MachO = dyn_cast<const MachOObjectFile>(O);
	uint64_t Address = unwrapOrError(Symbol.getAddress(), FileName, ArchiveName,
	ArchitectureName);
	if ((Address < StartAddress) \|\| (Address > StopAddress))
	return;
	SymbolRef::Type Type =
	unwrapOrError(Symbol.getType(), FileName, ArchiveName, ArchitectureName);
	uint32_t Flags =
	unwrapOrError(Symbol.getFlags(), FileName, ArchiveName, ArchitectureName);

	// Don't ask a Mach-O STAB symbol for its section unless you know that
	// STAB symbol's section field refers to a valid section index. Otherwise
	// the symbol may error trying to load a section that does not exist.
	bool IsSTAB = false;
	if (MachO) {
	DataRefImpl SymDRI = Symbol.getRawDataRefImpl();
	uint8_t NType =
	(MachO->is64Bit() ? MachO->getSymbol64TableEntry(SymDRI).n_type
	: MachO->getSymbolTableEntry(SymDRI).n_type);
	if (NType & MachO::N_STAB)
	IsSTAB = true;
	}
	section_iterator Section = IsSTAB
	? O->section_end()
	: unwrapOrError(Symbol.getSection(), FileName,
	ArchiveName, ArchitectureName);

	StringRef Name;
	if (Type == SymbolRef::ST_Debug && Section != O->section_end()) {
	if (Expected<StringRef> NameOrErr = Section->getName())
	Name = *NameOrErr;
	else
	consumeError(NameOrErr.takeError());

	} else {
	Name = unwrapOrError(Symbol.getName(), FileName, ArchiveName,
	ArchitectureName);
	}

	bool Global = Flags & SymbolRef::SF_Global;
	bool Weak = Flags & SymbolRef::SF_Weak;
	bool Absolute = Flags & SymbolRef::SF_Absolute;
	bool Common = Flags & SymbolRef::SF_Common;
	bool Hidden = Flags & SymbolRef::SF_Hidden;

	char GlobLoc = ' ';
	if ((Section != O->section_end() \|\| Absolute) && !Weak)
	GlobLoc = Global ? 'g' : 'l';
	char IFunc = ' ';
	if (O->isELF()) {
	if (ELFSymbolRef(Symbol).getELFType() == ELF::STT_GNU_IFUNC)
	IFunc = 'i';
	if (ELFSymbolRef(Symbol).getBinding() == ELF::STB_GNU_UNIQUE)
	GlobLoc = 'u';
	}

	char Debug = ' ';
	if (DumpDynamic)
	Debug = 'D';
	else if (Type == SymbolRef::ST_Debug \|\| Type == SymbolRef::ST_File)
	Debug = 'd';

	char FileFunc = ' ';
	if (Type == SymbolRef::ST_File)
	FileFunc = 'f';
	else if (Type == SymbolRef::ST_Function)
	FileFunc = 'F';
	else if (Type == SymbolRef::ST_Data)
	FileFunc = 'O';

	const char *Fmt = O->getBytesInAddress() > 4 ? "%016" PRIx64 : "%08" PRIx64;

	outs() << format(Fmt, Address) << " "
	<< GlobLoc // Local -> 'l', Global -> 'g', Neither -> ' '
	<< (Weak ? 'w' : ' ') // Weak?
	<< ' ' // Constructor. Not supported yet.
	<< ' ' // Warning. Not supported yet.
	<< IFunc // Indirect reference to another symbol.
	<< Debug // Debugging (d) or dynamic (D) symbol.
	<< FileFunc // Name of function (F), file (f) or object (O).
	<< ' ';
	if (Absolute) {
	outs() << "ABS";
	} else if (Common) {
	outs() << "COM";
	} else if (Section == O->section_end()) {
	outs() << "UND";
	} else {
	StringRef SegmentName = getSegmentName(MachO, *Section);
	if (!SegmentName.empty())
	outs() << SegmentName << ",";
	StringRef SectionName = unwrapOrError(Section->getName(), FileName);
	outs() << SectionName;
	}

	if (Common \|\| O->isELF()) {
	uint64_t Val =
	Common ? Symbol.getAlignment() : ELFSymbolRef(Symbol).getSize();
	outs() << '\t' << format(Fmt, Val);
	}

	if (O->isELF()) {
	uint8_t Other = ELFSymbolRef(Symbol).getOther();
	switch (Other) {
	case ELF::STV_DEFAULT:
	break;
	case ELF::STV_INTERNAL:
	outs() << " .internal";
	break;
	case ELF::STV_HIDDEN:
	outs() << " .hidden";
	break;
	case ELF::STV_PROTECTED:
	outs() << " .protected";
	break;
	default:
	outs() << format(" 0x%02x", Other);
	break;
	}
	} else if (Hidden) {
	outs() << " .hidden";
	}

	if (Demangle)
	outs() << ' ' << demangle(std::string(Name)) << '\n';
	else
	outs() << ' ' << Name << '\n';
	}

	static void printUnwindInfo(const ObjectFile *O) {
	outs() << "Unwind info:\n\n";

	if (const COFFObjectFile *Coff = dyn_cast<COFFObjectFile>(O))
	printCOFFUnwindInfo(Coff);
	else if (const MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(O))
	printMachOUnwindInfo(MachO);
	else
	// TODO: Extract DWARF dump tool to objdump.
	WithColor::error(errs(), ToolName)
	<< "This operation is only currently supported "
	"for COFF and MachO object files.\n";
	}

	/// Dump the raw contents of the __clangast section so the output can be piped
	/// into llvm-bcanalyzer.
	static void printRawClangAST(const ObjectFile *Obj) {
	if (outs().is_displayed()) {
	WithColor::error(errs(), ToolName)
	<< "The -raw-clang-ast option will dump the raw binary contents of "
	"the clang ast section.\n"
	"Please redirect the output to a file or another program such as "
	"llvm-bcanalyzer.\n";
	return;
	}

	StringRef ClangASTSectionName("__clangast");
	if (Obj->isCOFF()) {
	ClangASTSectionName = "clangast";
	}

	Optional<object::SectionRef> ClangASTSection;
	for (auto Sec : ToolSectionFilter(*Obj)) {
	StringRef Name;
	if (Expected<StringRef> NameOrErr = Sec.getName())
	Name = *NameOrErr;
	else
	consumeError(NameOrErr.takeError());

	if (Name == ClangASTSectionName) {
	ClangASTSection = Sec;
	break;
	}
	}
	if (!ClangASTSection)
	return;

	StringRef ClangASTContents = unwrapOrError(
	ClangASTSection.getValue().getContents(), Obj->getFileName());
	outs().write(ClangASTContents.data(), ClangASTContents.size());
	}

	static void printFaultMaps(const ObjectFile *Obj) {
	StringRef FaultMapSectionName;

	if (Obj->isELF()) {
	FaultMapSectionName = ".llvm_faultmaps";
	} else if (Obj->isMachO()) {
	FaultMapSectionName = "__llvm_faultmaps";
	} else {
	WithColor::error(errs(), ToolName)
	<< "This operation is only currently supported "
	"for ELF and Mach-O executable files.\n";
	return;
	}

	Optional<object::SectionRef> FaultMapSection;

	for (auto Sec : ToolSectionFilter(*Obj)) {
	StringRef Name;
	if (Expected<StringRef> NameOrErr = Sec.getName())
	Name = *NameOrErr;
	else
	consumeError(NameOrErr.takeError());

	if (Name == FaultMapSectionName) {
	FaultMapSection = Sec;
	break;
	}
	}

	outs() << "FaultMap table:\n";

	if (!FaultMapSection.hasValue()) {
	outs() << "<not found>\n";
	return;
	}

	StringRef FaultMapContents =
	unwrapOrError(FaultMapSection.getValue().getContents(), Obj->getFileName());
	FaultMapParser FMP(FaultMapContents.bytes_begin(),
	FaultMapContents.bytes_end());

	outs() << FMP;
	}

	static void printPrivateFileHeaders(const ObjectFile *O, bool OnlyFirst) {
	if (O->isELF()) {
	printELFFileHeader(O);
	printELFDynamicSection(O);
	printELFSymbolVersionInfo(O);
	return;
	}
	if (O->isCOFF())
	return printCOFFFileHeader(O);
	if (O->isWasm())
	return printWasmFileHeader(O);
	if (O->isMachO()) {
	printMachOFileHeader(O);
	if (!OnlyFirst)
	printMachOLoadCommands(O);
	return;
	}
	reportError(O->getFileName(), "Invalid/Unsupported object file format");
	}

	static void printFileHeaders(const ObjectFile *O) {
	if (!O->isELF() && !O->isCOFF())
	reportError(O->getFileName(), "Invalid/Unsupported object file format");

	Triple::ArchType AT = O->getArch();
	outs() << "architecture: " << Triple::getArchTypeName(AT) << "\n";
	uint64_t Address = unwrapOrError(O->getStartAddress(), O->getFileName());

	StringRef Fmt = O->getBytesInAddress() > 4 ? "%016" PRIx64 : "%08" PRIx64;
	outs() << "start address: "
	<< "0x" << format(Fmt.data(), Address) << "\n";
	}

	static void printArchiveChild(StringRef Filename, const Archive::Child &C) {
	Expected<sys::fs::perms> ModeOrErr = C.getAccessMode();
	if (!ModeOrErr) {
	WithColor::error(errs(), ToolName) << "ill-formed archive entry.\n";
	consumeError(ModeOrErr.takeError());
	return;
	}
	sys::fs::perms Mode = ModeOrErr.get();
	outs() << ((Mode & sys::fs::owner_read) ? "r" : "-");
	outs() << ((Mode & sys::fs::owner_write) ? "w" : "-");
	outs() << ((Mode & sys::fs::owner_exe) ? "x" : "-");
	outs() << ((Mode & sys::fs::group_read) ? "r" : "-");
	outs() << ((Mode & sys::fs::group_write) ? "w" : "-");
	outs() << ((Mode & sys::fs::group_exe) ? "x" : "-");
	outs() << ((Mode & sys::fs::others_read) ? "r" : "-");
	outs() << ((Mode & sys::fs::others_write) ? "w" : "-");
	outs() << ((Mode & sys::fs::others_exe) ? "x" : "-");

	outs() << " ";

	outs() << format("%d/%d %6" PRId64 " ", unwrapOrError(C.getUID(), Filename),
	unwrapOrError(C.getGID(), Filename),
	unwrapOrError(C.getRawSize(), Filename));

	StringRef RawLastModified = C.getRawLastModified();
	unsigned Seconds;
	if (RawLastModified.getAsInteger(10, Seconds))
	outs() << "(date: \"" << RawLastModified
	<< "\" contains non-decimal chars) ";
	else {
	// Since ctime(3) returns a 26 character string of the form:
	// "Sun Sep 16 01:03:52 1973\n\0"
	// just print 24 characters.
	time_t t = Seconds;
	outs() << format("%.24s ", ctime(&t));
	}

	StringRef Name = "";
	Expected<StringRef> NameOrErr = C.getName();
	if (!NameOrErr) {
	consumeError(NameOrErr.takeError());
	Name = unwrapOrError(C.getRawName(), Filename);
	} else {
	Name = NameOrErr.get();
	}
	outs() << Name << "\n";
	}

	// For ELF only now.
	static bool shouldWarnForInvalidStartStopAddress(ObjectFile *Obj) {
	if (const auto *Elf = dyn_cast<ELFObjectFileBase>(Obj)) {
	if (Elf->getEType() != ELF::ET_REL)
	return true;
	}
	return false;
	}

	static void checkForInvalidStartStopAddress(ObjectFile *Obj,
	uint64_t Start, uint64_t Stop) {
	if (!shouldWarnForInvalidStartStopAddress(Obj))
	return;

	for (const SectionRef &Section : Obj->sections())
	if (ELFSectionRef(Section).getFlags() & ELF::SHF_ALLOC) {
	uint64_t BaseAddr = Section.getAddress();
	uint64_t Size = Section.getSize();
	if ((Start < BaseAddr + Size) && Stop > BaseAddr)
	return;
	}

	if (!HasStartAddressFlag)
	reportWarning("no section has address less than 0x" +
	Twine::utohexstr(Stop) + " specified by --stop-address",
	Obj->getFileName());
	else if (!HasStopAddressFlag)
	reportWarning("no section has address greater than or equal to 0x" +
	Twine::utohexstr(Start) + " specified by --start-address",
	Obj->getFileName());
	else
	reportWarning("no section overlaps the range [0x" +
	Twine::utohexstr(Start) + ",0x" + Twine::utohexstr(Stop) +
	") specified by --start-address/--stop-address",
	Obj->getFileName());
	}

	static void dumpObject(ObjectFile O, const Archive A = nullptr,
	const Archive::Child *C = nullptr) {
	// Avoid other output when using a raw option.
	if (!RawClangAST) {
	outs() << '\n';
	if (A)
	outs() << A->getFileName() << "(" << O->getFileName() << ")";
	else
	outs() << O->getFileName();
	outs() << ":\tfile format " << O->getFileFormatName().lower() << "\n";
	}

	if (HasStartAddressFlag \|\| HasStopAddressFlag)
	checkForInvalidStartStopAddress(O, StartAddress, StopAddress);

	// Note: the order here matches GNU objdump for compatability.
	StringRef ArchiveName = A ? A->getFileName() : "";
	if (ArchiveHeaders && !MachOOpt && C)
	printArchiveChild(ArchiveName, *C);
	if (FileHeaders)
	printFileHeaders(O);
	if (PrivateHeaders \|\| FirstPrivateHeader)
	printPrivateFileHeaders(O, FirstPrivateHeader);
	if (SectionHeaders)
	printSectionHeaders(O);
	if (SymbolTable)
	printSymbolTable(O, ArchiveName);
	if (DynamicSymbolTable)
	printSymbolTable(O, ArchiveName, /ArchitectureName=/"",
	/DumpDynamic=/true);
	if (DwarfDumpType != DIDT_Null) {
	std::unique_ptr<DIContext> DICtx = DWARFContext::create(*O);
	// Dump the complete DWARF structure.
	DIDumpOptions DumpOpts;
	DumpOpts.DumpType = DwarfDumpType;
	DICtx->dump(outs(), DumpOpts);
	}
	if (Relocations && !Disassemble)
	printRelocations(O);
	if (DynamicRelocations)
	printDynamicRelocations(O);
	if (SectionContents)
	printSectionContents(O);
	if (Disassemble)
	disassembleObject(O, Relocations);
	if (UnwindInfo)
	printUnwindInfo(O);

	// Mach-O specific options:
	if (ExportsTrie)
	printExportsTrie(O);
	if (Rebase)
	printRebaseTable(O);
	if (Bind)
	printBindTable(O);
	if (LazyBind)
	printLazyBindTable(O);
	if (WeakBind)
	printWeakBindTable(O);

	// Other special sections:
	if (RawClangAST)
	printRawClangAST(O);
	if (FaultMapSection)
	printFaultMaps(O);
	}

	static void dumpObject(const COFFImportFile I, const Archive A,
	const Archive::Child *C = nullptr) {
	StringRef ArchiveName = A ? A->getFileName() : "";

	// Avoid other output when using a raw option.
	if (!RawClangAST)
	outs() << '\n'
	<< ArchiveName << "(" << I->getFileName() << ")"
	<< ":\tfile format COFF-import-file"
	<< "\n\n";

	if (ArchiveHeaders && !MachOOpt && C)
	printArchiveChild(ArchiveName, *C);
	if (SymbolTable)
	printCOFFSymbolTable(I);
	}

	/// Dump each object file in \a a;
	static void dumpArchive(const Archive *A) {
	Error Err = Error::success();
	unsigned I = -1;
	for (auto &C : A->children(Err)) {
	++I;
	Expected<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary();
	if (!ChildOrErr) {
	if (auto E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError()))
	reportError(std::move(E), getFileNameForError(C, I), A->getFileName());
	continue;
	}
	if (ObjectFile O = dyn_cast<ObjectFile>(&ChildOrErr.get()))
	dumpObject(O, A, &C);
	else if (COFFImportFile I = dyn_cast<COFFImportFile>(&ChildOrErr.get()))
	dumpObject(I, A, &C);
	else
	reportError(errorCodeToError(object_error::invalid_file_type),
	A->getFileName());
	}
	if (Err)
	reportError(std::move(Err), A->getFileName());
	}

	/// Open file and figure out how to dump it.
	static void dumpInput(StringRef file) {
	// If we are using the Mach-O specific object file parser, then let it parse
	// the file and process the command line options. So the -arch flags can
	// be used to select specific slices, etc.
	if (MachOOpt) {
	parseInputMachO(file);
	return;
	}

	// Attempt to open the binary.
	OwningBinary<Binary> OBinary = unwrapOrError(createBinary(file), file);
	Binary &Binary = *OBinary.getBinary();

	if (Archive *A = dyn_cast<Archive>(&Binary))
	dumpArchive(A);
	else if (ObjectFile *O = dyn_cast<ObjectFile>(&Binary))
	dumpObject(O);
	else if (MachOUniversalBinary *UB = dyn_cast<MachOUniversalBinary>(&Binary))
	parseInputMachO(UB);
	else
	reportError(errorCodeToError(object_error::invalid_file_type), file);
	}

	template <typename T>
	static void parseIntArg(const llvm::opt::InputArgList &InputArgs, int ID,
	T &Value) {
	if (const opt::Arg *A = InputArgs.getLastArg(ID)) {
	StringRef V(A->getValue());
	if (!llvm::to_integer(V, Value, 0)) {
	reportCmdLineError(A->getSpelling() +
	": expected a non-negative integer, but got '" + V +
	"'");
	}
	}
	}

	static std::vector<std::string>
	commaSeparatedValues(const llvm::opt::InputArgList &InputArgs, int ID) {
	std::vector<std::string> Values;
	for (StringRef Value : InputArgs.getAllArgValues(ID)) {
	llvm::SmallVector<StringRef, 2> SplitValues;
	llvm::SplitString(Value, SplitValues, ",");
	for (StringRef SplitValue : SplitValues)
	Values.push_back(SplitValue.str());
	}
	return Values;
	}

	static void parseOtoolOptions(const llvm::opt::InputArgList &InputArgs) {
	MachOOpt = true;
	FullLeadingAddr = true;
	PrintImmHex = true;

	ArchName = InputArgs.getLastArgValue(OTOOL_arch).str();
	LinkOptHints = InputArgs.hasArg(OTOOL_C);
	if (InputArgs.hasArg(OTOOL_d))
	FilterSections.push_back("__DATA,__data");
	DylibId = InputArgs.hasArg(OTOOL_D);
	UniversalHeaders = InputArgs.hasArg(OTOOL_f);
	DataInCode = InputArgs.hasArg(OTOOL_G);
	FirstPrivateHeader = InputArgs.hasArg(OTOOL_h);
	IndirectSymbols = InputArgs.hasArg(OTOOL_I);
	ShowRawInsn = InputArgs.hasArg(OTOOL_j);
	PrivateHeaders = InputArgs.hasArg(OTOOL_l);
	DylibsUsed = InputArgs.hasArg(OTOOL_L);
	MCPU = InputArgs.getLastArgValue(OTOOL_mcpu_EQ).str();
	ObjcMetaData = InputArgs.hasArg(OTOOL_o);
	DisSymName = InputArgs.getLastArgValue(OTOOL_p).str();
	InfoPlist = InputArgs.hasArg(OTOOL_P);
	Relocations = InputArgs.hasArg(OTOOL_r);
	if (const Arg *A = InputArgs.getLastArg(OTOOL_s)) {
	auto Filter = (A->getValue(0) + StringRef(",") + A->getValue(1)).str();
	FilterSections.push_back(Filter);
	}
	if (InputArgs.hasArg(OTOOL_t))
	FilterSections.push_back("__TEXT,__text");
	Verbose = InputArgs.hasArg(OTOOL_v) \|\| InputArgs.hasArg(OTOOL_V) \|\|
	InputArgs.hasArg(OTOOL_o);
	SymbolicOperands = InputArgs.hasArg(OTOOL_V);
	if (InputArgs.hasArg(OTOOL_x))
	FilterSections.push_back(",__text");
	LeadingAddr = LeadingHeaders = !InputArgs.hasArg(OTOOL_X);

	InputFilenames = InputArgs.getAllArgValues(OTOOL_INPUT);
	if (InputFilenames.empty())
	reportCmdLineError("no input file");

	for (const Arg *A : InputArgs) {
	const Option &O = A->getOption();
	if (O.getGroup().isValid() && O.getGroup().getID() == OTOOL_grp_obsolete) {
	reportCmdLineWarning(O.getPrefixedName() +
	" is obsolete and not implemented");
	}
	}
	}

	static void parseObjdumpOptions(const llvm::opt::InputArgList &InputArgs) {
	parseIntArg(InputArgs, OBJDUMP_adjust_vma_EQ, AdjustVMA);
	AllHeaders = InputArgs.hasArg(OBJDUMP_all_headers);
	ArchName = InputArgs.getLastArgValue(OBJDUMP_arch_name_EQ).str();
	ArchiveHeaders = InputArgs.hasArg(OBJDUMP_archive_headers);
	Demangle = InputArgs.hasArg(OBJDUMP_demangle);
	Disassemble = InputArgs.hasArg(OBJDUMP_disassemble);
	DisassembleAll = InputArgs.hasArg(OBJDUMP_disassemble_all);
	SymbolDescription = InputArgs.hasArg(OBJDUMP_symbol_description);
	DisassembleSymbols =
	commaSeparatedValues(InputArgs, OBJDUMP_disassemble_symbols_EQ);
	DisassembleZeroes = InputArgs.hasArg(OBJDUMP_disassemble_zeroes);
	if (const opt::Arg *A = InputArgs.getLastArg(OBJDUMP_dwarf_EQ)) {
	DwarfDumpType =
	StringSwitch<DIDumpType>(A->getValue()).Case("frames", DIDT_DebugFrame);
	}
	DynamicRelocations = InputArgs.hasArg(OBJDUMP_dynamic_reloc);
	FaultMapSection = InputArgs.hasArg(OBJDUMP_fault_map_section);
	FileHeaders = InputArgs.hasArg(OBJDUMP_file_headers);
	SectionContents = InputArgs.hasArg(OBJDUMP_full_contents);
	PrintLines = InputArgs.hasArg(OBJDUMP_line_numbers);
	InputFilenames = InputArgs.getAllArgValues(OBJDUMP_INPUT);
	MachOOpt = InputArgs.hasArg(OBJDUMP_macho);
	MCPU = InputArgs.getLastArgValue(OBJDUMP_mcpu_EQ).str();
	MAttrs = commaSeparatedValues(InputArgs, OBJDUMP_mattr_EQ);
	ShowRawInsn = !InputArgs.hasArg(OBJDUMP_no_show_raw_insn);
	LeadingAddr = !InputArgs.hasArg(OBJDUMP_no_leading_addr);
	RawClangAST = InputArgs.hasArg(OBJDUMP_raw_clang_ast);
	Relocations = InputArgs.hasArg(OBJDUMP_reloc);
	PrintImmHex =
	InputArgs.hasFlag(OBJDUMP_print_imm_hex, OBJDUMP_no_print_imm_hex, false);
	PrivateHeaders = InputArgs.hasArg(OBJDUMP_private_headers);
	FilterSections = InputArgs.getAllArgValues(OBJDUMP_section_EQ);
	SectionHeaders = InputArgs.hasArg(OBJDUMP_section_headers);
	ShowLMA = InputArgs.hasArg(OBJDUMP_show_lma);
	PrintSource = InputArgs.hasArg(OBJDUMP_source);
	parseIntArg(InputArgs, OBJDUMP_start_address_EQ, StartAddress);
	HasStartAddressFlag = InputArgs.hasArg(OBJDUMP_start_address_EQ);
	parseIntArg(InputArgs, OBJDUMP_stop_address_EQ, StopAddress);
	HasStopAddressFlag = InputArgs.hasArg(OBJDUMP_stop_address_EQ);
	SymbolTable = InputArgs.hasArg(OBJDUMP_syms);
	SymbolizeOperands = InputArgs.hasArg(OBJDUMP_symbolize_operands);
	DynamicSymbolTable = InputArgs.hasArg(OBJDUMP_dynamic_syms);
	TripleName = InputArgs.getLastArgValue(OBJDUMP_triple_EQ).str();
	UnwindInfo = InputArgs.hasArg(OBJDUMP_unwind_info);
	Wide = InputArgs.hasArg(OBJDUMP_wide);
	Prefix = InputArgs.getLastArgValue(OBJDUMP_prefix).str();
	parseIntArg(InputArgs, OBJDUMP_prefix_strip, PrefixStrip);
	if (const opt::Arg *A = InputArgs.getLastArg(OBJDUMP_debug_vars_EQ)) {
	DbgVariables = StringSwitch<DebugVarsFormat>(A->getValue())
	.Case("ascii", DVASCII)
	.Case("unicode", DVUnicode);
	}
	parseIntArg(InputArgs, OBJDUMP_debug_vars_indent_EQ, DbgIndent);

	parseMachOOptions(InputArgs);

	// Parse -M (--disassembler-options) and deprecated
	// --x86-asm-syntax={att,intel}.
	//
	// Note, for x86, the asm dialect (AssemblerDialect) is initialized when the
	// MCAsmInfo is constructed. MCInstPrinter::applyTargetSpecificCLOption is
	// called too late. For now we have to use the internal cl::opt option.
	const char *AsmSyntax = nullptr;
	for (const auto *A : InputArgs.filtered(OBJDUMP_disassembler_options_EQ,
	OBJDUMP_x86_asm_syntax_att,
	OBJDUMP_x86_asm_syntax_intel)) {
	switch (A->getOption().getID()) {
	case OBJDUMP_x86_asm_syntax_att:
	AsmSyntax = "--x86-asm-syntax=att";
	continue;
	case OBJDUMP_x86_asm_syntax_intel:
	AsmSyntax = "--x86-asm-syntax=intel";
	continue;
	}

	SmallVector<StringRef, 2> Values;
	llvm::SplitString(A->getValue(), Values, ",");
	for (StringRef V : Values) {
	if (V == "att")
	AsmSyntax = "--x86-asm-syntax=att";
	else if (V == "intel")
	AsmSyntax = "--x86-asm-syntax=intel";
	else
	DisassemblerOptions.push_back(V.str());
	}
	}
	if (AsmSyntax) {
	const char *Argv[] = {"llvm-objdump", AsmSyntax};
	llvm::cl::ParseCommandLineOptions(2, Argv);
	}

	// objdump defaults to a.out if no filenames specified.
	if (InputFilenames.empty())
	InputFilenames.push_back("a.out");
	}

	int main(int argc, char **argv) {
	using namespace llvm;
	InitLLVM X(argc, argv);

	ToolName = argv[0];
	std::unique_ptr<CommonOptTable> T;
	OptSpecifier Unknown, HelpFlag, HelpHiddenFlag, VersionFlag;

	StringRef Stem = sys::path::stem(ToolName);
	auto Is = [=](StringRef Tool) {
	// We need to recognize the following filenames:
	//
	// llvm-objdump -> objdump
	// llvm-otool-10.exe -> otool
	// powerpc64-unknown-freebsd13-objdump -> objdump
	auto I = Stem.rfind_insensitive(Tool);
	return I != StringRef::npos &&
	(I + Tool.size() == Stem.size() \|\| !isAlnum(Stem[I + Tool.size()]));
	};
	if (Is("otool")) {
	T = std::make_unique<OtoolOptTable>();
	Unknown = OTOOL_UNKNOWN;
	HelpFlag = OTOOL_help;
	HelpHiddenFlag = OTOOL_help_hidden;
	VersionFlag = OTOOL_version;
	} else {
	T = std::make_unique<ObjdumpOptTable>();
	Unknown = OBJDUMP_UNKNOWN;
	HelpFlag = OBJDUMP_help;
	HelpHiddenFlag = OBJDUMP_help_hidden;
	VersionFlag = OBJDUMP_version;
	}

	BumpPtrAllocator A;
	StringSaver Saver(A);
	opt::InputArgList InputArgs =
	T->parseArgs(argc, argv, Unknown, Saver,
	[&](StringRef Msg) { reportCmdLineError(Msg); });

	if (InputArgs.size() == 0 \|\| InputArgs.hasArg(HelpFlag)) {
	T->printHelp(ToolName);
	return 0;
	}
	if (InputArgs.hasArg(HelpHiddenFlag)) {
	T->printHelp(ToolName, /show_hidden=/true);
	return 0;
	}

	// Initialize targets and assembly printers/parsers.
	InitializeAllTargetInfos();
	InitializeAllTargetMCs();
	InitializeAllDisassemblers();

	if (InputArgs.hasArg(VersionFlag)) {
	cl::PrintVersionMessage();
	if (!Is("otool")) {
	outs() << '\n';
	TargetRegistry::printRegisteredTargetsForVersion(outs());
	}
	return 0;
	}

	if (Is("otool"))
	parseOtoolOptions(InputArgs);
	else
	parseObjdumpOptions(InputArgs);

	if (StartAddress >= StopAddress)
	reportCmdLineError("start address should be less than stop address");

	// Removes trailing separators from prefix.
	while (!Prefix.empty() && sys::path::is_separator(Prefix.back()))
	Prefix.pop_back();

	if (AllHeaders)
	ArchiveHeaders = FileHeaders = PrivateHeaders = Relocations =
	SectionHeaders = SymbolTable = true;

	if (DisassembleAll \|\| PrintSource \|\| PrintLines \|\|
	!DisassembleSymbols.empty())
	Disassemble = true;

	if (!ArchiveHeaders && !Disassemble && DwarfDumpType == DIDT_Null &&
	!DynamicRelocations && !FileHeaders && !PrivateHeaders && !RawClangAST &&
	!Relocations && !SectionHeaders && !SectionContents && !SymbolTable &&
	!DynamicSymbolTable && !UnwindInfo && !FaultMapSection &&
	!(MachOOpt &&
	(Bind \|\| DataInCode \|\| DylibId \|\| DylibsUsed \|\| ExportsTrie \|\|
	FirstPrivateHeader \|\| FunctionStarts \|\| IndirectSymbols \|\| InfoPlist \|\|
	LazyBind \|\| LinkOptHints \|\| ObjcMetaData \|\| Rebase \|\| Rpaths \|\|
	UniversalHeaders \|\| WeakBind \|\| !FilterSections.empty()))) {
	T->printHelp(ToolName);
	return 2;
	}

	DisasmSymbolSet.insert(DisassembleSymbols.begin(), DisassembleSymbols.end());

	llvm::for_each(InputFilenames, dumpInput);

	warnOnNoMatchForSections();

	return EXIT_SUCCESS;
	}
	diff --git a/lib/clang/include/VCSVersion.inc b/lib/clang/include/VCSVersion.inc
	index c7ff897b27b6..b12c539ee764 100644
	--- a/lib/clang/include/VCSVersion.inc
	+++ b/lib/clang/include/VCSVersion.inc
	@@ -1,14 +1,14 @@
	// $FreeBSD$

	-#define LLVM_REVISION "llvmorg-13.0.0-rc1-97-g23ba3732246a"
	+#define LLVM_REVISION "llvmorg-13.0.0-rc2-43-gf56129fe78d5"
	#define LLVM_REPOSITORY "git@github.com:llvm/llvm-project.git"

	-#define CLANG_REVISION "llvmorg-13.0.0-rc1-97-g23ba3732246a"
	+#define CLANG_REVISION "llvmorg-13.0.0-rc2-43-gf56129fe78d5"
	#define CLANG_REPOSITORY "git@github.com:llvm/llvm-project.git"

	// <Upstream revision at import>-<Local identifier in __FreeBSD_version style>
	-#define LLD_REVISION "llvmorg-13.0.0-rc1-97-g23ba3732246a-1200014"
	+#define LLD_REVISION "llvmorg-13.0.0-rc2-43-gf56129fe78d5-1200014"
	#define LLD_REPOSITORY "FreeBSD"

	-#define LLDB_REVISION "llvmorg-13.0.0-rc1-97-g23ba3732246a"
	+#define LLDB_REVISION "llvmorg-13.0.0-rc2-43-gf56129fe78d5"
	#define LLDB_REPOSITORY "git@github.com:llvm/llvm-project.git"
	diff --git a/lib/clang/include/llvm/Support/VCSRevision.h b/lib/clang/include/llvm/Support/VCSRevision.h
	index 1257fe7db9e9..ef7b3966a338 100644
	--- a/lib/clang/include/llvm/Support/VCSRevision.h
	+++ b/lib/clang/include/llvm/Support/VCSRevision.h
	@@ -1,3 +1,3 @@
	/* $FreeBSD$ */
	-#define LLVM_REVISION "llvmorg-13.0.0-rc1-97-g23ba3732246a"
	+#define LLVM_REVISION "llvmorg-13.0.0-rc2-43-gf56129fe78d5"
	#define LLVM_REPOSITORY "git@github.com:llvm/llvm-project.git"

File Metadata

Mime Type: application/octet-stream
Expires: Sat, Jun 29, 7:24 AM (2 d)
Storage Engine: chunks
Storage Format: Chunks
Storage Handle: 1xRAPugsk5ki
Default Alt Text: (7 MB)

No OneTemporaryActions

View Options

File Metadata

Event Timeline

No OneTemporary
Actions